1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- common code.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52
53
54 /*
55 * Bri-linear factor. Should be greater than one.
56 */
57 #define BRILINEAR_FACTOR 2
58
59
60 /**
61 * Does the given texture wrap mode allow sampling the texture border color?
62 * XXX maybe move this into gallium util code.
63 */
64 bool
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)65 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
66 enum pipe_tex_filter min_img_filter,
67 enum pipe_tex_filter mag_img_filter)
68 {
69 switch (mode) {
70 case PIPE_TEX_WRAP_REPEAT:
71 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
72 case PIPE_TEX_WRAP_MIRROR_REPEAT:
73 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
74 return false;
75 case PIPE_TEX_WRAP_CLAMP:
76 case PIPE_TEX_WRAP_MIRROR_CLAMP:
77 if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
78 mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
79 return false;
80 } else {
81 return true;
82 }
83 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
84 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
85 return true;
86 default:
87 assert(0 && "unexpected wrap mode");
88 return false;
89 }
90 }
91
92
93 /**
94 * Initialize lp_sampler_static_texture_state object with the gallium
95 * texture/sampler_view state (this contains the parts which are
96 * considered static).
97 */
98 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)99 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
100 const struct pipe_sampler_view *view)
101 {
102 memset(state, 0, sizeof *state);
103
104 if (!view || !view->texture)
105 return;
106
107 const struct pipe_resource *texture = view->texture;
108
109 state->format = view->format;
110 state->res_format = texture->format;
111 state->swizzle_r = view->swizzle_r;
112 state->swizzle_g = view->swizzle_g;
113 state->swizzle_b = view->swizzle_b;
114 state->swizzle_a = view->swizzle_a;
115 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
116 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
117 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
118 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
119
120 /* check if it is a tex2d created from buf */
121 if (view->is_tex2d_from_buf)
122 state->target = PIPE_TEXTURE_2D;
123 else
124 state->target = view->target;
125
126 state->pot_width = util_is_power_of_two_or_zero(texture->width0);
127 state->pot_height = util_is_power_of_two_or_zero(texture->height0);
128 state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
129 state->level_zero_only = !view->u.tex.last_level;
130
131 /*
132 * the layer / element / level parameters are all either dynamic
133 * state or handled transparently wrt execution.
134 */
135 }
136
137
138 /**
139 * Initialize lp_sampler_static_texture_state object with the gallium
140 * texture/sampler_view state (this contains the parts which are
141 * considered static).
142 */
143 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)144 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
145 const struct pipe_image_view *view)
146 {
147 memset(state, 0, sizeof *state);
148
149 if (!view || !view->resource)
150 return;
151
152 const struct pipe_resource *resource = view->resource;
153
154 state->format = view->format;
155 state->res_format = resource->format;
156 state->swizzle_r = PIPE_SWIZZLE_X;
157 state->swizzle_g = PIPE_SWIZZLE_Y;
158 state->swizzle_b = PIPE_SWIZZLE_Z;
159 state->swizzle_a = PIPE_SWIZZLE_W;
160 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
161 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
162 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
163 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
164
165 state->target = resource->target;
166 state->pot_width = util_is_power_of_two_or_zero(resource->width0);
167 state->pot_height = util_is_power_of_two_or_zero(resource->height0);
168 state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
169 state->level_zero_only = view->u.tex.level == 0;
170
171 /*
172 * the layer / element / level parameters are all either dynamic
173 * state or handled transparently wrt execution.
174 */
175 }
176
177
178 /**
179 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
180 * state (this contains the parts which are considered static).
181 */
182 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)183 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
184 const struct pipe_sampler_state *sampler)
185 {
186 memset(state, 0, sizeof *state);
187
188 if (!sampler)
189 return;
190
191 /*
192 * We don't copy sampler state over unless it is actually enabled, to avoid
193 * spurious recompiles, as the sampler static state is part of the shader
194 * key.
195 *
196 * Ideally gallium frontends or cso_cache module would make all state
197 * canonical, but until that happens it's better to be safe than sorry here.
198 *
199 * XXX: Actually there's much more than can be done here, especially
200 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
201 */
202
203 state->wrap_s = sampler->wrap_s;
204 state->wrap_t = sampler->wrap_t;
205 state->wrap_r = sampler->wrap_r;
206 state->min_img_filter = sampler->min_img_filter;
207 state->mag_img_filter = sampler->mag_img_filter;
208 state->min_mip_filter = sampler->min_mip_filter;
209 state->seamless_cube_map = sampler->seamless_cube_map;
210 state->reduction_mode = sampler->reduction_mode;
211 state->aniso = sampler->max_anisotropy > 1.0f;
212
213 if (sampler->max_lod > 0.0f) {
214 state->max_lod_pos = 1;
215 }
216
217 if (sampler->lod_bias != 0.0f) {
218 state->lod_bias_non_zero = 1;
219 }
220
221 if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
222 state->min_img_filter != state->mag_img_filter) {
223
224 /* If min_lod == max_lod we can greatly simplify mipmap selection.
225 * This is a case that occurs during automatic mipmap generation.
226 */
227 if (sampler->min_lod == sampler->max_lod) {
228 state->min_max_lod_equal = 1;
229 } else {
230 if (sampler->min_lod > 0.0f) {
231 state->apply_min_lod = 1;
232 }
233
234 /*
235 * XXX this won't do anything with the mesa state tracker which always
236 * sets max_lod to not more than actually present mip maps...
237 */
238 if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
239 state->apply_max_lod = 1;
240 }
241 }
242 }
243
244 state->compare_mode = sampler->compare_mode;
245 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
246 state->compare_func = sampler->compare_func;
247 }
248
249 state->normalized_coords = !sampler->unnormalized_coords;
250 }
251
252
253 /* build aniso pmin value */
254 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)255 lp_build_pmin(struct lp_build_sample_context *bld,
256 LLVMValueRef first_level,
257 LLVMValueRef s,
258 LLVMValueRef t,
259 LLVMValueRef max_aniso)
260 {
261 struct gallivm_state *gallivm = bld->gallivm;
262 LLVMBuilderRef builder = bld->gallivm->builder;
263 struct lp_build_context *coord_bld = &bld->coord_bld;
264 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
265 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
266 struct lp_build_context *pmin_bld = &bld->lodf_bld;
267 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
268 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
269 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
270 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
271 LLVMValueRef int_size, float_size;
272 const unsigned length = coord_bld->type.length;
273 const unsigned num_quads = length / 4;
274 const bool pmin_per_quad = pmin_bld->type.length != length;
275
276 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, true);
277 float_size = lp_build_int_to_float(float_size_bld, int_size);
278 max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
279 max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
280
281 static const unsigned char swizzle01[] = { /* no-op swizzle */
282 0, 1,
283 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
284 };
285 static const unsigned char swizzle23[] = {
286 2, 3,
287 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
288 };
289 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
290
291 for (unsigned i = 0; i < num_quads; i++) {
292 shuffles[i*4+0] = shuffles[i*4+1] = index0;
293 shuffles[i*4+2] = shuffles[i*4+3] = index1;
294 }
295 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
296 LLVMConstVector(shuffles, length), "");
297 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
298
299 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
300
301 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
302 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
303
304 LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
305
306 static const unsigned char swizzle0[] = { /* no-op swizzle */
307 0, LP_BLD_SWIZZLE_DONTCARE,
308 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
309 };
310 static const unsigned char swizzle1[] = {
311 1, LP_BLD_SWIZZLE_DONTCARE,
312 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
313 };
314 LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
315 LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
316
317 LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
318 LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
319
320 LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
321
322 LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
323 pmin2, temp);
324
325 LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
326
327 pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
328
329 if (pmin_per_quad)
330 pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
331 pmin_bld->type, pmin2, 0);
332 else
333 pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
334 return pmin2;
335 }
336
337
338 /**
339 * Generate code to compute coordinate gradient (rho).
340 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
341 *
342 * The resulting rho has bld->levelf format (per quad or per element).
343 */
344 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs)345 lp_build_rho(struct lp_build_sample_context *bld,
346 LLVMValueRef first_level,
347 LLVMValueRef s,
348 LLVMValueRef t,
349 LLVMValueRef r,
350 const struct lp_derivatives *derivs)
351 {
352 struct gallivm_state *gallivm = bld->gallivm;
353 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
354 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
355 struct lp_build_context *float_bld = &bld->float_bld;
356 struct lp_build_context *coord_bld = &bld->coord_bld;
357 struct lp_build_context *rho_bld = &bld->lodf_bld;
358 const unsigned dims = bld->dims;
359 LLVMValueRef ddx_ddy[2] = {NULL};
360 LLVMBuilderRef builder = bld->gallivm->builder;
361 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
362 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
363 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
364 LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
365 LLVMValueRef rho_vec;
366 LLVMValueRef rho;
367 unsigned length = coord_bld->type.length;
368 unsigned num_quads = length / 4;
369 bool rho_per_quad = rho_bld->type.length != length;
370 bool no_rho_opt = bld->no_rho_approx && (dims > 1);
371 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
372 LLVMValueRef rho_xvec, rho_yvec;
373
374 /* Note that all simplified calculations will only work for isotropic
375 * filtering
376 */
377
378 /*
379 * rho calcs are always per quad except for explicit derivs (excluding
380 * the messy cube maps for now) when requested.
381 */
382
383 LLVMValueRef int_size =
384 lp_build_minify(int_size_bld, bld->int_size, first_level, true);
385 LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size);
386
387 if (derivs) {
388 LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
389 for (unsigned i = 0; i < dims; i++) {
390 LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
391
392 LLVMValueRef floatdim =
393 lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
394 coord_bld->type, float_size, indexi);
395
396 /*
397 * note that for rho_per_quad case could reduce math (at some shuffle
398 * cost), but for now use same code to per-pixel lod case.
399 */
400 if (no_rho_opt) {
401 ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
402 ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
403 ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
404 ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
405 } else {
406 LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
407 LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
408 ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
409 ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
410 }
411 }
412 if (no_rho_opt) {
413 rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
414 rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
415 if (dims > 2) {
416 rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
417 rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
418 }
419 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
420 /* skipping sqrt hence returning rho squared */
421 } else {
422 rho = ddmax[0];
423 if (dims > 1) {
424 rho = lp_build_max(coord_bld, rho, ddmax[1]);
425 if (dims > 2) {
426 rho = lp_build_max(coord_bld, rho, ddmax[2]);
427 }
428 }
429 }
430
431 LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm,
432 coord_bld->type, rho);
433 rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
434
435 if (rho_per_quad) {
436 /*
437 * rho_vec contains per-pixel rho, convert to scalar per quad.
438 */
439 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
440 rho_bld->type, rho, 0);
441 }
442 } else {
443 /*
444 * This looks all a bit complex, but it's not that bad
445 * (the shuffle code makes it look worse than it is).
446 * Still, might not be ideal for all cases.
447 */
448 static const unsigned char swizzle0[] = { /* no-op swizzle */
449 0, LP_BLD_SWIZZLE_DONTCARE,
450 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
451 };
452 static const unsigned char swizzle1[] = {
453 1, LP_BLD_SWIZZLE_DONTCARE,
454 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
455 };
456 static const unsigned char swizzle2[] = {
457 2, LP_BLD_SWIZZLE_DONTCARE,
458 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
459 };
460
461 if (dims < 2) {
462 ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
463 } else if (dims >= 2) {
464 ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
465 if (dims > 2) {
466 ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
467 }
468 }
469
470 if (no_rho_opt) {
471 static const unsigned char swizzle01[] = { /* no-op swizzle */
472 0, 1,
473 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
474 };
475 static const unsigned char swizzle23[] = {
476 2, 3,
477 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
478 };
479 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim;
480 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
481
482 for (unsigned i = 0; i < num_quads; i++) {
483 shuffles[i*4+0] = shuffles[i*4+1] = index0;
484 shuffles[i*4+2] = shuffles[i*4+3] = index1;
485 }
486 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
487 LLVMConstVector(shuffles, length),
488 "");
489 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
490 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
491 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
492 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
493 rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
494
495 if (dims > 2) {
496 static const unsigned char swizzle02[] = {
497 0, 2,
498 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
499 };
500 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
501 coord_bld->type, float_size, index2);
502 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
503 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
504 ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
505 rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
506 }
507
508 rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
509 rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
510 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
511
512 if (rho_per_quad) {
513 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
514 rho_bld->type, rho, 0);
515 } else {
516 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
517 }
518 /* skipping sqrt hence returning rho squared */
519 } else {
520 ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
521 if (dims > 2) {
522 ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
523 } else {
524 ddx_ddy[1] = NULL; /* silence compiler warning */
525 }
526
527 if (dims < 2) {
528 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
529 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
530 } else if (dims == 2) {
531 static const unsigned char swizzle02[] = {
532 0, 2,
533 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
534 };
535 static const unsigned char swizzle13[] = {
536 1, 3,
537 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
538 };
539 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
540 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
541 } else {
542 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
543 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
544 assert(dims == 3);
545 for (unsigned i = 0; i < num_quads; i++) {
546 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
547 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
548 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
549 shuffles1[4*i + 3] = i32undef;
550 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
551 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
552 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
553 shuffles2[4*i + 3] = i32undef;
554 }
555 rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
556 LLVMConstVector(shuffles1, length), "");
557 rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
558 LLVMConstVector(shuffles2, length), "");
559 }
560
561 rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
562
563 if (bld->coord_type.length > 4) {
564 /* expand size to each quad */
565 if (dims > 1) {
566 /* could use some broadcast_vector helper for this? */
567 LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
568 for (unsigned i = 0; i < num_quads; i++) {
569 src[i] = float_size;
570 }
571 float_size = lp_build_concat(bld->gallivm, src,
572 float_size_bld->type, num_quads);
573 } else {
574 float_size = lp_build_broadcast_scalar(coord_bld, float_size);
575 }
576 rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
577
578 if (dims <= 1) {
579 rho = rho_vec;
580 } else {
581 if (dims >= 2) {
582 LLVMValueRef rho_s, rho_t, rho_r;
583
584 rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
585 rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
586
587 rho = lp_build_max(coord_bld, rho_s, rho_t);
588
589 if (dims >= 3) {
590 rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
591 rho = lp_build_max(coord_bld, rho, rho_r);
592 }
593 }
594 }
595 if (rho_per_quad) {
596 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
597 rho_bld->type, rho, 0);
598 } else {
599 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
600 }
601 } else {
602 if (dims <= 1) {
603 rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
604 }
605 rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
606
607 if (dims <= 1) {
608 rho = rho_vec;
609 } else {
610 if (dims >= 2) {
611 LLVMValueRef rho_s, rho_t, rho_r;
612
613 rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
614 rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
615
616 rho = lp_build_max(float_bld, rho_s, rho_t);
617
618 if (dims >= 3) {
619 rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
620 rho = lp_build_max(float_bld, rho, rho_r);
621 }
622 }
623 }
624 if (!rho_per_quad) {
625 rho = lp_build_broadcast_scalar(rho_bld, rho);
626 }
627 }
628 }
629 }
630
631 return rho;
632 }
633
634
635 /*
636 * Bri-linear lod computation
637 *
638 * Use a piece-wise linear approximation of log2 such that:
639 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
640 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
641 * with the steepness specified in 'factor'
642 * - exact result for 0.5, 1.5, etc.
643 *
644 *
645 * 1.0 - /----*
646 * /
647 * /
648 * /
649 * 0.5 - *
650 * /
651 * /
652 * /
653 * 0.0 - *----/
654 *
655 * | |
656 * 2^0 2^1
657 *
658 * This is a technique also commonly used in hardware:
659 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
660 *
661 * TODO: For correctness, this should only be applied when texture is known to
662 * have regular mipmaps, i.e., mipmaps derived from the base level.
663 *
664 * TODO: This could be done in fixed point, where applicable.
665 */
666 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)667 lp_build_brilinear_lod(struct lp_build_context *bld,
668 LLVMValueRef lod,
669 double factor,
670 LLVMValueRef *out_lod_ipart,
671 LLVMValueRef *out_lod_fpart)
672 {
673 LLVMValueRef lod_fpart;
674 double pre_offset = (factor - 0.5)/factor - 0.5;
675 double post_offset = 1 - factor;
676
677 if (0) {
678 lp_build_printf(bld->gallivm, "lod = %f\n", lod);
679 }
680
681 lod = lp_build_add(bld, lod,
682 lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
683
684 lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
685
686 lod_fpart = lp_build_mad(bld, lod_fpart,
687 lp_build_const_vec(bld->gallivm, bld->type, factor),
688 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
689
690 /*
691 * It's not necessary to clamp lod_fpart since:
692 * - the above expression will never produce numbers greater than one.
693 * - the mip filtering branch is only taken if lod_fpart is positive
694 */
695
696 *out_lod_fpart = lod_fpart;
697
698 if (0) {
699 lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
700 lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
701 }
702 }
703
704
705 /*
706 * Combined log2 and brilinear lod computation.
707 *
708 * It's in all identical to calling lp_build_fast_log2() and
709 * lp_build_brilinear_lod() above, but by combining we can compute the integer
710 * and fractional part independently.
711 */
712 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)713 lp_build_brilinear_rho(struct lp_build_context *bld,
714 LLVMValueRef rho,
715 double factor,
716 LLVMValueRef *out_lod_ipart,
717 LLVMValueRef *out_lod_fpart)
718 {
719 const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
720 const double post_offset = 1 - 2*factor;
721
722 assert(bld->type.floating);
723
724 assert(lp_check_value(bld->type, rho));
725
726 /*
727 * The pre factor will make the intersections with the exact powers of two
728 * happen precisely where we want them to be, which means that the integer
729 * part will not need any post adjustments.
730 */
731 rho = lp_build_mul(bld, rho,
732 lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
733
734 /* ipart = ifloor(log2(rho)) */
735 LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0);
736
737 /* fpart = rho / 2**ipart */
738 LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho);
739
740 lod_fpart =
741 lp_build_mad(bld, lod_fpart,
742 lp_build_const_vec(bld->gallivm, bld->type, factor),
743 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
744
745 /*
746 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
747 * - the above expression will never produce numbers greater than one.
748 * - the mip filtering branch is only taken if lod_fpart is positive
749 */
750
751 *out_lod_ipart = lod_ipart;
752 *out_lod_fpart = lod_fpart;
753 }
754
755
756 /**
757 * Fast implementation of iround(log2(sqrt(x))), based on
758 * log2(x^n) == n*log2(x).
759 *
760 * Gives accurate results all the time.
761 * (Could be trivially extended to handle other power-of-two roots.)
762 */
763 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)764 lp_build_ilog2_sqrt(struct lp_build_context *bld,
765 LLVMValueRef x)
766 {
767 LLVMBuilderRef builder = bld->gallivm->builder;
768 struct lp_type i_type = lp_int_type(bld->type);
769 LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
770
771 assert(bld->type.floating);
772
773 assert(lp_check_value(bld->type, x));
774
775 /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
776 LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1);
777 ipart = LLVMBuildAShr(builder, ipart, one, "");
778
779 return ipart;
780 }
781
782
783 /**
784 * Generate code to compute texture level of detail (lambda).
785 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
786 * \param lod_bias optional float vector with the shader lod bias
787 * \param explicit_lod optional float vector with the explicit lod
788 * \param out_lod_ipart integer part of lod
789 * \param out_lod_fpart float part of lod (never larger than 1 but may be negative)
790 * \param out_lod_positive (mask) if lod is positive (i.e. texture is minified)
791 *
792 * The resulting lod can be scalar per quad or be per element.
793 */
794 void
lp_build_lod_selector(struct lp_build_sample_context * bld,bool is_lodq,unsigned sampler_unit,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)795 lp_build_lod_selector(struct lp_build_sample_context *bld,
796 bool is_lodq,
797 unsigned sampler_unit,
798 LLVMValueRef first_level,
799 LLVMValueRef s,
800 LLVMValueRef t,
801 LLVMValueRef r,
802 const struct lp_derivatives *derivs,
803 LLVMValueRef lod_bias, /* optional */
804 LLVMValueRef explicit_lod, /* optional */
805 enum pipe_tex_mipfilter mip_filter,
806 LLVMValueRef max_aniso,
807 LLVMValueRef *out_lod,
808 LLVMValueRef *out_lod_ipart,
809 LLVMValueRef *out_lod_fpart,
810 LLVMValueRef *out_lod_positive)
811
812 {
813 LLVMBuilderRef builder = bld->gallivm->builder;
814 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
815 struct lp_build_context *lodf_bld = &bld->lodf_bld;
816 LLVMValueRef lod;
817
818 *out_lod_ipart = bld->lodi_bld.zero;
819 *out_lod_positive = bld->lodi_bld.zero;
820 *out_lod_fpart = lodf_bld->zero;
821
822 /*
823 * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
824 * Magnification: "Implementations may either unconditionally assume c = 0
825 * for the minification vs. magnification switch-over point, or may choose
826 * to make c depend on the combination of minification and magnification
827 * modes as follows: if the magnification filter is given by LINEAR and the
828 * minification filter is given by NEAREST_MIPMAP_NEAREST or
829 * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a
830 * minified texture does not appear "sharper" than a magnified
831 * texture. Otherwise c = 0." And 3.9.11 Texture Minification: "If lod is
832 * less than or equal to the constant c (see section 3.9.12) the texture is
833 * said to be magnified; if it is greater, the texture is minified." So,
834 * using 0 as switchover point always, and using magnification for lod ==
835 * 0. Note that the always c = 0 behavior is new (first appearing in GL
836 * 3.1 spec), old GL versions required 0.5 for the modes listed above. I
837 * have no clue about the (undocumented) wishes of d3d9/d3d10 here!
838 */
839
840 if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
841 /* User is forcing sampling from a particular mipmap level.
842 * This is hit during mipmap generation.
843 */
844 LLVMValueRef min_lod =
845 dynamic_state->min_lod(bld->gallivm, bld->resources_type,
846 bld->resources_ptr, sampler_unit);
847
848 lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
849 } else {
850 if (explicit_lod) {
851 if (bld->num_lods != bld->coord_type.length)
852 lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
853 lodf_bld->type, explicit_lod, 0);
854 else
855 lod = explicit_lod;
856 } else {
857 LLVMValueRef rho;
858 bool rho_squared = bld->no_rho_approx && (bld->dims > 1);
859
860 if (bld->static_sampler_state->aniso &&
861 !explicit_lod) {
862 rho = lp_build_pmin(bld, first_level, s, t, max_aniso);
863 rho_squared = true;
864 } else {
865 rho = lp_build_rho(bld, first_level, s, t, r, derivs);
866 }
867
868 /*
869 * Compute lod = log2(rho)
870 */
871
872 if (!lod_bias && !is_lodq &&
873 !bld->static_sampler_state->aniso &&
874 !bld->static_sampler_state->lod_bias_non_zero &&
875 !bld->static_sampler_state->apply_max_lod &&
876 !bld->static_sampler_state->apply_min_lod) {
877 /*
878 * Special case when there are no post-log2 adjustments, which
879 * saves instructions but keeping the integer and fractional lod
880 * computations separate from the start.
881 */
882
883 if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
884 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
885 /*
886 * Don't actually need both values all the time, lod_ipart is
887 * needed for nearest mipfilter, lod_positive if min != mag.
888 */
889 if (rho_squared) {
890 *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
891 } else {
892 *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
893 }
894 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
895 rho, lodf_bld->one);
896 return;
897 }
898 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
899 !bld->no_brilinear && !rho_squared &&
900 !bld->static_sampler_state->aniso) {
901 /*
902 * This can't work if rho is squared. Not sure if it could be
903 * fixed while keeping it worthwile, could also do sqrt here
904 * but brilinear and no_rho_opt seems like a combination not
905 * making much sense anyway so just use ordinary path below.
906 */
907 lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
908 out_lod_ipart, out_lod_fpart);
909 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
910 rho, lodf_bld->one);
911 return;
912 }
913 }
914
915 if (0) {
916 lod = lp_build_log2(lodf_bld, rho);
917 } else {
918 /* get more accurate results if we just sqaure rho always */
919 if (!rho_squared)
920 rho = lp_build_mul(lodf_bld, rho, rho);
921 lod = lp_build_fast_log2(lodf_bld, rho);
922 }
923
924 /* log2(x^2) == 0.5*log2(x) */
925 lod = lp_build_mul(lodf_bld, lod,
926 lp_build_const_vec(bld->gallivm,
927 lodf_bld->type, 0.5F));
928
929 /* add shader lod bias */
930 if (lod_bias) {
931 if (bld->num_lods != bld->coord_type.length)
932 lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
933 bld->coord_bld.type,
934 lodf_bld->type,
935 lod_bias, 0);
936 lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
937 }
938 }
939
940 /* add sampler lod bias */
941 if (bld->static_sampler_state->lod_bias_non_zero) {
942 LLVMValueRef sampler_lod_bias =
943 dynamic_state->lod_bias(bld->gallivm, bld->resources_type,
944 bld->resources_ptr, sampler_unit);
945 sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
946 sampler_lod_bias);
947 lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
948 }
949
950 if (is_lodq) {
951 *out_lod = lod;
952 }
953
954 /* clamp lod */
955 if (bld->static_sampler_state->apply_max_lod) {
956 LLVMValueRef max_lod =
957 dynamic_state->max_lod(bld->gallivm, bld->resources_type,
958 bld->resources_ptr, sampler_unit);
959 max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
960
961 lod = lp_build_min(lodf_bld, lod, max_lod);
962 }
963 if (bld->static_sampler_state->apply_min_lod) {
964 LLVMValueRef min_lod =
965 dynamic_state->min_lod(bld->gallivm, bld->resources_type,
966 bld->resources_ptr, sampler_unit);
967 min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
968
969 lod = lp_build_max(lodf_bld, lod, min_lod);
970 }
971
972 if (is_lodq) {
973 *out_lod_fpart = lod;
974 return;
975 }
976 }
977
978 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
979 lod, lodf_bld->zero);
980
981 if (bld->static_sampler_state->aniso) {
982 *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
983 } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
984 if (!bld->no_brilinear) {
985 lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
986 out_lod_ipart, out_lod_fpart);
987 } else {
988 lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
989 }
990
991 lp_build_name(*out_lod_fpart, "lod_fpart");
992 } else {
993 *out_lod_ipart = lp_build_iround(lodf_bld, lod);
994 }
995
996 lp_build_name(*out_lod_ipart, "lod_ipart");
997
998 return;
999 }
1000
1001
1002 /**
1003 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1004 * to actual mip level.
1005 * Note: this is all scalar per quad code.
1006 * \param lod_ipart int texture level of detail
1007 * \param level_out returns integer
1008 * \param out_of_bounds returns per coord out_of_bounds mask if provided
1009 */
1010 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1011 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1012 LLVMValueRef first_level,
1013 LLVMValueRef last_level,
1014 LLVMValueRef lod_ipart,
1015 LLVMValueRef *level_out,
1016 LLVMValueRef *out_of_bounds)
1017 {
1018 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1019 LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level);
1020
1021 if (out_of_bounds) {
1022 LLVMValueRef out, out1;
1023 out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1024 out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1025 out = lp_build_or(leveli_bld, out, out1);
1026 if (bld->num_mips == bld->coord_bld.type.length) {
1027 *out_of_bounds = out;
1028 } else if (bld->num_mips == 1) {
1029 *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1030 } else {
1031 assert(bld->num_mips == bld->coord_bld.type.length / 4);
1032 *out_of_bounds =
1033 lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1034 leveli_bld->type,
1035 bld->int_coord_bld.type,
1036 out);
1037 }
1038 level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1039 *level_out = level;
1040 } else {
1041 /* clamp level to legal range of levels */
1042 *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1043
1044 }
1045 }
1046
1047
1048 /**
1049 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1050 * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1051 * part accordingly.
1052 * Later, we'll sample from those two mipmap levels and interpolate between
1053 * them.
1054 */
1055 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1056 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1057 unsigned texture_unit,
1058 LLVMValueRef first_level,
1059 LLVMValueRef last_level,
1060 LLVMValueRef lod_ipart,
1061 LLVMValueRef *lod_fpart_inout,
1062 LLVMValueRef *level0_out,
1063 LLVMValueRef *level1_out)
1064 {
1065 LLVMBuilderRef builder = bld->gallivm->builder;
1066 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1067 struct lp_build_context *levelf_bld = &bld->levelf_bld;
1068 LLVMValueRef clamp_min;
1069 LLVMValueRef clamp_max;
1070
1071 assert(bld->num_lods == bld->num_mips);
1072
1073 *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1074 *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1075
1076 /*
1077 * Clamp both *level0_out and *level1_out to [first_level, last_level],
1078 * with the minimum number of comparisons, and zeroing lod_fpart in the
1079 * extreme ends in the process.
1080 */
1081
1082 /* *level0_out < first_level */
1083 clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1084 *level0_out, first_level,
1085 "clamp_lod_to_first");
1086
1087 *level0_out = LLVMBuildSelect(builder, clamp_min,
1088 first_level, *level0_out, "");
1089
1090 *level1_out = LLVMBuildSelect(builder, clamp_min,
1091 first_level, *level1_out, "");
1092
1093 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1094 levelf_bld->zero, *lod_fpart_inout, "");
1095
1096 /* *level0_out >= last_level */
1097 clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1098 *level0_out, last_level,
1099 "clamp_lod_to_last");
1100
1101 *level0_out = LLVMBuildSelect(builder, clamp_max,
1102 last_level, *level0_out, "");
1103
1104 *level1_out = LLVMBuildSelect(builder, clamp_max,
1105 last_level, *level1_out, "");
1106
1107 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1108 levelf_bld->zero, *lod_fpart_inout, "");
1109
1110 lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1111 lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1112 lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1113 }
1114
1115
1116 /**
1117 * A helper function that factorizes this common pattern.
1118 */
1119 LLVMValueRef
lp_sample_load_mip_value(struct gallivm_state * gallivm,LLVMTypeRef ptr_type,LLVMValueRef offsets,LLVMValueRef index1)1120 lp_sample_load_mip_value(struct gallivm_state *gallivm,
1121 LLVMTypeRef ptr_type,
1122 LLVMValueRef offsets,
1123 LLVMValueRef index1)
1124 {
1125 LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1126 LLVMValueRef indexes[2] = {zero, index1};
1127 LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets,
1128 indexes, ARRAY_SIZE(indexes), "");
1129 return LLVMBuildLoad2(gallivm->builder,
1130 LLVMInt32TypeInContext(gallivm->context), ptr, "");
1131 }
1132
1133
1134 /**
1135 * Return pointer to a single mipmap level.
1136 * \param level integer mipmap level
1137 */
1138 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1139 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1140 LLVMValueRef level)
1141 {
1142 LLVMValueRef mip_offset = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1143 bld->mip_offsets, level);
1144 LLVMBuilderRef builder = bld->gallivm->builder;
1145 LLVMValueRef data_ptr =
1146 LLVMBuildGEP2(builder,
1147 LLVMInt8TypeInContext(bld->gallivm->context),
1148 bld->base_ptr, &mip_offset, 1, "");
1149 return data_ptr;
1150 }
1151
1152
1153 /**
1154 * Return (per-pixel) offsets to mip levels.
1155 * \param level integer mipmap level
1156 */
1157 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1158 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1159 LLVMValueRef level)
1160 {
1161 LLVMBuilderRef builder = bld->gallivm->builder;
1162 LLVMValueRef offsets, offset1;
1163
1164 if (bld->num_mips == 1) {
1165 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
1166 offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1167 } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1168 offsets = bld->int_coord_bld.undef;
1169 for (unsigned i = 0; i < bld->num_mips; i++) {
1170 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1171 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1172 bld->mip_offsets,
1173 LLVMBuildExtractElement(builder, level,
1174 indexi, ""));
1175 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1176 offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1177 indexo, "");
1178 }
1179 offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld,
1180 offsets, 0, 4);
1181 } else {
1182 assert (bld->num_mips == bld->coord_bld.type.length);
1183
1184 offsets = bld->int_coord_bld.undef;
1185 for (unsigned i = 0; i < bld->num_mips; i++) {
1186 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1187 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1188 bld->mip_offsets,
1189 LLVMBuildExtractElement(builder, level,
1190 indexi, ""));
1191 offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1192 indexi, "");
1193 }
1194 }
1195 return offsets;
1196 }
1197
1198
1199 /**
1200 * Codegen equivalent for u_minify().
1201 * @param lod_scalar if lod is a (broadcasted) scalar
1202 * Return max(1, base_size >> level);
1203 */
1204 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,bool lod_scalar)1205 lp_build_minify(struct lp_build_context *bld,
1206 LLVMValueRef base_size,
1207 LLVMValueRef level,
1208 bool lod_scalar)
1209 {
1210 LLVMBuilderRef builder = bld->gallivm->builder;
1211 assert(lp_check_value(bld->type, base_size));
1212 assert(lp_check_value(bld->type, level));
1213
1214 if (level == bld->zero) {
1215 /* if we're using mipmap level zero, no minification is needed */
1216 return base_size;
1217 } else {
1218 LLVMValueRef size;
1219 assert(bld->type.sign);
1220 if (lod_scalar ||
1221 (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1222 size = LLVMBuildLShr(builder, base_size, level, "minify");
1223 size = lp_build_max(bld, size, bld->one);
1224 } else {
1225 /*
1226 * emulate shift with float mul, since intel "forgot" shifts with
1227 * per-element shift count until avx2, which results in terrible
1228 * scalar extraction (both count and value), scalar shift,
1229 * vector reinsertion. Should not be an issue on any non-x86 cpu
1230 * with a vector instruction set.
1231 * On cpus with AMD's XOP this should also be unnecessary but I'm
1232 * not sure if llvm would emit this with current flags.
1233 */
1234 LLVMValueRef const127, const23, lf;
1235 struct lp_type ftype;
1236 struct lp_build_context fbld;
1237 ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1238 lp_build_context_init(&fbld, bld->gallivm, ftype);
1239 const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1240 const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1241
1242 /* calculate 2^(-level) float */
1243 lf = lp_build_sub(bld, const127, level);
1244 lf = lp_build_shl(bld, lf, const23);
1245 lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1246
1247 /* finish shift operation by doing float mul */
1248 base_size = lp_build_int_to_float(&fbld, base_size);
1249 size = lp_build_mul(&fbld, base_size, lf);
1250 /*
1251 * do the max also with floats because
1252 * a) non-emulated int max requires sse41
1253 * (this is actually a lie as we could cast to 16bit values
1254 * as 16bit is sufficient and 16bit int max is sse2)
1255 * b) with avx we can do int max 4-wide but float max 8-wide
1256 */
1257 size = lp_build_max(&fbld, size, fbld.one);
1258 size = lp_build_itrunc(&fbld, size);
1259 }
1260 return size;
1261 }
1262 }
1263
1264
1265 /*
1266 * Scale image dimensions with block sizes.
1267 *
1268 * tex_blocksize is the resource format blocksize
1269 * view_blocksize is the view format blocksize
1270 *
1271 * This must be applied post-minification, but
1272 * only when blocksizes are different.
1273 *
1274 * ret = (size + (tex_blocksize - 1)) >> log2(tex_blocksize);
1275 * ret *= blocksize;
1276 */
1277 LLVMValueRef
lp_build_scale_view_dims(struct lp_build_context * bld,LLVMValueRef size,LLVMValueRef tex_blocksize,LLVMValueRef tex_blocksize_log2,LLVMValueRef view_blocksize)1278 lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size,
1279 LLVMValueRef tex_blocksize,
1280 LLVMValueRef tex_blocksize_log2,
1281 LLVMValueRef view_blocksize)
1282 {
1283 LLVMBuilderRef builder = bld->gallivm->builder;
1284 LLVMValueRef ret =
1285 LLVMBuildAdd(builder, size,
1286 LLVMBuildSub(builder, tex_blocksize,
1287 lp_build_const_int_vec(bld->gallivm,
1288 bld->type, 1), ""),
1289 "");
1290 ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, "");
1291 ret = LLVMBuildMul(builder, ret, view_blocksize, "");
1292 return ret;
1293 }
1294
1295
1296 /*
1297 * Scale a single image dimension.
1298 *
1299 * Scale one image between resource and view blocksizes.
1300 * noop if sizes are the same.
1301 */
1302 LLVMValueRef
lp_build_scale_view_dim(struct gallivm_state * gallivm,LLVMValueRef size,unsigned tex_blocksize,unsigned view_blocksize)1303 lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size,
1304 unsigned tex_blocksize, unsigned view_blocksize)
1305 {
1306 if (tex_blocksize == view_blocksize)
1307 return size;
1308
1309 LLVMBuilderRef builder = gallivm->builder;
1310 LLVMValueRef ret =
1311 LLVMBuildAdd(builder, size,
1312 lp_build_const_int32(gallivm, tex_blocksize - 1), "");
1313 ret = LLVMBuildLShr(builder, ret,
1314 lp_build_const_int32(gallivm,
1315 util_logbase2(tex_blocksize)), "");
1316 ret = LLVMBuildMul(builder, ret,
1317 lp_build_const_int32(gallivm, view_blocksize), "");
1318 return ret;
1319 }
1320
1321
1322 /**
1323 * Dereference stride_array[mipmap_level] array to get a stride.
1324 * Return stride as a vector.
1325 */
1326 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMTypeRef stride_type,LLVMValueRef stride_array,LLVMValueRef level)1327 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1328 LLVMTypeRef stride_type,
1329 LLVMValueRef stride_array, LLVMValueRef level)
1330 {
1331 LLVMBuilderRef builder = bld->gallivm->builder;
1332 LLVMValueRef stride, stride1;
1333
1334 if (bld->num_mips == 1) {
1335 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array, level);
1336 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1337 } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1338 LLVMValueRef stride1;
1339
1340 stride = bld->int_coord_bld.undef;
1341 for (unsigned i = 0; i < bld->num_mips; i++) {
1342 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1343 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1344 LLVMBuildExtractElement(builder, level,
1345 indexi, ""));
1346 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1347 stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1348 }
1349 stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1350 } else {
1351 LLVMValueRef stride1;
1352
1353 assert (bld->num_mips == bld->coord_bld.type.length);
1354
1355 stride = bld->int_coord_bld.undef;
1356 for (unsigned i = 0; i < bld->coord_bld.type.length; i++) {
1357 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1358 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1359 LLVMBuildExtractElement(builder, level,
1360 indexi, ""));
1361 stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1362 }
1363 }
1364 return stride;
1365 }
1366
1367
1368 /**
1369 * When sampling a mipmap, we need to compute the width, height, depth
1370 * of the source levels from the level indexes. This helper function
1371 * does that.
1372 */
1373 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1374 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1375 LLVMValueRef ilevel,
1376 LLVMValueRef *out_size,
1377 LLVMValueRef *row_stride_vec,
1378 LLVMValueRef *img_stride_vec)
1379 {
1380 const unsigned dims = bld->dims;
1381 LLVMValueRef ilevel_vec;
1382
1383 /*
1384 * Compute width, height, depth at mipmap level 'ilevel'
1385 */
1386 if (bld->num_mips == 1) {
1387 ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1388 *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
1389 ilevel_vec, true);
1390 *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size,
1391 bld->int_tex_blocksize,
1392 bld->int_tex_blocksize_log2,
1393 bld->int_view_blocksize);
1394 } else {
1395 LLVMValueRef int_size_vec;
1396 LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec;
1397 LLVMValueRef int_view_blocksize_vec;
1398 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1399 const unsigned num_quads = bld->coord_bld.type.length / 4;
1400
1401 if (bld->num_mips == num_quads) {
1402 /*
1403 * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1404 * intel "forgot" the variable shift count instruction until avx2.
1405 * A harmless 8x32 shift gets translated into 32 instructions
1406 * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1407 * unable to recognize if there are really just 2 different shift
1408 * count values. So do the shift 4-wide before expansion.
1409 */
1410 struct lp_build_context bld4;
1411 struct lp_type type4;
1412
1413 type4 = bld->int_coord_bld.type;
1414 type4.length = 4;
1415
1416 lp_build_context_init(&bld4, bld->gallivm, type4);
1417
1418 if (bld->dims == 1) {
1419 assert(bld->int_size_in_bld.type.length == 1);
1420 int_size_vec = lp_build_broadcast_scalar(&bld4,
1421 bld->int_size);
1422 int_tex_blocksize_vec =
1423 lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize);
1424 int_tex_blocksize_log2_vec =
1425 lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2);
1426 int_view_blocksize_vec =
1427 lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize);
1428 } else {
1429 assert(bld->int_size_in_bld.type.length == 4);
1430 int_size_vec = bld->int_size;
1431 int_tex_blocksize_vec = bld->int_tex_blocksize;
1432 int_tex_blocksize_log2_vec = bld->int_tex_blocksize_log2;
1433 int_view_blocksize_vec = bld->int_view_blocksize;
1434 }
1435
1436 for (unsigned i = 0; i < num_quads; i++) {
1437 LLVMValueRef ileveli;
1438 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1439
1440 ileveli = lp_build_extract_broadcast(bld->gallivm,
1441 bld->leveli_bld.type,
1442 bld4.type,
1443 ilevel,
1444 indexi);
1445 tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, true);
1446 tmp[i] = lp_build_scale_view_dims(&bld4, tmp[i],
1447 int_tex_blocksize_vec,
1448 int_tex_blocksize_log2_vec,
1449 int_view_blocksize_vec);
1450 }
1451 /*
1452 * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for
1453 * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1454 */
1455 *out_size = lp_build_concat(bld->gallivm,
1456 tmp,
1457 bld4.type,
1458 num_quads);
1459 } else {
1460 /* FIXME: this is terrible and results in _huge_ vector
1461 * (for the dims > 1 case).
1462 * Should refactor this (together with extract_image_sizes) and do
1463 * something more useful. Could for instance if we have width,height
1464 * with 4-wide vector pack all elements into a 8xi16 vector
1465 * (on which we can still do useful math) instead of using a 16xi32
1466 * vector.
1467 * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1468 * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
1469 * vector.
1470 */
1471 assert(bld->num_mips == bld->coord_bld.type.length);
1472 if (bld->dims == 1) {
1473 assert(bld->int_size_in_bld.type.length == 1);
1474 int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1475 bld->int_size);
1476 int_tex_blocksize_vec =
1477 lp_build_broadcast_scalar(&bld->int_coord_bld,
1478 bld->int_tex_blocksize);
1479 int_tex_blocksize_log2_vec =
1480 lp_build_broadcast_scalar(&bld->int_coord_bld,
1481 bld->int_tex_blocksize_log2);
1482 int_view_blocksize_vec =
1483 lp_build_broadcast_scalar(&bld->int_coord_bld,
1484 bld->int_view_blocksize);
1485 *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec,
1486 ilevel, false);
1487 *out_size = lp_build_scale_view_dims(&bld->int_coord_bld,
1488 *out_size,
1489 int_tex_blocksize_vec,
1490 int_tex_blocksize_log2_vec,
1491 int_view_blocksize_vec);
1492 } else {
1493 LLVMValueRef ilevel1;
1494 for (unsigned i = 0; i < bld->num_mips; i++) {
1495 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1496 ilevel1 = lp_build_extract_broadcast(bld->gallivm,
1497 bld->int_coord_type,
1498 bld->int_size_in_bld.type,
1499 ilevel, indexi);
1500 tmp[i] = bld->int_size;
1501 tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
1502 ilevel1, true);
1503 tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld,
1504 tmp[i],
1505 bld->int_tex_blocksize,
1506 bld->int_tex_blocksize_log2,
1507 bld->int_view_blocksize);
1508 }
1509 *out_size = lp_build_concat(bld->gallivm, tmp,
1510 bld->int_size_in_bld.type,
1511 bld->num_mips);
1512 }
1513 }
1514 }
1515
1516 if (dims >= 2) {
1517 *row_stride_vec = lp_build_get_level_stride_vec(bld,
1518 bld->row_stride_type,
1519 bld->row_stride_array,
1520 ilevel);
1521 }
1522 if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1523 *img_stride_vec = lp_build_get_level_stride_vec(bld,
1524 bld->img_stride_type,
1525 bld->img_stride_array,
1526 ilevel);
1527 }
1528 }
1529
1530
1531 /**
1532 * Extract and broadcast texture size.
1533 *
1534 * @param size_type type of the texture size vector (either
1535 * bld->int_size_type or bld->float_size_type)
1536 * @param coord_type type of the texture size vector (either
1537 * bld->int_coord_type or bld->coord_type)
1538 * @param size vector with the texture size (width, height, depth)
1539 */
1540 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1541 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1542 struct lp_build_context *size_bld,
1543 struct lp_type coord_type,
1544 LLVMValueRef size,
1545 LLVMValueRef *out_width,
1546 LLVMValueRef *out_height,
1547 LLVMValueRef *out_depth)
1548 {
1549 const unsigned dims = bld->dims;
1550 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1551 struct lp_type size_type = size_bld->type;
1552
1553 if (bld->num_mips == 1) {
1554 *out_width = lp_build_extract_broadcast(bld->gallivm,
1555 size_type,
1556 coord_type,
1557 size,
1558 LLVMConstInt(i32t, 0, 0));
1559 if (dims >= 2) {
1560 *out_height = lp_build_extract_broadcast(bld->gallivm,
1561 size_type,
1562 coord_type,
1563 size,
1564 LLVMConstInt(i32t, 1, 0));
1565 if (dims == 3) {
1566 *out_depth = lp_build_extract_broadcast(bld->gallivm,
1567 size_type,
1568 coord_type,
1569 size,
1570 LLVMConstInt(i32t, 2, 0));
1571 }
1572 }
1573 } else {
1574 unsigned num_quads = bld->coord_bld.type.length / 4;
1575
1576 if (dims == 1) {
1577 *out_width = size;
1578 } else if (bld->num_mips == num_quads) {
1579 *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1580 if (dims >= 2) {
1581 *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1582 if (dims == 3) {
1583 *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1584 }
1585 }
1586 } else {
1587 assert(bld->num_mips == bld->coord_type.length);
1588 *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1589 coord_type, size, 0);
1590 if (dims >= 2) {
1591 *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1592 coord_type, size, 1);
1593 if (dims == 3) {
1594 *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1595 coord_type, size, 2);
1596 }
1597 }
1598 }
1599 }
1600 }
1601
1602
1603 /**
1604 * Unnormalize coords.
1605 *
1606 * @param flt_size vector with the integer texture size (width, height, depth)
1607 */
1608 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1609 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1610 LLVMValueRef flt_size,
1611 LLVMValueRef *s,
1612 LLVMValueRef *t,
1613 LLVMValueRef *r)
1614 {
1615 const unsigned dims = bld->dims;
1616 LLVMValueRef width;
1617 LLVMValueRef height = NULL;
1618 LLVMValueRef depth = NULL;
1619
1620 lp_build_extract_image_sizes(bld,
1621 &bld->float_size_bld,
1622 bld->coord_type,
1623 flt_size,
1624 &width,
1625 &height,
1626 &depth);
1627
1628 /* s = s * width, t = t * height */
1629 *s = lp_build_mul(&bld->coord_bld, *s, width);
1630 if (dims >= 2) {
1631 *t = lp_build_mul(&bld->coord_bld, *t, height);
1632 if (dims >= 3) {
1633 *r = lp_build_mul(&bld->coord_bld, *r, depth);
1634 }
1635 }
1636 }
1637
1638
1639 /**
1640 * Generate new coords and faces for cubemap texels falling off the face.
1641 *
1642 * @param face face (center) of the pixel
1643 * @param x0 lower x coord
1644 * @param x1 higher x coord (must be x0 + 1)
1645 * @param y0 lower y coord
1646 * @param y1 higher y coord (must be x0 + 1)
1647 * @param max_coord texture cube (level) size - 1
1648 * @param next_faces new face values when falling off
1649 * @param next_xcoords new x coord values when falling off
1650 * @param next_ycoords new y coord values when falling off
1651 *
1652 * The arrays hold the new values when under/overflow of
1653 * lower x, higher x, lower y, higher y coord would occur (in this order).
1654 * next_xcoords/next_ycoords have two entries each (for both new lower and
1655 * higher coord).
1656 */
1657 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1658 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1659 LLVMValueRef face,
1660 LLVMValueRef x0,
1661 LLVMValueRef x1,
1662 LLVMValueRef y0,
1663 LLVMValueRef y1,
1664 LLVMValueRef max_coord,
1665 LLVMValueRef next_faces[4],
1666 LLVMValueRef next_xcoords[4][2],
1667 LLVMValueRef next_ycoords[4][2])
1668 {
1669 /*
1670 * Lookup tables aren't nice for simd code hence try some logic here.
1671 * (Note that while it would not be necessary to do per-sample (4) lookups
1672 * when using a LUT as it's impossible that texels fall off of positive
1673 * and negative edges simultaneously, it would however be necessary to
1674 * do 2 lookups for corner handling as in this case texels both fall off
1675 * of x and y axes.)
1676 */
1677 /*
1678 * Next faces (for face 012345):
1679 * x < 0.0 : 451110
1680 * x >= 1.0 : 540001
1681 * y < 0.0 : 225422
1682 * y >= 1.0 : 334533
1683 * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1684 * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1685 * nfy+: face & ~4 > 1 ? face + 2 : 3;
1686 * This could also use pshufb instead, but would need (manually coded)
1687 * ssse3 intrinsic (llvm won't do non-constant shuffles).
1688 */
1689 struct gallivm_state *gallivm = ivec_bld->gallivm;
1690 LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1691 LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1692 LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1693 LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1694 LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1695 LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1696
1697 sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1698 tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1699 sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1700 faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1701 tmp = lp_build_add(ivec_bld, faceand1, c4);
1702 next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1703 next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1704
1705 tmp = lp_build_andnot(ivec_bld, face, c4);
1706 sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1707 tmp = lp_build_add(ivec_bld, face, c2);
1708 next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1709 next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1710
1711 /*
1712 * new xcoords (for face 012345):
1713 * x < 0.0 : max max t max-t max max
1714 * x >= 1.0 : 0 0 max-t t 0 0
1715 * y < 0.0 : max 0 max-s s s max-s
1716 * y >= 1.0 : max 0 s max-s s max-s
1717 *
1718 * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1719 * ncx[0] = max - ncx[1]
1720 * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1721 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1722 */
1723 sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1724 maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1725 tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1726 next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1727 next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1728 maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1729 tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1730 next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1731 next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1732
1733 sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1734
1735 tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1736 maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1737 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1738 next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1739 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1740 next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1741 maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1742 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1743 next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1744 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1745 next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1746
1747 /*
1748 * new ycoords (for face 012345):
1749 * x < 0.0 : t t 0 max t t
1750 * x >= 1.0 : t t 0 max t t
1751 * y < 0.0 : max-s s 0 max max 0
1752 * y >= 1.0 : s max-s 0 max 0 max
1753 *
1754 * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1755 * ncy[1] = ncy[0]
1756 * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1757 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1758 */
1759 tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1760 next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1761 next_ycoords[1][0] = next_ycoords[0][0];
1762 next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1763 next_ycoords[1][1] = next_ycoords[0][1];
1764
1765 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1766 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1767 next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1768 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1769 next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1770 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1771 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1772 next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1773 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1774 next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1775 }
1776
1777
1778 /** Helper used by lp_build_cube_lookup() */
1779 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1780 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1781 {
1782 /* ima = +0.5 / abs(coord); */
1783 LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1784 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1785 /* avoid div by zero */
1786 LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1787 LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1788 LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1789 return ima;
1790 }
1791
1792
1793 /** Helper for doing 3-wise selection.
1794 * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1795 */
1796 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1797 lp_build_select3(struct lp_build_context *sel_bld,
1798 LLVMValueRef sel0,
1799 LLVMValueRef sel1,
1800 LLVMValueRef val0,
1801 LLVMValueRef val1,
1802 LLVMValueRef val2)
1803 {
1804 LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1);
1805 return lp_build_select(sel_bld, sel1, val2, tmp);
1806 }
1807
1808
1809 /**
1810 * Generate code to do cube face selection and compute per-face texcoords.
1811 */
1812 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,struct lp_derivatives * derivs_out,bool need_derivs)1813 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1814 LLVMValueRef *coords,
1815 const struct lp_derivatives *derivs_in, /* optional */
1816 struct lp_derivatives *derivs_out, /* optional */
1817 bool need_derivs)
1818 {
1819 struct lp_build_context *coord_bld = &bld->coord_bld;
1820 LLVMBuilderRef builder = bld->gallivm->builder;
1821 struct gallivm_state *gallivm = bld->gallivm;
1822 LLVMValueRef si, ti, ri;
1823
1824 /*
1825 * Do per-pixel face selection. We cannot however (as we used to do)
1826 * simply calculate the derivs afterwards (which is very bogus for
1827 * explicit derivs btw) because the values would be "random" when
1828 * not all pixels lie on the same face.
1829 */
1830 struct lp_build_context *cint_bld = &bld->int_coord_bld;
1831 struct lp_type intctype = cint_bld->type;
1832 LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1833 LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1834 LLVMValueRef as, at, ar, face, face_s, face_t;
1835 LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1836 LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1837 LLVMValueRef tnegi, rnegi;
1838 LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1839 LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1840 LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1841 1LL << (intctype.width - 1));
1842 LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1843 intctype.width -1);
1844 LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1845 LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1846 LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1847 LLVMValueRef s = coords[0];
1848 LLVMValueRef t = coords[1];
1849 LLVMValueRef r = coords[2];
1850
1851 assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1852 assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1853 assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1854
1855 /*
1856 * get absolute value (for x/y/z face selection) and sign bit
1857 * (for mirroring minor coords and pos/neg face selection)
1858 * of the original coords.
1859 */
1860 as = lp_build_abs(&bld->coord_bld, s);
1861 at = lp_build_abs(&bld->coord_bld, t);
1862 ar = lp_build_abs(&bld->coord_bld, r);
1863
1864 /*
1865 * major face determination: select x if x > y else select y
1866 * select z if z >= max(x,y) else select previous result
1867 * if some axis are the same we chose z over y, y over x - the
1868 * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1869 * wouldn't care could save a select or two if using different
1870 * compares and doing at_g_as_ar last since tnewx and tnewz are the
1871 * same).
1872 */
1873 as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1874 maxasat = lp_build_max(coord_bld, as, at);
1875 ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1876
1877 if (need_derivs) {
1878 /*
1879 * XXX: This is really really complex.
1880 * It is a bit overkill to use this for implicit derivatives as well,
1881 * no way this is worth the cost in practice, but seems to be the
1882 * only way for getting accurate and per-pixel lod values.
1883 */
1884 LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1885 LLVMValueRef madx, mady, madxdivma, madydivma;
1886 LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1887 LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1888 LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1889 LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1890 LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1891 /*
1892 * s = 1/2 * (sc / ma + 1)
1893 * t = 1/2 * (tc / ma + 1)
1894 *
1895 * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1896 * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1897 *
1898 * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1899 * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1900 * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1901 * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1902 */
1903
1904 /* select ma, calculate ima */
1905 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1906 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1907 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1908 ima = lp_build_div(coord_bld, coord_bld->one, ma);
1909 imahalf = lp_build_mul(coord_bld, posHalf, ima);
1910 imahalfpos = lp_build_abs(coord_bld, imahalf);
1911
1912 if (!derivs_in) {
1913 ddx[0] = lp_build_ddx(coord_bld, s);
1914 ddx[1] = lp_build_ddx(coord_bld, t);
1915 ddx[2] = lp_build_ddx(coord_bld, r);
1916 ddy[0] = lp_build_ddy(coord_bld, s);
1917 ddy[1] = lp_build_ddy(coord_bld, t);
1918 ddy[2] = lp_build_ddy(coord_bld, r);
1919 } else {
1920 ddx[0] = derivs_in->ddx[0];
1921 ddx[1] = derivs_in->ddx[1];
1922 ddx[2] = derivs_in->ddx[2];
1923 ddy[0] = derivs_in->ddy[0];
1924 ddy[1] = derivs_in->ddy[1];
1925 ddy[2] = derivs_in->ddy[2];
1926 }
1927
1928 /* select major derivatives */
1929 madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1930 mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1931
1932 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1933 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1934 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1935
1936 sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1937 tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1938 rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1939
1940 sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1941 tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1942 rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1943
1944 /*
1945 * compute all possible new s/t coords, which does the mirroring,
1946 * and do the same for derivs minor axes.
1947 * snewx = signma * -r;
1948 * tnewx = -t;
1949 * snewy = s;
1950 * tnewy = signma * r;
1951 * snewz = signma * s;
1952 * tnewz = -t;
1953 */
1954 tnegi = LLVMBuildXor(builder, ti, signmask, "");
1955 rnegi = LLVMBuildXor(builder, ri, signmask, "");
1956 tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1957 rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1958 tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1959 rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1960
1961 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1962 tnewx = tnegi;
1963 sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1964 tdxnewx = tdxnegi;
1965 sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1966 tdynewx = tdynegi;
1967
1968 snewy = si;
1969 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1970 sdxnewy = sdxi;
1971 tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1972 sdynewy = sdyi;
1973 tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1974
1975 snewz = LLVMBuildXor(builder, signmabit, si, "");
1976 tnewz = tnegi;
1977 sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1978 tdxnewz = tdxnegi;
1979 sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1980 tdynewz = tdynegi;
1981
1982 /* select the mirrored values */
1983 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1984 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1985 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1986 face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1987 face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1988 face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1989 face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1990
1991 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1992 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1993 face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1994 face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1995 face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1996 face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1997
1998 /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1999 madxdivma = lp_build_mul(coord_bld, madx, ima);
2000 tmp = lp_build_mul(coord_bld, madxdivma, face_s);
2001 tmp = lp_build_sub(coord_bld, face_sdx, tmp);
2002 derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
2003
2004 /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
2005 tmp = lp_build_mul(coord_bld, madxdivma, face_t);
2006 tmp = lp_build_sub(coord_bld, face_tdx, tmp);
2007 derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
2008
2009 /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
2010 madydivma = lp_build_mul(coord_bld, mady, ima);
2011 tmp = lp_build_mul(coord_bld, madydivma, face_s);
2012 tmp = lp_build_sub(coord_bld, face_sdy, tmp);
2013 derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
2014
2015 /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
2016 tmp = lp_build_mul(coord_bld, madydivma, face_t);
2017 tmp = lp_build_sub(coord_bld, face_tdy, tmp);
2018 derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
2019
2020 signma = LLVMBuildLShr(builder, mai, signshift, "");
2021 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2022
2023 /* project coords */
2024 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2025 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2026
2027 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2028 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2029
2030 return;
2031 }
2032
2033 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
2034 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
2035 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
2036
2037 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
2038 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
2039 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
2040
2041 /*
2042 * compute all possible new s/t coords, which does the mirroring
2043 * snewx = signma * -r;
2044 * tnewx = -t;
2045 * snewy = s;
2046 * tnewy = signma * r;
2047 * snewz = signma * s;
2048 * tnewz = -t;
2049 */
2050 tnegi = LLVMBuildXor(builder, ti, signmask, "");
2051 rnegi = LLVMBuildXor(builder, ri, signmask, "");
2052
2053 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2054 tnewx = tnegi;
2055
2056 snewy = si;
2057 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2058
2059 snewz = LLVMBuildXor(builder, signmabit, si, "");
2060 tnewz = tnegi;
2061
2062 /* select the mirrored values */
2063 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2064 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2065 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2066
2067 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2068 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2069
2070 /* add +1 for neg face */
2071 /* XXX with AVX probably want to use another select here -
2072 * as long as we ensure vblendvps gets used we can actually
2073 * skip the comparison and just use sign as a "mask" directly.
2074 */
2075 signma = LLVMBuildLShr(builder, mai, signshift, "");
2076 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2077
2078 /* project coords */
2079 imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2080 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2081 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2082
2083 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2084 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2085 }
2086
2087
2088 /**
2089 * Compute the partial offset of a pixel block along an arbitrary axis.
2090 *
2091 * @param coord coordinate in pixels
2092 * @param stride number of bytes between rows of successive pixel blocks
2093 * @param block_length number of pixels in a pixels block along the coordinate
2094 * axis
2095 * @param out_offset resulting relative offset of the pixel block in bytes
2096 * @param out_subcoord resulting sub-block pixel coordinate
2097 */
2098 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2099 lp_build_sample_partial_offset(struct lp_build_context *bld,
2100 unsigned block_length,
2101 LLVMValueRef coord,
2102 LLVMValueRef stride,
2103 LLVMValueRef *out_offset,
2104 LLVMValueRef *out_subcoord)
2105 {
2106 LLVMBuilderRef builder = bld->gallivm->builder;
2107 LLVMValueRef offset;
2108 LLVMValueRef subcoord;
2109
2110 if (block_length == 1) {
2111 subcoord = bld->zero;
2112 } else {
2113 /*
2114 * Pixel blocks have power of two dimensions. LLVM should convert the
2115 * rem/div to bit arithmetic.
2116 * TODO: Verify this.
2117 * It does indeed BUT it does transform it to scalar (and back) when doing so
2118 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2119 * The generated code looks seriously unfunny and is quite expensive.
2120 */
2121 #if 0
2122 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2123 subcoord = LLVMBuildURem(builder, coord, block_width, "");
2124 coord = LLVMBuildUDiv(builder, coord, block_width, "");
2125 #else
2126 unsigned logbase2 = util_logbase2(block_length);
2127 LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2128 LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2129 subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2130 coord = LLVMBuildLShr(builder, coord, block_shift, "");
2131 #endif
2132 }
2133
2134 offset = lp_build_mul(bld, coord, stride);
2135
2136 assert(out_offset);
2137 assert(out_subcoord);
2138
2139 *out_offset = offset;
2140 *out_subcoord = subcoord;
2141 }
2142
2143
2144 /**
2145 * Compute the offset of a pixel block.
2146 *
2147 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2148 *
2149 * Returns the relative offset and i,j sub-block coordinates
2150 */
2151 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2152 lp_build_sample_offset(struct lp_build_context *bld,
2153 const struct util_format_description *format_desc,
2154 LLVMValueRef x,
2155 LLVMValueRef y,
2156 LLVMValueRef z,
2157 LLVMValueRef y_stride,
2158 LLVMValueRef z_stride,
2159 LLVMValueRef *out_offset,
2160 LLVMValueRef *out_i,
2161 LLVMValueRef *out_j)
2162 {
2163 LLVMValueRef x_stride;
2164 LLVMValueRef offset;
2165
2166 x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2167 format_desc->block.bits/8);
2168
2169 lp_build_sample_partial_offset(bld,
2170 format_desc->block.width,
2171 x, x_stride,
2172 &offset, out_i);
2173
2174 if (y && y_stride) {
2175 LLVMValueRef y_offset;
2176 lp_build_sample_partial_offset(bld,
2177 format_desc->block.height,
2178 y, y_stride,
2179 &y_offset, out_j);
2180 offset = lp_build_add(bld, offset, y_offset);
2181 } else {
2182 *out_j = bld->zero;
2183 }
2184
2185 if (z && z_stride) {
2186 LLVMValueRef z_offset;
2187 LLVMValueRef k;
2188 lp_build_sample_partial_offset(bld,
2189 1, /* pixel blocks are always 2D */
2190 z, z_stride,
2191 &z_offset, &k);
2192 offset = lp_build_add(bld, offset, z_offset);
2193 }
2194
2195 *out_offset = offset;
2196 }
2197
2198
2199 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2200 lp_build_sample_min(struct lp_build_context *bld,
2201 LLVMValueRef x,
2202 LLVMValueRef v0,
2203 LLVMValueRef v1)
2204 {
2205 /* if the incoming LERP weight is 0 then the min/max
2206 * should ignore that value. */
2207 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2208 bld->type,
2209 PIPE_FUNC_NOTEQUAL,
2210 x, bld->zero);
2211 LLVMValueRef min = lp_build_min(bld, v0, v1);
2212
2213 return lp_build_select(bld, mask, min, v0);
2214 }
2215
2216
2217 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2218 lp_build_sample_max(struct lp_build_context *bld,
2219 LLVMValueRef x,
2220 LLVMValueRef v0,
2221 LLVMValueRef v1)
2222 {
2223 /* if the incoming LERP weight is 0 then the min/max
2224 * should ignore that value. */
2225 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2226 bld->type,
2227 PIPE_FUNC_NOTEQUAL,
2228 x, bld->zero);
2229 LLVMValueRef max = lp_build_max(bld, v0, v1);
2230
2231 return lp_build_select(bld, mask, max, v0);
2232 }
2233
2234
2235 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2236 lp_build_sample_min_2d(struct lp_build_context *bld,
2237 LLVMValueRef x,
2238 LLVMValueRef y,
2239 LLVMValueRef a,
2240 LLVMValueRef b,
2241 LLVMValueRef c,
2242 LLVMValueRef d)
2243 {
2244 LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2245 LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2246 return lp_build_sample_min(bld, y, v0, v1);
2247 }
2248
2249
2250 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2251 lp_build_sample_max_2d(struct lp_build_context *bld,
2252 LLVMValueRef x,
2253 LLVMValueRef y,
2254 LLVMValueRef a,
2255 LLVMValueRef b,
2256 LLVMValueRef c,
2257 LLVMValueRef d)
2258 {
2259 LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2260 LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2261 return lp_build_sample_max(bld, y, v0, v1);
2262 }
2263
2264
2265 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2266 lp_build_sample_min_3d(struct lp_build_context *bld,
2267 LLVMValueRef x,
2268 LLVMValueRef y,
2269 LLVMValueRef z,
2270 LLVMValueRef a, LLVMValueRef b,
2271 LLVMValueRef c, LLVMValueRef d,
2272 LLVMValueRef e, LLVMValueRef f,
2273 LLVMValueRef g, LLVMValueRef h)
2274 {
2275 LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2276 LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2277 return lp_build_sample_min(bld, z, v0, v1);
2278 }
2279
2280
2281 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2282 lp_build_sample_max_3d(struct lp_build_context *bld,
2283 LLVMValueRef x,
2284 LLVMValueRef y,
2285 LLVMValueRef z,
2286 LLVMValueRef a, LLVMValueRef b,
2287 LLVMValueRef c, LLVMValueRef d,
2288 LLVMValueRef e, LLVMValueRef f,
2289 LLVMValueRef g, LLVMValueRef h)
2290 {
2291 LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2292 LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2293 return lp_build_sample_max(bld, z, v0, v1);
2294 }
2295
2296
2297 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2298 lp_build_reduce_filter(struct lp_build_context *bld,
2299 enum pipe_tex_reduction_mode mode,
2300 unsigned flags,
2301 unsigned num_chan,
2302 LLVMValueRef x,
2303 LLVMValueRef *v00,
2304 LLVMValueRef *v01,
2305 LLVMValueRef *out)
2306 {
2307 unsigned chan;
2308 switch (mode) {
2309 case PIPE_TEX_REDUCTION_MIN:
2310 for (chan = 0; chan < num_chan; chan++)
2311 out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2312 break;
2313 case PIPE_TEX_REDUCTION_MAX:
2314 for (chan = 0; chan < num_chan; chan++)
2315 out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2316 break;
2317 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2318 default:
2319 for (chan = 0; chan < num_chan; chan++)
2320 out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2321 break;
2322 }
2323 }
2324
2325
2326 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2327 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2328 enum pipe_tex_reduction_mode mode,
2329 unsigned flags,
2330 unsigned num_chan,
2331 LLVMValueRef x,
2332 LLVMValueRef y,
2333 LLVMValueRef *v00,
2334 LLVMValueRef *v01,
2335 LLVMValueRef *v10,
2336 LLVMValueRef *v11,
2337 LLVMValueRef *out)
2338 {
2339 switch (mode) {
2340 case PIPE_TEX_REDUCTION_MIN:
2341 for (unsigned chan = 0; chan < num_chan; chan++)
2342 out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan],
2343 v10[chan], v11[chan]);
2344 break;
2345 case PIPE_TEX_REDUCTION_MAX:
2346 for (unsigned chan = 0; chan < num_chan; chan++)
2347 out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan],
2348 v10[chan], v11[chan]);
2349 break;
2350 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2351 default:
2352 for (unsigned chan = 0; chan < num_chan; chan++)
2353 out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan],
2354 v10[chan], v11[chan], flags);
2355 break;
2356 }
2357 }
2358
2359
2360 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2361 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2362 enum pipe_tex_reduction_mode mode,
2363 unsigned flags,
2364 unsigned num_chan,
2365 LLVMValueRef x,
2366 LLVMValueRef y,
2367 LLVMValueRef z,
2368 LLVMValueRef *v000,
2369 LLVMValueRef *v001,
2370 LLVMValueRef *v010,
2371 LLVMValueRef *v011,
2372 LLVMValueRef *v100,
2373 LLVMValueRef *v101,
2374 LLVMValueRef *v110,
2375 LLVMValueRef *v111,
2376 LLVMValueRef *out)
2377 {
2378 switch (mode) {
2379 case PIPE_TEX_REDUCTION_MIN:
2380 for (unsigned chan = 0; chan < num_chan; chan++)
2381 out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2382 v000[chan], v001[chan], v010[chan], v011[chan],
2383 v100[chan], v101[chan], v110[chan], v111[chan]);
2384 break;
2385 case PIPE_TEX_REDUCTION_MAX:
2386 for (unsigned chan = 0; chan < num_chan; chan++)
2387 out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2388 v000[chan], v001[chan], v010[chan], v011[chan],
2389 v100[chan], v101[chan], v110[chan], v111[chan]);
2390 break;
2391 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2392 default:
2393 for (unsigned chan = 0; chan < num_chan; chan++)
2394 out[chan] = lp_build_lerp_3d(bld, x, y, z,
2395 v000[chan], v001[chan], v010[chan], v011[chan],
2396 v100[chan], v101[chan], v110[chan], v111[chan],
2397 flags);
2398 break;
2399 }
2400 }
2401
2402
2403 /*
2404 * generated from
2405 * const float alpha = 2;
2406 * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2407 * const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2408 * const float weight = (float)expf(-alpha * r2);
2409 */
2410 static const float aniso_filter_table[1024] = {
2411 1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2412 0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2413 0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2414 0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2415 0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2416 0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2417 0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2418 0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2419 0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2420 0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2421 0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2422 0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2423 0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2424 0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2425 0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2426 0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2427 0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2428 0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2429 0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2430 0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2431 0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2432 0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2433 0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2434 0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2435 0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2436 0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2437 0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2438 0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2439 0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2440 0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2441 0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2442 0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2443 0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2444 0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2445 0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2446 0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2447 0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2448 0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2449 0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2450 0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2451 0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2452 0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2453 0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2454 0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2455 0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2456 0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2457 0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2458 0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2459 0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2460 0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2461 0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2462 0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2463 0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2464 0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2465 0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2466 0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2467 0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2468 0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2469 0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2470 0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2471 0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2472 0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2473 0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2474 0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2475 0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2476 0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2477 0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2478 0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2479 0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2480 0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2481 0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2482 0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2483 0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2484 0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2485 0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2486 0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2487 0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2488 0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2489 0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2490 0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2491 0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2492 0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2493 0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2494 0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2495 0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2496 0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2497 0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2498 0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2499 0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2500 0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2501 0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2502 0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2503 0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2504 0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2505 0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2506 0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2507 0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2508 0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2509 0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2510 0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2511 0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2512 0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2513 0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2514 0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2515 0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2516 0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2517 0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2518 0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2519 0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2520 0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2521 0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2522 0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2523 0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2524 0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2525 0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2526 0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2527 0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2528 0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2529 0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2530 0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2531 0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2532 0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2533 0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2534 0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2535 0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2536 0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2537 0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2538 0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2539 };
2540
2541
2542 const float *
lp_build_sample_aniso_filter_table(void)2543 lp_build_sample_aniso_filter_table(void)
2544 {
2545 return aniso_filter_table;
2546 }
2547