• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- common code.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52 
53 
54 /*
55  * Bri-linear factor. Should be greater than one.
56  */
57 #define BRILINEAR_FACTOR 2
58 
59 /**
60  * Does the given texture wrap mode allow sampling the texture border color?
61  * XXX maybe move this into gallium util code.
62  */
63 boolean
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)64 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
65                                        enum pipe_tex_filter min_img_filter,
66                                        enum pipe_tex_filter mag_img_filter)
67 {
68    switch (mode) {
69    case PIPE_TEX_WRAP_REPEAT:
70    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
71    case PIPE_TEX_WRAP_MIRROR_REPEAT:
72    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
73       return FALSE;
74    case PIPE_TEX_WRAP_CLAMP:
75    case PIPE_TEX_WRAP_MIRROR_CLAMP:
76       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
77           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
78          return FALSE;
79       } else {
80          return TRUE;
81       }
82    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
83    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
84       return TRUE;
85    default:
86       assert(0 && "unexpected wrap mode");
87       return FALSE;
88    }
89 }
90 
91 
92 /**
93  * Initialize lp_sampler_static_texture_state object with the gallium
94  * texture/sampler_view state (this contains the parts which are
95  * considered static).
96  */
97 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)98 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
99                                 const struct pipe_sampler_view *view)
100 {
101    memset(state, 0, sizeof *state);
102 
103    if (!view || !view->texture)
104       return;
105 
106    const struct pipe_resource *texture = view->texture;
107 
108    state->format = view->format;
109    state->swizzle_r = view->swizzle_r;
110    state->swizzle_g = view->swizzle_g;
111    state->swizzle_b = view->swizzle_b;
112    state->swizzle_a = view->swizzle_a;
113    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
114    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
115    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
116    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
117 
118    state->target = view->target;
119    state->pot_width = util_is_power_of_two_or_zero(texture->width0);
120    state->pot_height = util_is_power_of_two_or_zero(texture->height0);
121    state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
122    state->level_zero_only = !view->u.tex.last_level;
123 
124    /*
125     * the layer / element / level parameters are all either dynamic
126     * state or handled transparently wrt execution.
127     */
128 }
129 
130 /**
131  * Initialize lp_sampler_static_texture_state object with the gallium
132  * texture/sampler_view state (this contains the parts which are
133  * considered static).
134  */
135 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)136 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
137                                       const struct pipe_image_view *view)
138 {
139    memset(state, 0, sizeof *state);
140 
141    if (!view || !view->resource)
142       return;
143 
144    const struct pipe_resource *resource = view->resource;
145 
146    state->format = view->format;
147    state->swizzle_r = PIPE_SWIZZLE_X;
148    state->swizzle_g = PIPE_SWIZZLE_Y;
149    state->swizzle_b = PIPE_SWIZZLE_Z;
150    state->swizzle_a = PIPE_SWIZZLE_W;
151    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
152    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
153    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
154    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
155 
156    state->target = view->resource->target;
157    state->pot_width = util_is_power_of_two_or_zero(resource->width0);
158    state->pot_height = util_is_power_of_two_or_zero(resource->height0);
159    state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
160    state->level_zero_only = 0;
161 
162    /*
163     * the layer / element / level parameters are all either dynamic
164     * state or handled transparently wrt execution.
165     */
166 }
167 
168 /**
169  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
170  * state (this contains the parts which are considered static).
171  */
172 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)173 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
174                                 const struct pipe_sampler_state *sampler)
175 {
176    memset(state, 0, sizeof *state);
177 
178    if (!sampler)
179       return;
180 
181    /*
182     * We don't copy sampler state over unless it is actually enabled, to avoid
183     * spurious recompiles, as the sampler static state is part of the shader
184     * key.
185     *
186     * Ideally gallium frontends or cso_cache module would make all state
187     * canonical, but until that happens it's better to be safe than sorry here.
188     *
189     * XXX: Actually there's much more than can be done here, especially
190     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
191     */
192 
193    state->wrap_s            = sampler->wrap_s;
194    state->wrap_t            = sampler->wrap_t;
195    state->wrap_r            = sampler->wrap_r;
196    state->min_img_filter    = sampler->min_img_filter;
197    state->mag_img_filter    = sampler->mag_img_filter;
198    state->min_mip_filter    = sampler->min_mip_filter;
199    state->seamless_cube_map = sampler->seamless_cube_map;
200    state->reduction_mode    = sampler->reduction_mode;
201    state->aniso = sampler->max_anisotropy > 1.0f;
202 
203    if (sampler->max_lod > 0.0f) {
204       state->max_lod_pos = 1;
205    }
206 
207    if (sampler->lod_bias != 0.0f) {
208       state->lod_bias_non_zero = 1;
209    }
210 
211    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
212        state->min_img_filter != state->mag_img_filter) {
213 
214       /* If min_lod == max_lod we can greatly simplify mipmap selection.
215        * This is a case that occurs during automatic mipmap generation.
216        */
217       if (sampler->min_lod == sampler->max_lod) {
218          state->min_max_lod_equal = 1;
219       } else {
220          if (sampler->min_lod > 0.0f) {
221             state->apply_min_lod = 1;
222          }
223 
224          /*
225           * XXX this won't do anything with the mesa state tracker which always
226           * sets max_lod to not more than actually present mip maps...
227           */
228          if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
229             state->apply_max_lod = 1;
230          }
231       }
232    }
233 
234    state->compare_mode      = sampler->compare_mode;
235    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
236       state->compare_func   = sampler->compare_func;
237    }
238 
239    state->normalized_coords = sampler->normalized_coords;
240 }
241 
242 /* build aniso pmin value */
243 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)244 lp_build_pmin(struct lp_build_sample_context *bld,
245               unsigned texture_unit,
246               LLVMValueRef s,
247               LLVMValueRef t,
248               LLVMValueRef max_aniso)
249 {
250    struct gallivm_state *gallivm = bld->gallivm;
251    LLVMBuilderRef builder = bld->gallivm->builder;
252    struct lp_build_context *coord_bld = &bld->coord_bld;
253    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
254    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
255    struct lp_build_context *pmin_bld = &bld->lodf_bld;
256    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
257    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
258    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
259    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
260    LLVMValueRef int_size, float_size;
261    LLVMValueRef first_level, first_level_vec;
262    unsigned length = coord_bld->type.length;
263    unsigned num_quads = length / 4;
264    boolean pmin_per_quad = pmin_bld->type.length != length;
265    unsigned i;
266 
267    first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
268                                                  bld->context_ptr, texture_unit, NULL);
269    first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
270    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
271    float_size = lp_build_int_to_float(float_size_bld, int_size);
272    max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
273    max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
274 
275    static const unsigned char swizzle01[] = { /* no-op swizzle */
276       0, 1,
277       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
278    };
279    static const unsigned char swizzle23[] = {
280       2, 3,
281       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
282    };
283    LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
284 
285    for (i = 0; i < num_quads; i++) {
286       shuffles[i*4+0] = shuffles[i*4+1] = index0;
287       shuffles[i*4+2] = shuffles[i*4+3] = index1;
288    }
289    floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
290                                      LLVMConstVector(shuffles, length), "");
291    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
292 
293    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
294 
295    ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
296    ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
297 
298    LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
299 
300    static const unsigned char swizzle0[] = { /* no-op swizzle */
301      0, LP_BLD_SWIZZLE_DONTCARE,
302      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
303    };
304    static const unsigned char swizzle1[] = {
305      1, LP_BLD_SWIZZLE_DONTCARE,
306      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
307    };
308    LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
309    LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
310 
311    LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
312    LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
313 
314    LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
315 
316    LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
317                                         pmin2, temp);
318 
319    LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
320 
321    pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
322 
323    if (pmin_per_quad)
324       pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
325                                         pmin_bld->type, pmin2, 0);
326    else
327       pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
328    return pmin2;
329 }
330 
331 /**
332  * Generate code to compute coordinate gradient (rho).
333  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
334  *
335  * The resulting rho has bld->levelf format (per quad or per element).
336  */
337 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs)338 lp_build_rho(struct lp_build_sample_context *bld,
339              unsigned texture_unit,
340              LLVMValueRef s,
341              LLVMValueRef t,
342              LLVMValueRef r,
343              LLVMValueRef cube_rho,
344              const struct lp_derivatives *derivs)
345 {
346    struct gallivm_state *gallivm = bld->gallivm;
347    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
348    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
349    struct lp_build_context *float_bld = &bld->float_bld;
350    struct lp_build_context *coord_bld = &bld->coord_bld;
351    struct lp_build_context *rho_bld = &bld->lodf_bld;
352    const unsigned dims = bld->dims;
353    LLVMValueRef ddx_ddy[2] = {NULL};
354    LLVMBuilderRef builder = bld->gallivm->builder;
355    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
356    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
357    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
358    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
359    LLVMValueRef rho_vec;
360    LLVMValueRef int_size, float_size;
361    LLVMValueRef rho;
362    LLVMValueRef first_level, first_level_vec;
363    unsigned length = coord_bld->type.length;
364    unsigned num_quads = length / 4;
365    boolean rho_per_quad = rho_bld->type.length != length;
366    boolean no_rho_opt = bld->no_rho_approx && (dims > 1);
367    unsigned i;
368    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
369    LLVMValueRef rho_xvec, rho_yvec;
370 
371    /* Note that all simplified calculations will only work for isotropic filtering */
372 
373    /*
374     * rho calcs are always per quad except for explicit derivs (excluding
375     * the messy cube maps for now) when requested.
376     */
377 
378    first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
379                                                  bld->context_ptr, texture_unit, NULL);
380    first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
381    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec, TRUE);
382    float_size = lp_build_int_to_float(float_size_bld, int_size);
383 
384    if (cube_rho) {
385       LLVMValueRef cubesize;
386       LLVMValueRef index0 = lp_build_const_int32(gallivm, 0);
387 
388       /*
389        * Cube map code did already everything except size mul and per-quad extraction.
390        * Luckily cube maps are always quadratic!
391        */
392       if (rho_per_quad) {
393          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
394                                          rho_bld->type, cube_rho, 0);
395       }
396       else {
397          rho = lp_build_swizzle_scalar_aos(coord_bld, cube_rho, 0, 4);
398       }
399       /* Could optimize this for single quad just skip the broadcast */
400       cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
401                                             rho_bld->type, float_size, index0);
402       /* skipping sqrt hence returning rho squared */
403       cubesize = lp_build_mul(rho_bld, cubesize, cubesize);
404       rho = lp_build_mul(rho_bld, cubesize, rho);
405    }
406    else if (derivs) {
407       LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
408       for (i = 0; i < dims; i++) {
409          LLVMValueRef floatdim;
410          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
411 
412          floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
413                                                coord_bld->type, float_size, indexi);
414 
415          /*
416           * note that for rho_per_quad case could reduce math (at some shuffle
417           * cost), but for now use same code to per-pixel lod case.
418           */
419          if (no_rho_opt) {
420             ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
421             ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
422             ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
423             ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
424          }
425          else {
426             LLVMValueRef tmpx, tmpy;
427             tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
428             tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
429             ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
430             ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
431          }
432       }
433       if (no_rho_opt) {
434          rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
435          rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
436          if (dims > 2) {
437             rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
438             rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
439          }
440          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
441          /* skipping sqrt hence returning rho squared */
442      }
443       else {
444          rho = ddmax[0];
445          if (dims > 1) {
446             rho = lp_build_max(coord_bld, rho, ddmax[1]);
447             if (dims > 2) {
448                rho = lp_build_max(coord_bld, rho, ddmax[2]);
449             }
450          }
451       }
452 
453       LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm, coord_bld->type, rho);
454       rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
455 
456       if (rho_per_quad) {
457          /*
458           * rho_vec contains per-pixel rho, convert to scalar per quad.
459           */
460          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
461                                          rho_bld->type, rho, 0);
462       }
463    }
464    else {
465       /*
466        * This looks all a bit complex, but it's not that bad
467        * (the shuffle code makes it look worse than it is).
468        * Still, might not be ideal for all cases.
469        */
470       static const unsigned char swizzle0[] = { /* no-op swizzle */
471          0, LP_BLD_SWIZZLE_DONTCARE,
472          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
473       };
474       static const unsigned char swizzle1[] = {
475          1, LP_BLD_SWIZZLE_DONTCARE,
476          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
477       };
478       static const unsigned char swizzle2[] = {
479          2, LP_BLD_SWIZZLE_DONTCARE,
480          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
481       };
482 
483       if (dims < 2) {
484          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
485       }
486       else if (dims >= 2) {
487          ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
488          if (dims > 2) {
489             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
490          }
491       }
492 
493       if (no_rho_opt) {
494          static const unsigned char swizzle01[] = { /* no-op swizzle */
495             0, 1,
496             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
497          };
498          static const unsigned char swizzle23[] = {
499             2, 3,
500             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
501          };
502          LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
503 
504          for (i = 0; i < num_quads; i++) {
505             shuffles[i*4+0] = shuffles[i*4+1] = index0;
506             shuffles[i*4+2] = shuffles[i*4+3] = index1;
507          }
508          floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
509                                            LLVMConstVector(shuffles, length), "");
510          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
511          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
512          ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
513          ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
514          rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
515 
516          if (dims > 2) {
517             static const unsigned char swizzle02[] = {
518                0, 2,
519                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
520             };
521             floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
522                                                   coord_bld->type, float_size, index2);
523             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
524             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
525             ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
526             rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
527          }
528 
529          rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
530          rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
531          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
532 
533          if (rho_per_quad) {
534             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
535                                             rho_bld->type, rho, 0);
536          }
537          else {
538             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
539          }
540          /* skipping sqrt hence returning rho squared */
541       }
542       else {
543          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
544          if (dims > 2) {
545             ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
546          }
547          else {
548             ddx_ddy[1] = NULL; /* silence compiler warning */
549          }
550 
551          if (dims < 2) {
552             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
553             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
554          }
555          else if (dims == 2) {
556             static const unsigned char swizzle02[] = {
557                0, 2,
558                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
559             };
560             static const unsigned char swizzle13[] = {
561                1, 3,
562                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
563             };
564             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
565             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
566          }
567          else {
568             LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
569             LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
570             assert(dims == 3);
571             for (i = 0; i < num_quads; i++) {
572                shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
573                shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
574                shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
575                shuffles1[4*i + 3] = i32undef;
576                shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
577                shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
578                shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
579                shuffles2[4*i + 3] = i32undef;
580             }
581             rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
582                                               LLVMConstVector(shuffles1, length), "");
583             rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
584                                               LLVMConstVector(shuffles2, length), "");
585          }
586 
587          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
588 
589          if (bld->coord_type.length > 4) {
590             /* expand size to each quad */
591             if (dims > 1) {
592                /* could use some broadcast_vector helper for this? */
593                LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
594                for (i = 0; i < num_quads; i++) {
595                   src[i] = float_size;
596                }
597                float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
598             }
599             else {
600                float_size = lp_build_broadcast_scalar(coord_bld, float_size);
601             }
602             rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
603 
604             if (dims <= 1) {
605                rho = rho_vec;
606             }
607             else {
608                if (dims >= 2) {
609                   LLVMValueRef rho_s, rho_t, rho_r;
610 
611                   rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
612                   rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
613 
614                   rho = lp_build_max(coord_bld, rho_s, rho_t);
615 
616                   if (dims >= 3) {
617                      rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
618                      rho = lp_build_max(coord_bld, rho, rho_r);
619                   }
620                }
621             }
622             if (rho_per_quad) {
623                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
624                                                rho_bld->type, rho, 0);
625             }
626             else {
627                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
628             }
629          }
630          else {
631             if (dims <= 1) {
632                rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
633             }
634             rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
635 
636             if (dims <= 1) {
637                rho = rho_vec;
638             }
639             else {
640                if (dims >= 2) {
641                   LLVMValueRef rho_s, rho_t, rho_r;
642 
643                   rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
644                   rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
645 
646                   rho = lp_build_max(float_bld, rho_s, rho_t);
647 
648                   if (dims >= 3) {
649                      rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
650                      rho = lp_build_max(float_bld, rho, rho_r);
651                   }
652                }
653             }
654             if (!rho_per_quad) {
655                rho = lp_build_broadcast_scalar(rho_bld, rho);
656             }
657          }
658       }
659    }
660 
661    return rho;
662 }
663 
664 
665 /*
666  * Bri-linear lod computation
667  *
668  * Use a piece-wise linear approximation of log2 such that:
669  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
670  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
671  *   with the steepness specified in 'factor'
672  * - exact result for 0.5, 1.5, etc.
673  *
674  *
675  *   1.0 -              /----*
676  *                     /
677  *                    /
678  *                   /
679  *   0.5 -          *
680  *                 /
681  *                /
682  *               /
683  *   0.0 - *----/
684  *
685  *         |                 |
686  *        2^0               2^1
687  *
688  * This is a technique also commonly used in hardware:
689  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
690  *
691  * TODO: For correctness, this should only be applied when texture is known to
692  * have regular mipmaps, i.e., mipmaps derived from the base level.
693  *
694  * TODO: This could be done in fixed point, where applicable.
695  */
696 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)697 lp_build_brilinear_lod(struct lp_build_context *bld,
698                        LLVMValueRef lod,
699                        double factor,
700                        LLVMValueRef *out_lod_ipart,
701                        LLVMValueRef *out_lod_fpart)
702 {
703    LLVMValueRef lod_fpart;
704    double pre_offset = (factor - 0.5)/factor - 0.5;
705    double post_offset = 1 - factor;
706 
707    if (0) {
708       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
709    }
710 
711    lod = lp_build_add(bld, lod,
712                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
713 
714    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
715 
716    lod_fpart = lp_build_mad(bld, lod_fpart,
717                             lp_build_const_vec(bld->gallivm, bld->type, factor),
718                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
719 
720    /*
721     * It's not necessary to clamp lod_fpart since:
722     * - the above expression will never produce numbers greater than one.
723     * - the mip filtering branch is only taken if lod_fpart is positive
724     */
725 
726    *out_lod_fpart = lod_fpart;
727 
728    if (0) {
729       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
730       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
731    }
732 }
733 
734 
735 /*
736  * Combined log2 and brilinear lod computation.
737  *
738  * It's in all identical to calling lp_build_fast_log2() and
739  * lp_build_brilinear_lod() above, but by combining we can compute the integer
740  * and fractional part independently.
741  */
742 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)743 lp_build_brilinear_rho(struct lp_build_context *bld,
744                        LLVMValueRef rho,
745                        double factor,
746                        LLVMValueRef *out_lod_ipart,
747                        LLVMValueRef *out_lod_fpart)
748 {
749    LLVMValueRef lod_ipart;
750    LLVMValueRef lod_fpart;
751 
752    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
753    const double post_offset = 1 - 2*factor;
754 
755    assert(bld->type.floating);
756 
757    assert(lp_check_value(bld->type, rho));
758 
759    /*
760     * The pre factor will make the intersections with the exact powers of two
761     * happen precisely where we want them to be, which means that the integer
762     * part will not need any post adjustments.
763     */
764    rho = lp_build_mul(bld, rho,
765                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
766 
767    /* ipart = ifloor(log2(rho)) */
768    lod_ipart = lp_build_extract_exponent(bld, rho, 0);
769 
770    /* fpart = rho / 2**ipart */
771    lod_fpart = lp_build_extract_mantissa(bld, rho);
772 
773    lod_fpart = lp_build_mad(bld, lod_fpart,
774                             lp_build_const_vec(bld->gallivm, bld->type, factor),
775                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
776 
777    /*
778     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
779     * - the above expression will never produce numbers greater than one.
780     * - the mip filtering branch is only taken if lod_fpart is positive
781     */
782 
783    *out_lod_ipart = lod_ipart;
784    *out_lod_fpart = lod_fpart;
785 }
786 
787 
788 /**
789  * Fast implementation of iround(log2(sqrt(x))), based on
790  * log2(x^n) == n*log2(x).
791  *
792  * Gives accurate results all the time.
793  * (Could be trivially extended to handle other power-of-two roots.)
794  */
795 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)796 lp_build_ilog2_sqrt(struct lp_build_context *bld,
797                     LLVMValueRef x)
798 {
799    LLVMBuilderRef builder = bld->gallivm->builder;
800    LLVMValueRef ipart;
801    struct lp_type i_type = lp_int_type(bld->type);
802    LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
803 
804    assert(bld->type.floating);
805 
806    assert(lp_check_value(bld->type, x));
807 
808    /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
809    ipart = lp_build_extract_exponent(bld, x, 1);
810    ipart = LLVMBuildAShr(builder, ipart, one, "");
811 
812    return ipart;
813 }
814 
815 
816 /**
817  * Generate code to compute texture level of detail (lambda).
818  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
819  * \param lod_bias  optional float vector with the shader lod bias
820  * \param explicit_lod  optional float vector with the explicit lod
821  * \param cube_rho  rho calculated by cube coord mapping (optional)
822  * \param out_lod_ipart  integer part of lod
823  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
824  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
825  *
826  * The resulting lod can be scalar per quad or be per element.
827  */
828 void
lp_build_lod_selector(struct lp_build_sample_context * bld,boolean is_lodq,unsigned texture_unit,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,LLVMValueRef cube_rho,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)829 lp_build_lod_selector(struct lp_build_sample_context *bld,
830                       boolean is_lodq,
831                       unsigned texture_unit,
832                       unsigned sampler_unit,
833                       LLVMValueRef s,
834                       LLVMValueRef t,
835                       LLVMValueRef r,
836                       LLVMValueRef cube_rho,
837                       const struct lp_derivatives *derivs,
838                       LLVMValueRef lod_bias, /* optional */
839                       LLVMValueRef explicit_lod, /* optional */
840                       enum pipe_tex_mipfilter mip_filter,
841                       LLVMValueRef max_aniso,
842                       LLVMValueRef *out_lod,
843                       LLVMValueRef *out_lod_ipart,
844                       LLVMValueRef *out_lod_fpart,
845                       LLVMValueRef *out_lod_positive)
846 
847 {
848    LLVMBuilderRef builder = bld->gallivm->builder;
849    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
850    struct lp_build_context *lodf_bld = &bld->lodf_bld;
851    LLVMValueRef lod;
852 
853    *out_lod_ipart = bld->lodi_bld.zero;
854    *out_lod_positive = bld->lodi_bld.zero;
855    *out_lod_fpart = lodf_bld->zero;
856 
857    /*
858     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture Magnification:
859     * "Implementations may either unconditionally assume c = 0 for the minification
860     * vs. magnification switch-over point, or may choose to make c depend on the
861     * combination of minification and magnification modes as follows: if the
862     * magnification filter is given by LINEAR and the minification filter is given
863     * by NEAREST_MIPMAP_NEAREST or NEAREST_MIPMAP_LINEAR, then c = 0.5. This is
864     * done to ensure that a minified texture does not appear "sharper" than a
865     * magnified texture. Otherwise c = 0."
866     * And 3.9.11 Texture Minification:
867     * "If lod is less than or equal to the constant c (see section 3.9.12) the
868     * texture is said to be magnified; if it is greater, the texture is minified."
869     * So, using 0 as switchover point always, and using magnification for lod == 0.
870     * Note that the always c = 0 behavior is new (first appearing in GL 3.1 spec),
871     * old GL versions required 0.5 for the modes listed above.
872     * I have no clue about the (undocumented) wishes of d3d9/d3d10 here!
873     */
874 
875    if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
876       /* User is forcing sampling from a particular mipmap level.
877        * This is hit during mipmap generation.
878        */
879       LLVMValueRef min_lod =
880          dynamic_state->min_lod(dynamic_state, bld->gallivm,
881                                 bld->context_ptr, sampler_unit);
882 
883       lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
884    }
885    else {
886       if (explicit_lod) {
887          if (bld->num_lods != bld->coord_type.length)
888             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
889                                             lodf_bld->type, explicit_lod, 0);
890          else
891             lod = explicit_lod;
892       }
893       else {
894          LLVMValueRef rho;
895          boolean rho_squared = (bld->no_rho_approx &&
896                                 (bld->dims > 1)) || cube_rho;
897 
898          if (bld->static_sampler_state->aniso &&
899              !explicit_lod) {
900             rho = lp_build_pmin(bld, texture_unit, s, t, max_aniso);
901             rho_squared = true;
902          } else
903             rho = lp_build_rho(bld, texture_unit, s, t, r, cube_rho, derivs);
904 
905          /*
906           * Compute lod = log2(rho)
907           */
908 
909          if (!lod_bias && !is_lodq &&
910              !bld->static_sampler_state->aniso &&
911              !bld->static_sampler_state->lod_bias_non_zero &&
912              !bld->static_sampler_state->apply_max_lod &&
913              !bld->static_sampler_state->apply_min_lod) {
914             /*
915              * Special case when there are no post-log2 adjustments, which
916              * saves instructions but keeping the integer and fractional lod
917              * computations separate from the start.
918              */
919 
920             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
921                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
922                /*
923                 * Don't actually need both values all the time, lod_ipart is
924                 * needed for nearest mipfilter, lod_positive if min != mag.
925                 */
926                if (rho_squared) {
927                   *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
928                }
929                else {
930                   *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
931                }
932                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
933                                                 rho, lodf_bld->one);
934                return;
935             }
936             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
937                 !bld->no_brilinear && !rho_squared &&
938                 !bld->static_sampler_state->aniso) {
939                /*
940                 * This can't work if rho is squared. Not sure if it could be
941                 * fixed while keeping it worthwile, could also do sqrt here
942                 * but brilinear and no_rho_opt seems like a combination not
943                 * making much sense anyway so just use ordinary path below.
944                 */
945                lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
946                                       out_lod_ipart, out_lod_fpart);
947                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
948                                                 rho, lodf_bld->one);
949                return;
950             }
951          }
952 
953          if (0) {
954             lod = lp_build_log2(lodf_bld, rho);
955          }
956          else {
957             /* get more accurate results if we just sqaure rho always */
958             if (!rho_squared)
959                rho = lp_build_mul(lodf_bld, rho, rho);
960             lod = lp_build_fast_log2(lodf_bld, rho);
961          }
962 
963          /* log2(x^2) == 0.5*log2(x) */
964          lod = lp_build_mul(lodf_bld, lod,
965                             lp_build_const_vec(bld->gallivm, lodf_bld->type, 0.5F));
966 
967          /* add shader lod bias */
968          if (lod_bias) {
969             if (bld->num_lods != bld->coord_type.length)
970                lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
971                                                     lodf_bld->type, lod_bias, 0);
972             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
973          }
974       }
975 
976       /* add sampler lod bias */
977       if (bld->static_sampler_state->lod_bias_non_zero) {
978          LLVMValueRef sampler_lod_bias =
979             dynamic_state->lod_bias(dynamic_state, bld->gallivm,
980                                     bld->context_ptr, sampler_unit);
981          sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
982                                                       sampler_lod_bias);
983          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
984       }
985 
986       if (is_lodq) {
987          *out_lod = lod;
988       }
989 
990       /* clamp lod */
991       if (bld->static_sampler_state->apply_max_lod) {
992          LLVMValueRef max_lod =
993             dynamic_state->max_lod(dynamic_state, bld->gallivm,
994                                    bld->context_ptr, sampler_unit);
995          max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
996 
997          lod = lp_build_min(lodf_bld, lod, max_lod);
998       }
999       if (bld->static_sampler_state->apply_min_lod) {
1000          LLVMValueRef min_lod =
1001             dynamic_state->min_lod(dynamic_state, bld->gallivm,
1002                                    bld->context_ptr, sampler_unit);
1003          min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
1004 
1005          lod = lp_build_max(lodf_bld, lod, min_lod);
1006       }
1007 
1008       if (is_lodq) {
1009          *out_lod_fpart = lod;
1010          return;
1011       }
1012    }
1013 
1014    *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
1015                                     lod, lodf_bld->zero);
1016 
1017    if (bld->static_sampler_state->aniso) {
1018       *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
1019    } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1020       if (!bld->no_brilinear) {
1021          lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
1022                                 out_lod_ipart, out_lod_fpart);
1023       }
1024       else {
1025          lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1026       }
1027 
1028       lp_build_name(*out_lod_fpart, "lod_fpart");
1029    }
1030    else {
1031       *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1032    }
1033 
1034    lp_build_name(*out_lod_ipart, "lod_ipart");
1035 
1036    return;
1037 }
1038 
1039 
1040 /**
1041  * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1042  * to actual mip level.
1043  * Note: this is all scalar per quad code.
1044  * \param lod_ipart  int texture level of detail
1045  * \param level_out  returns integer
1046  * \param out_of_bounds returns per coord out_of_bounds mask if provided
1047  */
1048 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1049 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1050                            unsigned texture_unit,
1051                            LLVMValueRef lod_ipart,
1052                            LLVMValueRef *level_out,
1053                            LLVMValueRef *out_of_bounds)
1054 {
1055    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1056    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1057    LLVMValueRef first_level, last_level, level;
1058 
1059    first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1060                                             bld->context_ptr, texture_unit, NULL);
1061    last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1062                                           bld->context_ptr, texture_unit, NULL);
1063    first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1064    last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1065 
1066    level = lp_build_add(leveli_bld, lod_ipart, first_level);
1067 
1068    if (out_of_bounds) {
1069       LLVMValueRef out, out1;
1070       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1071       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1072       out = lp_build_or(leveli_bld, out, out1);
1073       if (bld->num_mips == bld->coord_bld.type.length) {
1074          *out_of_bounds = out;
1075       }
1076       else if (bld->num_mips == 1) {
1077          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1078       }
1079       else {
1080          assert(bld->num_mips == bld->coord_bld.type.length / 4);
1081          *out_of_bounds = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1082                                                                 leveli_bld->type,
1083                                                                 bld->int_coord_bld.type,
1084                                                                 out);
1085       }
1086       level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1087       *level_out = level;
1088    }
1089    else {
1090       /* clamp level to legal range of levels */
1091       *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1092 
1093    }
1094 }
1095 
1096 
1097 /**
1098  * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1099  * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1100  * part accordingly.
1101  * Later, we'll sample from those two mipmap levels and interpolate between them.
1102  */
1103 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1104 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1105                            unsigned texture_unit,
1106                            LLVMValueRef lod_ipart,
1107                            LLVMValueRef *lod_fpart_inout,
1108                            LLVMValueRef *level0_out,
1109                            LLVMValueRef *level1_out)
1110 {
1111    LLVMBuilderRef builder = bld->gallivm->builder;
1112    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
1113    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1114    struct lp_build_context *levelf_bld = &bld->levelf_bld;
1115    LLVMValueRef first_level, last_level;
1116    LLVMValueRef clamp_min;
1117    LLVMValueRef clamp_max;
1118 
1119    assert(bld->num_lods == bld->num_mips);
1120 
1121    first_level = dynamic_state->first_level(dynamic_state, bld->gallivm,
1122                                             bld->context_ptr, texture_unit, NULL);
1123    last_level = dynamic_state->last_level(dynamic_state, bld->gallivm,
1124                                           bld->context_ptr, texture_unit, NULL);
1125    first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
1126    last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
1127 
1128    *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1129    *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1130 
1131    /*
1132     * Clamp both *level0_out and *level1_out to [first_level, last_level], with
1133     * the minimum number of comparisons, and zeroing lod_fpart in the extreme
1134     * ends in the process.
1135     */
1136 
1137    /* *level0_out < first_level */
1138    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1139                              *level0_out, first_level,
1140                              "clamp_lod_to_first");
1141 
1142    *level0_out = LLVMBuildSelect(builder, clamp_min,
1143                                  first_level, *level0_out, "");
1144 
1145    *level1_out = LLVMBuildSelect(builder, clamp_min,
1146                                  first_level, *level1_out, "");
1147 
1148    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1149                                       levelf_bld->zero, *lod_fpart_inout, "");
1150 
1151    /* *level0_out >= last_level */
1152    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1153                              *level0_out, last_level,
1154                              "clamp_lod_to_last");
1155 
1156    *level0_out = LLVMBuildSelect(builder, clamp_max,
1157                                  last_level, *level0_out, "");
1158 
1159    *level1_out = LLVMBuildSelect(builder, clamp_max,
1160                                  last_level, *level1_out, "");
1161 
1162    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1163                                       levelf_bld->zero, *lod_fpart_inout, "");
1164 
1165    lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1166    lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1167    lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1168 }
1169 
1170 /**
1171  * A helper function that factorizes this common pattern.
1172  */
1173 static LLVMValueRef
load_mip(struct gallivm_state * gallivm,LLVMValueRef offsets,LLVMValueRef index1)1174 load_mip(struct gallivm_state *gallivm, LLVMValueRef offsets, LLVMValueRef index1) {
1175    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1176    LLVMValueRef indexes[2] = {zero, index1};
1177    LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, offsets, indexes, ARRAY_SIZE(indexes), "");
1178    return LLVMBuildLoad(gallivm->builder, ptr, "");
1179 }
1180 
1181 /**
1182  * Return pointer to a single mipmap level.
1183  * \param level  integer mipmap level
1184  */
1185 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1186 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1187                           LLVMValueRef level)
1188 {
1189    LLVMValueRef mip_offset = load_mip(bld->gallivm, bld->mip_offsets, level);
1190    LLVMBuilderRef builder = bld->gallivm->builder;
1191    LLVMValueRef data_ptr = LLVMBuildGEP(builder, bld->base_ptr, &mip_offset, 1, "");
1192    return data_ptr;
1193 }
1194 
1195 /**
1196  * Return (per-pixel) offsets to mip levels.
1197  * \param level  integer mipmap level
1198  */
1199 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1200 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1201                          LLVMValueRef level)
1202 {
1203    LLVMBuilderRef builder = bld->gallivm->builder;
1204    LLVMValueRef offsets, offset1;
1205 
1206    if (bld->num_mips == 1) {
1207       offset1 = load_mip(bld->gallivm, bld->mip_offsets, level);
1208       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1209    }
1210    else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1211       unsigned i;
1212 
1213       offsets = bld->int_coord_bld.undef;
1214       for (i = 0; i < bld->num_mips; i++) {
1215          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1216          offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1217          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1218          offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
1219       }
1220       offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4);
1221    }
1222    else {
1223       unsigned i;
1224 
1225       assert (bld->num_mips == bld->coord_bld.type.length);
1226 
1227       offsets = bld->int_coord_bld.undef;
1228       for (i = 0; i < bld->num_mips; i++) {
1229          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1230          offset1 = load_mip(bld->gallivm, bld->mip_offsets, LLVMBuildExtractElement(builder, level, indexi, ""));
1231          offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
1232       }
1233    }
1234    return offsets;
1235 }
1236 
1237 
1238 /**
1239  * Codegen equivalent for u_minify().
1240  * @param lod_scalar  if lod is a (broadcasted) scalar
1241  * Return max(1, base_size >> level);
1242  */
1243 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,boolean lod_scalar)1244 lp_build_minify(struct lp_build_context *bld,
1245                 LLVMValueRef base_size,
1246                 LLVMValueRef level,
1247                 boolean lod_scalar)
1248 {
1249    LLVMBuilderRef builder = bld->gallivm->builder;
1250    assert(lp_check_value(bld->type, base_size));
1251    assert(lp_check_value(bld->type, level));
1252 
1253    if (level == bld->zero) {
1254       /* if we're using mipmap level zero, no minification is needed */
1255       return base_size;
1256    }
1257    else {
1258       LLVMValueRef size;
1259       assert(bld->type.sign);
1260       if (lod_scalar ||
1261          (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1262          size = LLVMBuildLShr(builder, base_size, level, "minify");
1263          size = lp_build_max(bld, size, bld->one);
1264       }
1265       else {
1266          /*
1267           * emulate shift with float mul, since intel "forgot" shifts with
1268           * per-element shift count until avx2, which results in terrible
1269           * scalar extraction (both count and value), scalar shift,
1270           * vector reinsertion. Should not be an issue on any non-x86 cpu
1271           * with a vector instruction set.
1272           * On cpus with AMD's XOP this should also be unnecessary but I'm
1273           * not sure if llvm would emit this with current flags.
1274           */
1275          LLVMValueRef const127, const23, lf;
1276          struct lp_type ftype;
1277          struct lp_build_context fbld;
1278          ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1279          lp_build_context_init(&fbld, bld->gallivm, ftype);
1280          const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1281          const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1282 
1283          /* calculate 2^(-level) float */
1284          lf = lp_build_sub(bld, const127, level);
1285          lf = lp_build_shl(bld, lf, const23);
1286          lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1287 
1288          /* finish shift operation by doing float mul */
1289          base_size = lp_build_int_to_float(&fbld, base_size);
1290          size = lp_build_mul(&fbld, base_size, lf);
1291          /*
1292           * do the max also with floats because
1293           * a) non-emulated int max requires sse41
1294           *    (this is actually a lie as we could cast to 16bit values
1295           *    as 16bit is sufficient and 16bit int max is sse2)
1296           * b) with avx we can do int max 4-wide but float max 8-wide
1297           */
1298          size = lp_build_max(&fbld, size, fbld.one);
1299          size = lp_build_itrunc(&fbld, size);
1300       }
1301       return size;
1302    }
1303 }
1304 
1305 
1306 /**
1307  * Dereference stride_array[mipmap_level] array to get a stride.
1308  * Return stride as a vector.
1309  */
1310 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMValueRef stride_array,LLVMValueRef level)1311 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1312                               LLVMValueRef stride_array, LLVMValueRef level)
1313 {
1314    LLVMBuilderRef builder = bld->gallivm->builder;
1315    LLVMValueRef stride, stride1;
1316    if (bld->num_mips == 1) {
1317       stride1 = load_mip(bld->gallivm, stride_array, level);
1318       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1319    }
1320    else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1321       LLVMValueRef stride1;
1322       unsigned i;
1323 
1324       stride = bld->int_coord_bld.undef;
1325       for (i = 0; i < bld->num_mips; i++) {
1326          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1327          stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1328          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1329          stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1330       }
1331       stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1332    }
1333    else {
1334       LLVMValueRef stride1;
1335       unsigned i;
1336 
1337       assert (bld->num_mips == bld->coord_bld.type.length);
1338 
1339       stride = bld->int_coord_bld.undef;
1340       for (i = 0; i < bld->coord_bld.type.length; i++) {
1341          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1342          stride1 = load_mip(bld->gallivm, stride_array, LLVMBuildExtractElement(builder, level, indexi, ""));
1343          stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1344       }
1345    }
1346    return stride;
1347 }
1348 
1349 
1350 /**
1351  * When sampling a mipmap, we need to compute the width, height, depth
1352  * of the source levels from the level indexes.  This helper function
1353  * does that.
1354  */
1355 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1356 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1357                             LLVMValueRef ilevel,
1358                             LLVMValueRef *out_size,
1359                             LLVMValueRef *row_stride_vec,
1360                             LLVMValueRef *img_stride_vec)
1361 {
1362    const unsigned dims = bld->dims;
1363    LLVMValueRef ilevel_vec;
1364 
1365    /*
1366     * Compute width, height, depth at mipmap level 'ilevel'
1367     */
1368    if (bld->num_mips == 1) {
1369       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1370       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec, TRUE);
1371    }
1372    else {
1373       LLVMValueRef int_size_vec;
1374       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1375       unsigned num_quads = bld->coord_bld.type.length / 4;
1376       unsigned i;
1377 
1378       if (bld->num_mips == num_quads) {
1379          /*
1380           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1381           * intel "forgot" the variable shift count instruction until avx2.
1382           * A harmless 8x32 shift gets translated into 32 instructions
1383           * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1384           * unable to recognize if there are really just 2 different shift
1385           * count values. So do the shift 4-wide before expansion.
1386           */
1387          struct lp_build_context bld4;
1388          struct lp_type type4;
1389 
1390          type4 = bld->int_coord_bld.type;
1391          type4.length = 4;
1392 
1393          lp_build_context_init(&bld4, bld->gallivm, type4);
1394 
1395          if (bld->dims == 1) {
1396             assert(bld->int_size_in_bld.type.length == 1);
1397             int_size_vec = lp_build_broadcast_scalar(&bld4,
1398                                                      bld->int_size);
1399          }
1400          else {
1401             assert(bld->int_size_in_bld.type.length == 4);
1402             int_size_vec = bld->int_size;
1403          }
1404 
1405          for (i = 0; i < num_quads; i++) {
1406             LLVMValueRef ileveli;
1407             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1408 
1409             ileveli = lp_build_extract_broadcast(bld->gallivm,
1410                                                  bld->leveli_bld.type,
1411                                                  bld4.type,
1412                                                  ilevel,
1413                                                  indexi);
1414             tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, TRUE);
1415          }
1416          /*
1417           * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1418           * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1419           */
1420          *out_size = lp_build_concat(bld->gallivm,
1421                                      tmp,
1422                                      bld4.type,
1423                                      num_quads);
1424       }
1425       else {
1426         /* FIXME: this is terrible and results in _huge_ vector
1427          * (for the dims > 1 case).
1428          * Should refactor this (together with extract_image_sizes) and do
1429          * something more useful. Could for instance if we have width,height
1430          * with 4-wide vector pack all elements into a 8xi16 vector
1431          * (on which we can still do useful math) instead of using a 16xi32
1432          * vector.
1433          * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1434          * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1435          */
1436          assert(bld->num_mips == bld->coord_bld.type.length);
1437          if (bld->dims == 1) {
1438             assert(bld->int_size_in_bld.type.length == 1);
1439             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1440                                                      bld->int_size);
1441             *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel, FALSE);
1442          }
1443          else {
1444             LLVMValueRef ilevel1;
1445             for (i = 0; i < bld->num_mips; i++) {
1446                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1447                ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
1448                                                     bld->int_size_in_bld.type, ilevel, indexi);
1449                tmp[i] = bld->int_size;
1450                tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1, TRUE);
1451             }
1452             *out_size = lp_build_concat(bld->gallivm, tmp,
1453                                         bld->int_size_in_bld.type,
1454                                         bld->num_mips);
1455          }
1456       }
1457    }
1458 
1459    if (dims >= 2) {
1460       *row_stride_vec = lp_build_get_level_stride_vec(bld,
1461                                                       bld->row_stride_array,
1462                                                       ilevel);
1463    }
1464    if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1465       *img_stride_vec = lp_build_get_level_stride_vec(bld,
1466                                                       bld->img_stride_array,
1467                                                       ilevel);
1468    }
1469 }
1470 
1471 
1472 /**
1473  * Extract and broadcast texture size.
1474  *
1475  * @param size_type   type of the texture size vector (either
1476  *                    bld->int_size_type or bld->float_size_type)
1477  * @param coord_type  type of the texture size vector (either
1478  *                    bld->int_coord_type or bld->coord_type)
1479  * @param size        vector with the texture size (width, height, depth)
1480  */
1481 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1482 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1483                              struct lp_build_context *size_bld,
1484                              struct lp_type coord_type,
1485                              LLVMValueRef size,
1486                              LLVMValueRef *out_width,
1487                              LLVMValueRef *out_height,
1488                              LLVMValueRef *out_depth)
1489 {
1490    const unsigned dims = bld->dims;
1491    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1492    struct lp_type size_type = size_bld->type;
1493 
1494    if (bld->num_mips == 1) {
1495       *out_width = lp_build_extract_broadcast(bld->gallivm,
1496                                               size_type,
1497                                               coord_type,
1498                                               size,
1499                                               LLVMConstInt(i32t, 0, 0));
1500       if (dims >= 2) {
1501          *out_height = lp_build_extract_broadcast(bld->gallivm,
1502                                                   size_type,
1503                                                   coord_type,
1504                                                   size,
1505                                                   LLVMConstInt(i32t, 1, 0));
1506          if (dims == 3) {
1507             *out_depth = lp_build_extract_broadcast(bld->gallivm,
1508                                                     size_type,
1509                                                     coord_type,
1510                                                     size,
1511                                                     LLVMConstInt(i32t, 2, 0));
1512          }
1513       }
1514    }
1515    else {
1516       unsigned num_quads = bld->coord_bld.type.length / 4;
1517 
1518       if (dims == 1) {
1519          *out_width = size;
1520       }
1521       else if (bld->num_mips == num_quads) {
1522          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1523          if (dims >= 2) {
1524             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1525             if (dims == 3) {
1526                *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1527             }
1528          }
1529       }
1530       else {
1531          assert(bld->num_mips == bld->coord_type.length);
1532          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1533                                                 coord_type, size, 0);
1534          if (dims >= 2) {
1535             *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1536                                                     coord_type, size, 1);
1537             if (dims == 3) {
1538                *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1539                                                       coord_type, size, 2);
1540             }
1541          }
1542       }
1543    }
1544 }
1545 
1546 
1547 /**
1548  * Unnormalize coords.
1549  *
1550  * @param flt_size  vector with the integer texture size (width, height, depth)
1551  */
1552 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1553 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1554                              LLVMValueRef flt_size,
1555                              LLVMValueRef *s,
1556                              LLVMValueRef *t,
1557                              LLVMValueRef *r)
1558 {
1559    const unsigned dims = bld->dims;
1560    LLVMValueRef width;
1561    LLVMValueRef height = NULL;
1562    LLVMValueRef depth = NULL;
1563 
1564    lp_build_extract_image_sizes(bld,
1565                                 &bld->float_size_bld,
1566                                 bld->coord_type,
1567                                 flt_size,
1568                                 &width,
1569                                 &height,
1570                                 &depth);
1571 
1572    /* s = s * width, t = t * height */
1573    *s = lp_build_mul(&bld->coord_bld, *s, width);
1574    if (dims >= 2) {
1575       *t = lp_build_mul(&bld->coord_bld, *t, height);
1576       if (dims >= 3) {
1577          *r = lp_build_mul(&bld->coord_bld, *r, depth);
1578       }
1579    }
1580 }
1581 
1582 /**
1583  * Generate new coords and faces for cubemap texels falling off the face.
1584  *
1585  * @param face   face (center) of the pixel
1586  * @param x0     lower x coord
1587  * @param x1     higher x coord (must be x0 + 1)
1588  * @param y0     lower y coord
1589  * @param y1     higher y coord (must be x0 + 1)
1590  * @param max_coord     texture cube (level) size - 1
1591  * @param next_faces    new face values when falling off
1592  * @param next_xcoords  new x coord values when falling off
1593  * @param next_ycoords  new y coord values when falling off
1594  *
1595  * The arrays hold the new values when under/overflow of
1596  * lower x, higher x, lower y, higher y coord would occur (in this order).
1597  * next_xcoords/next_ycoords have two entries each (for both new lower and
1598  * higher coord).
1599  */
1600 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1601 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1602                         LLVMValueRef face,
1603                         LLVMValueRef x0,
1604                         LLVMValueRef x1,
1605                         LLVMValueRef y0,
1606                         LLVMValueRef y1,
1607                         LLVMValueRef max_coord,
1608                         LLVMValueRef next_faces[4],
1609                         LLVMValueRef next_xcoords[4][2],
1610                         LLVMValueRef next_ycoords[4][2])
1611 {
1612    /*
1613     * Lookup tables aren't nice for simd code hence try some logic here.
1614     * (Note that while it would not be necessary to do per-sample (4) lookups
1615     * when using a LUT as it's impossible that texels fall off of positive
1616     * and negative edges simultaneously, it would however be necessary to
1617     * do 2 lookups for corner handling as in this case texels both fall off
1618     * of x and y axes.)
1619     */
1620    /*
1621     * Next faces (for face 012345):
1622     * x < 0.0  : 451110
1623     * x >= 1.0 : 540001
1624     * y < 0.0  : 225422
1625     * y >= 1.0 : 334533
1626     * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1627     * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1628     * nfy+: face & ~4 > 1 ? face + 2 : 3;
1629     * This could also use pshufb instead, but would need (manually coded)
1630     * ssse3 intrinsic (llvm won't do non-constant shuffles).
1631     */
1632    struct gallivm_state *gallivm = ivec_bld->gallivm;
1633    LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1634    LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1635    LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1636    LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1637    LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1638    LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1639 
1640    sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1641    tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1642    sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1643    faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1644    tmp = lp_build_add(ivec_bld, faceand1, c4);
1645    next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1646    next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1647 
1648    tmp = lp_build_andnot(ivec_bld, face, c4);
1649    sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1650    tmp = lp_build_add(ivec_bld, face, c2);
1651    next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1652    next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1653 
1654    /*
1655     * new xcoords (for face 012345):
1656     * x < 0.0  : max   max   t     max-t max  max
1657     * x >= 1.0 : 0     0     max-t t     0    0
1658     * y < 0.0  : max   0     max-s s     s    max-s
1659     * y >= 1.0 : max   0     s     max-s s    max-s
1660     *
1661     * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1662     * ncx[0] = max - ncx[1]
1663     * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1664     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1665     */
1666    sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1667    maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1668    tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1669    next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1670    next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1671    maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1672    tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1673    next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1674    next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1675 
1676    sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1677 
1678    tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1679    maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1680    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1681    next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1682    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1683    next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1684    maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1685    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1686    next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1687    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1688    next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1689 
1690    /*
1691     * new ycoords (for face 012345):
1692     * x < 0.0  : t     t     0     max   t    t
1693     * x >= 1.0 : t     t     0     max   t    t
1694     * y < 0.0  : max-s s     0     max   max  0
1695     * y >= 1.0 : s     max-s 0     max   0    max
1696     *
1697     * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1698     * ncy[1] = ncy[0]
1699     * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1700     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1701     */
1702    tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1703    next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1704    next_ycoords[1][0] = next_ycoords[0][0];
1705    next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1706    next_ycoords[1][1] = next_ycoords[0][1];
1707 
1708    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1709    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1710    next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1711    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1712    next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1713    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1714    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1715    next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1716    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1717    next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1718 }
1719 
1720 
1721 /** Helper used by lp_build_cube_lookup() */
1722 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1723 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1724 {
1725    /* ima = +0.5 / abs(coord); */
1726    LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1727    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1728    /* avoid div by zero */
1729    LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1730    LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1731    LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1732    return ima;
1733 }
1734 
1735 
1736 /** Helper for doing 3-wise selection.
1737  * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1738  */
1739 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1740 lp_build_select3(struct lp_build_context *sel_bld,
1741                  LLVMValueRef sel0,
1742                  LLVMValueRef sel1,
1743                  LLVMValueRef val0,
1744                  LLVMValueRef val1,
1745                  LLVMValueRef val2)
1746 {
1747    LLVMValueRef tmp;
1748    tmp = lp_build_select(sel_bld, sel0, val0, val1);
1749    return lp_build_select(sel_bld, sel1, val2, tmp);
1750 }
1751 
1752 
1753 /**
1754  * Generate code to do cube face selection and compute per-face texcoords.
1755  */
1756 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,LLVMValueRef * rho,struct lp_derivatives * derivs_out,boolean need_derivs)1757 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1758                      LLVMValueRef *coords,
1759                      const struct lp_derivatives *derivs_in, /* optional */
1760                      LLVMValueRef *rho,
1761                      struct lp_derivatives *derivs_out, /* optional */
1762                      boolean need_derivs)
1763 {
1764    struct lp_build_context *coord_bld = &bld->coord_bld;
1765    LLVMBuilderRef builder = bld->gallivm->builder;
1766    struct gallivm_state *gallivm = bld->gallivm;
1767    LLVMValueRef si, ti, ri;
1768 
1769    /*
1770     * Do per-pixel face selection. We cannot however (as we used to do)
1771     * simply calculate the derivs afterwards (which is very bogus for
1772     * explicit derivs btw) because the values would be "random" when
1773     * not all pixels lie on the same face. So what we do here is just
1774     * calculate the derivatives after scaling the coords by the absolute
1775     * value of the inverse major axis, and essentially do rho calculation
1776     * steps as if it were a 3d texture. This is perfect if all pixels hit
1777     * the same face, but not so great at edges, I believe the max error
1778     * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1779     * the 3d distance between 2 points on the cube instead of measuring up/down
1780     * the edge). Still this is possibly a win over just selecting the same face
1781     * for all pixels. Unfortunately, something like that doesn't work for
1782     * explicit derivatives.
1783     */
1784    struct lp_build_context *cint_bld = &bld->int_coord_bld;
1785    struct lp_type intctype = cint_bld->type;
1786    LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1787    LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1788    LLVMValueRef as, at, ar, face, face_s, face_t;
1789    LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1790    LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1791    LLVMValueRef tnegi, rnegi;
1792    LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1793    LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1794    LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1795                                                   1LL << (intctype.width - 1));
1796    LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1797                                                    intctype.width -1);
1798    LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1799    LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1800    LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1801    LLVMValueRef s = coords[0];
1802    LLVMValueRef t = coords[1];
1803    LLVMValueRef r = coords[2];
1804 
1805    assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1806    assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1807    assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1808 
1809    /*
1810     * get absolute value (for x/y/z face selection) and sign bit
1811     * (for mirroring minor coords and pos/neg face selection)
1812     * of the original coords.
1813     */
1814    as = lp_build_abs(&bld->coord_bld, s);
1815    at = lp_build_abs(&bld->coord_bld, t);
1816    ar = lp_build_abs(&bld->coord_bld, r);
1817 
1818    /*
1819     * major face determination: select x if x > y else select y
1820     * select z if z >= max(x,y) else select previous result
1821     * if some axis are the same we chose z over y, y over x - the
1822     * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1823     * wouldn't care could save a select or two if using different
1824     * compares and doing at_g_as_ar last since tnewx and tnewz are the
1825     * same).
1826     */
1827    as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1828    maxasat = lp_build_max(coord_bld, as, at);
1829    ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1830 
1831    if (need_derivs) {
1832       /*
1833        * XXX: This is really really complex.
1834        * It is a bit overkill to use this for implicit derivatives as well,
1835        * no way this is worth the cost in practice, but seems to be the
1836        * only way for getting accurate and per-pixel lod values.
1837        */
1838       LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1839       LLVMValueRef madx, mady, madxdivma, madydivma;
1840       LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1841       LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1842       LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1843       LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1844       LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1845       /*
1846        * s = 1/2 * ( sc / ma + 1)
1847        * t = 1/2 * ( tc / ma + 1)
1848        *
1849        * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1850        * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1851        *
1852        * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1853        * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1854        * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1855        * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1856        */
1857 
1858       /* select ma, calculate ima */
1859       ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1860       mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1861       signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1862       ima = lp_build_div(coord_bld, coord_bld->one, ma);
1863       imahalf = lp_build_mul(coord_bld, posHalf, ima);
1864       imahalfpos = lp_build_abs(coord_bld, imahalf);
1865 
1866       if (!derivs_in) {
1867          ddx[0] = lp_build_ddx(coord_bld, s);
1868          ddx[1] = lp_build_ddx(coord_bld, t);
1869          ddx[2] = lp_build_ddx(coord_bld, r);
1870          ddy[0] = lp_build_ddy(coord_bld, s);
1871          ddy[1] = lp_build_ddy(coord_bld, t);
1872          ddy[2] = lp_build_ddy(coord_bld, r);
1873       }
1874       else {
1875          ddx[0] = derivs_in->ddx[0];
1876          ddx[1] = derivs_in->ddx[1];
1877          ddx[2] = derivs_in->ddx[2];
1878          ddy[0] = derivs_in->ddy[0];
1879          ddy[1] = derivs_in->ddy[1];
1880          ddy[2] = derivs_in->ddy[2];
1881       }
1882 
1883       /* select major derivatives */
1884       madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1885       mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1886 
1887       si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1888       ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1889       ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1890 
1891       sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1892       tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1893       rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1894 
1895       sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1896       tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1897       rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1898 
1899       /*
1900        * compute all possible new s/t coords, which does the mirroring,
1901        * and do the same for derivs minor axes.
1902        * snewx = signma * -r;
1903        * tnewx = -t;
1904        * snewy = s;
1905        * tnewy = signma * r;
1906        * snewz = signma * s;
1907        * tnewz = -t;
1908        */
1909       tnegi = LLVMBuildXor(builder, ti, signmask, "");
1910       rnegi = LLVMBuildXor(builder, ri, signmask, "");
1911       tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1912       rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1913       tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1914       rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1915 
1916       snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1917       tnewx = tnegi;
1918       sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1919       tdxnewx = tdxnegi;
1920       sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1921       tdynewx = tdynegi;
1922 
1923       snewy = si;
1924       tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1925       sdxnewy = sdxi;
1926       tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1927       sdynewy = sdyi;
1928       tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1929 
1930       snewz = LLVMBuildXor(builder, signmabit, si, "");
1931       tnewz = tnegi;
1932       sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1933       tdxnewz = tdxnegi;
1934       sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1935       tdynewz = tdynegi;
1936 
1937       /* select the mirrored values */
1938       face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1939       face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1940       face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1941       face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1942       face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1943       face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1944       face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1945 
1946       face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1947       face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1948       face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1949       face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1950       face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1951       face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1952 
1953       /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1954       madxdivma = lp_build_mul(coord_bld, madx, ima);
1955       tmp = lp_build_mul(coord_bld, madxdivma, face_s);
1956       tmp = lp_build_sub(coord_bld, face_sdx, tmp);
1957       derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
1958 
1959       /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
1960       tmp = lp_build_mul(coord_bld, madxdivma, face_t);
1961       tmp = lp_build_sub(coord_bld, face_tdx, tmp);
1962       derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
1963 
1964       /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
1965       madydivma = lp_build_mul(coord_bld, mady, ima);
1966       tmp = lp_build_mul(coord_bld, madydivma, face_s);
1967       tmp = lp_build_sub(coord_bld, face_sdy, tmp);
1968       derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
1969 
1970       /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
1971       tmp = lp_build_mul(coord_bld, madydivma, face_t);
1972       tmp = lp_build_sub(coord_bld, face_tdy, tmp);
1973       derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
1974 
1975       signma = LLVMBuildLShr(builder, mai, signshift, "");
1976       coords[2] = LLVMBuildOr(builder, face, signma, "face");
1977 
1978       /* project coords */
1979       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
1980       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
1981 
1982       coords[0] = lp_build_add(coord_bld, face_s, posHalf);
1983       coords[1] = lp_build_add(coord_bld, face_t, posHalf);
1984 
1985       return;
1986    }
1987 
1988    ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1989    mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1990    signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1991 
1992    si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1993    ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1994    ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1995 
1996    /*
1997     * compute all possible new s/t coords, which does the mirroring
1998     * snewx = signma * -r;
1999     * tnewx = -t;
2000     * snewy = s;
2001     * tnewy = signma * r;
2002     * snewz = signma * s;
2003     * tnewz = -t;
2004     */
2005    tnegi = LLVMBuildXor(builder, ti, signmask, "");
2006    rnegi = LLVMBuildXor(builder, ri, signmask, "");
2007 
2008    snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2009    tnewx = tnegi;
2010 
2011    snewy = si;
2012    tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2013 
2014    snewz = LLVMBuildXor(builder, signmabit, si, "");
2015    tnewz = tnegi;
2016 
2017    /* select the mirrored values */
2018    face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2019    face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2020    face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2021 
2022    face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2023    face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2024 
2025    /* add +1 for neg face */
2026    /* XXX with AVX probably want to use another select here -
2027     * as long as we ensure vblendvps gets used we can actually
2028     * skip the comparison and just use sign as a "mask" directly.
2029     */
2030    signma = LLVMBuildLShr(builder, mai, signshift, "");
2031    coords[2] = LLVMBuildOr(builder, face, signma, "face");
2032 
2033    /* project coords */
2034    if (!need_derivs) {
2035       imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2036       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2037       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2038    }
2039 
2040    coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2041    coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2042 }
2043 
2044 
2045 /**
2046  * Compute the partial offset of a pixel block along an arbitrary axis.
2047  *
2048  * @param coord   coordinate in pixels
2049  * @param stride  number of bytes between rows of successive pixel blocks
2050  * @param block_length  number of pixels in a pixels block along the coordinate
2051  *                      axis
2052  * @param out_offset    resulting relative offset of the pixel block in bytes
2053  * @param out_subcoord  resulting sub-block pixel coordinate
2054  */
2055 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2056 lp_build_sample_partial_offset(struct lp_build_context *bld,
2057                                unsigned block_length,
2058                                LLVMValueRef coord,
2059                                LLVMValueRef stride,
2060                                LLVMValueRef *out_offset,
2061                                LLVMValueRef *out_subcoord)
2062 {
2063    LLVMBuilderRef builder = bld->gallivm->builder;
2064    LLVMValueRef offset;
2065    LLVMValueRef subcoord;
2066 
2067    if (block_length == 1) {
2068       subcoord = bld->zero;
2069    }
2070    else {
2071       /*
2072        * Pixel blocks have power of two dimensions. LLVM should convert the
2073        * rem/div to bit arithmetic.
2074        * TODO: Verify this.
2075        * It does indeed BUT it does transform it to scalar (and back) when doing so
2076        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2077        * The generated code looks seriously unfunny and is quite expensive.
2078        */
2079 #if 0
2080       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2081       subcoord = LLVMBuildURem(builder, coord, block_width, "");
2082       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
2083 #else
2084       unsigned logbase2 = util_logbase2(block_length);
2085       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2086       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2087       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2088       coord = LLVMBuildLShr(builder, coord, block_shift, "");
2089 #endif
2090    }
2091 
2092    offset = lp_build_mul(bld, coord, stride);
2093 
2094    assert(out_offset);
2095    assert(out_subcoord);
2096 
2097    *out_offset = offset;
2098    *out_subcoord = subcoord;
2099 }
2100 
2101 
2102 /**
2103  * Compute the offset of a pixel block.
2104  *
2105  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2106  *
2107  * Returns the relative offset and i,j sub-block coordinates
2108  */
2109 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2110 lp_build_sample_offset(struct lp_build_context *bld,
2111                        const struct util_format_description *format_desc,
2112                        LLVMValueRef x,
2113                        LLVMValueRef y,
2114                        LLVMValueRef z,
2115                        LLVMValueRef y_stride,
2116                        LLVMValueRef z_stride,
2117                        LLVMValueRef *out_offset,
2118                        LLVMValueRef *out_i,
2119                        LLVMValueRef *out_j)
2120 {
2121    LLVMValueRef x_stride;
2122    LLVMValueRef offset;
2123 
2124    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2125                                  format_desc->block.bits/8);
2126 
2127    lp_build_sample_partial_offset(bld,
2128                                   format_desc->block.width,
2129                                   x, x_stride,
2130                                   &offset, out_i);
2131 
2132    if (y && y_stride) {
2133       LLVMValueRef y_offset;
2134       lp_build_sample_partial_offset(bld,
2135                                      format_desc->block.height,
2136                                      y, y_stride,
2137                                      &y_offset, out_j);
2138       offset = lp_build_add(bld, offset, y_offset);
2139    }
2140    else {
2141       *out_j = bld->zero;
2142    }
2143 
2144    if (z && z_stride) {
2145       LLVMValueRef z_offset;
2146       LLVMValueRef k;
2147       lp_build_sample_partial_offset(bld,
2148                                      1, /* pixel blocks are always 2D */
2149                                      z, z_stride,
2150                                      &z_offset, &k);
2151       offset = lp_build_add(bld, offset, z_offset);
2152    }
2153 
2154    *out_offset = offset;
2155 }
2156 
2157 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2158 lp_build_sample_min(struct lp_build_context *bld,
2159                     LLVMValueRef x,
2160                     LLVMValueRef v0,
2161                     LLVMValueRef v1)
2162 {
2163    /* if the incoming LERP weight is 0 then the min/max
2164     * should ignore that value. */
2165    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2166                                         bld->type,
2167                                         PIPE_FUNC_NOTEQUAL,
2168                                         x, bld->zero);
2169    LLVMValueRef min = lp_build_min(bld, v0, v1);
2170 
2171    return lp_build_select(bld, mask, min, v0);
2172 }
2173 
2174 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2175 lp_build_sample_max(struct lp_build_context *bld,
2176                     LLVMValueRef x,
2177                     LLVMValueRef v0,
2178                     LLVMValueRef v1)
2179 {
2180    /* if the incoming LERP weight is 0 then the min/max
2181     * should ignore that value. */
2182    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2183                                         bld->type,
2184                                         PIPE_FUNC_NOTEQUAL,
2185                                         x, bld->zero);
2186    LLVMValueRef max = lp_build_max(bld, v0, v1);
2187 
2188    return lp_build_select(bld, mask, max, v0);
2189 }
2190 
2191 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2192 lp_build_sample_min_2d(struct lp_build_context *bld,
2193                        LLVMValueRef x,
2194                        LLVMValueRef y,
2195                        LLVMValueRef a,
2196                        LLVMValueRef b,
2197                        LLVMValueRef c,
2198                        LLVMValueRef d)
2199 {
2200    LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2201    LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2202    return lp_build_sample_min(bld, y, v0, v1);
2203 }
2204 
2205 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2206 lp_build_sample_max_2d(struct lp_build_context *bld,
2207                        LLVMValueRef x,
2208                        LLVMValueRef y,
2209                        LLVMValueRef a,
2210                        LLVMValueRef b,
2211                        LLVMValueRef c,
2212                        LLVMValueRef d)
2213 {
2214    LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2215    LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2216    return lp_build_sample_max(bld, y, v0, v1);
2217 }
2218 
2219 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2220 lp_build_sample_min_3d(struct lp_build_context *bld,
2221                 LLVMValueRef x,
2222                 LLVMValueRef y,
2223                 LLVMValueRef z,
2224                 LLVMValueRef a, LLVMValueRef b,
2225                 LLVMValueRef c, LLVMValueRef d,
2226                 LLVMValueRef e, LLVMValueRef f,
2227                 LLVMValueRef g, LLVMValueRef h)
2228 {
2229    LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2230    LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2231    return lp_build_sample_min(bld, z, v0, v1);
2232 }
2233 
2234 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2235 lp_build_sample_max_3d(struct lp_build_context *bld,
2236                        LLVMValueRef x,
2237                        LLVMValueRef y,
2238                        LLVMValueRef z,
2239                        LLVMValueRef a, LLVMValueRef b,
2240                        LLVMValueRef c, LLVMValueRef d,
2241                        LLVMValueRef e, LLVMValueRef f,
2242                        LLVMValueRef g, LLVMValueRef h)
2243 {
2244    LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2245    LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2246    return lp_build_sample_max(bld, z, v0, v1);
2247 }
2248 
2249 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2250 lp_build_reduce_filter(struct lp_build_context *bld,
2251                        enum pipe_tex_reduction_mode mode,
2252                        unsigned flags,
2253                        unsigned num_chan,
2254                        LLVMValueRef x,
2255                        LLVMValueRef *v00,
2256                        LLVMValueRef *v01,
2257                        LLVMValueRef *out)
2258 {
2259    unsigned chan;
2260    switch (mode) {
2261    case PIPE_TEX_REDUCTION_MIN:
2262       for (chan = 0; chan < num_chan; chan++)
2263          out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2264       break;
2265    case PIPE_TEX_REDUCTION_MAX:
2266       for (chan = 0; chan < num_chan; chan++)
2267          out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2268       break;
2269    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2270    default:
2271       for (chan = 0; chan < num_chan; chan++)
2272          out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2273       break;
2274    }
2275 }
2276 
2277 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2278 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2279                           enum pipe_tex_reduction_mode mode,
2280                           unsigned flags,
2281                           unsigned num_chan,
2282                           LLVMValueRef x,
2283                           LLVMValueRef y,
2284                           LLVMValueRef *v00,
2285                           LLVMValueRef *v01,
2286                           LLVMValueRef *v10,
2287                           LLVMValueRef *v11,
2288                           LLVMValueRef *out)
2289 {
2290    unsigned chan;
2291    switch (mode) {
2292    case PIPE_TEX_REDUCTION_MIN:
2293       for (chan = 0; chan < num_chan; chan++)
2294          out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2295       break;
2296    case PIPE_TEX_REDUCTION_MAX:
2297       for (chan = 0; chan < num_chan; chan++)
2298          out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan]);
2299       break;
2300    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2301    default:
2302       for (chan = 0; chan < num_chan; chan++)
2303          out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan], v10[chan], v11[chan], flags);
2304       break;
2305    }
2306 }
2307 
2308 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2309 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2310                           enum pipe_tex_reduction_mode mode,
2311                           unsigned flags,
2312                           unsigned num_chan,
2313                           LLVMValueRef x,
2314                           LLVMValueRef y,
2315                           LLVMValueRef z,
2316                           LLVMValueRef *v000,
2317                           LLVMValueRef *v001,
2318                           LLVMValueRef *v010,
2319                           LLVMValueRef *v011,
2320                           LLVMValueRef *v100,
2321                           LLVMValueRef *v101,
2322                           LLVMValueRef *v110,
2323                           LLVMValueRef *v111,
2324                           LLVMValueRef *out)
2325 {
2326    unsigned chan;
2327    switch (mode) {
2328    case PIPE_TEX_REDUCTION_MIN:
2329       for (chan = 0; chan < num_chan; chan++)
2330          out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2331                                      v000[chan], v001[chan], v010[chan], v011[chan],
2332                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2333       break;
2334    case PIPE_TEX_REDUCTION_MAX:
2335       for (chan = 0; chan < num_chan; chan++)
2336          out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2337                                      v000[chan], v001[chan], v010[chan], v011[chan],
2338                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2339       break;
2340    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2341    default:
2342       for (chan = 0; chan < num_chan; chan++)
2343          out[chan] = lp_build_lerp_3d(bld, x, y, z,
2344                                       v000[chan], v001[chan], v010[chan], v011[chan],
2345                                       v100[chan], v101[chan], v110[chan], v111[chan],
2346                                       flags);
2347       break;
2348    }
2349 }
2350 
2351 /*
2352  * generated from
2353  * const float alpha = 2;
2354  * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2355  *    const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2356  *    const float weight = (float)expf(-alpha * r2);
2357  */
2358 static const float aniso_filter_table[1024] = {
2359    1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2360    0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2361    0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2362    0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2363    0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2364    0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2365    0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2366    0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2367    0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2368    0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2369    0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2370    0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2371    0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2372    0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2373    0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2374    0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2375    0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2376    0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2377    0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2378    0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2379    0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2380    0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2381    0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2382    0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2383    0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2384    0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2385    0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2386    0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2387    0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2388    0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2389    0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2390    0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2391    0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2392    0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2393    0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2394    0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2395    0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2396    0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2397    0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2398    0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2399    0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2400    0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2401    0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2402    0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2403    0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2404    0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2405    0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2406    0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2407    0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2408    0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2409    0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2410    0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2411    0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2412    0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2413    0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2414    0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2415    0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2416    0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2417    0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2418    0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2419    0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2420    0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2421    0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2422    0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2423    0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2424    0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2425    0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2426    0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2427    0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2428    0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2429    0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2430    0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2431    0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2432    0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2433    0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2434    0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2435    0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2436    0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2437    0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2438    0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2439    0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2440    0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2441    0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2442    0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2443    0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2444    0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2445    0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2446    0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2447    0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2448    0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2449    0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2450    0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2451    0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2452    0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2453    0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2454    0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2455    0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2456    0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2457    0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2458    0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2459    0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2460    0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2461    0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2462    0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2463    0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2464    0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2465    0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2466    0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2467    0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2468    0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2469    0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2470    0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2471    0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2472    0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2473    0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2474    0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2475    0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2476    0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2477    0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2478    0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2479    0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2480    0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2481    0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2482    0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2483    0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2484    0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2485    0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2486    0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2487 };
2488 
2489 const float *
lp_build_sample_aniso_filter_table(void)2490 lp_build_sample_aniso_filter_table(void)
2491 {
2492    return aniso_filter_table;
2493 }
2494