• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- common code.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52 
53 
54 /*
55  * Bri-linear factor. Should be greater than one.
56  */
57 #define BRILINEAR_FACTOR 2
58 
59 
60 /**
61  * Does the given texture wrap mode allow sampling the texture border color?
62  * XXX maybe move this into gallium util code.
63  */
64 bool
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)65 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
66                                        enum pipe_tex_filter min_img_filter,
67                                        enum pipe_tex_filter mag_img_filter)
68 {
69    switch (mode) {
70    case PIPE_TEX_WRAP_REPEAT:
71    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
72    case PIPE_TEX_WRAP_MIRROR_REPEAT:
73    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
74       return false;
75    case PIPE_TEX_WRAP_CLAMP:
76    case PIPE_TEX_WRAP_MIRROR_CLAMP:
77       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
78           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
79          return false;
80       } else {
81          return true;
82       }
83    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
84    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
85       return true;
86    default:
87       assert(0 && "unexpected wrap mode");
88       return false;
89    }
90 }
91 
92 
93 /**
94  * Initialize lp_sampler_static_texture_state object with the gallium
95  * texture/sampler_view state (this contains the parts which are
96  * considered static).
97  */
98 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)99 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
100                                 const struct pipe_sampler_view *view)
101 {
102    memset(state, 0, sizeof *state);
103 
104    if (!view || !view->texture)
105       return;
106 
107    const struct pipe_resource *texture = view->texture;
108 
109    state->format = view->format;
110    state->res_format = texture->format;
111    state->swizzle_r = view->swizzle_r;
112    state->swizzle_g = view->swizzle_g;
113    state->swizzle_b = view->swizzle_b;
114    state->swizzle_a = view->swizzle_a;
115    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
116    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
117    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
118    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
119 
120    /* check if it is a tex2d created from buf */
121    if (view->is_tex2d_from_buf)
122       state->target = PIPE_TEXTURE_2D;
123    else
124       state->target = view->target;
125 
126    state->pot_width = util_is_power_of_two_or_zero(texture->width0);
127    state->pot_height = util_is_power_of_two_or_zero(texture->height0);
128    state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
129    state->level_zero_only = !view->u.tex.last_level;
130 
131    /*
132     * the layer / element / level parameters are all either dynamic
133     * state or handled transparently wrt execution.
134     */
135 }
136 
137 
138 /**
139  * Initialize lp_sampler_static_texture_state object with the gallium
140  * texture/sampler_view state (this contains the parts which are
141  * considered static).
142  */
143 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)144 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
145                                       const struct pipe_image_view *view)
146 {
147    memset(state, 0, sizeof *state);
148 
149    if (!view || !view->resource)
150       return;
151 
152    const struct pipe_resource *resource = view->resource;
153 
154    state->format = view->format;
155    state->res_format = resource->format;
156    state->swizzle_r = PIPE_SWIZZLE_X;
157    state->swizzle_g = PIPE_SWIZZLE_Y;
158    state->swizzle_b = PIPE_SWIZZLE_Z;
159    state->swizzle_a = PIPE_SWIZZLE_W;
160    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
161    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
162    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
163    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
164 
165    state->target = resource->target;
166    state->pot_width = util_is_power_of_two_or_zero(resource->width0);
167    state->pot_height = util_is_power_of_two_or_zero(resource->height0);
168    state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
169    state->level_zero_only = view->u.tex.level == 0;
170 
171    /*
172     * the layer / element / level parameters are all either dynamic
173     * state or handled transparently wrt execution.
174     */
175 }
176 
177 
178 /**
179  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
180  * state (this contains the parts which are considered static).
181  */
182 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)183 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
184                                 const struct pipe_sampler_state *sampler)
185 {
186    memset(state, 0, sizeof *state);
187 
188    if (!sampler)
189       return;
190 
191    /*
192     * We don't copy sampler state over unless it is actually enabled, to avoid
193     * spurious recompiles, as the sampler static state is part of the shader
194     * key.
195     *
196     * Ideally gallium frontends or cso_cache module would make all state
197     * canonical, but until that happens it's better to be safe than sorry here.
198     *
199     * XXX: Actually there's much more than can be done here, especially
200     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
201     */
202 
203    state->wrap_s            = sampler->wrap_s;
204    state->wrap_t            = sampler->wrap_t;
205    state->wrap_r            = sampler->wrap_r;
206    state->min_img_filter    = sampler->min_img_filter;
207    state->mag_img_filter    = sampler->mag_img_filter;
208    state->min_mip_filter    = sampler->min_mip_filter;
209    state->seamless_cube_map = sampler->seamless_cube_map;
210    state->reduction_mode    = sampler->reduction_mode;
211    state->aniso = sampler->max_anisotropy > 1.0f;
212 
213    if (sampler->max_lod > 0.0f) {
214       state->max_lod_pos = 1;
215    }
216 
217    if (sampler->lod_bias != 0.0f) {
218       state->lod_bias_non_zero = 1;
219    }
220 
221    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
222        state->min_img_filter != state->mag_img_filter) {
223 
224       /* If min_lod == max_lod we can greatly simplify mipmap selection.
225        * This is a case that occurs during automatic mipmap generation.
226        */
227       if (sampler->min_lod == sampler->max_lod) {
228          state->min_max_lod_equal = 1;
229       } else {
230          if (sampler->min_lod > 0.0f) {
231             state->apply_min_lod = 1;
232          }
233 
234          /*
235           * XXX this won't do anything with the mesa state tracker which always
236           * sets max_lod to not more than actually present mip maps...
237           */
238          if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
239             state->apply_max_lod = 1;
240          }
241       }
242    }
243 
244    state->compare_mode      = sampler->compare_mode;
245    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
246       state->compare_func   = sampler->compare_func;
247    }
248 
249    state->normalized_coords = !sampler->unnormalized_coords;
250 }
251 
252 
253 /* build aniso pmin value */
254 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)255 lp_build_pmin(struct lp_build_sample_context *bld,
256               LLVMValueRef first_level,
257               LLVMValueRef s,
258               LLVMValueRef t,
259               LLVMValueRef max_aniso)
260 {
261    struct gallivm_state *gallivm = bld->gallivm;
262    LLVMBuilderRef builder = bld->gallivm->builder;
263    struct lp_build_context *coord_bld = &bld->coord_bld;
264    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
265    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
266    struct lp_build_context *pmin_bld = &bld->lodf_bld;
267    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
268    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
269    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
270    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
271    LLVMValueRef int_size, float_size;
272    const unsigned length = coord_bld->type.length;
273    const unsigned num_quads = length / 4;
274    const bool pmin_per_quad = pmin_bld->type.length != length;
275 
276    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, true);
277    float_size = lp_build_int_to_float(float_size_bld, int_size);
278    max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
279    max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
280 
281    static const unsigned char swizzle01[] = { /* no-op swizzle */
282       0, 1,
283       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
284    };
285    static const unsigned char swizzle23[] = {
286       2, 3,
287       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
288    };
289    LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
290 
291    for (unsigned i = 0; i < num_quads; i++) {
292       shuffles[i*4+0] = shuffles[i*4+1] = index0;
293       shuffles[i*4+2] = shuffles[i*4+3] = index1;
294    }
295    floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
296                                      LLVMConstVector(shuffles, length), "");
297    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
298 
299    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
300 
301    ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
302    ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
303 
304    LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
305 
306    static const unsigned char swizzle0[] = { /* no-op swizzle */
307      0, LP_BLD_SWIZZLE_DONTCARE,
308      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
309    };
310    static const unsigned char swizzle1[] = {
311      1, LP_BLD_SWIZZLE_DONTCARE,
312      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
313    };
314    LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
315    LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
316 
317    LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
318    LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
319 
320    LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
321 
322    LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
323                                         pmin2, temp);
324 
325    LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
326 
327    pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
328 
329    if (pmin_per_quad)
330       pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
331                                         pmin_bld->type, pmin2, 0);
332    else
333       pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
334    return pmin2;
335 }
336 
337 
338 /**
339  * Generate code to compute coordinate gradient (rho).
340  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
341  *
342  * The resulting rho has bld->levelf format (per quad or per element).
343  */
344 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs)345 lp_build_rho(struct lp_build_sample_context *bld,
346              LLVMValueRef first_level,
347              LLVMValueRef s,
348              LLVMValueRef t,
349              LLVMValueRef r,
350              const struct lp_derivatives *derivs)
351 {
352    struct gallivm_state *gallivm = bld->gallivm;
353    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
354    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
355    struct lp_build_context *float_bld = &bld->float_bld;
356    struct lp_build_context *coord_bld = &bld->coord_bld;
357    struct lp_build_context *rho_bld = &bld->lodf_bld;
358    const unsigned dims = bld->dims;
359    LLVMValueRef ddx_ddy[2] = {NULL};
360    LLVMBuilderRef builder = bld->gallivm->builder;
361    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
362    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
363    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
364    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
365    LLVMValueRef rho_vec;
366    LLVMValueRef rho;
367    unsigned length = coord_bld->type.length;
368    unsigned num_quads = length / 4;
369    bool rho_per_quad = rho_bld->type.length != length;
370    bool no_rho_opt = bld->no_rho_approx && (dims > 1);
371    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
372    LLVMValueRef rho_xvec, rho_yvec;
373 
374    /* Note that all simplified calculations will only work for isotropic
375     * filtering
376     */
377 
378    /*
379     * rho calcs are always per quad except for explicit derivs (excluding
380     * the messy cube maps for now) when requested.
381     */
382 
383    LLVMValueRef int_size =
384       lp_build_minify(int_size_bld, bld->int_size, first_level, true);
385    LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size);
386 
387    if (derivs) {
388       LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
389       for (unsigned i = 0; i < dims; i++) {
390          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
391 
392          LLVMValueRef floatdim =
393             lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
394                                        coord_bld->type, float_size, indexi);
395 
396          /*
397           * note that for rho_per_quad case could reduce math (at some shuffle
398           * cost), but for now use same code to per-pixel lod case.
399           */
400          if (no_rho_opt) {
401             ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
402             ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
403             ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
404             ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
405          } else {
406             LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
407             LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
408             ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
409             ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
410          }
411       }
412       if (no_rho_opt) {
413          rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
414          rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
415          if (dims > 2) {
416             rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
417             rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
418          }
419          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
420          /* skipping sqrt hence returning rho squared */
421       } else {
422          rho = ddmax[0];
423          if (dims > 1) {
424             rho = lp_build_max(coord_bld, rho, ddmax[1]);
425             if (dims > 2) {
426                rho = lp_build_max(coord_bld, rho, ddmax[2]);
427             }
428          }
429       }
430 
431       LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm,
432                                                        coord_bld->type, rho);
433       rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
434 
435       if (rho_per_quad) {
436          /*
437           * rho_vec contains per-pixel rho, convert to scalar per quad.
438           */
439          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
440                                          rho_bld->type, rho, 0);
441       }
442    } else {
443       /*
444        * This looks all a bit complex, but it's not that bad
445        * (the shuffle code makes it look worse than it is).
446        * Still, might not be ideal for all cases.
447        */
448       static const unsigned char swizzle0[] = { /* no-op swizzle */
449          0, LP_BLD_SWIZZLE_DONTCARE,
450          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
451       };
452       static const unsigned char swizzle1[] = {
453          1, LP_BLD_SWIZZLE_DONTCARE,
454          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
455       };
456       static const unsigned char swizzle2[] = {
457          2, LP_BLD_SWIZZLE_DONTCARE,
458          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
459       };
460 
461       if (dims < 2) {
462          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
463       } else if (dims >= 2) {
464          ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
465          if (dims > 2) {
466             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
467          }
468       }
469 
470       if (no_rho_opt) {
471          static const unsigned char swizzle01[] = { /* no-op swizzle */
472             0, 1,
473             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
474          };
475          static const unsigned char swizzle23[] = {
476             2, 3,
477             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
478          };
479          LLVMValueRef ddx_ddys, ddx_ddyt, floatdim;
480          LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
481 
482          for (unsigned i = 0; i < num_quads; i++) {
483             shuffles[i*4+0] = shuffles[i*4+1] = index0;
484             shuffles[i*4+2] = shuffles[i*4+3] = index1;
485          }
486          floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
487                                            LLVMConstVector(shuffles, length),
488                                            "");
489          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
490          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
491          ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
492          ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
493          rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
494 
495          if (dims > 2) {
496             static const unsigned char swizzle02[] = {
497                0, 2,
498                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
499             };
500             floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
501                                                   coord_bld->type, float_size, index2);
502             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
503             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
504             ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
505             rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
506          }
507 
508          rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
509          rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
510          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
511 
512          if (rho_per_quad) {
513             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
514                                             rho_bld->type, rho, 0);
515          } else {
516             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
517          }
518          /* skipping sqrt hence returning rho squared */
519       } else {
520          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
521          if (dims > 2) {
522             ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
523          } else {
524             ddx_ddy[1] = NULL; /* silence compiler warning */
525          }
526 
527          if (dims < 2) {
528             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
529             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
530          } else if (dims == 2) {
531             static const unsigned char swizzle02[] = {
532                0, 2,
533                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
534             };
535             static const unsigned char swizzle13[] = {
536                1, 3,
537                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
538             };
539             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
540             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
541          } else {
542             LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
543             LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
544             assert(dims == 3);
545             for (unsigned i = 0; i < num_quads; i++) {
546                shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
547                shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
548                shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
549                shuffles1[4*i + 3] = i32undef;
550                shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
551                shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
552                shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
553                shuffles2[4*i + 3] = i32undef;
554             }
555             rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
556                                               LLVMConstVector(shuffles1, length), "");
557             rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
558                                               LLVMConstVector(shuffles2, length), "");
559          }
560 
561          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
562 
563          if (bld->coord_type.length > 4) {
564             /* expand size to each quad */
565             if (dims > 1) {
566                /* could use some broadcast_vector helper for this? */
567                LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
568                for (unsigned i = 0; i < num_quads; i++) {
569                   src[i] = float_size;
570                }
571                float_size = lp_build_concat(bld->gallivm, src,
572                                             float_size_bld->type, num_quads);
573             } else {
574                float_size = lp_build_broadcast_scalar(coord_bld, float_size);
575             }
576             rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
577 
578             if (dims <= 1) {
579                rho = rho_vec;
580             } else {
581                if (dims >= 2) {
582                   LLVMValueRef rho_s, rho_t, rho_r;
583 
584                   rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
585                   rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
586 
587                   rho = lp_build_max(coord_bld, rho_s, rho_t);
588 
589                   if (dims >= 3) {
590                      rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
591                      rho = lp_build_max(coord_bld, rho, rho_r);
592                   }
593                }
594             }
595             if (rho_per_quad) {
596                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
597                                                rho_bld->type, rho, 0);
598             } else {
599                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
600             }
601          } else {
602             if (dims <= 1) {
603                rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
604             }
605             rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
606 
607             if (dims <= 1) {
608                rho = rho_vec;
609             } else {
610                if (dims >= 2) {
611                   LLVMValueRef rho_s, rho_t, rho_r;
612 
613                   rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
614                   rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
615 
616                   rho = lp_build_max(float_bld, rho_s, rho_t);
617 
618                   if (dims >= 3) {
619                      rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
620                      rho = lp_build_max(float_bld, rho, rho_r);
621                   }
622                }
623             }
624             if (!rho_per_quad) {
625                rho = lp_build_broadcast_scalar(rho_bld, rho);
626             }
627          }
628       }
629    }
630 
631    return rho;
632 }
633 
634 
635 /*
636  * Bri-linear lod computation
637  *
638  * Use a piece-wise linear approximation of log2 such that:
639  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
640  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
641  *   with the steepness specified in 'factor'
642  * - exact result for 0.5, 1.5, etc.
643  *
644  *
645  *   1.0 -              /----*
646  *                     /
647  *                    /
648  *                   /
649  *   0.5 -          *
650  *                 /
651  *                /
652  *               /
653  *   0.0 - *----/
654  *
655  *         |                 |
656  *        2^0               2^1
657  *
658  * This is a technique also commonly used in hardware:
659  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
660  *
661  * TODO: For correctness, this should only be applied when texture is known to
662  * have regular mipmaps, i.e., mipmaps derived from the base level.
663  *
664  * TODO: This could be done in fixed point, where applicable.
665  */
666 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)667 lp_build_brilinear_lod(struct lp_build_context *bld,
668                        LLVMValueRef lod,
669                        double factor,
670                        LLVMValueRef *out_lod_ipart,
671                        LLVMValueRef *out_lod_fpart)
672 {
673    LLVMValueRef lod_fpart;
674    double pre_offset = (factor - 0.5)/factor - 0.5;
675    double post_offset = 1 - factor;
676 
677    if (0) {
678       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
679    }
680 
681    lod = lp_build_add(bld, lod,
682                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
683 
684    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
685 
686    lod_fpart = lp_build_mad(bld, lod_fpart,
687                             lp_build_const_vec(bld->gallivm, bld->type, factor),
688                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
689 
690    /*
691     * It's not necessary to clamp lod_fpart since:
692     * - the above expression will never produce numbers greater than one.
693     * - the mip filtering branch is only taken if lod_fpart is positive
694     */
695 
696    *out_lod_fpart = lod_fpart;
697 
698    if (0) {
699       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
700       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
701    }
702 }
703 
704 
705 /*
706  * Combined log2 and brilinear lod computation.
707  *
708  * It's in all identical to calling lp_build_fast_log2() and
709  * lp_build_brilinear_lod() above, but by combining we can compute the integer
710  * and fractional part independently.
711  */
712 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)713 lp_build_brilinear_rho(struct lp_build_context *bld,
714                        LLVMValueRef rho,
715                        double factor,
716                        LLVMValueRef *out_lod_ipart,
717                        LLVMValueRef *out_lod_fpart)
718 {
719    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
720    const double post_offset = 1 - 2*factor;
721 
722    assert(bld->type.floating);
723 
724    assert(lp_check_value(bld->type, rho));
725 
726    /*
727     * The pre factor will make the intersections with the exact powers of two
728     * happen precisely where we want them to be, which means that the integer
729     * part will not need any post adjustments.
730     */
731    rho = lp_build_mul(bld, rho,
732                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
733 
734    /* ipart = ifloor(log2(rho)) */
735    LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0);
736 
737    /* fpart = rho / 2**ipart */
738    LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho);
739 
740    lod_fpart =
741       lp_build_mad(bld, lod_fpart,
742                    lp_build_const_vec(bld->gallivm, bld->type, factor),
743                    lp_build_const_vec(bld->gallivm, bld->type, post_offset));
744 
745    /*
746     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
747     * - the above expression will never produce numbers greater than one.
748     * - the mip filtering branch is only taken if lod_fpart is positive
749     */
750 
751    *out_lod_ipart = lod_ipart;
752    *out_lod_fpart = lod_fpart;
753 }
754 
755 
756 /**
757  * Fast implementation of iround(log2(sqrt(x))), based on
758  * log2(x^n) == n*log2(x).
759  *
760  * Gives accurate results all the time.
761  * (Could be trivially extended to handle other power-of-two roots.)
762  */
763 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)764 lp_build_ilog2_sqrt(struct lp_build_context *bld,
765                     LLVMValueRef x)
766 {
767    LLVMBuilderRef builder = bld->gallivm->builder;
768    struct lp_type i_type = lp_int_type(bld->type);
769    LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
770 
771    assert(bld->type.floating);
772 
773    assert(lp_check_value(bld->type, x));
774 
775    /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
776    LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1);
777    ipart = LLVMBuildAShr(builder, ipart, one, "");
778 
779    return ipart;
780 }
781 
782 
783 /**
784  * Generate code to compute texture level of detail (lambda).
785  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
786  * \param lod_bias  optional float vector with the shader lod bias
787  * \param explicit_lod  optional float vector with the explicit lod
788  * \param out_lod_ipart  integer part of lod
789  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
790  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
791  *
792  * The resulting lod can be scalar per quad or be per element.
793  */
794 void
lp_build_lod_selector(struct lp_build_sample_context * bld,bool is_lodq,unsigned sampler_unit,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)795 lp_build_lod_selector(struct lp_build_sample_context *bld,
796                       bool is_lodq,
797                       unsigned sampler_unit,
798                       LLVMValueRef first_level,
799                       LLVMValueRef s,
800                       LLVMValueRef t,
801                       LLVMValueRef r,
802                       const struct lp_derivatives *derivs,
803                       LLVMValueRef lod_bias, /* optional */
804                       LLVMValueRef explicit_lod, /* optional */
805                       enum pipe_tex_mipfilter mip_filter,
806                       LLVMValueRef max_aniso,
807                       LLVMValueRef *out_lod,
808                       LLVMValueRef *out_lod_ipart,
809                       LLVMValueRef *out_lod_fpart,
810                       LLVMValueRef *out_lod_positive)
811 
812 {
813    LLVMBuilderRef builder = bld->gallivm->builder;
814    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
815    struct lp_build_context *lodf_bld = &bld->lodf_bld;
816    LLVMValueRef lod;
817 
818    *out_lod_ipart = bld->lodi_bld.zero;
819    *out_lod_positive = bld->lodi_bld.zero;
820    *out_lod_fpart = lodf_bld->zero;
821 
822    /*
823     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
824     * Magnification: "Implementations may either unconditionally assume c = 0
825     * for the minification vs. magnification switch-over point, or may choose
826     * to make c depend on the combination of minification and magnification
827     * modes as follows: if the magnification filter is given by LINEAR and the
828     * minification filter is given by NEAREST_MIPMAP_NEAREST or
829     * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a
830     * minified texture does not appear "sharper" than a magnified
831     * texture. Otherwise c = 0."  And 3.9.11 Texture Minification: "If lod is
832     * less than or equal to the constant c (see section 3.9.12) the texture is
833     * said to be magnified; if it is greater, the texture is minified."  So,
834     * using 0 as switchover point always, and using magnification for lod ==
835     * 0.  Note that the always c = 0 behavior is new (first appearing in GL
836     * 3.1 spec), old GL versions required 0.5 for the modes listed above.  I
837     * have no clue about the (undocumented) wishes of d3d9/d3d10 here!
838     */
839 
840    if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
841       /* User is forcing sampling from a particular mipmap level.
842        * This is hit during mipmap generation.
843        */
844       LLVMValueRef min_lod =
845          dynamic_state->min_lod(bld->gallivm, bld->resources_type,
846                                 bld->resources_ptr, sampler_unit);
847 
848       lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
849    } else {
850       if (explicit_lod) {
851          if (bld->num_lods != bld->coord_type.length)
852             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
853                                             lodf_bld->type, explicit_lod, 0);
854          else
855             lod = explicit_lod;
856       } else {
857          LLVMValueRef rho;
858          bool rho_squared = bld->no_rho_approx && (bld->dims > 1);
859 
860          if (bld->static_sampler_state->aniso &&
861              !explicit_lod) {
862             rho = lp_build_pmin(bld, first_level, s, t, max_aniso);
863             rho_squared = true;
864          } else {
865             rho = lp_build_rho(bld, first_level, s, t, r, derivs);
866          }
867 
868          /*
869           * Compute lod = log2(rho)
870           */
871 
872          if (!lod_bias && !is_lodq &&
873              !bld->static_sampler_state->aniso &&
874              !bld->static_sampler_state->lod_bias_non_zero &&
875              !bld->static_sampler_state->apply_max_lod &&
876              !bld->static_sampler_state->apply_min_lod) {
877             /*
878              * Special case when there are no post-log2 adjustments, which
879              * saves instructions but keeping the integer and fractional lod
880              * computations separate from the start.
881              */
882 
883             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
884                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
885                /*
886                 * Don't actually need both values all the time, lod_ipart is
887                 * needed for nearest mipfilter, lod_positive if min != mag.
888                 */
889                if (rho_squared) {
890                   *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
891                } else {
892                   *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
893                }
894                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
895                                                 rho, lodf_bld->one);
896                return;
897             }
898             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
899                 !bld->no_brilinear && !rho_squared &&
900                 !bld->static_sampler_state->aniso) {
901                /*
902                 * This can't work if rho is squared. Not sure if it could be
903                 * fixed while keeping it worthwile, could also do sqrt here
904                 * but brilinear and no_rho_opt seems like a combination not
905                 * making much sense anyway so just use ordinary path below.
906                 */
907                lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
908                                       out_lod_ipart, out_lod_fpart);
909                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
910                                                 rho, lodf_bld->one);
911                return;
912             }
913          }
914 
915          if (0) {
916             lod = lp_build_log2(lodf_bld, rho);
917          } else {
918             /* get more accurate results if we just sqaure rho always */
919             if (!rho_squared)
920                rho = lp_build_mul(lodf_bld, rho, rho);
921             lod = lp_build_fast_log2(lodf_bld, rho);
922          }
923 
924          /* log2(x^2) == 0.5*log2(x) */
925          lod = lp_build_mul(lodf_bld, lod,
926                             lp_build_const_vec(bld->gallivm,
927                                                lodf_bld->type, 0.5F));
928 
929          /* add shader lod bias */
930          if (lod_bias) {
931             if (bld->num_lods != bld->coord_type.length)
932                lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
933                                                     bld->coord_bld.type,
934                                                     lodf_bld->type,
935                                                     lod_bias, 0);
936             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
937          }
938       }
939 
940       /* add sampler lod bias */
941       if (bld->static_sampler_state->lod_bias_non_zero) {
942          LLVMValueRef sampler_lod_bias =
943             dynamic_state->lod_bias(bld->gallivm, bld->resources_type,
944                                     bld->resources_ptr, sampler_unit);
945          sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
946                                                       sampler_lod_bias);
947          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
948       }
949 
950       if (is_lodq) {
951          *out_lod = lod;
952       }
953 
954       /* clamp lod */
955       if (bld->static_sampler_state->apply_max_lod) {
956          LLVMValueRef max_lod =
957             dynamic_state->max_lod(bld->gallivm, bld->resources_type,
958                                    bld->resources_ptr, sampler_unit);
959          max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
960 
961          lod = lp_build_min(lodf_bld, lod, max_lod);
962       }
963       if (bld->static_sampler_state->apply_min_lod) {
964          LLVMValueRef min_lod =
965             dynamic_state->min_lod(bld->gallivm, bld->resources_type,
966                                    bld->resources_ptr, sampler_unit);
967          min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
968 
969          lod = lp_build_max(lodf_bld, lod, min_lod);
970       }
971 
972       if (is_lodq) {
973          *out_lod_fpart = lod;
974          return;
975       }
976    }
977 
978    *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
979                                     lod, lodf_bld->zero);
980 
981    if (bld->static_sampler_state->aniso) {
982       *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
983    } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
984       if (!bld->no_brilinear) {
985          lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
986                                 out_lod_ipart, out_lod_fpart);
987       } else {
988          lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
989       }
990 
991       lp_build_name(*out_lod_fpart, "lod_fpart");
992    } else {
993       *out_lod_ipart = lp_build_iround(lodf_bld, lod);
994    }
995 
996    lp_build_name(*out_lod_ipart, "lod_ipart");
997 
998    return;
999 }
1000 
1001 
1002 /**
1003  * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1004  * to actual mip level.
1005  * Note: this is all scalar per quad code.
1006  * \param lod_ipart  int texture level of detail
1007  * \param level_out  returns integer
1008  * \param out_of_bounds returns per coord out_of_bounds mask if provided
1009  */
1010 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1011 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1012                            LLVMValueRef first_level,
1013                            LLVMValueRef last_level,
1014                            LLVMValueRef lod_ipart,
1015                            LLVMValueRef *level_out,
1016                            LLVMValueRef *out_of_bounds)
1017 {
1018    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1019    LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level);
1020 
1021    if (out_of_bounds) {
1022       LLVMValueRef out, out1;
1023       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1024       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1025       out = lp_build_or(leveli_bld, out, out1);
1026       if (bld->num_mips == bld->coord_bld.type.length) {
1027          *out_of_bounds = out;
1028       } else if (bld->num_mips == 1) {
1029          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1030       } else {
1031          assert(bld->num_mips == bld->coord_bld.type.length / 4);
1032          *out_of_bounds =
1033             lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1034                                                   leveli_bld->type,
1035                                                   bld->int_coord_bld.type,
1036                                                   out);
1037       }
1038       level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1039       *level_out = level;
1040    } else {
1041       /* clamp level to legal range of levels */
1042       *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1043 
1044    }
1045 }
1046 
1047 
1048 /**
1049  * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1050  * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1051  * part accordingly.
1052  * Later, we'll sample from those two mipmap levels and interpolate between
1053  * them.
1054  */
1055 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1056 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1057                            unsigned texture_unit,
1058                            LLVMValueRef first_level,
1059                            LLVMValueRef last_level,
1060                            LLVMValueRef lod_ipart,
1061                            LLVMValueRef *lod_fpart_inout,
1062                            LLVMValueRef *level0_out,
1063                            LLVMValueRef *level1_out)
1064 {
1065    LLVMBuilderRef builder = bld->gallivm->builder;
1066    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1067    struct lp_build_context *levelf_bld = &bld->levelf_bld;
1068    LLVMValueRef clamp_min;
1069    LLVMValueRef clamp_max;
1070 
1071    assert(bld->num_lods == bld->num_mips);
1072 
1073    *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1074    *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1075 
1076    /*
1077     * Clamp both *level0_out and *level1_out to [first_level, last_level],
1078     * with the minimum number of comparisons, and zeroing lod_fpart in the
1079     * extreme ends in the process.
1080     */
1081 
1082    /* *level0_out < first_level */
1083    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1084                              *level0_out, first_level,
1085                              "clamp_lod_to_first");
1086 
1087    *level0_out = LLVMBuildSelect(builder, clamp_min,
1088                                  first_level, *level0_out, "");
1089 
1090    *level1_out = LLVMBuildSelect(builder, clamp_min,
1091                                  first_level, *level1_out, "");
1092 
1093    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1094                                       levelf_bld->zero, *lod_fpart_inout, "");
1095 
1096    /* *level0_out >= last_level */
1097    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1098                              *level0_out, last_level,
1099                              "clamp_lod_to_last");
1100 
1101    *level0_out = LLVMBuildSelect(builder, clamp_max,
1102                                  last_level, *level0_out, "");
1103 
1104    *level1_out = LLVMBuildSelect(builder, clamp_max,
1105                                  last_level, *level1_out, "");
1106 
1107    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1108                                       levelf_bld->zero, *lod_fpart_inout, "");
1109 
1110    lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1111    lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1112    lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1113 }
1114 
1115 
1116 /**
1117  * A helper function that factorizes this common pattern.
1118  */
1119 LLVMValueRef
lp_sample_load_mip_value(struct gallivm_state * gallivm,LLVMTypeRef ptr_type,LLVMValueRef offsets,LLVMValueRef index1)1120 lp_sample_load_mip_value(struct gallivm_state *gallivm,
1121                          LLVMTypeRef ptr_type,
1122                          LLVMValueRef offsets,
1123                          LLVMValueRef index1)
1124 {
1125    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1126    LLVMValueRef indexes[2] = {zero, index1};
1127    LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets,
1128                                     indexes, ARRAY_SIZE(indexes), "");
1129    return LLVMBuildLoad2(gallivm->builder,
1130                          LLVMInt32TypeInContext(gallivm->context), ptr, "");
1131 }
1132 
1133 
1134 /**
1135  * Return pointer to a single mipmap level.
1136  * \param level  integer mipmap level
1137  */
1138 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1139 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1140                           LLVMValueRef level)
1141 {
1142    LLVMValueRef mip_offset = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1143                                                       bld->mip_offsets, level);
1144    LLVMBuilderRef builder = bld->gallivm->builder;
1145    LLVMValueRef data_ptr =
1146       LLVMBuildGEP2(builder,
1147                     LLVMInt8TypeInContext(bld->gallivm->context),
1148                     bld->base_ptr, &mip_offset, 1, "");
1149    return data_ptr;
1150 }
1151 
1152 
1153 /**
1154  * Return (per-pixel) offsets to mip levels.
1155  * \param level  integer mipmap level
1156  */
1157 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1158 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1159                          LLVMValueRef level)
1160 {
1161    LLVMBuilderRef builder = bld->gallivm->builder;
1162    LLVMValueRef offsets, offset1;
1163 
1164    if (bld->num_mips == 1) {
1165       offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
1166       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1167    } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1168       offsets = bld->int_coord_bld.undef;
1169       for (unsigned i = 0; i < bld->num_mips; i++) {
1170          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1171          offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1172                                             bld->mip_offsets,
1173                                             LLVMBuildExtractElement(builder, level,
1174                                                                     indexi, ""));
1175          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1176          offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1177                                           indexo, "");
1178       }
1179       offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld,
1180                                             offsets, 0, 4);
1181    } else {
1182       assert (bld->num_mips == bld->coord_bld.type.length);
1183 
1184       offsets = bld->int_coord_bld.undef;
1185       for (unsigned i = 0; i < bld->num_mips; i++) {
1186          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1187          offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1188                                             bld->mip_offsets,
1189                                             LLVMBuildExtractElement(builder, level,
1190                                                                     indexi, ""));
1191          offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1192                                           indexi, "");
1193       }
1194    }
1195    return offsets;
1196 }
1197 
1198 
1199 /**
1200  * Codegen equivalent for u_minify().
1201  * @param lod_scalar  if lod is a (broadcasted) scalar
1202  * Return max(1, base_size >> level);
1203  */
1204 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,bool lod_scalar)1205 lp_build_minify(struct lp_build_context *bld,
1206                 LLVMValueRef base_size,
1207                 LLVMValueRef level,
1208                 bool lod_scalar)
1209 {
1210    LLVMBuilderRef builder = bld->gallivm->builder;
1211    assert(lp_check_value(bld->type, base_size));
1212    assert(lp_check_value(bld->type, level));
1213 
1214    if (level == bld->zero) {
1215       /* if we're using mipmap level zero, no minification is needed */
1216       return base_size;
1217    } else {
1218       LLVMValueRef size;
1219       assert(bld->type.sign);
1220       if (lod_scalar ||
1221          (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1222          size = LLVMBuildLShr(builder, base_size, level, "minify");
1223          size = lp_build_max(bld, size, bld->one);
1224       } else {
1225          /*
1226           * emulate shift with float mul, since intel "forgot" shifts with
1227           * per-element shift count until avx2, which results in terrible
1228           * scalar extraction (both count and value), scalar shift,
1229           * vector reinsertion. Should not be an issue on any non-x86 cpu
1230           * with a vector instruction set.
1231           * On cpus with AMD's XOP this should also be unnecessary but I'm
1232           * not sure if llvm would emit this with current flags.
1233           */
1234          LLVMValueRef const127, const23, lf;
1235          struct lp_type ftype;
1236          struct lp_build_context fbld;
1237          ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1238          lp_build_context_init(&fbld, bld->gallivm, ftype);
1239          const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1240          const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1241 
1242          /* calculate 2^(-level) float */
1243          lf = lp_build_sub(bld, const127, level);
1244          lf = lp_build_shl(bld, lf, const23);
1245          lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1246 
1247          /* finish shift operation by doing float mul */
1248          base_size = lp_build_int_to_float(&fbld, base_size);
1249          size = lp_build_mul(&fbld, base_size, lf);
1250          /*
1251           * do the max also with floats because
1252           * a) non-emulated int max requires sse41
1253           *    (this is actually a lie as we could cast to 16bit values
1254           *    as 16bit is sufficient and 16bit int max is sse2)
1255           * b) with avx we can do int max 4-wide but float max 8-wide
1256           */
1257          size = lp_build_max(&fbld, size, fbld.one);
1258          size = lp_build_itrunc(&fbld, size);
1259       }
1260       return size;
1261    }
1262 }
1263 
1264 
1265 /*
1266  * Scale image dimensions with block sizes.
1267  *
1268  * tex_blocksize is the resource format blocksize
1269  * view_blocksize is the view format blocksize
1270  *
1271  * This must be applied post-minification, but
1272  * only when blocksizes are different.
1273  *
1274  * ret = (size + (tex_blocksize - 1)) >> log2(tex_blocksize);
1275  * ret *= blocksize;
1276  */
1277 LLVMValueRef
lp_build_scale_view_dims(struct lp_build_context * bld,LLVMValueRef size,LLVMValueRef tex_blocksize,LLVMValueRef tex_blocksize_log2,LLVMValueRef view_blocksize)1278 lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size,
1279                          LLVMValueRef tex_blocksize,
1280                          LLVMValueRef tex_blocksize_log2,
1281                          LLVMValueRef view_blocksize)
1282 {
1283    LLVMBuilderRef builder = bld->gallivm->builder;
1284    LLVMValueRef ret =
1285       LLVMBuildAdd(builder, size,
1286                    LLVMBuildSub(builder, tex_blocksize,
1287                                 lp_build_const_int_vec(bld->gallivm,
1288                                                        bld->type, 1), ""),
1289                    "");
1290    ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, "");
1291    ret = LLVMBuildMul(builder, ret, view_blocksize, "");
1292    return ret;
1293 }
1294 
1295 
1296 /*
1297  * Scale a single image dimension.
1298  *
1299  * Scale one image between resource and view blocksizes.
1300  * noop if sizes are the same.
1301  */
1302 LLVMValueRef
lp_build_scale_view_dim(struct gallivm_state * gallivm,LLVMValueRef size,unsigned tex_blocksize,unsigned view_blocksize)1303 lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size,
1304                         unsigned tex_blocksize, unsigned view_blocksize)
1305 {
1306    if (tex_blocksize == view_blocksize)
1307       return size;
1308 
1309    LLVMBuilderRef builder = gallivm->builder;
1310    LLVMValueRef ret =
1311       LLVMBuildAdd(builder, size,
1312                    lp_build_const_int32(gallivm, tex_blocksize - 1), "");
1313    ret = LLVMBuildLShr(builder, ret,
1314                        lp_build_const_int32(gallivm,
1315                                             util_logbase2(tex_blocksize)), "");
1316    ret = LLVMBuildMul(builder, ret,
1317                       lp_build_const_int32(gallivm, view_blocksize), "");
1318    return ret;
1319 }
1320 
1321 
1322 /**
1323  * Dereference stride_array[mipmap_level] array to get a stride.
1324  * Return stride as a vector.
1325  */
1326 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMTypeRef stride_type,LLVMValueRef stride_array,LLVMValueRef level)1327 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1328                               LLVMTypeRef stride_type,
1329                               LLVMValueRef stride_array, LLVMValueRef level)
1330 {
1331    LLVMBuilderRef builder = bld->gallivm->builder;
1332    LLVMValueRef stride, stride1;
1333 
1334    if (bld->num_mips == 1) {
1335       stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array, level);
1336       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1337    } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1338       LLVMValueRef stride1;
1339 
1340       stride = bld->int_coord_bld.undef;
1341       for (unsigned i = 0; i < bld->num_mips; i++) {
1342          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1343          stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1344                                             LLVMBuildExtractElement(builder, level,
1345                                                                     indexi, ""));
1346          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1347          stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1348       }
1349       stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1350    } else {
1351       LLVMValueRef stride1;
1352 
1353       assert (bld->num_mips == bld->coord_bld.type.length);
1354 
1355       stride = bld->int_coord_bld.undef;
1356       for (unsigned i = 0; i < bld->coord_bld.type.length; i++) {
1357          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1358          stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1359                                             LLVMBuildExtractElement(builder, level,
1360                                                                     indexi, ""));
1361          stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1362       }
1363    }
1364    return stride;
1365 }
1366 
1367 
1368 /**
1369  * When sampling a mipmap, we need to compute the width, height, depth
1370  * of the source levels from the level indexes.  This helper function
1371  * does that.
1372  */
1373 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1374 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1375                             LLVMValueRef ilevel,
1376                             LLVMValueRef *out_size,
1377                             LLVMValueRef *row_stride_vec,
1378                             LLVMValueRef *img_stride_vec)
1379 {
1380    const unsigned dims = bld->dims;
1381    LLVMValueRef ilevel_vec;
1382 
1383    /*
1384     * Compute width, height, depth at mipmap level 'ilevel'
1385     */
1386    if (bld->num_mips == 1) {
1387       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1388       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
1389                                   ilevel_vec, true);
1390       *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size,
1391                                            bld->int_tex_blocksize,
1392                                            bld->int_tex_blocksize_log2,
1393                                            bld->int_view_blocksize);
1394    } else {
1395       LLVMValueRef int_size_vec;
1396       LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec;
1397       LLVMValueRef int_view_blocksize_vec;
1398       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1399       const unsigned num_quads = bld->coord_bld.type.length / 4;
1400 
1401       if (bld->num_mips == num_quads) {
1402          /*
1403           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1404           * intel "forgot" the variable shift count instruction until avx2.
1405           * A harmless 8x32 shift gets translated into 32 instructions
1406           * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1407           * unable to recognize if there are really just 2 different shift
1408           * count values. So do the shift 4-wide before expansion.
1409           */
1410          struct lp_build_context bld4;
1411          struct lp_type type4;
1412 
1413          type4 = bld->int_coord_bld.type;
1414          type4.length = 4;
1415 
1416          lp_build_context_init(&bld4, bld->gallivm, type4);
1417 
1418          if (bld->dims == 1) {
1419             assert(bld->int_size_in_bld.type.length == 1);
1420             int_size_vec = lp_build_broadcast_scalar(&bld4,
1421                                                      bld->int_size);
1422             int_tex_blocksize_vec =
1423                lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize);
1424             int_tex_blocksize_log2_vec =
1425                lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2);
1426             int_view_blocksize_vec =
1427                lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize);
1428          } else {
1429             assert(bld->int_size_in_bld.type.length == 4);
1430             int_size_vec = bld->int_size;
1431             int_tex_blocksize_vec = bld->int_tex_blocksize;
1432             int_tex_blocksize_log2_vec = bld->int_tex_blocksize_log2;
1433             int_view_blocksize_vec = bld->int_view_blocksize;
1434          }
1435 
1436          for (unsigned i = 0; i < num_quads; i++) {
1437             LLVMValueRef ileveli;
1438             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1439 
1440             ileveli = lp_build_extract_broadcast(bld->gallivm,
1441                                                  bld->leveli_bld.type,
1442                                                  bld4.type,
1443                                                  ilevel,
1444                                                  indexi);
1445             tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, true);
1446             tmp[i] = lp_build_scale_view_dims(&bld4, tmp[i],
1447                                               int_tex_blocksize_vec,
1448                                               int_tex_blocksize_log2_vec,
1449                                               int_view_blocksize_vec);
1450          }
1451          /*
1452           * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for
1453           * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1454           */
1455          *out_size = lp_build_concat(bld->gallivm,
1456                                      tmp,
1457                                      bld4.type,
1458                                      num_quads);
1459       } else {
1460          /* FIXME: this is terrible and results in _huge_ vector
1461           * (for the dims > 1 case).
1462           * Should refactor this (together with extract_image_sizes) and do
1463           * something more useful. Could for instance if we have width,height
1464           * with 4-wide vector pack all elements into a 8xi16 vector
1465           * (on which we can still do useful math) instead of using a 16xi32
1466           * vector.
1467           * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1468           * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
1469           * vector.
1470           */
1471          assert(bld->num_mips == bld->coord_bld.type.length);
1472          if (bld->dims == 1) {
1473             assert(bld->int_size_in_bld.type.length == 1);
1474             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1475                                                      bld->int_size);
1476             int_tex_blocksize_vec =
1477                lp_build_broadcast_scalar(&bld->int_coord_bld,
1478                                          bld->int_tex_blocksize);
1479             int_tex_blocksize_log2_vec =
1480                lp_build_broadcast_scalar(&bld->int_coord_bld,
1481                                          bld->int_tex_blocksize_log2);
1482             int_view_blocksize_vec =
1483                lp_build_broadcast_scalar(&bld->int_coord_bld,
1484                                          bld->int_view_blocksize);
1485             *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec,
1486                                         ilevel, false);
1487             *out_size = lp_build_scale_view_dims(&bld->int_coord_bld,
1488                                                  *out_size,
1489                                                  int_tex_blocksize_vec,
1490                                                  int_tex_blocksize_log2_vec,
1491                                                  int_view_blocksize_vec);
1492          } else {
1493             LLVMValueRef ilevel1;
1494             for (unsigned i = 0; i < bld->num_mips; i++) {
1495                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1496                ilevel1 = lp_build_extract_broadcast(bld->gallivm,
1497                                                     bld->int_coord_type,
1498                                                     bld->int_size_in_bld.type,
1499                                                     ilevel, indexi);
1500                tmp[i] = bld->int_size;
1501                tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
1502                                         ilevel1, true);
1503                tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld,
1504                                                  tmp[i],
1505                                                  bld->int_tex_blocksize,
1506                                                  bld->int_tex_blocksize_log2,
1507                                                  bld->int_view_blocksize);
1508             }
1509             *out_size = lp_build_concat(bld->gallivm, tmp,
1510                                         bld->int_size_in_bld.type,
1511                                         bld->num_mips);
1512          }
1513       }
1514    }
1515 
1516    if (dims >= 2) {
1517       *row_stride_vec = lp_build_get_level_stride_vec(bld,
1518                                                       bld->row_stride_type,
1519                                                       bld->row_stride_array,
1520                                                       ilevel);
1521    }
1522    if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1523       *img_stride_vec = lp_build_get_level_stride_vec(bld,
1524                                                       bld->img_stride_type,
1525                                                       bld->img_stride_array,
1526                                                       ilevel);
1527    }
1528 }
1529 
1530 
1531 /**
1532  * Extract and broadcast texture size.
1533  *
1534  * @param size_type   type of the texture size vector (either
1535  *                    bld->int_size_type or bld->float_size_type)
1536  * @param coord_type  type of the texture size vector (either
1537  *                    bld->int_coord_type or bld->coord_type)
1538  * @param size        vector with the texture size (width, height, depth)
1539  */
1540 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1541 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1542                              struct lp_build_context *size_bld,
1543                              struct lp_type coord_type,
1544                              LLVMValueRef size,
1545                              LLVMValueRef *out_width,
1546                              LLVMValueRef *out_height,
1547                              LLVMValueRef *out_depth)
1548 {
1549    const unsigned dims = bld->dims;
1550    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1551    struct lp_type size_type = size_bld->type;
1552 
1553    if (bld->num_mips == 1) {
1554       *out_width = lp_build_extract_broadcast(bld->gallivm,
1555                                               size_type,
1556                                               coord_type,
1557                                               size,
1558                                               LLVMConstInt(i32t, 0, 0));
1559       if (dims >= 2) {
1560          *out_height = lp_build_extract_broadcast(bld->gallivm,
1561                                                   size_type,
1562                                                   coord_type,
1563                                                   size,
1564                                                   LLVMConstInt(i32t, 1, 0));
1565          if (dims == 3) {
1566             *out_depth = lp_build_extract_broadcast(bld->gallivm,
1567                                                     size_type,
1568                                                     coord_type,
1569                                                     size,
1570                                                     LLVMConstInt(i32t, 2, 0));
1571          }
1572       }
1573    } else {
1574       unsigned num_quads = bld->coord_bld.type.length / 4;
1575 
1576       if (dims == 1) {
1577          *out_width = size;
1578       } else if (bld->num_mips == num_quads) {
1579          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1580          if (dims >= 2) {
1581             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1582             if (dims == 3) {
1583                *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1584             }
1585          }
1586       } else {
1587          assert(bld->num_mips == bld->coord_type.length);
1588          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1589                                                 coord_type, size, 0);
1590          if (dims >= 2) {
1591             *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1592                                                     coord_type, size, 1);
1593             if (dims == 3) {
1594                *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1595                                                       coord_type, size, 2);
1596             }
1597          }
1598       }
1599    }
1600 }
1601 
1602 
1603 /**
1604  * Unnormalize coords.
1605  *
1606  * @param flt_size  vector with the integer texture size (width, height, depth)
1607  */
1608 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1609 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1610                              LLVMValueRef flt_size,
1611                              LLVMValueRef *s,
1612                              LLVMValueRef *t,
1613                              LLVMValueRef *r)
1614 {
1615    const unsigned dims = bld->dims;
1616    LLVMValueRef width;
1617    LLVMValueRef height = NULL;
1618    LLVMValueRef depth = NULL;
1619 
1620    lp_build_extract_image_sizes(bld,
1621                                 &bld->float_size_bld,
1622                                 bld->coord_type,
1623                                 flt_size,
1624                                 &width,
1625                                 &height,
1626                                 &depth);
1627 
1628    /* s = s * width, t = t * height */
1629    *s = lp_build_mul(&bld->coord_bld, *s, width);
1630    if (dims >= 2) {
1631       *t = lp_build_mul(&bld->coord_bld, *t, height);
1632       if (dims >= 3) {
1633          *r = lp_build_mul(&bld->coord_bld, *r, depth);
1634       }
1635    }
1636 }
1637 
1638 
1639 /**
1640  * Generate new coords and faces for cubemap texels falling off the face.
1641  *
1642  * @param face   face (center) of the pixel
1643  * @param x0     lower x coord
1644  * @param x1     higher x coord (must be x0 + 1)
1645  * @param y0     lower y coord
1646  * @param y1     higher y coord (must be x0 + 1)
1647  * @param max_coord     texture cube (level) size - 1
1648  * @param next_faces    new face values when falling off
1649  * @param next_xcoords  new x coord values when falling off
1650  * @param next_ycoords  new y coord values when falling off
1651  *
1652  * The arrays hold the new values when under/overflow of
1653  * lower x, higher x, lower y, higher y coord would occur (in this order).
1654  * next_xcoords/next_ycoords have two entries each (for both new lower and
1655  * higher coord).
1656  */
1657 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1658 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1659                         LLVMValueRef face,
1660                         LLVMValueRef x0,
1661                         LLVMValueRef x1,
1662                         LLVMValueRef y0,
1663                         LLVMValueRef y1,
1664                         LLVMValueRef max_coord,
1665                         LLVMValueRef next_faces[4],
1666                         LLVMValueRef next_xcoords[4][2],
1667                         LLVMValueRef next_ycoords[4][2])
1668 {
1669    /*
1670     * Lookup tables aren't nice for simd code hence try some logic here.
1671     * (Note that while it would not be necessary to do per-sample (4) lookups
1672     * when using a LUT as it's impossible that texels fall off of positive
1673     * and negative edges simultaneously, it would however be necessary to
1674     * do 2 lookups for corner handling as in this case texels both fall off
1675     * of x and y axes.)
1676     */
1677    /*
1678     * Next faces (for face 012345):
1679     * x < 0.0  : 451110
1680     * x >= 1.0 : 540001
1681     * y < 0.0  : 225422
1682     * y >= 1.0 : 334533
1683     * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1684     * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1685     * nfy+: face & ~4 > 1 ? face + 2 : 3;
1686     * This could also use pshufb instead, but would need (manually coded)
1687     * ssse3 intrinsic (llvm won't do non-constant shuffles).
1688     */
1689    struct gallivm_state *gallivm = ivec_bld->gallivm;
1690    LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1691    LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1692    LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1693    LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1694    LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1695    LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1696 
1697    sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1698    tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1699    sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1700    faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1701    tmp = lp_build_add(ivec_bld, faceand1, c4);
1702    next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1703    next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1704 
1705    tmp = lp_build_andnot(ivec_bld, face, c4);
1706    sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1707    tmp = lp_build_add(ivec_bld, face, c2);
1708    next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1709    next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1710 
1711    /*
1712     * new xcoords (for face 012345):
1713     * x < 0.0  : max   max   t     max-t max  max
1714     * x >= 1.0 : 0     0     max-t t     0    0
1715     * y < 0.0  : max   0     max-s s     s    max-s
1716     * y >= 1.0 : max   0     s     max-s s    max-s
1717     *
1718     * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1719     * ncx[0] = max - ncx[1]
1720     * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1721     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1722     */
1723    sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1724    maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1725    tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1726    next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1727    next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1728    maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1729    tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1730    next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1731    next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1732 
1733    sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1734 
1735    tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1736    maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1737    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1738    next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1739    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1740    next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1741    maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1742    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1743    next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1744    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1745    next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1746 
1747    /*
1748     * new ycoords (for face 012345):
1749     * x < 0.0  : t     t     0     max   t    t
1750     * x >= 1.0 : t     t     0     max   t    t
1751     * y < 0.0  : max-s s     0     max   max  0
1752     * y >= 1.0 : s     max-s 0     max   0    max
1753     *
1754     * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1755     * ncy[1] = ncy[0]
1756     * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1757     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1758     */
1759    tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1760    next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1761    next_ycoords[1][0] = next_ycoords[0][0];
1762    next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1763    next_ycoords[1][1] = next_ycoords[0][1];
1764 
1765    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1766    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1767    next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1768    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1769    next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1770    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1771    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1772    next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1773    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1774    next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1775 }
1776 
1777 
1778 /** Helper used by lp_build_cube_lookup() */
1779 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1780 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1781 {
1782    /* ima = +0.5 / abs(coord); */
1783    LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1784    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1785    /* avoid div by zero */
1786    LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1787    LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1788    LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1789    return ima;
1790 }
1791 
1792 
1793 /** Helper for doing 3-wise selection.
1794  * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1795  */
1796 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1797 lp_build_select3(struct lp_build_context *sel_bld,
1798                  LLVMValueRef sel0,
1799                  LLVMValueRef sel1,
1800                  LLVMValueRef val0,
1801                  LLVMValueRef val1,
1802                  LLVMValueRef val2)
1803 {
1804    LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1);
1805    return lp_build_select(sel_bld, sel1, val2, tmp);
1806 }
1807 
1808 
1809 /**
1810  * Generate code to do cube face selection and compute per-face texcoords.
1811  */
1812 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,struct lp_derivatives * derivs_out,bool need_derivs)1813 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1814                      LLVMValueRef *coords,
1815                      const struct lp_derivatives *derivs_in, /* optional */
1816                      struct lp_derivatives *derivs_out, /* optional */
1817                      bool need_derivs)
1818 {
1819    struct lp_build_context *coord_bld = &bld->coord_bld;
1820    LLVMBuilderRef builder = bld->gallivm->builder;
1821    struct gallivm_state *gallivm = bld->gallivm;
1822    LLVMValueRef si, ti, ri;
1823 
1824    /*
1825     * Do per-pixel face selection. We cannot however (as we used to do)
1826     * simply calculate the derivs afterwards (which is very bogus for
1827     * explicit derivs btw) because the values would be "random" when
1828     * not all pixels lie on the same face.
1829     */
1830    struct lp_build_context *cint_bld = &bld->int_coord_bld;
1831    struct lp_type intctype = cint_bld->type;
1832    LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1833    LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1834    LLVMValueRef as, at, ar, face, face_s, face_t;
1835    LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1836    LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1837    LLVMValueRef tnegi, rnegi;
1838    LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1839    LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1840    LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1841                                                   1LL << (intctype.width - 1));
1842    LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1843                                                    intctype.width -1);
1844    LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1845    LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1846    LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1847    LLVMValueRef s = coords[0];
1848    LLVMValueRef t = coords[1];
1849    LLVMValueRef r = coords[2];
1850 
1851    assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1852    assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1853    assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1854 
1855    /*
1856     * get absolute value (for x/y/z face selection) and sign bit
1857     * (for mirroring minor coords and pos/neg face selection)
1858     * of the original coords.
1859     */
1860    as = lp_build_abs(&bld->coord_bld, s);
1861    at = lp_build_abs(&bld->coord_bld, t);
1862    ar = lp_build_abs(&bld->coord_bld, r);
1863 
1864    /*
1865     * major face determination: select x if x > y else select y
1866     * select z if z >= max(x,y) else select previous result
1867     * if some axis are the same we chose z over y, y over x - the
1868     * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1869     * wouldn't care could save a select or two if using different
1870     * compares and doing at_g_as_ar last since tnewx and tnewz are the
1871     * same).
1872     */
1873    as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1874    maxasat = lp_build_max(coord_bld, as, at);
1875    ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1876 
1877    if (need_derivs) {
1878       /*
1879        * XXX: This is really really complex.
1880        * It is a bit overkill to use this for implicit derivatives as well,
1881        * no way this is worth the cost in practice, but seems to be the
1882        * only way for getting accurate and per-pixel lod values.
1883        */
1884       LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1885       LLVMValueRef madx, mady, madxdivma, madydivma;
1886       LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1887       LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1888       LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1889       LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1890       LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1891       /*
1892        * s = 1/2 * (sc / ma + 1)
1893        * t = 1/2 * (tc / ma + 1)
1894        *
1895        * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1896        * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1897        *
1898        * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1899        * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1900        * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1901        * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1902        */
1903 
1904       /* select ma, calculate ima */
1905       ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1906       mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1907       signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1908       ima = lp_build_div(coord_bld, coord_bld->one, ma);
1909       imahalf = lp_build_mul(coord_bld, posHalf, ima);
1910       imahalfpos = lp_build_abs(coord_bld, imahalf);
1911 
1912       if (!derivs_in) {
1913          ddx[0] = lp_build_ddx(coord_bld, s);
1914          ddx[1] = lp_build_ddx(coord_bld, t);
1915          ddx[2] = lp_build_ddx(coord_bld, r);
1916          ddy[0] = lp_build_ddy(coord_bld, s);
1917          ddy[1] = lp_build_ddy(coord_bld, t);
1918          ddy[2] = lp_build_ddy(coord_bld, r);
1919       } else {
1920          ddx[0] = derivs_in->ddx[0];
1921          ddx[1] = derivs_in->ddx[1];
1922          ddx[2] = derivs_in->ddx[2];
1923          ddy[0] = derivs_in->ddy[0];
1924          ddy[1] = derivs_in->ddy[1];
1925          ddy[2] = derivs_in->ddy[2];
1926       }
1927 
1928       /* select major derivatives */
1929       madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1930       mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1931 
1932       si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1933       ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1934       ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1935 
1936       sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1937       tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1938       rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1939 
1940       sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1941       tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1942       rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1943 
1944       /*
1945        * compute all possible new s/t coords, which does the mirroring,
1946        * and do the same for derivs minor axes.
1947        * snewx = signma * -r;
1948        * tnewx = -t;
1949        * snewy = s;
1950        * tnewy = signma * r;
1951        * snewz = signma * s;
1952        * tnewz = -t;
1953        */
1954       tnegi = LLVMBuildXor(builder, ti, signmask, "");
1955       rnegi = LLVMBuildXor(builder, ri, signmask, "");
1956       tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1957       rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1958       tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1959       rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1960 
1961       snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1962       tnewx = tnegi;
1963       sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1964       tdxnewx = tdxnegi;
1965       sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1966       tdynewx = tdynegi;
1967 
1968       snewy = si;
1969       tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1970       sdxnewy = sdxi;
1971       tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1972       sdynewy = sdyi;
1973       tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1974 
1975       snewz = LLVMBuildXor(builder, signmabit, si, "");
1976       tnewz = tnegi;
1977       sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1978       tdxnewz = tdxnegi;
1979       sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1980       tdynewz = tdynegi;
1981 
1982       /* select the mirrored values */
1983       face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1984       face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1985       face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1986       face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1987       face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
1988       face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
1989       face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
1990 
1991       face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
1992       face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
1993       face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
1994       face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
1995       face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
1996       face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
1997 
1998       /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
1999       madxdivma = lp_build_mul(coord_bld, madx, ima);
2000       tmp = lp_build_mul(coord_bld, madxdivma, face_s);
2001       tmp = lp_build_sub(coord_bld, face_sdx, tmp);
2002       derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
2003 
2004       /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
2005       tmp = lp_build_mul(coord_bld, madxdivma, face_t);
2006       tmp = lp_build_sub(coord_bld, face_tdx, tmp);
2007       derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
2008 
2009       /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
2010       madydivma = lp_build_mul(coord_bld, mady, ima);
2011       tmp = lp_build_mul(coord_bld, madydivma, face_s);
2012       tmp = lp_build_sub(coord_bld, face_sdy, tmp);
2013       derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
2014 
2015       /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
2016       tmp = lp_build_mul(coord_bld, madydivma, face_t);
2017       tmp = lp_build_sub(coord_bld, face_tdy, tmp);
2018       derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
2019 
2020       signma = LLVMBuildLShr(builder, mai, signshift, "");
2021       coords[2] = LLVMBuildOr(builder, face, signma, "face");
2022 
2023       /* project coords */
2024       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2025       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2026 
2027       coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2028       coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2029 
2030       return;
2031    }
2032 
2033    ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
2034    mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
2035    signmabit = LLVMBuildAnd(builder, mai, signmask, "");
2036 
2037    si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
2038    ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
2039    ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
2040 
2041    /*
2042     * compute all possible new s/t coords, which does the mirroring
2043     * snewx = signma * -r;
2044     * tnewx = -t;
2045     * snewy = s;
2046     * tnewy = signma * r;
2047     * snewz = signma * s;
2048     * tnewz = -t;
2049     */
2050    tnegi = LLVMBuildXor(builder, ti, signmask, "");
2051    rnegi = LLVMBuildXor(builder, ri, signmask, "");
2052 
2053    snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2054    tnewx = tnegi;
2055 
2056    snewy = si;
2057    tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2058 
2059    snewz = LLVMBuildXor(builder, signmabit, si, "");
2060    tnewz = tnegi;
2061 
2062    /* select the mirrored values */
2063    face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2064    face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2065    face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2066 
2067    face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2068    face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2069 
2070    /* add +1 for neg face */
2071    /* XXX with AVX probably want to use another select here -
2072     * as long as we ensure vblendvps gets used we can actually
2073     * skip the comparison and just use sign as a "mask" directly.
2074     */
2075    signma = LLVMBuildLShr(builder, mai, signshift, "");
2076    coords[2] = LLVMBuildOr(builder, face, signma, "face");
2077 
2078    /* project coords */
2079    imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2080    face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2081    face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2082 
2083    coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2084    coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2085 }
2086 
2087 
2088 /**
2089  * Compute the partial offset of a pixel block along an arbitrary axis.
2090  *
2091  * @param coord   coordinate in pixels
2092  * @param stride  number of bytes between rows of successive pixel blocks
2093  * @param block_length  number of pixels in a pixels block along the coordinate
2094  *                      axis
2095  * @param out_offset    resulting relative offset of the pixel block in bytes
2096  * @param out_subcoord  resulting sub-block pixel coordinate
2097  */
2098 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2099 lp_build_sample_partial_offset(struct lp_build_context *bld,
2100                                unsigned block_length,
2101                                LLVMValueRef coord,
2102                                LLVMValueRef stride,
2103                                LLVMValueRef *out_offset,
2104                                LLVMValueRef *out_subcoord)
2105 {
2106    LLVMBuilderRef builder = bld->gallivm->builder;
2107    LLVMValueRef offset;
2108    LLVMValueRef subcoord;
2109 
2110    if (block_length == 1) {
2111       subcoord = bld->zero;
2112    } else {
2113       /*
2114        * Pixel blocks have power of two dimensions. LLVM should convert the
2115        * rem/div to bit arithmetic.
2116        * TODO: Verify this.
2117        * It does indeed BUT it does transform it to scalar (and back) when doing so
2118        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2119        * The generated code looks seriously unfunny and is quite expensive.
2120        */
2121 #if 0
2122       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2123       subcoord = LLVMBuildURem(builder, coord, block_width, "");
2124       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
2125 #else
2126       unsigned logbase2 = util_logbase2(block_length);
2127       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2128       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2129       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2130       coord = LLVMBuildLShr(builder, coord, block_shift, "");
2131 #endif
2132    }
2133 
2134    offset = lp_build_mul(bld, coord, stride);
2135 
2136    assert(out_offset);
2137    assert(out_subcoord);
2138 
2139    *out_offset = offset;
2140    *out_subcoord = subcoord;
2141 }
2142 
2143 
2144 /**
2145  * Compute the offset of a pixel block.
2146  *
2147  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2148  *
2149  * Returns the relative offset and i,j sub-block coordinates
2150  */
2151 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2152 lp_build_sample_offset(struct lp_build_context *bld,
2153                        const struct util_format_description *format_desc,
2154                        LLVMValueRef x,
2155                        LLVMValueRef y,
2156                        LLVMValueRef z,
2157                        LLVMValueRef y_stride,
2158                        LLVMValueRef z_stride,
2159                        LLVMValueRef *out_offset,
2160                        LLVMValueRef *out_i,
2161                        LLVMValueRef *out_j)
2162 {
2163    LLVMValueRef x_stride;
2164    LLVMValueRef offset;
2165 
2166    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2167                                  format_desc->block.bits/8);
2168 
2169    lp_build_sample_partial_offset(bld,
2170                                   format_desc->block.width,
2171                                   x, x_stride,
2172                                   &offset, out_i);
2173 
2174    if (y && y_stride) {
2175       LLVMValueRef y_offset;
2176       lp_build_sample_partial_offset(bld,
2177                                      format_desc->block.height,
2178                                      y, y_stride,
2179                                      &y_offset, out_j);
2180       offset = lp_build_add(bld, offset, y_offset);
2181    } else {
2182       *out_j = bld->zero;
2183    }
2184 
2185    if (z && z_stride) {
2186       LLVMValueRef z_offset;
2187       LLVMValueRef k;
2188       lp_build_sample_partial_offset(bld,
2189                                      1, /* pixel blocks are always 2D */
2190                                      z, z_stride,
2191                                      &z_offset, &k);
2192       offset = lp_build_add(bld, offset, z_offset);
2193    }
2194 
2195    *out_offset = offset;
2196 }
2197 
2198 
2199 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2200 lp_build_sample_min(struct lp_build_context *bld,
2201                     LLVMValueRef x,
2202                     LLVMValueRef v0,
2203                     LLVMValueRef v1)
2204 {
2205    /* if the incoming LERP weight is 0 then the min/max
2206     * should ignore that value. */
2207    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2208                                         bld->type,
2209                                         PIPE_FUNC_NOTEQUAL,
2210                                         x, bld->zero);
2211    LLVMValueRef min = lp_build_min(bld, v0, v1);
2212 
2213    return lp_build_select(bld, mask, min, v0);
2214 }
2215 
2216 
2217 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2218 lp_build_sample_max(struct lp_build_context *bld,
2219                     LLVMValueRef x,
2220                     LLVMValueRef v0,
2221                     LLVMValueRef v1)
2222 {
2223    /* if the incoming LERP weight is 0 then the min/max
2224     * should ignore that value. */
2225    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2226                                         bld->type,
2227                                         PIPE_FUNC_NOTEQUAL,
2228                                         x, bld->zero);
2229    LLVMValueRef max = lp_build_max(bld, v0, v1);
2230 
2231    return lp_build_select(bld, mask, max, v0);
2232 }
2233 
2234 
2235 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2236 lp_build_sample_min_2d(struct lp_build_context *bld,
2237                        LLVMValueRef x,
2238                        LLVMValueRef y,
2239                        LLVMValueRef a,
2240                        LLVMValueRef b,
2241                        LLVMValueRef c,
2242                        LLVMValueRef d)
2243 {
2244    LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2245    LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2246    return lp_build_sample_min(bld, y, v0, v1);
2247 }
2248 
2249 
2250 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2251 lp_build_sample_max_2d(struct lp_build_context *bld,
2252                        LLVMValueRef x,
2253                        LLVMValueRef y,
2254                        LLVMValueRef a,
2255                        LLVMValueRef b,
2256                        LLVMValueRef c,
2257                        LLVMValueRef d)
2258 {
2259    LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2260    LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2261    return lp_build_sample_max(bld, y, v0, v1);
2262 }
2263 
2264 
2265 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2266 lp_build_sample_min_3d(struct lp_build_context *bld,
2267                 LLVMValueRef x,
2268                 LLVMValueRef y,
2269                 LLVMValueRef z,
2270                 LLVMValueRef a, LLVMValueRef b,
2271                 LLVMValueRef c, LLVMValueRef d,
2272                 LLVMValueRef e, LLVMValueRef f,
2273                 LLVMValueRef g, LLVMValueRef h)
2274 {
2275    LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2276    LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2277    return lp_build_sample_min(bld, z, v0, v1);
2278 }
2279 
2280 
2281 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2282 lp_build_sample_max_3d(struct lp_build_context *bld,
2283                        LLVMValueRef x,
2284                        LLVMValueRef y,
2285                        LLVMValueRef z,
2286                        LLVMValueRef a, LLVMValueRef b,
2287                        LLVMValueRef c, LLVMValueRef d,
2288                        LLVMValueRef e, LLVMValueRef f,
2289                        LLVMValueRef g, LLVMValueRef h)
2290 {
2291    LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2292    LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2293    return lp_build_sample_max(bld, z, v0, v1);
2294 }
2295 
2296 
2297 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2298 lp_build_reduce_filter(struct lp_build_context *bld,
2299                        enum pipe_tex_reduction_mode mode,
2300                        unsigned flags,
2301                        unsigned num_chan,
2302                        LLVMValueRef x,
2303                        LLVMValueRef *v00,
2304                        LLVMValueRef *v01,
2305                        LLVMValueRef *out)
2306 {
2307    unsigned chan;
2308    switch (mode) {
2309    case PIPE_TEX_REDUCTION_MIN:
2310       for (chan = 0; chan < num_chan; chan++)
2311          out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2312       break;
2313    case PIPE_TEX_REDUCTION_MAX:
2314       for (chan = 0; chan < num_chan; chan++)
2315          out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2316       break;
2317    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2318    default:
2319       for (chan = 0; chan < num_chan; chan++)
2320          out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2321       break;
2322    }
2323 }
2324 
2325 
2326 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2327 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2328                           enum pipe_tex_reduction_mode mode,
2329                           unsigned flags,
2330                           unsigned num_chan,
2331                           LLVMValueRef x,
2332                           LLVMValueRef y,
2333                           LLVMValueRef *v00,
2334                           LLVMValueRef *v01,
2335                           LLVMValueRef *v10,
2336                           LLVMValueRef *v11,
2337                           LLVMValueRef *out)
2338 {
2339    switch (mode) {
2340    case PIPE_TEX_REDUCTION_MIN:
2341       for (unsigned chan = 0; chan < num_chan; chan++)
2342          out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan],
2343                                             v10[chan], v11[chan]);
2344       break;
2345    case PIPE_TEX_REDUCTION_MAX:
2346       for (unsigned chan = 0; chan < num_chan; chan++)
2347          out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan],
2348                                             v10[chan], v11[chan]);
2349       break;
2350    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2351    default:
2352       for (unsigned chan = 0; chan < num_chan; chan++)
2353          out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan],
2354                                       v10[chan], v11[chan], flags);
2355       break;
2356    }
2357 }
2358 
2359 
2360 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2361 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2362                           enum pipe_tex_reduction_mode mode,
2363                           unsigned flags,
2364                           unsigned num_chan,
2365                           LLVMValueRef x,
2366                           LLVMValueRef y,
2367                           LLVMValueRef z,
2368                           LLVMValueRef *v000,
2369                           LLVMValueRef *v001,
2370                           LLVMValueRef *v010,
2371                           LLVMValueRef *v011,
2372                           LLVMValueRef *v100,
2373                           LLVMValueRef *v101,
2374                           LLVMValueRef *v110,
2375                           LLVMValueRef *v111,
2376                           LLVMValueRef *out)
2377 {
2378    switch (mode) {
2379    case PIPE_TEX_REDUCTION_MIN:
2380       for (unsigned chan = 0; chan < num_chan; chan++)
2381          out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2382                                      v000[chan], v001[chan], v010[chan], v011[chan],
2383                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2384       break;
2385    case PIPE_TEX_REDUCTION_MAX:
2386       for (unsigned chan = 0; chan < num_chan; chan++)
2387          out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2388                                      v000[chan], v001[chan], v010[chan], v011[chan],
2389                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2390       break;
2391    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2392    default:
2393       for (unsigned chan = 0; chan < num_chan; chan++)
2394          out[chan] = lp_build_lerp_3d(bld, x, y, z,
2395                                       v000[chan], v001[chan], v010[chan], v011[chan],
2396                                       v100[chan], v101[chan], v110[chan], v111[chan],
2397                                       flags);
2398       break;
2399    }
2400 }
2401 
2402 
2403 /*
2404  * generated from
2405  * const float alpha = 2;
2406  * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2407  *    const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2408  *    const float weight = (float)expf(-alpha * r2);
2409  */
2410 static const float aniso_filter_table[1024] = {
2411    1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2412    0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2413    0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2414    0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2415    0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2416    0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2417    0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2418    0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2419    0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2420    0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2421    0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2422    0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2423    0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2424    0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2425    0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2426    0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2427    0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2428    0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2429    0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2430    0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2431    0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2432    0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2433    0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2434    0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2435    0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2436    0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2437    0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2438    0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2439    0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2440    0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2441    0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2442    0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2443    0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2444    0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2445    0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2446    0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2447    0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2448    0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2449    0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2450    0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2451    0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2452    0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2453    0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2454    0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2455    0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2456    0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2457    0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2458    0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2459    0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2460    0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2461    0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2462    0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2463    0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2464    0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2465    0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2466    0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2467    0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2468    0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2469    0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2470    0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2471    0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2472    0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2473    0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2474    0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2475    0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2476    0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2477    0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2478    0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2479    0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2480    0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2481    0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2482    0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2483    0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2484    0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2485    0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2486    0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2487    0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2488    0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2489    0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2490    0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2491    0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2492    0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2493    0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2494    0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2495    0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2496    0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2497    0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2498    0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2499    0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2500    0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2501    0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2502    0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2503    0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2504    0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2505    0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2506    0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2507    0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2508    0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2509    0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2510    0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2511    0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2512    0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2513    0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2514    0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2515    0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2516    0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2517    0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2518    0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2519    0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2520    0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2521    0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2522    0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2523    0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2524    0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2525    0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2526    0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2527    0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2528    0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2529    0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2530    0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2531    0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2532    0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2533    0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2534    0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2535    0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2536    0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2537    0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2538    0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2539 };
2540 
2541 
2542 const float *
lp_build_sample_aniso_filter_table(void)2543 lp_build_sample_aniso_filter_table(void)
2544 {
2545    return aniso_filter_table;
2546 }
2547