• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- common code.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  */
34 
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52 
53 
54 /*
55  * Bri-linear factor. Should be greater than one.
56  */
57 #define BRILINEAR_FACTOR 2
58 
59 
60 /**
61  * Does the given texture wrap mode allow sampling the texture border color?
62  * XXX maybe move this into gallium util code.
63  */
64 bool
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)65 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
66                                        enum pipe_tex_filter min_img_filter,
67                                        enum pipe_tex_filter mag_img_filter)
68 {
69    switch (mode) {
70    case PIPE_TEX_WRAP_REPEAT:
71    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
72    case PIPE_TEX_WRAP_MIRROR_REPEAT:
73    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
74       return false;
75    case PIPE_TEX_WRAP_CLAMP:
76    case PIPE_TEX_WRAP_MIRROR_CLAMP:
77       if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
78           mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
79          return false;
80       } else {
81          return true;
82       }
83    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
84    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
85       return true;
86    default:
87       assert(0 && "unexpected wrap mode");
88       return false;
89    }
90 }
91 
92 
93 /**
94  * Initialize lp_sampler_static_texture_state object with the gallium
95  * texture/sampler_view state (this contains the parts which are
96  * considered static).
97  */
98 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)99 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
100                                 const struct pipe_sampler_view *view)
101 {
102    memset(state, 0, sizeof *state);
103 
104    if (!view || !view->texture)
105       return;
106 
107    const struct pipe_resource *texture = view->texture;
108 
109    state->format = view->format;
110    state->res_format = texture->format;
111    state->swizzle_r = view->swizzle_r;
112    state->swizzle_g = view->swizzle_g;
113    state->swizzle_b = view->swizzle_b;
114    state->swizzle_a = view->swizzle_a;
115    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
116    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
117    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
118    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
119 
120    /* check if it is a tex2d created from buf */
121    if (view->is_tex2d_from_buf)
122       state->target = PIPE_TEXTURE_2D;
123    else
124       state->target = view->target;
125 
126    state->res_target = texture->target;
127 
128    state->pot_width = util_is_power_of_two_or_zero(texture->width0);
129    state->pot_height = util_is_power_of_two_or_zero(texture->height0);
130    state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
131    state->level_zero_only = !view->u.tex.last_level;
132    state->tiled = !!(texture->flags & PIPE_RESOURCE_FLAG_SPARSE);
133    if (state->tiled)
134       state->tiled_samples = texture->nr_samples;
135 
136    /*
137     * the layer / element / level parameters are all either dynamic
138     * state or handled transparently wrt execution.
139     */
140 }
141 
142 
143 /**
144  * Initialize lp_sampler_static_texture_state object with the gallium
145  * texture/sampler_view state (this contains the parts which are
146  * considered static).
147  */
148 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)149 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
150                                       const struct pipe_image_view *view)
151 {
152    memset(state, 0, sizeof *state);
153 
154    if (!view || !view->resource)
155       return;
156 
157    const struct pipe_resource *resource = view->resource;
158 
159    state->format = view->format;
160    state->res_format = resource->format;
161    state->swizzle_r = PIPE_SWIZZLE_X;
162    state->swizzle_g = PIPE_SWIZZLE_Y;
163    state->swizzle_b = PIPE_SWIZZLE_Z;
164    state->swizzle_a = PIPE_SWIZZLE_W;
165    assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
166    assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
167    assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
168    assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
169 
170    state->target = resource->target;
171    state->res_target = resource->target;
172    state->pot_width = util_is_power_of_two_or_zero(resource->width0);
173    state->pot_height = util_is_power_of_two_or_zero(resource->height0);
174    state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
175    state->level_zero_only = view->u.tex.level == 0;
176    state->tiled = !!(resource->flags & PIPE_RESOURCE_FLAG_SPARSE);
177    if (state->tiled) {
178       state->tiled_samples = resource->nr_samples;
179       if (view->u.tex.is_2d_view_of_3d)
180          state->target = PIPE_TEXTURE_2D;
181    }
182 
183    /*
184     * the layer / element / level parameters are all either dynamic
185     * state or handled transparently wrt execution.
186     */
187 }
188 
189 
190 /**
191  * Initialize lp_sampler_static_sampler_state object with the gallium sampler
192  * state (this contains the parts which are considered static).
193  */
194 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)195 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
196                                 const struct pipe_sampler_state *sampler)
197 {
198    memset(state, 0, sizeof *state);
199 
200    if (!sampler)
201       return;
202 
203    /*
204     * We don't copy sampler state over unless it is actually enabled, to avoid
205     * spurious recompiles, as the sampler static state is part of the shader
206     * key.
207     *
208     * Ideally gallium frontends or cso_cache module would make all state
209     * canonical, but until that happens it's better to be safe than sorry here.
210     *
211     * XXX: Actually there's much more than can be done here, especially
212     * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
213     */
214 
215    state->wrap_s            = sampler->wrap_s;
216    state->wrap_t            = sampler->wrap_t;
217    state->wrap_r            = sampler->wrap_r;
218    state->min_img_filter    = sampler->min_img_filter;
219    state->mag_img_filter    = sampler->mag_img_filter;
220    state->min_mip_filter    = sampler->min_mip_filter;
221    state->seamless_cube_map = sampler->seamless_cube_map;
222    state->reduction_mode    = sampler->reduction_mode;
223    if (sampler->max_anisotropy > 1)
224       state->aniso = sampler->max_anisotropy;
225 
226    if (sampler->max_lod > 0.0f) {
227       state->max_lod_pos = 1;
228    }
229 
230    if (sampler->lod_bias != 0.0f) {
231       state->lod_bias_non_zero = 1;
232    }
233 
234    if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
235        state->min_img_filter != state->mag_img_filter) {
236 
237       /* If min_lod == max_lod we can greatly simplify mipmap selection.
238        * This is a case that occurs during automatic mipmap generation.
239        */
240       if (sampler->min_lod == sampler->max_lod) {
241          state->min_max_lod_equal = 1;
242       } else {
243          if (sampler->min_lod > 0.0f) {
244             state->apply_min_lod = 1;
245          }
246 
247          /*
248           * XXX this won't do anything with the mesa state tracker which always
249           * sets max_lod to not more than actually present mip maps...
250           */
251          if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
252             state->apply_max_lod = 1;
253          }
254       }
255    }
256 
257    state->compare_mode      = sampler->compare_mode;
258    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
259       state->compare_func   = sampler->compare_func;
260    }
261 
262    state->normalized_coords = !sampler->unnormalized_coords;
263 }
264 
265 
266 /* build aniso pmin value */
267 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t)268 lp_build_pmin(struct lp_build_sample_context *bld,
269               LLVMValueRef first_level,
270               LLVMValueRef s,
271               LLVMValueRef t)
272 {
273    struct gallivm_state *gallivm = bld->gallivm;
274    LLVMBuilderRef builder = bld->gallivm->builder;
275    struct lp_build_context *coord_bld = &bld->coord_bld;
276    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
277    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
278    struct lp_build_context *pmin_bld = &bld->lodf_bld;
279    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
280    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
281    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
282    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
283    LLVMValueRef int_size, float_size;
284    const unsigned length = coord_bld->type.length;
285    const unsigned num_quads = length / 4;
286    const bool pmin_per_quad = pmin_bld->type.length != length;
287 
288    int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, true);
289    float_size = lp_build_int_to_float(float_size_bld, int_size);
290 
291    static const unsigned char swizzle01[] = { /* no-op swizzle */
292       0, 1,
293       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
294    };
295    static const unsigned char swizzle23[] = {
296       2, 3,
297       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
298    };
299    LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
300 
301    for (unsigned i = 0; i < num_quads; i++) {
302       shuffles[i*4+0] = shuffles[i*4+1] = index0;
303       shuffles[i*4+2] = shuffles[i*4+3] = index1;
304    }
305    floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
306                                      LLVMConstVector(shuffles, length), "");
307    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
308 
309    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
310 
311    ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
312    ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
313 
314    LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
315 
316    static const unsigned char swizzle0[] = { /* no-op swizzle */
317      0, LP_BLD_SWIZZLE_DONTCARE,
318      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
319    };
320    static const unsigned char swizzle1[] = {
321      1, LP_BLD_SWIZZLE_DONTCARE,
322      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
323    };
324    LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
325    LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
326 
327    LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
328    LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
329 
330    LLVMValueRef temp = lp_build_mul(
331       coord_bld, pmin2, lp_build_const_vec(gallivm, coord_bld->type, bld->static_sampler_state->aniso *
332                                            bld->static_sampler_state->aniso));
333 
334    LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
335                                         pmax2, temp);
336 
337    LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2,
338       lp_build_const_vec(gallivm, coord_bld->type, bld->static_sampler_state->aniso));
339 
340    pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
341 
342    if (pmin_per_quad)
343       pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
344                                         pmin_bld->type, pmin2, 0);
345    else
346       pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
347    return pmin2;
348 }
349 
350 
351 /**
352  * Generate code to compute coordinate gradient (rho).
353  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
354  *
355  * The resulting rho has bld->levelf format (per quad or per element).
356  */
357 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs)358 lp_build_rho(struct lp_build_sample_context *bld,
359              LLVMValueRef first_level,
360              LLVMValueRef s,
361              LLVMValueRef t,
362              LLVMValueRef r,
363              const struct lp_derivatives *derivs)
364 {
365    struct gallivm_state *gallivm = bld->gallivm;
366    struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
367    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
368    struct lp_build_context *float_bld = &bld->float_bld;
369    struct lp_build_context *coord_bld = &bld->coord_bld;
370    struct lp_build_context *rho_bld = &bld->lodf_bld;
371    const unsigned dims = bld->dims;
372    LLVMValueRef ddx_ddy[2] = {NULL};
373    LLVMBuilderRef builder = bld->gallivm->builder;
374    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
375    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
376    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
377    LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
378    LLVMValueRef rho_vec;
379    LLVMValueRef rho;
380    unsigned length = coord_bld->type.length;
381    unsigned num_quads = length / 4;
382    bool rho_per_quad = rho_bld->type.length != length;
383    bool no_rho_opt = bld->no_rho_approx && (dims > 1);
384    LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
385    LLVMValueRef rho_xvec, rho_yvec;
386 
387    /* Note that all simplified calculations will only work for isotropic
388     * filtering
389     */
390 
391    /*
392     * rho calcs are always per quad except for explicit derivs (excluding
393     * the messy cube maps for now) when requested.
394     */
395 
396    LLVMValueRef int_size =
397       lp_build_minify(int_size_bld, bld->int_size, first_level, true);
398    LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size);
399 
400    if (derivs) {
401       LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
402       for (unsigned i = 0; i < dims; i++) {
403          LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
404 
405          LLVMValueRef floatdim =
406             lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
407                                        coord_bld->type, float_size, indexi);
408 
409          /*
410           * note that for rho_per_quad case could reduce math (at some shuffle
411           * cost), but for now use same code to per-pixel lod case.
412           */
413          if (no_rho_opt) {
414             ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
415             ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
416             ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
417             ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
418          } else {
419             LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
420             LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
421             ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
422             ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
423          }
424       }
425       if (no_rho_opt) {
426          rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
427          rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
428          if (dims > 2) {
429             rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
430             rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
431          }
432          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
433          /* skipping sqrt hence returning rho squared */
434       } else {
435          rho = ddmax[0];
436          if (dims > 1) {
437             rho = lp_build_max(coord_bld, rho, ddmax[1]);
438             if (dims > 2) {
439                rho = lp_build_max(coord_bld, rho, ddmax[2]);
440             }
441          }
442       }
443 
444       LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm,
445                                                        coord_bld->type, rho);
446       rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
447 
448       if (rho_per_quad) {
449          /*
450           * rho_vec contains per-pixel rho, convert to scalar per quad.
451           */
452          rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
453                                          rho_bld->type, rho, 0);
454       }
455    } else {
456       /*
457        * This looks all a bit complex, but it's not that bad
458        * (the shuffle code makes it look worse than it is).
459        * Still, might not be ideal for all cases.
460        */
461       static const unsigned char swizzle0[] = { /* no-op swizzle */
462          0, LP_BLD_SWIZZLE_DONTCARE,
463          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
464       };
465       static const unsigned char swizzle1[] = {
466          1, LP_BLD_SWIZZLE_DONTCARE,
467          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
468       };
469       static const unsigned char swizzle2[] = {
470          2, LP_BLD_SWIZZLE_DONTCARE,
471          LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
472       };
473 
474       if (dims < 2) {
475          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
476       } else if (dims >= 2) {
477          ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
478          if (dims > 2) {
479             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
480          }
481       }
482 
483       if (no_rho_opt) {
484          static const unsigned char swizzle01[] = { /* no-op swizzle */
485             0, 1,
486             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
487          };
488          static const unsigned char swizzle23[] = {
489             2, 3,
490             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
491          };
492          LLVMValueRef ddx_ddys, ddx_ddyt, floatdim;
493          LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
494 
495          for (unsigned i = 0; i < num_quads; i++) {
496             shuffles[i*4+0] = shuffles[i*4+1] = index0;
497             shuffles[i*4+2] = shuffles[i*4+3] = index1;
498          }
499          floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
500                                            LLVMConstVector(shuffles, length),
501                                            "");
502          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
503          ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
504          ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
505          ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
506          rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
507 
508          if (dims > 2) {
509             static const unsigned char swizzle02[] = {
510                0, 2,
511                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
512             };
513             floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
514                                                   coord_bld->type, float_size, index2);
515             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
516             ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
517             ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
518             rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
519          }
520 
521          rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
522          rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
523          rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
524 
525          if (rho_per_quad) {
526             rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
527                                             rho_bld->type, rho, 0);
528          } else {
529             rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
530          }
531          /* skipping sqrt hence returning rho squared */
532       } else {
533          ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
534          if (dims > 2) {
535             ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
536          } else {
537             ddx_ddy[1] = NULL; /* silence compiler warning */
538          }
539 
540          if (dims < 2) {
541             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
542             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
543          } else if (dims == 2) {
544             static const unsigned char swizzle02[] = {
545                0, 2,
546                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
547             };
548             static const unsigned char swizzle13[] = {
549                1, 3,
550                LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
551             };
552             rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
553             rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
554          } else {
555             LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
556             LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
557             assert(dims == 3);
558             for (unsigned i = 0; i < num_quads; i++) {
559                shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
560                shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
561                shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
562                shuffles1[4*i + 3] = i32undef;
563                shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
564                shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
565                shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
566                shuffles2[4*i + 3] = i32undef;
567             }
568             rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
569                                               LLVMConstVector(shuffles1, length), "");
570             rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
571                                               LLVMConstVector(shuffles2, length), "");
572          }
573 
574          rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
575 
576          if (bld->coord_type.length > 4) {
577             /* expand size to each quad */
578             if (dims > 1) {
579                /* could use some broadcast_vector helper for this? */
580                LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
581                for (unsigned i = 0; i < num_quads; i++) {
582                   src[i] = float_size;
583                }
584                float_size = lp_build_concat(bld->gallivm, src,
585                                             float_size_bld->type, num_quads);
586             } else {
587                float_size = lp_build_broadcast_scalar(coord_bld, float_size);
588             }
589             rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
590 
591             if (dims <= 1) {
592                rho = rho_vec;
593             } else {
594                if (dims >= 2) {
595                   LLVMValueRef rho_s, rho_t, rho_r;
596 
597                   rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
598                   rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
599 
600                   rho = lp_build_max(coord_bld, rho_s, rho_t);
601 
602                   if (dims >= 3) {
603                      rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
604                      rho = lp_build_max(coord_bld, rho, rho_r);
605                   }
606                }
607             }
608             if (rho_per_quad) {
609                rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
610                                                rho_bld->type, rho, 0);
611             } else {
612                rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
613             }
614          } else {
615             if (dims <= 1) {
616                rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
617             }
618             rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
619 
620             if (dims <= 1) {
621                rho = rho_vec;
622             } else {
623                if (dims >= 2) {
624                   LLVMValueRef rho_s, rho_t, rho_r;
625 
626                   rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
627                   rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
628 
629                   rho = lp_build_max(float_bld, rho_s, rho_t);
630 
631                   if (dims >= 3) {
632                      rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
633                      rho = lp_build_max(float_bld, rho, rho_r);
634                   }
635                }
636             }
637             if (!rho_per_quad) {
638                rho = lp_build_broadcast_scalar(rho_bld, rho);
639             }
640          }
641       }
642    }
643 
644    return rho;
645 }
646 
647 
648 /*
649  * Bri-linear lod computation
650  *
651  * Use a piece-wise linear approximation of log2 such that:
652  * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
653  * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
654  *   with the steepness specified in 'factor'
655  * - exact result for 0.5, 1.5, etc.
656  *
657  *
658  *   1.0 -              /----*
659  *                     /
660  *                    /
661  *                   /
662  *   0.5 -          *
663  *                 /
664  *                /
665  *               /
666  *   0.0 - *----/
667  *
668  *         |                 |
669  *        2^0               2^1
670  *
671  * This is a technique also commonly used in hardware:
672  * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
673  *
674  * TODO: For correctness, this should only be applied when texture is known to
675  * have regular mipmaps, i.e., mipmaps derived from the base level.
676  *
677  * TODO: This could be done in fixed point, where applicable.
678  */
679 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)680 lp_build_brilinear_lod(struct lp_build_context *bld,
681                        LLVMValueRef lod,
682                        double factor,
683                        LLVMValueRef *out_lod_ipart,
684                        LLVMValueRef *out_lod_fpart)
685 {
686    LLVMValueRef lod_fpart;
687    double pre_offset = (factor - 0.5)/factor - 0.5;
688    double post_offset = 1 - factor;
689 
690    if (0) {
691       lp_build_printf(bld->gallivm, "lod = %f\n", lod);
692    }
693 
694    lod = lp_build_add(bld, lod,
695                       lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
696 
697    lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
698 
699    lod_fpart = lp_build_mad(bld, lod_fpart,
700                             lp_build_const_vec(bld->gallivm, bld->type, factor),
701                             lp_build_const_vec(bld->gallivm, bld->type, post_offset));
702 
703    /*
704     * It's not necessary to clamp lod_fpart since:
705     * - the above expression will never produce numbers greater than one.
706     * - the mip filtering branch is only taken if lod_fpart is positive
707     */
708 
709    *out_lod_fpart = lod_fpart;
710 
711    if (0) {
712       lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
713       lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
714    }
715 }
716 
717 
718 /*
719  * Combined log2 and brilinear lod computation.
720  *
721  * It's in all identical to calling lp_build_fast_log2() and
722  * lp_build_brilinear_lod() above, but by combining we can compute the integer
723  * and fractional part independently.
724  */
725 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)726 lp_build_brilinear_rho(struct lp_build_context *bld,
727                        LLVMValueRef rho,
728                        double factor,
729                        LLVMValueRef *out_lod_ipart,
730                        LLVMValueRef *out_lod_fpart)
731 {
732    const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
733    const double post_offset = 1 - 2*factor;
734 
735    assert(bld->type.floating);
736 
737    assert(lp_check_value(bld->type, rho));
738 
739    /*
740     * The pre factor will make the intersections with the exact powers of two
741     * happen precisely where we want them to be, which means that the integer
742     * part will not need any post adjustments.
743     */
744    rho = lp_build_mul(bld, rho,
745                       lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
746 
747    /* ipart = ifloor(log2(rho)) */
748    LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0);
749 
750    /* fpart = rho / 2**ipart */
751    LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho);
752 
753    lod_fpart =
754       lp_build_mad(bld, lod_fpart,
755                    lp_build_const_vec(bld->gallivm, bld->type, factor),
756                    lp_build_const_vec(bld->gallivm, bld->type, post_offset));
757 
758    /*
759     * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
760     * - the above expression will never produce numbers greater than one.
761     * - the mip filtering branch is only taken if lod_fpart is positive
762     */
763 
764    *out_lod_ipart = lod_ipart;
765    *out_lod_fpart = lod_fpart;
766 }
767 
768 
769 /**
770  * Fast implementation of iround(log2(sqrt(x))), based on
771  * log2(x^n) == n*log2(x).
772  *
773  * Gives accurate results all the time.
774  * (Could be trivially extended to handle other power-of-two roots.)
775  */
776 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)777 lp_build_ilog2_sqrt(struct lp_build_context *bld,
778                     LLVMValueRef x)
779 {
780    LLVMBuilderRef builder = bld->gallivm->builder;
781    struct lp_type i_type = lp_int_type(bld->type);
782    LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
783 
784    assert(bld->type.floating);
785 
786    assert(lp_check_value(bld->type, x));
787 
788    /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
789    LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1);
790    ipart = LLVMBuildAShr(builder, ipart, one, "");
791 
792    return ipart;
793 }
794 
795 
796 /**
797  * Generate code to compute texture level of detail (lambda).
798  * \param derivs  partial derivatives of (s, t, r, q) with respect to X and Y
799  * \param lod_bias  optional float vector with the shader lod bias
800  * \param explicit_lod  optional float vector with the explicit lod
801  * \param out_lod_ipart  integer part of lod
802  * \param out_lod_fpart  float part of lod (never larger than 1 but may be negative)
803  * \param out_lod_positive  (mask) if lod is positive (i.e. texture is minified)
804  *
805  * The resulting lod can be scalar per quad or be per element.
806  */
807 void
lp_build_lod_selector(struct lp_build_sample_context * bld,bool is_lodq,unsigned sampler_unit,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)808 lp_build_lod_selector(struct lp_build_sample_context *bld,
809                       bool is_lodq,
810                       unsigned sampler_unit,
811                       LLVMValueRef first_level,
812                       LLVMValueRef s,
813                       LLVMValueRef t,
814                       LLVMValueRef r,
815                       const struct lp_derivatives *derivs,
816                       LLVMValueRef lod_bias, /* optional */
817                       LLVMValueRef explicit_lod, /* optional */
818                       enum pipe_tex_mipfilter mip_filter,
819                       LLVMValueRef *out_lod,
820                       LLVMValueRef *out_lod_ipart,
821                       LLVMValueRef *out_lod_fpart,
822                       LLVMValueRef *out_lod_positive)
823 
824 {
825    LLVMBuilderRef builder = bld->gallivm->builder;
826    struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
827    struct lp_build_context *lodf_bld = &bld->lodf_bld;
828    LLVMValueRef lod;
829 
830    *out_lod_ipart = bld->lodi_bld.zero;
831    *out_lod_positive = bld->lodi_bld.zero;
832    *out_lod_fpart = lodf_bld->zero;
833 
834    /*
835     * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
836     * Magnification: "Implementations may either unconditionally assume c = 0
837     * for the minification vs. magnification switch-over point, or may choose
838     * to make c depend on the combination of minification and magnification
839     * modes as follows: if the magnification filter is given by LINEAR and the
840     * minification filter is given by NEAREST_MIPMAP_NEAREST or
841     * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a
842     * minified texture does not appear "sharper" than a magnified
843     * texture. Otherwise c = 0."  And 3.9.11 Texture Minification: "If lod is
844     * less than or equal to the constant c (see section 3.9.12) the texture is
845     * said to be magnified; if it is greater, the texture is minified."  So,
846     * using 0 as switchover point always, and using magnification for lod ==
847     * 0.  Note that the always c = 0 behavior is new (first appearing in GL
848     * 3.1 spec), old GL versions required 0.5 for the modes listed above.  I
849     * have no clue about the (undocumented) wishes of d3d9/d3d10 here!
850     */
851 
852    if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
853       /* User is forcing sampling from a particular mipmap level.
854        * This is hit during mipmap generation.
855        */
856       LLVMValueRef min_lod =
857          dynamic_state->min_lod(bld->gallivm, bld->resources_type,
858                                 bld->resources_ptr, sampler_unit);
859 
860       lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
861    } else {
862       if (explicit_lod) {
863          if (bld->num_lods != bld->coord_type.length)
864             lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
865                                             lodf_bld->type, explicit_lod, 0);
866          else
867             lod = explicit_lod;
868       } else {
869          LLVMValueRef rho;
870          bool rho_squared = bld->no_rho_approx && (bld->dims > 1);
871 
872          if (bld->static_sampler_state->aniso &&
873              !explicit_lod) {
874             rho = lp_build_pmin(bld, first_level, s, t);
875             rho_squared = true;
876          } else {
877             rho = lp_build_rho(bld, first_level, s, t, r, derivs);
878          }
879 
880          /*
881           * Compute lod = log2(rho)
882           */
883 
884          if (!lod_bias && !is_lodq &&
885              !bld->static_sampler_state->aniso &&
886              !bld->static_sampler_state->lod_bias_non_zero &&
887              !bld->static_sampler_state->apply_max_lod &&
888              !bld->static_sampler_state->apply_min_lod) {
889             /*
890              * Special case when there are no post-log2 adjustments, which
891              * saves instructions but keeping the integer and fractional lod
892              * computations separate from the start.
893              */
894 
895             if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
896                 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
897                /*
898                 * Don't actually need both values all the time, lod_ipart is
899                 * needed for nearest mipfilter, lod_positive if min != mag.
900                 */
901                if (rho_squared) {
902                   *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
903                } else {
904                   *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
905                }
906                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
907                                                 rho, lodf_bld->one);
908                return;
909             }
910             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
911                 !bld->no_brilinear && !rho_squared &&
912                 !bld->static_sampler_state->aniso) {
913                /*
914                 * This can't work if rho is squared. Not sure if it could be
915                 * fixed while keeping it worthwile, could also do sqrt here
916                 * but brilinear and no_rho_opt seems like a combination not
917                 * making much sense anyway so just use ordinary path below.
918                 */
919                lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
920                                       out_lod_ipart, out_lod_fpart);
921                *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
922                                                 rho, lodf_bld->one);
923                return;
924             }
925          }
926 
927          /* get more accurate results if we just sqaure rho always */
928          if (!rho_squared)
929             rho = lp_build_mul(lodf_bld, rho, rho);
930 
931          if (is_lodq)
932             lod = lp_build_log2(lodf_bld, rho);
933          else
934             lod = lp_build_fast_log2(lodf_bld, rho);
935 
936          /* log2(x^2) == 0.5*log2(x) */
937          lod = lp_build_mul(lodf_bld, lod,
938                             lp_build_const_vec(bld->gallivm,
939                                                lodf_bld->type, 0.5F));
940 
941          /* add shader lod bias */
942          if (lod_bias) {
943             if (bld->num_lods != bld->coord_type.length)
944                lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
945                                                     bld->coord_bld.type,
946                                                     lodf_bld->type,
947                                                     lod_bias, 0);
948             lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
949          }
950       }
951 
952       /* add sampler lod bias */
953       if (bld->static_sampler_state->lod_bias_non_zero) {
954          LLVMValueRef sampler_lod_bias =
955             dynamic_state->lod_bias(bld->gallivm, bld->resources_type,
956                                     bld->resources_ptr, sampler_unit);
957          sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
958                                                       sampler_lod_bias);
959          lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
960       }
961 
962       if (is_lodq) {
963          *out_lod = lod;
964       }
965 
966       /* clamp lod */
967       if (bld->static_sampler_state->apply_max_lod) {
968          LLVMValueRef max_lod =
969             dynamic_state->max_lod(bld->gallivm, bld->resources_type,
970                                    bld->resources_ptr, sampler_unit);
971          max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
972 
973          lod = lp_build_min(lodf_bld, lod, max_lod);
974       }
975       if (bld->static_sampler_state->apply_min_lod) {
976          LLVMValueRef min_lod =
977             dynamic_state->min_lod(bld->gallivm, bld->resources_type,
978                                    bld->resources_ptr, sampler_unit);
979          min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
980 
981          lod = lp_build_max(lodf_bld, lod, min_lod);
982       }
983 
984       if (is_lodq) {
985          *out_lod_fpart = lod;
986          return;
987       }
988    }
989 
990    *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
991                                     lod, lodf_bld->zero);
992 
993    if (bld->static_sampler_state->aniso) {
994       *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
995    } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
996       if (!bld->no_brilinear) {
997          lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
998                                 out_lod_ipart, out_lod_fpart);
999       } else {
1000          lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1001       }
1002 
1003       lp_build_name(*out_lod_fpart, "lod_fpart");
1004    } else {
1005       *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1006    }
1007 
1008    lp_build_name(*out_lod_ipart, "lod_ipart");
1009 
1010    return;
1011 }
1012 
1013 
1014 /**
1015  * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1016  * to actual mip level.
1017  * Note: this is all scalar per quad code.
1018  * \param lod_ipart  int texture level of detail
1019  * \param level_out  returns integer
1020  * \param out_of_bounds returns per coord out_of_bounds mask if provided
1021  */
1022 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1023 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1024                            LLVMValueRef first_level,
1025                            LLVMValueRef last_level,
1026                            LLVMValueRef lod_ipart,
1027                            LLVMValueRef *level_out,
1028                            LLVMValueRef *out_of_bounds)
1029 {
1030    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1031    LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level);
1032 
1033    if (out_of_bounds) {
1034       LLVMValueRef out, out1;
1035       out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1036       out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1037       out = lp_build_or(leveli_bld, out, out1);
1038       if (bld->num_mips == bld->coord_bld.type.length) {
1039          *out_of_bounds = out;
1040       } else if (bld->num_mips == 1) {
1041          *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1042       } else {
1043          assert(bld->num_mips == bld->coord_bld.type.length / 4);
1044          *out_of_bounds =
1045             lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1046                                                   leveli_bld->type,
1047                                                   bld->int_coord_bld.type,
1048                                                   out);
1049       }
1050       level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1051       *level_out = level;
1052    } else {
1053       /* clamp level to legal range of levels */
1054       *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1055 
1056    }
1057 }
1058 
1059 
1060 /**
1061  * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1062  * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1063  * part accordingly.
1064  * Later, we'll sample from those two mipmap levels and interpolate between
1065  * them.
1066  */
1067 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1068 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1069                            unsigned texture_unit,
1070                            LLVMValueRef first_level,
1071                            LLVMValueRef last_level,
1072                            LLVMValueRef lod_ipart,
1073                            LLVMValueRef *lod_fpart_inout,
1074                            LLVMValueRef *level0_out,
1075                            LLVMValueRef *level1_out)
1076 {
1077    LLVMBuilderRef builder = bld->gallivm->builder;
1078    struct lp_build_context *leveli_bld = &bld->leveli_bld;
1079    struct lp_build_context *levelf_bld = &bld->levelf_bld;
1080    LLVMValueRef clamp_min;
1081    LLVMValueRef clamp_max;
1082 
1083    assert(bld->num_lods == bld->num_mips);
1084 
1085    *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1086    *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1087 
1088    /*
1089     * Clamp both *level0_out and *level1_out to [first_level, last_level],
1090     * with the minimum number of comparisons, and zeroing lod_fpart in the
1091     * extreme ends in the process.
1092     */
1093 
1094    /* *level0_out < first_level */
1095    clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1096                              *level0_out, first_level,
1097                              "clamp_lod_to_first");
1098 
1099    *level0_out = LLVMBuildSelect(builder, clamp_min,
1100                                  first_level, *level0_out, "");
1101 
1102    *level1_out = LLVMBuildSelect(builder, clamp_min,
1103                                  first_level, *level1_out, "");
1104 
1105    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1106                                       levelf_bld->zero, *lod_fpart_inout, "");
1107 
1108    /* *level0_out >= last_level */
1109    clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1110                              *level0_out, last_level,
1111                              "clamp_lod_to_last");
1112 
1113    *level0_out = LLVMBuildSelect(builder, clamp_max,
1114                                  last_level, *level0_out, "");
1115 
1116    *level1_out = LLVMBuildSelect(builder, clamp_max,
1117                                  last_level, *level1_out, "");
1118 
1119    *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1120                                       levelf_bld->zero, *lod_fpart_inout, "");
1121 
1122    lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1123    lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1124    lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1125 }
1126 
1127 
1128 /**
1129  * A helper function that factorizes this common pattern.
1130  */
1131 LLVMValueRef
lp_sample_load_mip_value(struct gallivm_state * gallivm,LLVMTypeRef ptr_type,LLVMValueRef offsets,LLVMValueRef index1)1132 lp_sample_load_mip_value(struct gallivm_state *gallivm,
1133                          LLVMTypeRef ptr_type,
1134                          LLVMValueRef offsets,
1135                          LLVMValueRef index1)
1136 {
1137    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1138    LLVMValueRef indexes[2] = {zero, index1};
1139    LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets,
1140                                     indexes, ARRAY_SIZE(indexes), "");
1141    return LLVMBuildLoad2(gallivm->builder,
1142                          LLVMInt32TypeInContext(gallivm->context), ptr, "");
1143 }
1144 
1145 
1146 /**
1147  * Return pointer to a single mipmap level.
1148  * \param level  integer mipmap level
1149  */
1150 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1151 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1152                           LLVMValueRef level)
1153 {
1154    LLVMValueRef mip_offset = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1155                                                       bld->mip_offsets, level);
1156    LLVMBuilderRef builder = bld->gallivm->builder;
1157    LLVMValueRef data_ptr =
1158       LLVMBuildGEP2(builder,
1159                     LLVMInt8TypeInContext(bld->gallivm->context),
1160                     bld->base_ptr, &mip_offset, 1, "");
1161    return data_ptr;
1162 }
1163 
1164 
1165 /**
1166  * Return (per-pixel) offsets to mip levels.
1167  * \param level  integer mipmap level
1168  */
1169 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1170 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1171                          LLVMValueRef level)
1172 {
1173    LLVMBuilderRef builder = bld->gallivm->builder;
1174    LLVMValueRef offsets, offset1;
1175 
1176    if (bld->num_mips == 1) {
1177       offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
1178       offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1179    } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1180       offsets = bld->int_coord_bld.undef;
1181       for (unsigned i = 0; i < bld->num_mips; i++) {
1182          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1183          offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1184                                             bld->mip_offsets,
1185                                             LLVMBuildExtractElement(builder, level,
1186                                                                     indexi, ""));
1187          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1188          offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1189                                           indexo, "");
1190       }
1191       offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld,
1192                                             offsets, 0, 4);
1193    } else {
1194       assert (bld->num_mips == bld->coord_bld.type.length);
1195 
1196       offsets = bld->int_coord_bld.undef;
1197       for (unsigned i = 0; i < bld->num_mips; i++) {
1198          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1199          offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1200                                             bld->mip_offsets,
1201                                             LLVMBuildExtractElement(builder, level,
1202                                                                     indexi, ""));
1203          offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1204                                           indexi, "");
1205       }
1206    }
1207    return offsets;
1208 }
1209 
1210 
1211 /**
1212  * Codegen equivalent for u_minify().
1213  * @param lod_scalar  if lod is a (broadcasted) scalar
1214  * Return max(1, base_size >> level);
1215  */
1216 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,bool lod_scalar)1217 lp_build_minify(struct lp_build_context *bld,
1218                 LLVMValueRef base_size,
1219                 LLVMValueRef level,
1220                 bool lod_scalar)
1221 {
1222    LLVMBuilderRef builder = bld->gallivm->builder;
1223    assert(lp_check_value(bld->type, base_size));
1224    assert(lp_check_value(bld->type, level));
1225 
1226    if (level == bld->zero) {
1227       /* if we're using mipmap level zero, no minification is needed */
1228       return base_size;
1229    } else {
1230       LLVMValueRef size;
1231       assert(bld->type.sign);
1232       if (lod_scalar ||
1233          (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1234          size = LLVMBuildLShr(builder, base_size, level, "minify");
1235          size = lp_build_max(bld, size, bld->one);
1236       } else {
1237          /*
1238           * emulate shift with float mul, since intel "forgot" shifts with
1239           * per-element shift count until avx2, which results in terrible
1240           * scalar extraction (both count and value), scalar shift,
1241           * vector reinsertion. Should not be an issue on any non-x86 cpu
1242           * with a vector instruction set.
1243           * On cpus with AMD's XOP this should also be unnecessary but I'm
1244           * not sure if llvm would emit this with current flags.
1245           */
1246          LLVMValueRef const127, const23, lf;
1247          struct lp_type ftype;
1248          struct lp_build_context fbld;
1249          ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1250          lp_build_context_init(&fbld, bld->gallivm, ftype);
1251          const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1252          const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1253 
1254          /* calculate 2^(-level) float */
1255          lf = lp_build_sub(bld, const127, level);
1256          lf = lp_build_shl(bld, lf, const23);
1257          lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1258 
1259          /* finish shift operation by doing float mul */
1260          base_size = lp_build_int_to_float(&fbld, base_size);
1261          size = lp_build_mul(&fbld, base_size, lf);
1262          /*
1263           * do the max also with floats because
1264           * a) non-emulated int max requires sse41
1265           *    (this is actually a lie as we could cast to 16bit values
1266           *    as 16bit is sufficient and 16bit int max is sse2)
1267           * b) with avx we can do int max 4-wide but float max 8-wide
1268           */
1269          size = lp_build_max(&fbld, size, fbld.one);
1270          size = lp_build_itrunc(&fbld, size);
1271       }
1272       return size;
1273    }
1274 }
1275 
1276 
1277 /*
1278  * Scale image dimensions with block sizes.
1279  *
1280  * tex_blocksize is the resource format blocksize
1281  * view_blocksize is the view format blocksize
1282  *
1283  * This must be applied post-minification, but
1284  * only when blocksizes are different.
1285  *
1286  * ret = (size + (tex_blocksize - 1)) >> log2(tex_blocksize);
1287  * ret *= blocksize;
1288  */
1289 LLVMValueRef
lp_build_scale_view_dims(struct lp_build_context * bld,LLVMValueRef size,LLVMValueRef tex_blocksize,LLVMValueRef tex_blocksize_log2,LLVMValueRef view_blocksize)1290 lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size,
1291                          LLVMValueRef tex_blocksize,
1292                          LLVMValueRef tex_blocksize_log2,
1293                          LLVMValueRef view_blocksize)
1294 {
1295    LLVMBuilderRef builder = bld->gallivm->builder;
1296    LLVMValueRef ret =
1297       LLVMBuildAdd(builder, size,
1298                    LLVMBuildSub(builder, tex_blocksize,
1299                                 lp_build_const_int_vec(bld->gallivm,
1300                                                        bld->type, 1), ""),
1301                    "");
1302    ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, "");
1303    ret = LLVMBuildMul(builder, ret, view_blocksize, "");
1304    return ret;
1305 }
1306 
1307 
1308 /*
1309  * Scale a single image dimension.
1310  *
1311  * Scale one image between resource and view blocksizes.
1312  * noop if sizes are the same.
1313  */
1314 LLVMValueRef
lp_build_scale_view_dim(struct gallivm_state * gallivm,LLVMValueRef size,unsigned tex_blocksize,unsigned view_blocksize)1315 lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size,
1316                         unsigned tex_blocksize, unsigned view_blocksize)
1317 {
1318    if (tex_blocksize == view_blocksize)
1319       return size;
1320 
1321    LLVMBuilderRef builder = gallivm->builder;
1322    LLVMValueRef ret =
1323       LLVMBuildAdd(builder, size,
1324                    lp_build_const_int32(gallivm, tex_blocksize - 1), "");
1325    ret = LLVMBuildLShr(builder, ret,
1326                        lp_build_const_int32(gallivm,
1327                                             util_logbase2(tex_blocksize)), "");
1328    ret = LLVMBuildMul(builder, ret,
1329                       lp_build_const_int32(gallivm, view_blocksize), "");
1330    return ret;
1331 }
1332 
1333 
1334 /**
1335  * Dereference stride_array[mipmap_level] array to get a stride.
1336  * Return stride as a vector.
1337  */
1338 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMTypeRef stride_type,LLVMValueRef stride_array,LLVMValueRef level)1339 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1340                               LLVMTypeRef stride_type,
1341                               LLVMValueRef stride_array, LLVMValueRef level)
1342 {
1343    LLVMBuilderRef builder = bld->gallivm->builder;
1344    LLVMValueRef stride, stride1;
1345 
1346    if (bld->num_mips == 1) {
1347       stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array, level);
1348       stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1349    } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1350       LLVMValueRef stride1;
1351 
1352       stride = bld->int_coord_bld.undef;
1353       for (unsigned i = 0; i < bld->num_mips; i++) {
1354          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1355          stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1356                                             LLVMBuildExtractElement(builder, level,
1357                                                                     indexi, ""));
1358          LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1359          stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1360       }
1361       stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1362    } else {
1363       LLVMValueRef stride1;
1364 
1365       assert (bld->num_mips == bld->coord_bld.type.length);
1366 
1367       stride = bld->int_coord_bld.undef;
1368       for (unsigned i = 0; i < bld->coord_bld.type.length; i++) {
1369          LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1370          stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1371                                             LLVMBuildExtractElement(builder, level,
1372                                                                     indexi, ""));
1373          stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1374       }
1375    }
1376    return stride;
1377 }
1378 
1379 
1380 /**
1381  * When sampling a mipmap, we need to compute the width, height, depth
1382  * of the source levels from the level indexes.  This helper function
1383  * does that.
1384  */
1385 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1386 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1387                             LLVMValueRef ilevel,
1388                             LLVMValueRef *out_size,
1389                             LLVMValueRef *row_stride_vec,
1390                             LLVMValueRef *img_stride_vec)
1391 {
1392    const unsigned dims = bld->dims;
1393    LLVMValueRef ilevel_vec;
1394 
1395    /*
1396     * Compute width, height, depth at mipmap level 'ilevel'
1397     */
1398    if (bld->num_mips == 1) {
1399       ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1400       *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
1401                                   ilevel_vec, true);
1402       *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size,
1403                                            bld->int_tex_blocksize,
1404                                            bld->int_tex_blocksize_log2,
1405                                            bld->int_view_blocksize);
1406    } else {
1407       LLVMValueRef int_size_vec;
1408       LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec;
1409       LLVMValueRef int_view_blocksize_vec;
1410       LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1411       const unsigned num_quads = bld->coord_bld.type.length / 4;
1412 
1413       if (bld->num_mips == num_quads) {
1414          /*
1415           * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1416           * intel "forgot" the variable shift count instruction until avx2.
1417           * A harmless 8x32 shift gets translated into 32 instructions
1418           * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1419           * unable to recognize if there are really just 2 different shift
1420           * count values. So do the shift 4-wide before expansion.
1421           */
1422          struct lp_build_context bld4;
1423          struct lp_type type4;
1424 
1425          type4 = bld->int_coord_bld.type;
1426          type4.length = 4;
1427 
1428          lp_build_context_init(&bld4, bld->gallivm, type4);
1429 
1430          if (bld->dims == 1) {
1431             assert(bld->int_size_in_bld.type.length == 1);
1432             int_size_vec = lp_build_broadcast_scalar(&bld4,
1433                                                      bld->int_size);
1434             int_tex_blocksize_vec =
1435                lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize);
1436             int_tex_blocksize_log2_vec =
1437                lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2);
1438             int_view_blocksize_vec =
1439                lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize);
1440          } else {
1441             assert(bld->int_size_in_bld.type.length == 4);
1442             int_size_vec = bld->int_size;
1443             int_tex_blocksize_vec = bld->int_tex_blocksize;
1444             int_tex_blocksize_log2_vec = bld->int_tex_blocksize_log2;
1445             int_view_blocksize_vec = bld->int_view_blocksize;
1446          }
1447 
1448          for (unsigned i = 0; i < num_quads; i++) {
1449             LLVMValueRef ileveli;
1450             LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1451 
1452             ileveli = lp_build_extract_broadcast(bld->gallivm,
1453                                                  bld->leveli_bld.type,
1454                                                  bld4.type,
1455                                                  ilevel,
1456                                                  indexi);
1457             tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, true);
1458             tmp[i] = lp_build_scale_view_dims(&bld4, tmp[i],
1459                                               int_tex_blocksize_vec,
1460                                               int_tex_blocksize_log2_vec,
1461                                               int_view_blocksize_vec);
1462          }
1463          /*
1464           * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for
1465           * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1466           */
1467          *out_size = lp_build_concat(bld->gallivm,
1468                                      tmp,
1469                                      bld4.type,
1470                                      num_quads);
1471       } else {
1472          /* FIXME: this is terrible and results in _huge_ vector
1473           * (for the dims > 1 case).
1474           * Should refactor this (together with extract_image_sizes) and do
1475           * something more useful. Could for instance if we have width,height
1476           * with 4-wide vector pack all elements into a 8xi16 vector
1477           * (on which we can still do useful math) instead of using a 16xi32
1478           * vector.
1479           * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1480           * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
1481           * vector.
1482           */
1483          assert(bld->num_mips == bld->coord_bld.type.length);
1484          if (bld->dims == 1) {
1485             assert(bld->int_size_in_bld.type.length == 1);
1486             int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1487                                                      bld->int_size);
1488             int_tex_blocksize_vec =
1489                lp_build_broadcast_scalar(&bld->int_coord_bld,
1490                                          bld->int_tex_blocksize);
1491             int_tex_blocksize_log2_vec =
1492                lp_build_broadcast_scalar(&bld->int_coord_bld,
1493                                          bld->int_tex_blocksize_log2);
1494             int_view_blocksize_vec =
1495                lp_build_broadcast_scalar(&bld->int_coord_bld,
1496                                          bld->int_view_blocksize);
1497             *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec,
1498                                         ilevel, false);
1499             *out_size = lp_build_scale_view_dims(&bld->int_coord_bld,
1500                                                  *out_size,
1501                                                  int_tex_blocksize_vec,
1502                                                  int_tex_blocksize_log2_vec,
1503                                                  int_view_blocksize_vec);
1504          } else {
1505             LLVMValueRef ilevel1;
1506             for (unsigned i = 0; i < bld->num_mips; i++) {
1507                LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1508                ilevel1 = lp_build_extract_broadcast(bld->gallivm,
1509                                                     bld->int_coord_type,
1510                                                     bld->int_size_in_bld.type,
1511                                                     ilevel, indexi);
1512                tmp[i] = bld->int_size;
1513                tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
1514                                         ilevel1, true);
1515                tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld,
1516                                                  tmp[i],
1517                                                  bld->int_tex_blocksize,
1518                                                  bld->int_tex_blocksize_log2,
1519                                                  bld->int_view_blocksize);
1520             }
1521             *out_size = lp_build_concat(bld->gallivm, tmp,
1522                                         bld->int_size_in_bld.type,
1523                                         bld->num_mips);
1524          }
1525       }
1526    }
1527 
1528    if (dims >= 2) {
1529       *row_stride_vec = lp_build_get_level_stride_vec(bld,
1530                                                       bld->row_stride_type,
1531                                                       bld->row_stride_array,
1532                                                       ilevel);
1533    }
1534    if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1535       *img_stride_vec = lp_build_get_level_stride_vec(bld,
1536                                                       bld->img_stride_type,
1537                                                       bld->img_stride_array,
1538                                                       ilevel);
1539    }
1540 }
1541 
1542 
1543 /**
1544  * Extract and broadcast texture size.
1545  *
1546  * @param size_type   type of the texture size vector (either
1547  *                    bld->int_size_type or bld->float_size_type)
1548  * @param coord_type  type of the texture size vector (either
1549  *                    bld->int_coord_type or bld->coord_type)
1550  * @param size        vector with the texture size (width, height, depth)
1551  */
1552 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1553 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1554                              struct lp_build_context *size_bld,
1555                              struct lp_type coord_type,
1556                              LLVMValueRef size,
1557                              LLVMValueRef *out_width,
1558                              LLVMValueRef *out_height,
1559                              LLVMValueRef *out_depth)
1560 {
1561    const unsigned dims = bld->dims;
1562    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1563    struct lp_type size_type = size_bld->type;
1564 
1565    if (bld->num_mips == 1) {
1566       *out_width = lp_build_extract_broadcast(bld->gallivm,
1567                                               size_type,
1568                                               coord_type,
1569                                               size,
1570                                               LLVMConstInt(i32t, 0, 0));
1571       if (dims >= 2) {
1572          *out_height = lp_build_extract_broadcast(bld->gallivm,
1573                                                   size_type,
1574                                                   coord_type,
1575                                                   size,
1576                                                   LLVMConstInt(i32t, 1, 0));
1577          if (dims == 3) {
1578             *out_depth = lp_build_extract_broadcast(bld->gallivm,
1579                                                     size_type,
1580                                                     coord_type,
1581                                                     size,
1582                                                     LLVMConstInt(i32t, 2, 0));
1583          }
1584       }
1585    } else {
1586       unsigned num_quads = bld->coord_bld.type.length / 4;
1587 
1588       if (dims == 1) {
1589          *out_width = size;
1590       } else if (bld->num_mips == num_quads) {
1591          *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1592          if (dims >= 2) {
1593             *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1594             if (dims == 3) {
1595                *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1596             }
1597          }
1598       } else {
1599          assert(bld->num_mips == bld->coord_type.length);
1600          *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1601                                                 coord_type, size, 0);
1602          if (dims >= 2) {
1603             *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1604                                                     coord_type, size, 1);
1605             if (dims == 3) {
1606                *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1607                                                       coord_type, size, 2);
1608             }
1609          }
1610       }
1611    }
1612 }
1613 
1614 
1615 /**
1616  * Unnormalize coords.
1617  *
1618  * @param flt_size  vector with the integer texture size (width, height, depth)
1619  */
1620 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1621 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1622                              LLVMValueRef flt_size,
1623                              LLVMValueRef *s,
1624                              LLVMValueRef *t,
1625                              LLVMValueRef *r)
1626 {
1627    const unsigned dims = bld->dims;
1628    LLVMValueRef width;
1629    LLVMValueRef height = NULL;
1630    LLVMValueRef depth = NULL;
1631 
1632    lp_build_extract_image_sizes(bld,
1633                                 &bld->float_size_bld,
1634                                 bld->coord_type,
1635                                 flt_size,
1636                                 &width,
1637                                 &height,
1638                                 &depth);
1639 
1640    /* s = s * width, t = t * height */
1641    *s = lp_build_mul(&bld->coord_bld, *s, width);
1642    if (dims >= 2) {
1643       *t = lp_build_mul(&bld->coord_bld, *t, height);
1644       if (dims >= 3) {
1645          *r = lp_build_mul(&bld->coord_bld, *r, depth);
1646       }
1647    }
1648 }
1649 
1650 
1651 /**
1652  * Generate new coords and faces for cubemap texels falling off the face.
1653  *
1654  * @param face   face (center) of the pixel
1655  * @param x0     lower x coord
1656  * @param x1     higher x coord (must be x0 + 1)
1657  * @param y0     lower y coord
1658  * @param y1     higher y coord (must be x0 + 1)
1659  * @param max_coord     texture cube (level) size - 1
1660  * @param next_faces    new face values when falling off
1661  * @param next_xcoords  new x coord values when falling off
1662  * @param next_ycoords  new y coord values when falling off
1663  *
1664  * The arrays hold the new values when under/overflow of
1665  * lower x, higher x, lower y, higher y coord would occur (in this order).
1666  * next_xcoords/next_ycoords have two entries each (for both new lower and
1667  * higher coord).
1668  */
1669 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1670 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1671                         LLVMValueRef face,
1672                         LLVMValueRef x0,
1673                         LLVMValueRef x1,
1674                         LLVMValueRef y0,
1675                         LLVMValueRef y1,
1676                         LLVMValueRef max_coord,
1677                         LLVMValueRef next_faces[4],
1678                         LLVMValueRef next_xcoords[4][2],
1679                         LLVMValueRef next_ycoords[4][2])
1680 {
1681    /*
1682     * Lookup tables aren't nice for simd code hence try some logic here.
1683     * (Note that while it would not be necessary to do per-sample (4) lookups
1684     * when using a LUT as it's impossible that texels fall off of positive
1685     * and negative edges simultaneously, it would however be necessary to
1686     * do 2 lookups for corner handling as in this case texels both fall off
1687     * of x and y axes.)
1688     */
1689    /*
1690     * Next faces (for face 012345):
1691     * x < 0.0  : 451110
1692     * x >= 1.0 : 540001
1693     * y < 0.0  : 225422
1694     * y >= 1.0 : 334533
1695     * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1696     * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1697     * nfy+: face & ~4 > 1 ? face + 2 : 3;
1698     * This could also use pshufb instead, but would need (manually coded)
1699     * ssse3 intrinsic (llvm won't do non-constant shuffles).
1700     */
1701    struct gallivm_state *gallivm = ivec_bld->gallivm;
1702    LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1703    LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1704    LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1705    LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1706    LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1707    LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1708 
1709    sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1710    tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1711    sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1712    faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1713    tmp = lp_build_add(ivec_bld, faceand1, c4);
1714    next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1715    next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1716 
1717    tmp = lp_build_andnot(ivec_bld, face, c4);
1718    sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1719    tmp = lp_build_add(ivec_bld, face, c2);
1720    next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1721    next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1722 
1723    /*
1724     * new xcoords (for face 012345):
1725     * x < 0.0  : max   max   t     max-t max  max
1726     * x >= 1.0 : 0     0     max-t t     0    0
1727     * y < 0.0  : max   0     max-s s     s    max-s
1728     * y >= 1.0 : max   0     s     max-s s    max-s
1729     *
1730     * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1731     * ncx[0] = max - ncx[1]
1732     * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1733     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1734     */
1735    sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1736    maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1737    tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1738    next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1739    next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1740    maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1741    tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1742    next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1743    next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1744 
1745    sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1746 
1747    tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1748    maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1749    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1750    next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1751    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1752    next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1753    maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1754    tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1755    next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1756    tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1757    next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1758 
1759    /*
1760     * new ycoords (for face 012345):
1761     * x < 0.0  : t     t     0     max   t    t
1762     * x >= 1.0 : t     t     0     max   t    t
1763     * y < 0.0  : max-s s     0     max   max  0
1764     * y >= 1.0 : s     max-s 0     max   0    max
1765     *
1766     * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1767     * ncy[1] = ncy[0]
1768     * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1769     * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1770     */
1771    tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1772    next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1773    next_ycoords[1][0] = next_ycoords[0][0];
1774    next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1775    next_ycoords[1][1] = next_ycoords[0][1];
1776 
1777    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1778    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1779    next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1780    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1781    next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1782    tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1783    tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1784    next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1785    tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1786    next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1787 }
1788 
1789 
1790 /** Helper used by lp_build_cube_lookup() */
1791 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1792 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1793 {
1794    /* ima = +0.5 / abs(coord); */
1795    LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1796    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1797    /* avoid div by zero */
1798    LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1799    LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1800    LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1801    return ima;
1802 }
1803 
1804 
1805 /** Helper for doing 3-wise selection.
1806  * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1807  */
1808 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1809 lp_build_select3(struct lp_build_context *sel_bld,
1810                  LLVMValueRef sel0,
1811                  LLVMValueRef sel1,
1812                  LLVMValueRef val0,
1813                  LLVMValueRef val1,
1814                  LLVMValueRef val2)
1815 {
1816    LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1);
1817    return lp_build_select(sel_bld, sel1, val2, tmp);
1818 }
1819 
1820 
1821 /**
1822  * Generate code to do cube face selection and compute per-face texcoords.
1823  */
1824 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,struct lp_derivatives * derivs_out,bool need_derivs)1825 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1826                      LLVMValueRef *coords,
1827                      const struct lp_derivatives *derivs_in, /* optional */
1828                      struct lp_derivatives *derivs_out, /* optional */
1829                      bool need_derivs)
1830 {
1831    struct lp_build_context *coord_bld = &bld->coord_bld;
1832    LLVMBuilderRef builder = bld->gallivm->builder;
1833    struct gallivm_state *gallivm = bld->gallivm;
1834    LLVMValueRef si, ti, ri;
1835 
1836    /*
1837     * Do per-pixel face selection. We cannot however (as we used to do)
1838     * simply calculate the derivs afterwards (which is very bogus for
1839     * explicit derivs btw) because the values would be "random" when
1840     * not all pixels lie on the same face.
1841     */
1842    struct lp_build_context *cint_bld = &bld->int_coord_bld;
1843    struct lp_type intctype = cint_bld->type;
1844    LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1845    LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1846    LLVMValueRef as, at, ar, face, face_s, face_t;
1847    LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1848    LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1849    LLVMValueRef tnegi, rnegi;
1850    LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1851    LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1852    LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1853                                                   1LL << (intctype.width - 1));
1854    LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1855                                                    intctype.width -1);
1856    LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1857    LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1858    LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1859    LLVMValueRef s = coords[0];
1860    LLVMValueRef t = coords[1];
1861    LLVMValueRef r = coords[2];
1862 
1863    assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1864    assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1865    assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1866 
1867    /*
1868     * get absolute value (for x/y/z face selection) and sign bit
1869     * (for mirroring minor coords and pos/neg face selection)
1870     * of the original coords.
1871     */
1872    as = lp_build_abs(&bld->coord_bld, s);
1873    at = lp_build_abs(&bld->coord_bld, t);
1874    ar = lp_build_abs(&bld->coord_bld, r);
1875 
1876    /*
1877     * major face determination: select x if x > y else select y
1878     * select z if z >= max(x,y) else select previous result
1879     * if some axis are the same we chose z over y, y over x - the
1880     * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1881     * wouldn't care could save a select or two if using different
1882     * compares and doing at_g_as_ar last since tnewx and tnewz are the
1883     * same).
1884     */
1885    as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1886    maxasat = lp_build_max(coord_bld, as, at);
1887    ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1888 
1889    if (need_derivs) {
1890       /*
1891        * XXX: This is really really complex.
1892        * It is a bit overkill to use this for implicit derivatives as well,
1893        * no way this is worth the cost in practice, but seems to be the
1894        * only way for getting accurate and per-pixel lod values.
1895        */
1896       LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1897       LLVMValueRef madx, mady, madxdivma, madydivma;
1898       LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1899       LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1900       LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1901       LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1902       LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1903       /*
1904        * s = 1/2 * (sc / ma + 1)
1905        * t = 1/2 * (tc / ma + 1)
1906        *
1907        * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1908        * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1909        *
1910        * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1911        * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1912        * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1913        * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1914        */
1915 
1916       /* select ma, calculate ima */
1917       ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1918       mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1919       signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1920       ima = lp_build_div(coord_bld, coord_bld->one, ma);
1921       imahalf = lp_build_mul(coord_bld, posHalf, ima);
1922       imahalfpos = lp_build_abs(coord_bld, imahalf);
1923 
1924       if (!derivs_in) {
1925          ddx[0] = lp_build_ddx(coord_bld, s);
1926          ddx[1] = lp_build_ddx(coord_bld, t);
1927          ddx[2] = lp_build_ddx(coord_bld, r);
1928          ddy[0] = lp_build_ddy(coord_bld, s);
1929          ddy[1] = lp_build_ddy(coord_bld, t);
1930          ddy[2] = lp_build_ddy(coord_bld, r);
1931       } else {
1932          ddx[0] = derivs_in->ddx[0];
1933          ddx[1] = derivs_in->ddx[1];
1934          ddx[2] = derivs_in->ddx[2];
1935          ddy[0] = derivs_in->ddy[0];
1936          ddy[1] = derivs_in->ddy[1];
1937          ddy[2] = derivs_in->ddy[2];
1938       }
1939 
1940       /* select major derivatives */
1941       madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1942       mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1943 
1944       si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1945       ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1946       ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1947 
1948       sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1949       tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1950       rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1951 
1952       sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1953       tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1954       rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1955 
1956       /*
1957        * compute all possible new s/t coords, which does the mirroring,
1958        * and do the same for derivs minor axes.
1959        * snewx = signma * -r;
1960        * tnewx = -t;
1961        * snewy = s;
1962        * tnewy = signma * r;
1963        * snewz = signma * s;
1964        * tnewz = -t;
1965        */
1966       tnegi = LLVMBuildXor(builder, ti, signmask, "");
1967       rnegi = LLVMBuildXor(builder, ri, signmask, "");
1968       tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1969       rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1970       tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1971       rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1972 
1973       snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1974       tnewx = tnegi;
1975       sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1976       tdxnewx = tdxnegi;
1977       sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1978       tdynewx = tdynegi;
1979 
1980       snewy = si;
1981       tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1982       sdxnewy = sdxi;
1983       tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1984       sdynewy = sdyi;
1985       tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1986 
1987       snewz = LLVMBuildXor(builder, signmabit, si, "");
1988       tnewz = tnegi;
1989       sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1990       tdxnewz = tdxnegi;
1991       sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1992       tdynewz = tdynegi;
1993 
1994       /* select the mirrored values */
1995       face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1996       face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1997       face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1998       face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1999       face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
2000       face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
2001       face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
2002 
2003       face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2004       face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2005       face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
2006       face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
2007       face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
2008       face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
2009 
2010       /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
2011       madxdivma = lp_build_mul(coord_bld, madx, ima);
2012       tmp = lp_build_mul(coord_bld, madxdivma, face_s);
2013       tmp = lp_build_sub(coord_bld, face_sdx, tmp);
2014       derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
2015 
2016       /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
2017       tmp = lp_build_mul(coord_bld, madxdivma, face_t);
2018       tmp = lp_build_sub(coord_bld, face_tdx, tmp);
2019       derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
2020 
2021       /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
2022       madydivma = lp_build_mul(coord_bld, mady, ima);
2023       tmp = lp_build_mul(coord_bld, madydivma, face_s);
2024       tmp = lp_build_sub(coord_bld, face_sdy, tmp);
2025       derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
2026 
2027       /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
2028       tmp = lp_build_mul(coord_bld, madydivma, face_t);
2029       tmp = lp_build_sub(coord_bld, face_tdy, tmp);
2030       derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
2031 
2032       signma = LLVMBuildLShr(builder, mai, signshift, "");
2033       coords[2] = LLVMBuildOr(builder, face, signma, "face");
2034 
2035       /* project coords */
2036       face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2037       face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2038 
2039       coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2040       coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2041 
2042       return;
2043    }
2044 
2045    ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
2046    mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
2047    signmabit = LLVMBuildAnd(builder, mai, signmask, "");
2048 
2049    si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
2050    ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
2051    ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
2052 
2053    /*
2054     * compute all possible new s/t coords, which does the mirroring
2055     * snewx = signma * -r;
2056     * tnewx = -t;
2057     * snewy = s;
2058     * tnewy = signma * r;
2059     * snewz = signma * s;
2060     * tnewz = -t;
2061     */
2062    tnegi = LLVMBuildXor(builder, ti, signmask, "");
2063    rnegi = LLVMBuildXor(builder, ri, signmask, "");
2064 
2065    snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2066    tnewx = tnegi;
2067 
2068    snewy = si;
2069    tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2070 
2071    snewz = LLVMBuildXor(builder, signmabit, si, "");
2072    tnewz = tnegi;
2073 
2074    /* select the mirrored values */
2075    face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2076    face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2077    face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2078 
2079    face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2080    face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2081 
2082    /* add +1 for neg face */
2083    /* XXX with AVX probably want to use another select here -
2084     * as long as we ensure vblendvps gets used we can actually
2085     * skip the comparison and just use sign as a "mask" directly.
2086     */
2087    signma = LLVMBuildLShr(builder, mai, signshift, "");
2088    coords[2] = LLVMBuildOr(builder, face, signma, "face");
2089 
2090    /* project coords */
2091    imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2092    face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2093    face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2094 
2095    coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2096    coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2097 }
2098 
2099 
2100 /**
2101  * Compute the partial offset of a pixel block along an arbitrary axis.
2102  *
2103  * @param coord   coordinate in pixels
2104  * @param stride  number of bytes between rows of successive pixel blocks
2105  * @param block_length  number of pixels in a pixels block along the coordinate
2106  *                      axis
2107  * @param out_offset    resulting relative offset of the pixel block in bytes
2108  * @param out_subcoord  resulting sub-block pixel coordinate
2109  */
2110 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2111 lp_build_sample_partial_offset(struct lp_build_context *bld,
2112                                unsigned block_length,
2113                                LLVMValueRef coord,
2114                                LLVMValueRef stride,
2115                                LLVMValueRef *out_offset,
2116                                LLVMValueRef *out_subcoord)
2117 {
2118    LLVMBuilderRef builder = bld->gallivm->builder;
2119    LLVMValueRef offset;
2120    LLVMValueRef subcoord;
2121 
2122    if (block_length == 1) {
2123       subcoord = bld->zero;
2124    } else {
2125       /*
2126        * Pixel blocks have power of two dimensions. LLVM should convert the
2127        * rem/div to bit arithmetic.
2128        * TODO: Verify this.
2129        * It does indeed BUT it does transform it to scalar (and back) when doing so
2130        * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2131        * The generated code looks seriously unfunny and is quite expensive.
2132        */
2133 #if 0
2134       LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2135       subcoord = LLVMBuildURem(builder, coord, block_width, "");
2136       coord    = LLVMBuildUDiv(builder, coord, block_width, "");
2137 #else
2138       unsigned logbase2 = util_logbase2(block_length);
2139       LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2140       LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2141       subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2142       coord = LLVMBuildLShr(builder, coord, block_shift, "");
2143 #endif
2144    }
2145 
2146    offset = lp_build_mul(bld, coord, stride);
2147 
2148    assert(out_offset);
2149    assert(out_subcoord);
2150 
2151    *out_offset = offset;
2152    *out_subcoord = subcoord;
2153 }
2154 
2155 
2156 /**
2157  * Compute the offset of a pixel block.
2158  *
2159  * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2160  *
2161  * Returns the relative offset and i,j sub-block coordinates
2162  */
2163 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2164 lp_build_sample_offset(struct lp_build_context *bld,
2165                        const struct util_format_description *format_desc,
2166                        LLVMValueRef x,
2167                        LLVMValueRef y,
2168                        LLVMValueRef z,
2169                        LLVMValueRef y_stride,
2170                        LLVMValueRef z_stride,
2171                        LLVMValueRef *out_offset,
2172                        LLVMValueRef *out_i,
2173                        LLVMValueRef *out_j)
2174 {
2175    LLVMValueRef x_stride;
2176    LLVMValueRef offset;
2177 
2178    x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2179                                  format_desc->block.bits/8);
2180 
2181    lp_build_sample_partial_offset(bld,
2182                                   format_desc->block.width,
2183                                   x, x_stride,
2184                                   &offset, out_i);
2185 
2186    if (y && y_stride) {
2187       LLVMValueRef y_offset;
2188       lp_build_sample_partial_offset(bld,
2189                                      format_desc->block.height,
2190                                      y, y_stride,
2191                                      &y_offset, out_j);
2192       offset = lp_build_add(bld, offset, y_offset);
2193    } else {
2194       *out_j = bld->zero;
2195    }
2196 
2197    if (z && z_stride) {
2198       LLVMValueRef z_offset;
2199       LLVMValueRef k;
2200       lp_build_sample_partial_offset(bld,
2201                                      1, /* pixel blocks are always 2D */
2202                                      z, z_stride,
2203                                      &z_offset, &k);
2204       offset = lp_build_add(bld, offset, z_offset);
2205    }
2206 
2207    *out_offset = offset;
2208 }
2209 
2210 
2211 
2212 void
lp_build_tiled_sample_offset(struct lp_build_context * bld,enum pipe_format format,const struct lp_static_texture_state * static_texture_state,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef width,LLVMValueRef height,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2213 lp_build_tiled_sample_offset(struct lp_build_context *bld,
2214                              enum pipe_format format,
2215                              const struct lp_static_texture_state *static_texture_state,
2216                              LLVMValueRef x,
2217                              LLVMValueRef y,
2218                              LLVMValueRef z,
2219                              LLVMValueRef width,
2220                              LLVMValueRef height,
2221                              LLVMValueRef z_stride,
2222                              LLVMValueRef *out_offset,
2223                              LLVMValueRef *out_i,
2224                              LLVMValueRef *out_j)
2225 {
2226    struct gallivm_state *gallivm = bld->gallivm;
2227    LLVMBuilderRef builder = gallivm->builder;
2228 
2229    assert(static_texture_state->tiled);
2230 
2231    uint32_t res_dimensions = 1;
2232    switch (static_texture_state->res_target) {
2233    case PIPE_TEXTURE_2D:
2234    case PIPE_TEXTURE_CUBE:
2235    case PIPE_TEXTURE_RECT:
2236    case PIPE_TEXTURE_2D_ARRAY:
2237       res_dimensions = 2;
2238       break;
2239    case PIPE_TEXTURE_3D:
2240       res_dimensions = 3;
2241       break;
2242    default:
2243       break;
2244    }
2245 
2246    uint32_t dimensions = 1;
2247    switch (static_texture_state->target) {
2248    case PIPE_TEXTURE_2D:
2249    case PIPE_TEXTURE_CUBE:
2250    case PIPE_TEXTURE_RECT:
2251    case PIPE_TEXTURE_2D_ARRAY:
2252       dimensions = 2;
2253       break;
2254    case PIPE_TEXTURE_3D:
2255       dimensions = 3;
2256       break;
2257    default:
2258       break;
2259    }
2260 
2261    uint32_t block_size[3] = {
2262       util_format_get_blockwidth(format),
2263       util_format_get_blockheight(format),
2264       util_format_get_blockdepth(format),
2265    };
2266 
2267    uint32_t sparse_tile_size[3] = {
2268       util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 0) * block_size[0],
2269       util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 1) * block_size[1],
2270       util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 2) * block_size[2],
2271    };
2272 
2273    LLVMValueRef sparse_tile_size_log2[3] = {
2274       lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[0])),
2275       lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[1])),
2276       lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[2])),
2277    };
2278 
2279    LLVMValueRef tile_index = LLVMBuildLShr(builder, x, sparse_tile_size_log2[0], "");
2280 
2281    if (y && dimensions > 1) {
2282       LLVMValueRef x_tile_count = lp_build_add(bld, width, lp_build_const_vec(gallivm, bld->type, sparse_tile_size[0] - 1));
2283       x_tile_count = LLVMBuildLShr(builder, x_tile_count, sparse_tile_size_log2[0], "");
2284       LLVMValueRef y_tile = LLVMBuildLShr(builder, y, sparse_tile_size_log2[1], "");
2285       tile_index = lp_build_add(bld, tile_index, lp_build_mul(bld, y_tile, x_tile_count));
2286 
2287       if (z && dimensions > 2) {
2288          LLVMValueRef y_tile_count = lp_build_add(bld, height, lp_build_const_vec(gallivm, bld->type, sparse_tile_size[1] - 1));
2289          y_tile_count = LLVMBuildLShr(builder, y_tile_count, sparse_tile_size_log2[1], "");
2290          LLVMValueRef z_tile = LLVMBuildLShr(builder, z, sparse_tile_size_log2[2], "");
2291          tile_index = lp_build_add(bld, tile_index, lp_build_mul(bld, z_tile, lp_build_mul(bld, x_tile_count, y_tile_count)));
2292       }
2293    }
2294 
2295    LLVMValueRef offset = LLVMBuildShl(builder, tile_index, lp_build_const_vec(gallivm, bld->type, 16), "");
2296 
2297    LLVMValueRef sparse_tile_masks[3] = {
2298       lp_build_const_vec(gallivm, bld->type, sparse_tile_size[0] - 1),
2299       lp_build_const_vec(gallivm, bld->type, sparse_tile_size[1] - 1),
2300       lp_build_const_vec(gallivm, bld->type, sparse_tile_size[2] - 1),
2301    };
2302 
2303    x = LLVMBuildAnd(builder, x, sparse_tile_masks[0], "");
2304    LLVMValueRef x_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format));
2305 
2306    LLVMValueRef x_offset;
2307    lp_build_sample_partial_offset(bld, block_size[0],
2308                                   x, x_stride, &x_offset, out_i);
2309    offset = lp_build_add(bld, offset, x_offset);
2310 
2311    if (y && dimensions > 1) {
2312       y = LLVMBuildAnd(builder, y, sparse_tile_masks[1], "");
2313       LLVMValueRef y_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format) *
2314                                                  sparse_tile_size[0] / block_size[0]);
2315 
2316       LLVMValueRef y_offset;
2317       lp_build_sample_partial_offset(bld, block_size[1],
2318                                      y, y_stride, &y_offset, out_j);
2319       offset = lp_build_add(bld, offset, y_offset);
2320    } else {
2321       *out_j = bld->zero;
2322    }
2323 
2324    if (z && (z_stride || dimensions > 2)) {
2325       if (dimensions > 2) {
2326          z = LLVMBuildAnd(builder, z, sparse_tile_masks[2], "");
2327          z_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format) *
2328                                        sparse_tile_size[0] / block_size[0] *
2329                                        sparse_tile_size[1] / block_size[1]);
2330       }
2331 
2332       LLVMValueRef z_offset;
2333       LLVMValueRef k;
2334       lp_build_sample_partial_offset(bld, 1, z, z_stride, &z_offset, &k);
2335       offset = lp_build_add(bld, offset, z_offset);
2336    }
2337 
2338    *out_offset = offset;
2339 }
2340 
2341 
2342 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2343 lp_build_sample_min(struct lp_build_context *bld,
2344                     LLVMValueRef x,
2345                     LLVMValueRef v0,
2346                     LLVMValueRef v1)
2347 {
2348    /* if the incoming LERP weight is 0 then the min/max
2349     * should ignore that value. */
2350    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2351                                         bld->type,
2352                                         PIPE_FUNC_NOTEQUAL,
2353                                         x, bld->zero);
2354    LLVMValueRef min = lp_build_min(bld, v0, v1);
2355 
2356    return lp_build_select(bld, mask, min, v0);
2357 }
2358 
2359 
2360 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2361 lp_build_sample_max(struct lp_build_context *bld,
2362                     LLVMValueRef x,
2363                     LLVMValueRef v0,
2364                     LLVMValueRef v1)
2365 {
2366    /* if the incoming LERP weight is 0 then the min/max
2367     * should ignore that value. */
2368    LLVMValueRef mask = lp_build_compare(bld->gallivm,
2369                                         bld->type,
2370                                         PIPE_FUNC_NOTEQUAL,
2371                                         x, bld->zero);
2372    LLVMValueRef max = lp_build_max(bld, v0, v1);
2373 
2374    return lp_build_select(bld, mask, max, v0);
2375 }
2376 
2377 
2378 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2379 lp_build_sample_min_2d(struct lp_build_context *bld,
2380                        LLVMValueRef x,
2381                        LLVMValueRef y,
2382                        LLVMValueRef a,
2383                        LLVMValueRef b,
2384                        LLVMValueRef c,
2385                        LLVMValueRef d)
2386 {
2387    LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2388    LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2389    return lp_build_sample_min(bld, y, v0, v1);
2390 }
2391 
2392 
2393 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2394 lp_build_sample_max_2d(struct lp_build_context *bld,
2395                        LLVMValueRef x,
2396                        LLVMValueRef y,
2397                        LLVMValueRef a,
2398                        LLVMValueRef b,
2399                        LLVMValueRef c,
2400                        LLVMValueRef d)
2401 {
2402    LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2403    LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2404    return lp_build_sample_max(bld, y, v0, v1);
2405 }
2406 
2407 
2408 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2409 lp_build_sample_min_3d(struct lp_build_context *bld,
2410                 LLVMValueRef x,
2411                 LLVMValueRef y,
2412                 LLVMValueRef z,
2413                 LLVMValueRef a, LLVMValueRef b,
2414                 LLVMValueRef c, LLVMValueRef d,
2415                 LLVMValueRef e, LLVMValueRef f,
2416                 LLVMValueRef g, LLVMValueRef h)
2417 {
2418    LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2419    LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2420    return lp_build_sample_min(bld, z, v0, v1);
2421 }
2422 
2423 
2424 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2425 lp_build_sample_max_3d(struct lp_build_context *bld,
2426                        LLVMValueRef x,
2427                        LLVMValueRef y,
2428                        LLVMValueRef z,
2429                        LLVMValueRef a, LLVMValueRef b,
2430                        LLVMValueRef c, LLVMValueRef d,
2431                        LLVMValueRef e, LLVMValueRef f,
2432                        LLVMValueRef g, LLVMValueRef h)
2433 {
2434    LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2435    LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2436    return lp_build_sample_max(bld, z, v0, v1);
2437 }
2438 
2439 
2440 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2441 lp_build_reduce_filter(struct lp_build_context *bld,
2442                        enum pipe_tex_reduction_mode mode,
2443                        unsigned flags,
2444                        unsigned num_chan,
2445                        LLVMValueRef x,
2446                        LLVMValueRef *v00,
2447                        LLVMValueRef *v01,
2448                        LLVMValueRef *out)
2449 {
2450    unsigned chan;
2451    switch (mode) {
2452    case PIPE_TEX_REDUCTION_MIN:
2453       for (chan = 0; chan < num_chan; chan++)
2454          out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2455       break;
2456    case PIPE_TEX_REDUCTION_MAX:
2457       for (chan = 0; chan < num_chan; chan++)
2458          out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2459       break;
2460    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2461    default:
2462       for (chan = 0; chan < num_chan; chan++)
2463          out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2464       break;
2465    }
2466 }
2467 
2468 
2469 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2470 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2471                           enum pipe_tex_reduction_mode mode,
2472                           unsigned flags,
2473                           unsigned num_chan,
2474                           LLVMValueRef x,
2475                           LLVMValueRef y,
2476                           LLVMValueRef *v00,
2477                           LLVMValueRef *v01,
2478                           LLVMValueRef *v10,
2479                           LLVMValueRef *v11,
2480                           LLVMValueRef *out)
2481 {
2482    switch (mode) {
2483    case PIPE_TEX_REDUCTION_MIN:
2484       for (unsigned chan = 0; chan < num_chan; chan++)
2485          out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan],
2486                                             v10[chan], v11[chan]);
2487       break;
2488    case PIPE_TEX_REDUCTION_MAX:
2489       for (unsigned chan = 0; chan < num_chan; chan++)
2490          out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan],
2491                                             v10[chan], v11[chan]);
2492       break;
2493    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2494    default:
2495       for (unsigned chan = 0; chan < num_chan; chan++)
2496          out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan],
2497                                       v10[chan], v11[chan], flags);
2498       break;
2499    }
2500 }
2501 
2502 
2503 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2504 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2505                           enum pipe_tex_reduction_mode mode,
2506                           unsigned flags,
2507                           unsigned num_chan,
2508                           LLVMValueRef x,
2509                           LLVMValueRef y,
2510                           LLVMValueRef z,
2511                           LLVMValueRef *v000,
2512                           LLVMValueRef *v001,
2513                           LLVMValueRef *v010,
2514                           LLVMValueRef *v011,
2515                           LLVMValueRef *v100,
2516                           LLVMValueRef *v101,
2517                           LLVMValueRef *v110,
2518                           LLVMValueRef *v111,
2519                           LLVMValueRef *out)
2520 {
2521    switch (mode) {
2522    case PIPE_TEX_REDUCTION_MIN:
2523       for (unsigned chan = 0; chan < num_chan; chan++)
2524          out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2525                                      v000[chan], v001[chan], v010[chan], v011[chan],
2526                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2527       break;
2528    case PIPE_TEX_REDUCTION_MAX:
2529       for (unsigned chan = 0; chan < num_chan; chan++)
2530          out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2531                                      v000[chan], v001[chan], v010[chan], v011[chan],
2532                                      v100[chan], v101[chan], v110[chan], v111[chan]);
2533       break;
2534    case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2535    default:
2536       for (unsigned chan = 0; chan < num_chan; chan++)
2537          out[chan] = lp_build_lerp_3d(bld, x, y, z,
2538                                       v000[chan], v001[chan], v010[chan], v011[chan],
2539                                       v100[chan], v101[chan], v110[chan], v111[chan],
2540                                       flags);
2541       break;
2542    }
2543 }
2544