• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- SoA.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35 
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/compiler.h"
40 #include "util/u_debug.h"
41 #include "util/u_dump.h"
42 #include "util/u_memory.h"
43 #include "util/u_math.h"
44 #include "util/format/u_format.h"
45 #include "util/u_cpu_detect.h"
46 #include "util/format_rgb9e5.h"
47 #include "lp_bld_debug.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_conv.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_printf.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_format.h"
59 #include "lp_bld_sample.h"
60 #include "lp_bld_sample_aos.h"
61 #include "lp_bld_struct.h"
62 #include "lp_bld_quad.h"
63 #include "lp_bld_pack.h"
64 #include "lp_bld_intr.h"
65 #include "lp_bld_misc.h"
66 #include "lp_bld_jit_types.h"
67 
68 
69 /**
70  * Generate code to fetch a texel from a texture at int coords (x, y, z).
71  * The computation depends on whether the texture is 1D, 2D or 3D.
72  * The result, texel, will be float vectors:
73  *   texel[0] = red values
74  *   texel[1] = green values
75  *   texel[2] = blue values
76  *   texel[3] = alpha values
77  */
78 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef texel_out[4])79 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
80                           LLVMValueRef width,
81                           LLVMValueRef height,
82                           LLVMValueRef depth,
83                           LLVMValueRef x,
84                           LLVMValueRef y,
85                           LLVMValueRef z,
86                           LLVMValueRef y_stride,
87                           LLVMValueRef z_stride,
88                           LLVMValueRef data_ptr,
89                           LLVMValueRef mipoffsets,
90                           LLVMValueRef texel_out[4])
91 {
92    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
93    const unsigned dims = bld->dims;
94    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
95    LLVMBuilderRef builder = bld->gallivm->builder;
96    LLVMValueRef offset;
97    LLVMValueRef i, j;
98    LLVMValueRef use_border = NULL;
99 
100    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
101    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
102                                               static_state->min_img_filter,
103                                               static_state->mag_img_filter)) {
104       LLVMValueRef b1, b2;
105       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
106       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
107       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
108    }
109 
110    if (dims >= 2 &&
111        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
112                                               static_state->min_img_filter,
113                                               static_state->mag_img_filter)) {
114       LLVMValueRef b1, b2;
115       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
116       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
117       if (use_border) {
118          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
119          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
120       } else {
121          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
122       }
123    }
124 
125    if (dims == 3 &&
126        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
127                                               static_state->min_img_filter,
128                                               static_state->mag_img_filter)) {
129       LLVMValueRef b1, b2;
130       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
131       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
132       if (use_border) {
133          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
134          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
135       } else {
136          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
137       }
138    }
139 
140    /* convert x,y,z coords to linear offset from start of texture, in bytes */
141    lp_build_sample_offset(&bld->int_coord_bld,
142                           bld->format_desc,
143                           x, y, z, y_stride, z_stride,
144                           &offset, &i, &j);
145    if (mipoffsets) {
146       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
147    }
148 
149    if (use_border) {
150       /* If we can sample the border color, it means that texcoords may
151        * lie outside the bounds of the texture image.  We need to do
152        * something to prevent reading out of bounds and causing a segfault.
153        *
154        * Simply AND the texture coords with !use_border.  This will cause
155        * coords which are out of bounds to become zero.  Zero's guaranteed
156        * to be inside the texture image.
157        */
158       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
159    }
160 
161    lp_build_fetch_rgba_soa(bld->gallivm,
162                            bld->format_desc,
163                            bld->texel_type, true,
164                            data_ptr, offset,
165                            i, j,
166                            bld->cache,
167                            texel_out);
168 
169    /*
170     * Note: if we find an app which frequently samples the texture border
171     * we might want to implement a true conditional here to avoid sampling
172     * the texture whenever possible (since that's quite a bit of code).
173     * Ex:
174     *   if (use_border) {
175     *      texel = border_color;
176     *   } else {
177     *      texel = sample_texture(coord);
178     *   }
179     * As it is now, we always sample the texture, then selectively replace
180     * the texel color results with the border color.
181     */
182 
183    if (use_border) {
184       /* select texel color or border color depending on use_border. */
185       const struct util_format_description *format_desc = bld->format_desc;
186       struct lp_type border_type = bld->texel_type;
187       border_type.length = 4;
188       /*
189        * Only replace channels which are actually present. The others should
190        * get optimized away eventually by sampler_view swizzle anyway but it's
191        * easier too.
192        */
193       for (unsigned chan = 0; chan < 4; chan++) {
194          unsigned chan_s;
195          /* reverse-map channel... */
196          if (util_format_has_stencil(format_desc)) {
197             if (chan == 0)
198                chan_s = 0;
199             else
200                break;
201          } else {
202             for (chan_s = 0; chan_s < 4; chan_s++) {
203                if (chan_s == format_desc->swizzle[chan]) {
204                   break;
205                }
206             }
207          }
208          if (chan_s <= 3) {
209             /* use the already clamped color */
210             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
211             LLVMValueRef border_chan;
212 
213             border_chan = lp_build_extract_broadcast(bld->gallivm,
214                                                      border_type,
215                                                      bld->texel_type,
216                                                      bld->border_color_clamped,
217                                                      idx);
218             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
219                                               border_chan, texel_out[chan]);
220          }
221       }
222    }
223 }
224 
225 static LLVMValueRef
get_first_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)226 get_first_level(struct gallivm_state *gallivm,
227                 LLVMTypeRef resources_type,
228                 LLVMValueRef resources_ptr,
229                 unsigned texture_unit,
230                 LLVMValueRef texture_unit_offset,
231                 const struct lp_static_texture_state *static_state,
232                 struct lp_sampler_dynamic_state *dynamic_state)
233 {
234    if (static_state->level_zero_only)
235       return lp_build_const_int32(gallivm, 0);
236    else {
237       LLVMValueRef first_level;
238 
239       first_level = dynamic_state->first_level(gallivm, resources_type,
240                                                resources_ptr, texture_unit,
241                                                texture_unit_offset);
242       first_level = LLVMBuildZExt(gallivm->builder, first_level,
243                                   LLVMInt32TypeInContext(gallivm->context), "");
244       return first_level;
245    }
246 }
247 
248 
249 static LLVMValueRef
get_last_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)250 get_last_level(struct gallivm_state *gallivm,
251                LLVMTypeRef resources_type,
252                LLVMValueRef resources_ptr,
253                unsigned texture_unit,
254                LLVMValueRef texture_unit_offset,
255                const struct lp_static_texture_state *static_state,
256                struct lp_sampler_dynamic_state *dynamic_state)
257 {
258    if (static_state->level_zero_only)
259       return lp_build_const_int32(gallivm, 0);
260    else {
261       LLVMValueRef last_level;
262 
263       last_level = dynamic_state->last_level(gallivm, resources_type,
264                                              resources_ptr, texture_unit,
265                                              texture_unit_offset);
266       last_level = LLVMBuildZExt(gallivm->builder, last_level,
267                                  LLVMInt32TypeInContext(gallivm->context), "");
268       return last_level;
269    }
270 }
271 
272 /**
273  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
274  * (Note that with pot sizes could do this much more easily post-scale
275  * with some bit arithmetic.)
276  */
277 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord,bool posOnly)278 lp_build_coord_mirror(struct lp_build_sample_context *bld,
279                       LLVMValueRef coord, bool posOnly)
280 {
281    struct lp_build_context *coord_bld = &bld->coord_bld;
282    LLVMValueRef fract;
283    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
284 
285    /*
286     * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
287     * it all works out. (The result is in range [-1, 1.0], negative if
288     * the coord is in the "odd" section, otherwise positive.)
289     */
290 
291    coord = lp_build_mul(coord_bld, coord, half);
292    fract = lp_build_round(coord_bld, coord);
293    fract = lp_build_sub(coord_bld, coord, fract);
294    coord = lp_build_add(coord_bld, fract, fract);
295 
296    if (posOnly) {
297       /*
298        * Theoretically it's not quite 100% accurate because the spec says
299        * that ultimately a scaled coord of -x.0 should map to int coord
300        * -x + 1 with mirroring, not -x (this does not matter for bilinear
301        * filtering).
302        */
303       coord = lp_build_abs(coord_bld, coord);
304       /* kill off NaNs */
305       /* XXX: not safe without arch rounding, fract can be anything. */
306       coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
307                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
308    }
309 
310    return coord;
311 }
312 
313 
314 /**
315  * Helper to compute the first coord and the weight for
316  * linear wrap repeat npot textures
317  */
318 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)319 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
320                                   LLVMValueRef coord_f,
321                                   LLVMValueRef length_i,
322                                   LLVMValueRef length_f,
323                                   LLVMValueRef *coord0_i,
324                                   LLVMValueRef *weight_f)
325 {
326    struct lp_build_context *coord_bld = &bld->coord_bld;
327    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
328    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
329    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
330                                                 int_coord_bld->one);
331    LLVMValueRef mask;
332    /* wrap with normalized floats is just fract */
333    coord_f = lp_build_fract(coord_bld, coord_f);
334    /* mul by size and subtract 0.5 */
335    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
336    coord_f = lp_build_sub(coord_bld, coord_f, half);
337    /*
338     * we avoided the 0.5/length division before the repeat wrap,
339     * now need to fix up edge cases with selects
340     */
341    /*
342     * Note we do a float (unordered) compare so we can eliminate NaNs.
343     * (Otherwise would need fract_safe above).
344     */
345    mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
346                            PIPE_FUNC_LESS, coord_f, coord_bld->zero);
347 
348    /* convert to int, compute lerp weight */
349    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
350    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
351 }
352 
353 
354 /**
355  * Build LLVM code for texture wrap mode for linear filtering.
356  * \param x0_out  returns first integer texcoord
357  * \param x1_out  returns second integer texcoord
358  * \param weight_out  returns linear interpolation weight
359  */
360 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)361 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
362                             bool is_gather,
363                             LLVMValueRef coord,
364                             LLVMValueRef length,
365                             LLVMValueRef length_f,
366                             LLVMValueRef offset,
367                             bool is_pot,
368                             unsigned wrap_mode,
369                             LLVMValueRef *x0_out,
370                             LLVMValueRef *x1_out,
371                             LLVMValueRef *weight_out)
372 {
373    struct lp_build_context *coord_bld = &bld->coord_bld;
374    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
375    LLVMBuilderRef builder = bld->gallivm->builder;
376    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
377    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
378    LLVMValueRef coord0, coord1, weight;
379 
380    switch (wrap_mode) {
381    case PIPE_TEX_WRAP_REPEAT:
382       if (is_pot) {
383          /* mul by size and subtract 0.5 */
384          coord = lp_build_mul(coord_bld, coord, length_f);
385          coord = lp_build_sub(coord_bld, coord, half);
386          if (offset) {
387             offset = lp_build_int_to_float(coord_bld, offset);
388             coord = lp_build_add(coord_bld, coord, offset);
389          }
390          /* convert to int, compute lerp weight */
391          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
392          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
393          /* repeat wrap */
394          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
395          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
396       } else {
397          LLVMValueRef mask;
398          if (offset) {
399             offset = lp_build_int_to_float(coord_bld, offset);
400             offset = lp_build_div(coord_bld, offset, length_f);
401             coord = lp_build_add(coord_bld, coord, offset);
402          }
403          lp_build_coord_repeat_npot_linear(bld, coord,
404                                            length, length_f,
405                                            &coord0, &weight);
406          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
407                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
408          coord1 = LLVMBuildAnd(builder,
409                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
410                                mask, "");
411       }
412       break;
413 
414    case PIPE_TEX_WRAP_CLAMP:
415       if (bld->static_sampler_state->normalized_coords) {
416          /* scale coord to length */
417          coord = lp_build_mul(coord_bld, coord, length_f);
418       }
419       if (offset) {
420          offset = lp_build_int_to_float(coord_bld, offset);
421          coord = lp_build_add(coord_bld, coord, offset);
422       }
423 
424       /*
425        * clamp to [0, length]
426        *
427        * Unlike some other wrap modes, this should be correct for gather
428        * too. GL_CLAMP explicitly does this clamp on the coord prior to
429        * actual wrapping (which is per sample).
430        */
431       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
432 
433       coord = lp_build_sub(coord_bld, coord, half);
434 
435       /* convert to int, compute lerp weight */
436       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
437       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
438       break;
439 
440    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
441       {
442          struct lp_build_context abs_coord_bld = bld->coord_bld;
443          abs_coord_bld.type.sign = false;
444 
445          if (bld->static_sampler_state->normalized_coords) {
446             /* mul by tex size */
447             coord = lp_build_mul(coord_bld, coord, length_f);
448          }
449          if (offset) {
450             offset = lp_build_int_to_float(coord_bld, offset);
451             coord = lp_build_add(coord_bld, coord, offset);
452          }
453 
454          /* clamp to length max */
455          coord = lp_build_min_ext(coord_bld, coord, length_f,
456                                   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
457          if (!is_gather) {
458             /* subtract 0.5 */
459             coord = lp_build_sub(coord_bld, coord, half);
460             /* clamp to [0, length - 0.5] */
461             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
462             /* convert to int, compute lerp weight */
463             lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
464             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
465          } else {
466             /*
467              * The non-gather path will end up with coords 0, 1 if coord was
468              * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
469              * really matter what the second coord is). But for gather, we
470              * really need to end up with coords 0, 0.
471              */
472             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
473             coord0 = lp_build_sub(coord_bld, coord, half);
474             coord1 = lp_build_add(coord_bld, coord, half);
475             /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
476             coord0 = lp_build_itrunc(coord_bld, coord0);
477             coord1 = lp_build_itrunc(coord_bld, coord1);
478             weight = coord_bld->undef;
479          }
480          /* coord1 = min(coord1, length-1) */
481          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
482          break;
483       }
484 
485    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
486       if (bld->static_sampler_state->normalized_coords) {
487          /* scale coord to length */
488          coord = lp_build_mul(coord_bld, coord, length_f);
489       }
490       if (offset) {
491          offset = lp_build_int_to_float(coord_bld, offset);
492          coord = lp_build_add(coord_bld, coord, offset);
493       }
494       /*
495        * We don't need any clamp. Technically, for very large (pos or neg)
496        * (or infinite) values, clamp against [-length, length] would be
497        * correct, but we don't need to guarantee any specific
498        * result for such coords (the ifloor will be undefined, but for modes
499        * requiring border all resulting coords are safe).
500        */
501       coord = lp_build_sub(coord_bld, coord, half);
502       /* convert to int, compute lerp weight */
503       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
504       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
505       break;
506 
507    case PIPE_TEX_WRAP_MIRROR_REPEAT:
508       if (offset) {
509          offset = lp_build_int_to_float(coord_bld, offset);
510          offset = lp_build_div(coord_bld, offset, length_f);
511          coord = lp_build_add(coord_bld, coord, offset);
512       }
513       if (!is_gather) {
514          /* compute mirror function */
515          coord = lp_build_coord_mirror(bld, coord, true);
516 
517          /* scale coord to length */
518          coord = lp_build_mul(coord_bld, coord, length_f);
519          coord = lp_build_sub(coord_bld, coord, half);
520 
521          /* convert to int, compute lerp weight */
522          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
523          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
524 
525          /* coord0 = max(coord0, 0) */
526          coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
527          /* coord1 = min(coord1, length-1) */
528          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
529       } else {
530          /*
531           * This is pretty reasonable in the end,  all what the tests care
532           * about is nasty edge cases (scaled coords x.5, so the individual
533           * coords are actually integers, which is REALLY tricky to get right
534           * due to this working differently both for negative numbers as well
535           * as for even/odd cases). But with enough magic it's not too complex
536           * after all.
537           * Maybe should try a bit arithmetic one though for POT textures...
538           */
539          LLVMValueRef isNeg;
540          /*
541           * Wrapping just once still works, even though it means we can
542           * get "wrong" sign due to performing mirror in the middle of the
543           * two coords (because this can only happen very near the odd/even
544           * edges, so both coords will actually end up as 0 or length - 1
545           * in the end).
546           * For GL4 gather with per-sample offsets we'd need to the mirroring
547           * per coord too.
548           */
549          coord = lp_build_coord_mirror(bld, coord, false);
550          coord = lp_build_mul(coord_bld, coord, length_f);
551 
552          /*
553           * NaNs should be safe here, we'll do away with them with
554           * the ones' complement plus min.
555           */
556          coord0 = lp_build_sub(coord_bld, coord, half);
557          coord0 = lp_build_ifloor(coord_bld, coord0);
558          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
559          /* ones complement for neg numbers (mirror(negX) = X - 1)  */
560          isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
561                               coord0, int_coord_bld->zero);
562          coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
563          isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
564                               coord1, int_coord_bld->zero);
565          coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
566          coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
567          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
568 
569          weight = coord_bld->undef;
570       }
571       break;
572 
573    case PIPE_TEX_WRAP_MIRROR_CLAMP:
574       if (bld->static_sampler_state->normalized_coords) {
575          /* scale coord to length */
576          coord = lp_build_mul(coord_bld, coord, length_f);
577       }
578       if (offset) {
579          offset = lp_build_int_to_float(coord_bld, offset);
580          coord = lp_build_add(coord_bld, coord, offset);
581       }
582       /*
583        * XXX: probably not correct for gather, albeit I'm not
584        * entirely sure as it's poorly specified. The wrapping looks
585        * correct according to the spec which is against gl 1.2.1,
586        * however negative values will be swapped - gl re-specified
587        * wrapping with newer versions (no more pre-clamp except with
588        * GL_CLAMP).
589        */
590       coord = lp_build_abs(coord_bld, coord);
591 
592       /* clamp to [0, length] */
593       coord = lp_build_min_ext(coord_bld, coord, length_f,
594                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
595 
596       coord = lp_build_sub(coord_bld, coord, half);
597 
598       /* convert to int, compute lerp weight */
599       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
600       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
601       break;
602 
603    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
604       {
605          struct lp_build_context abs_coord_bld = bld->coord_bld;
606          abs_coord_bld.type.sign = false;
607 
608          if (bld->static_sampler_state->normalized_coords) {
609             /* scale coord to length */
610             coord = lp_build_mul(coord_bld, coord, length_f);
611          }
612          if (offset) {
613             offset = lp_build_int_to_float(coord_bld, offset);
614             coord = lp_build_add(coord_bld, coord, offset);
615          }
616          if (!is_gather) {
617             coord = lp_build_abs(coord_bld, coord);
618 
619             /* clamp to length max */
620             coord = lp_build_min_ext(coord_bld, coord, length_f,
621                                      GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
622             /* subtract 0.5 */
623             coord = lp_build_sub(coord_bld, coord, half);
624             /* clamp to [0, length - 0.5] */
625             coord = lp_build_max(coord_bld, coord, coord_bld->zero);
626 
627             /* convert to int, compute lerp weight */
628             lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
629             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
630             /* coord1 = min(coord1, length-1) */
631             coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
632          } else {
633             /*
634              * The non-gather path will swap coord0/1 if coord was negative,
635              * which is ok for filtering since the filter weight matches
636              * accordingly. Also, if coord is close to zero, coord0/1 will
637              * be 0 and 1, instead of 0 and 0 (again ok due to filter
638              * weight being 0.0). Both issues need to be fixed for gather.
639              */
640             LLVMValueRef isNeg;
641 
642             /*
643              * Actually wanted to cheat here and use:
644              * coord1 = lp_build_iround(coord_bld, coord);
645              * but it's not good enough for some tests (even piglit
646              * textureGather is set up in a way so the coords area always
647              * .5, that is right at the crossover points).
648              * So do ordinary sub/floor, then do ones' complement
649              * for negative numbers.
650              * (Note can't just do sub|add/abs/itrunc per coord neither -
651              * because the spec demands that mirror(3.0) = 3 but
652              * mirror(-3.0) = 2.)
653              */
654             coord = lp_build_sub(coord_bld, coord, half);
655             coord0 = lp_build_ifloor(coord_bld, coord);
656             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
657             isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
658                                  int_coord_bld->zero);
659             coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
660             coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
661 
662             isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
663                                  int_coord_bld->zero);
664             coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
665             coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
666 
667             weight = coord_bld->undef;
668          }
669       }
670       break;
671 
672    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
673       {
674          if (bld->static_sampler_state->normalized_coords) {
675             /* scale coord to length */
676             coord = lp_build_mul(coord_bld, coord, length_f);
677          }
678          if (offset) {
679             offset = lp_build_int_to_float(coord_bld, offset);
680             coord = lp_build_add(coord_bld, coord, offset);
681          }
682          /*
683           * XXX: probably not correct for gather due to swapped
684           * order if coord is negative (same rationale as for
685           * MIRROR_CLAMP).
686           */
687          coord = lp_build_abs(coord_bld, coord);
688 
689          /*
690           * We don't need any clamp. Technically, for very large
691           * (or infinite) values, clamp against length would be
692           * correct, but we don't need to guarantee any specific
693           * result for such coords (the ifloor will be undefined, but
694           * for modes requiring border all resulting coords are safe).
695           */
696          coord = lp_build_sub(coord_bld, coord, half);
697 
698          /* convert to int, compute lerp weight */
699          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
700          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
701       }
702       break;
703 
704    default:
705       assert(0);
706       coord0 = NULL;
707       coord1 = NULL;
708       weight = NULL;
709    }
710 
711    *x0_out = coord0;
712    *x1_out = coord1;
713    *weight_out = weight;
714 }
715 
716 
717 /**
718  * Build LLVM code for texture wrap mode for nearest filtering.
719  * \param coord  the incoming texcoord (nominally in [0,1])
720  * \param length  the texture size along one dimension, as int vector
721  * \param length_f  the texture size along one dimension, as float vector
722  * \param offset  texel offset along one dimension (as int vector)
723  * \param is_pot  if TRUE, length is a power of two
724  * \param wrap_mode  one of PIPE_TEX_WRAP_x
725  */
726 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode)727 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
728                              LLVMValueRef coord,
729                              LLVMValueRef length,
730                              LLVMValueRef length_f,
731                              LLVMValueRef offset,
732                              bool is_pot,
733                              unsigned wrap_mode)
734 {
735    struct lp_build_context *coord_bld = &bld->coord_bld;
736    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
737    LLVMBuilderRef builder = bld->gallivm->builder;
738    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
739    LLVMValueRef icoord;
740 
741    switch (wrap_mode) {
742    case PIPE_TEX_WRAP_REPEAT:
743       if (is_pot) {
744          coord = lp_build_mul(coord_bld, coord, length_f);
745          icoord = lp_build_ifloor(coord_bld, coord);
746          if (offset) {
747             icoord = lp_build_add(int_coord_bld, icoord, offset);
748          }
749          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
750       } else {
751           if (offset) {
752              offset = lp_build_int_to_float(coord_bld, offset);
753              offset = lp_build_div(coord_bld, offset, length_f);
754              coord = lp_build_add(coord_bld, coord, offset);
755           }
756           /* take fraction, unnormalize */
757           coord = lp_build_fract_safe(coord_bld, coord);
758           coord = lp_build_mul(coord_bld, coord, length_f);
759           icoord = lp_build_itrunc(coord_bld, coord);
760       }
761       break;
762 
763    case PIPE_TEX_WRAP_CLAMP:
764    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
765       if (bld->static_sampler_state->normalized_coords) {
766          /* scale coord to length */
767          coord = lp_build_mul(coord_bld, coord, length_f);
768       }
769 
770       if (offset) {
771          offset = lp_build_int_to_float(coord_bld, offset);
772          coord = lp_build_add(coord_bld, coord, offset);
773       }
774       /* floor */
775       /* use itrunc instead since we clamp to 0 anyway */
776       icoord = lp_build_itrunc(coord_bld, coord);
777 
778       /* clamp to [0, length - 1]. */
779       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
780                               length_minus_one);
781       break;
782 
783    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
784       if (bld->static_sampler_state->normalized_coords) {
785          /* scale coord to length */
786          coord = lp_build_mul(coord_bld, coord, length_f);
787       }
788       /* no clamp necessary, border masking will handle this */
789       icoord = lp_build_ifloor(coord_bld, coord);
790       if (offset) {
791          icoord = lp_build_add(int_coord_bld, icoord, offset);
792       }
793       break;
794 
795    case PIPE_TEX_WRAP_MIRROR_REPEAT:
796       if (offset) {
797          offset = lp_build_int_to_float(coord_bld, offset);
798          offset = lp_build_div(coord_bld, offset, length_f);
799          coord = lp_build_add(coord_bld, coord, offset);
800       }
801       /* compute mirror function */
802       coord = lp_build_coord_mirror(bld, coord, true);
803 
804       /* scale coord to length */
805       assert(bld->static_sampler_state->normalized_coords);
806       coord = lp_build_mul(coord_bld, coord, length_f);
807 
808       /* itrunc == ifloor here */
809       icoord = lp_build_itrunc(coord_bld, coord);
810 
811       /* clamp to [0, length - 1] */
812       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
813       break;
814 
815    case PIPE_TEX_WRAP_MIRROR_CLAMP:
816    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
817       if (bld->static_sampler_state->normalized_coords) {
818          /* scale coord to length */
819          coord = lp_build_mul(coord_bld, coord, length_f);
820       }
821       if (offset) {
822          offset = lp_build_int_to_float(coord_bld, offset);
823          coord = lp_build_add(coord_bld, coord, offset);
824       }
825       coord = lp_build_abs(coord_bld, coord);
826 
827       /* itrunc == ifloor here */
828       icoord = lp_build_itrunc(coord_bld, coord);
829       /*
830        * Use unsigned min due to possible undef values (NaNs, overflow)
831        */
832       {
833          struct lp_build_context abs_coord_bld = *int_coord_bld;
834          abs_coord_bld.type.sign = false;
835          /* clamp to [0, length - 1] */
836          icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
837       }
838       break;
839 
840    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
841       if (bld->static_sampler_state->normalized_coords) {
842          /* scale coord to length */
843          coord = lp_build_mul(coord_bld, coord, length_f);
844       }
845       if (offset) {
846          offset = lp_build_int_to_float(coord_bld, offset);
847          coord = lp_build_add(coord_bld, coord, offset);
848       }
849       coord = lp_build_abs(coord_bld, coord);
850 
851       /* itrunc == ifloor here */
852       icoord = lp_build_itrunc(coord_bld, coord);
853       break;
854 
855    default:
856       assert(0);
857       icoord = NULL;
858    }
859 
860    return icoord;
861 }
862 
863 
864 /**
865  * Do shadow test/comparison.
866  * \param p shadow ref value
867  * \param texel  the texel to compare against
868  */
869 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)870 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
871                             LLVMValueRef p,
872                             LLVMValueRef texel)
873 {
874    struct lp_build_context *texel_bld = &bld->texel_bld;
875    LLVMValueRef res;
876 
877    if (0) {
878       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
879       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
880    }
881 
882    /* result = (p FUNC texel) ? 1 : 0 */
883    /*
884     * honor d3d10 floating point rules here, which state that comparisons
885     * are ordered except NOT_EQUAL which is unordered.
886     */
887    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
888       res = lp_build_cmp_ordered(texel_bld,
889                                  bld->static_sampler_state->compare_func,
890                                  p, texel);
891    } else {
892       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
893                          p, texel);
894    }
895    return res;
896 }
897 
898 
899 /**
900  * Generate code to sample a mipmap level with nearest filtering.
901  * If sampling a cube texture, r = cube face in [0,5].
902  */
903 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])904 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
905                               LLVMValueRef size,
906                               LLVMValueRef row_stride_vec,
907                               LLVMValueRef img_stride_vec,
908                               LLVMValueRef data_ptr,
909                               LLVMValueRef mipoffsets,
910                               const LLVMValueRef *coords,
911                               const LLVMValueRef *offsets,
912                               LLVMValueRef colors_out[4])
913 {
914    const unsigned dims = bld->dims;
915    LLVMValueRef width_vec;
916    LLVMValueRef height_vec;
917    LLVMValueRef depth_vec;
918    LLVMValueRef flt_size;
919    LLVMValueRef flt_width_vec;
920    LLVMValueRef flt_height_vec;
921    LLVMValueRef flt_depth_vec;
922    LLVMValueRef x, y = NULL, z = NULL;
923 
924    lp_build_extract_image_sizes(bld,
925                                 &bld->int_size_bld,
926                                 bld->int_coord_type,
927                                 size,
928                                 &width_vec, &height_vec, &depth_vec);
929 
930    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
931 
932    lp_build_extract_image_sizes(bld,
933                                 &bld->float_size_bld,
934                                 bld->coord_type,
935                                 flt_size,
936                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
937 
938    /*
939     * Compute integer texcoords.
940     */
941    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
942                                     flt_width_vec, offsets[0],
943                                     bld->static_texture_state->pot_width,
944                                     bld->static_sampler_state->wrap_s);
945    lp_build_name(x, "tex.x.wrapped");
946 
947    if (dims >= 2) {
948       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
949                                        flt_height_vec, offsets[1],
950                                        bld->static_texture_state->pot_height,
951                                        bld->static_sampler_state->wrap_t);
952       lp_build_name(y, "tex.y.wrapped");
953 
954       if (dims == 3) {
955          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
956                                           flt_depth_vec, offsets[2],
957                                           bld->static_texture_state->pot_depth,
958                                           bld->static_sampler_state->wrap_r);
959          lp_build_name(z, "tex.z.wrapped");
960       }
961    }
962    if (has_layer_coord(bld->static_texture_state->target)) {
963       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
964          /* add cube layer to face */
965          z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
966       } else {
967          z = coords[2];
968       }
969       lp_build_name(z, "tex.z.layer");
970    }
971 
972    /*
973     * Get texture colors.
974     */
975    lp_build_sample_texel_soa(bld,
976                              width_vec, height_vec, depth_vec,
977                              x, y, z,
978                              row_stride_vec, img_stride_vec,
979                              data_ptr, mipoffsets, colors_out);
980 
981    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
982       LLVMValueRef cmpval;
983       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
984       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
985       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
986                                       bld->texel_bld.one, bld->texel_bld.zero);
987       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
988    }
989 
990 }
991 
992 
993 /**
994  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
995  */
996 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)997 lp_build_masklerp(struct lp_build_context *bld,
998                  LLVMValueRef weight,
999                  LLVMValueRef mask0,
1000                  LLVMValueRef mask1)
1001 {
1002    struct gallivm_state *gallivm = bld->gallivm;
1003    LLVMBuilderRef builder = gallivm->builder;
1004    LLVMValueRef weight2;
1005 
1006    weight2 = lp_build_sub(bld, bld->one, weight);
1007    weight = LLVMBuildBitCast(builder, weight,
1008                               lp_build_int_vec_type(gallivm, bld->type), "");
1009    weight2 = LLVMBuildBitCast(builder, weight2,
1010                               lp_build_int_vec_type(gallivm, bld->type), "");
1011    weight = LLVMBuildAnd(builder, weight, mask1, "");
1012    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
1013    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
1014    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
1015    return lp_build_add(bld, weight, weight2);
1016 }
1017 
1018 /**
1019  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
1020  */
1021 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)1022 lp_build_masklerp2d(struct lp_build_context *bld,
1023                     LLVMValueRef weight0,
1024                     LLVMValueRef weight1,
1025                     LLVMValueRef mask00,
1026                     LLVMValueRef mask01,
1027                     LLVMValueRef mask10,
1028                     LLVMValueRef mask11)
1029 {
1030    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
1031    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
1032    return lp_build_lerp(bld, weight1, val0, val1, 0);
1033 }
1034 
1035 /*
1036  * this is a bit excessive code for something OpenGL just recommends
1037  * but does not require.
1038  */
1039 #define ACCURATE_CUBE_CORNERS 1
1040 
1041 /**
1042  * Generate code to sample a mipmap level with linear filtering.
1043  * If sampling a cube texture, r = cube face in [0,5].
1044  * If linear_mask is present, only pixels having their mask set
1045  * will receive linear filtering, the rest will use nearest.
1046  */
1047 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])1048 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1049                              bool is_gather,
1050                              LLVMValueRef size,
1051                              LLVMValueRef linear_mask,
1052                              LLVMValueRef row_stride_vec,
1053                              LLVMValueRef img_stride_vec,
1054                              LLVMValueRef data_ptr,
1055                              LLVMValueRef mipoffsets,
1056                              const LLVMValueRef *coords,
1057                              const LLVMValueRef *offsets,
1058                              LLVMValueRef colors_out[4])
1059 {
1060    LLVMBuilderRef builder = bld->gallivm->builder;
1061    struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1062    struct lp_build_context *coord_bld = &bld->coord_bld;
1063    struct lp_build_context *texel_bld = &bld->texel_bld;
1064    const unsigned dims = bld->dims;
1065    LLVMValueRef width_vec;
1066    LLVMValueRef height_vec;
1067    LLVMValueRef depth_vec;
1068    LLVMValueRef flt_size;
1069    LLVMValueRef flt_width_vec;
1070    LLVMValueRef flt_height_vec;
1071    LLVMValueRef flt_depth_vec;
1072    LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL;
1073    LLVMValueRef z1 = NULL;
1074    LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1075    LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1076    LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1077    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1078    LLVMValueRef xs[4], ys[4], zs[4];
1079    LLVMValueRef neighbors[2][2][4];
1080    bool seamless_cube_filter, accurate_cube_corners;
1081    unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1082 
1083    if (is_gather) {
1084       switch (bld->gather_comp) {
1085       case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
1086       case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
1087       case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
1088       case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
1089       default:
1090          break;
1091       }
1092    }
1093 
1094    seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1095                            bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1096                           bld->static_sampler_state->seamless_cube_map;
1097 
1098    /*
1099     * Disable accurate cube corners for integer textures, which should only
1100     * get here in the gather path.
1101     */
1102    accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1103      !util_format_is_pure_integer(bld->static_texture_state->format);
1104 
1105    lp_build_extract_image_sizes(bld,
1106                                 &bld->int_size_bld,
1107                                 bld->int_coord_type,
1108                                 size,
1109                                 &width_vec, &height_vec, &depth_vec);
1110 
1111    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1112 
1113    lp_build_extract_image_sizes(bld,
1114                                 &bld->float_size_bld,
1115                                 bld->coord_type,
1116                                 flt_size,
1117                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1118 
1119    LLVMTypeRef int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1120 
1121    /*
1122     * Compute integer texcoords.
1123     */
1124 
1125    if (!seamless_cube_filter) {
1126       lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1127                                   flt_width_vec, offsets[0],
1128                                   bld->static_texture_state->pot_width,
1129                                   bld->static_sampler_state->wrap_s,
1130                                   &x00, &x01, &s_fpart);
1131       lp_build_name(x00, "tex.x0.wrapped");
1132       lp_build_name(x01, "tex.x1.wrapped");
1133       x10 = x00;
1134       x11 = x01;
1135 
1136       if (dims >= 2) {
1137          lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1138                                      flt_height_vec, offsets[1],
1139                                      bld->static_texture_state->pot_height,
1140                                      bld->static_sampler_state->wrap_t,
1141                                      &y00, &y10, &t_fpart);
1142          lp_build_name(y00, "tex.y0.wrapped");
1143          lp_build_name(y10, "tex.y1.wrapped");
1144          y01 = y00;
1145          y11 = y10;
1146 
1147          if (dims == 3) {
1148             lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1149                                         flt_depth_vec, offsets[2],
1150                                         bld->static_texture_state->pot_depth,
1151                                         bld->static_sampler_state->wrap_r,
1152                                         &z00, &z1, &r_fpart);
1153             z01 = z10 = z11 = z00;
1154             lp_build_name(z00, "tex.z0.wrapped");
1155             lp_build_name(z1, "tex.z1.wrapped");
1156          }
1157       }
1158       if (has_layer_coord(bld->static_texture_state->target)) {
1159          if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1160             /* add cube layer to face */
1161             z00 = z01 = z10 = z11 = z1 =
1162                lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1163          } else {
1164             z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
1165          }
1166          lp_build_name(z00, "tex.z0.layer");
1167          lp_build_name(z1, "tex.z1.layer");
1168       }
1169    } else {
1170       struct lp_build_if_state edge_if;
1171       LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1172       LLVMValueRef coord0, coord1, have_edge, have_corner;
1173       LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1174       LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1175       LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1176       LLVMValueRef face = coords[2];
1177       LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1178       LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1179       /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1180       height_vec = width_vec;
1181       flt_height_vec = flt_width_vec;
1182 
1183       /* XXX the overflow logic is actually sort of duplicated with trilinear,
1184        * since an overflow in one mip should also have a corresponding overflow
1185        * in another.
1186        */
1187       /* should always have normalized coords, and offsets are undefined */
1188       assert(bld->static_sampler_state->normalized_coords);
1189       /*
1190        * The coords should all be between [0,1] however we can have NaNs,
1191        * which will wreak havoc. In particular the y1_clamped value below
1192        * can be -INT_MAX (on x86) and be propagated right through (probably
1193        * other values might be bogus in the end too).
1194        * So kill off the NaNs here.
1195        */
1196       coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1197                                 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1198       coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
1199       /* instead of clamp, build mask if overflowed */
1200       coord0 = lp_build_sub(coord_bld, coord0, half);
1201       /* convert to int, compute lerp weight */
1202       /* not ideal with AVX (and no AVX2) */
1203       lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
1204       x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1205       coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1206                                 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1207       coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
1208       coord1 = lp_build_sub(coord_bld, coord1, half);
1209       lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
1210       y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1211 
1212       fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1213       fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1214       fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1215       fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1216 
1217       fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1218       fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1219       have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1220       have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1221 
1222       /* needed for accurate corner filtering branch later, rely on 0 init */
1223       have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1224 
1225       for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
1226          xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1227          ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1228          zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1229       }
1230 
1231       lp_build_if(&edge_if, bld->gallivm, have_edge);
1232 
1233       have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1234       have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1235       LLVMBuildStore(builder, have_corner, have_corners);
1236 
1237       /*
1238        * Need to feed clamped values here for cheap corner handling,
1239        * but only for y coord (as when falling off both edges we only
1240        * fall off the x one) - this should be sufficient.
1241        */
1242       y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1243       y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1244 
1245       /*
1246        * Get all possible new coords.
1247        */
1248       lp_build_cube_new_coords(ivec_bld, face,
1249                                x0, x1, y0_clamped, y1_clamped,
1250                                length_minus_one,
1251                                new_faces, new_xcoords, new_ycoords);
1252 
1253       /* handle fall off x-, x+ direction */
1254       /* determine new coords, face (not both fall_off vars can be true at same time) */
1255       x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1256       y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1257       x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1258       y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1259       x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1260       y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1261       x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1262       y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1263 
1264       z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1265       z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1266 
1267       /* handle fall off y-, y+ direction */
1268       /*
1269        * Cheap corner logic: just hack up things so a texel doesn't fall
1270        * off both sides (which means filter weights will be wrong but we'll only
1271        * use valid texels in the filter).
1272        * This means however (y) coords must additionally be clamped (see above).
1273        * This corner handling should be fully OpenGL (but not d3d10) compliant.
1274        */
1275       fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1276       fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1277       fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1278       fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1279 
1280       x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1281       y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1282       x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1283       y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1284       x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1285       y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1286       x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1287       y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1288 
1289       z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1290       z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1291       z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1292       z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1293 
1294       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1295          /* now can add cube layer to face (per sample) */
1296          z00 = lp_build_add(ivec_bld, z00, coords[3]);
1297          z01 = lp_build_add(ivec_bld, z01, coords[3]);
1298          z10 = lp_build_add(ivec_bld, z10, coords[3]);
1299          z11 = lp_build_add(ivec_bld, z11, coords[3]);
1300       }
1301 
1302       LLVMBuildStore(builder, x00, xs[0]);
1303       LLVMBuildStore(builder, x01, xs[1]);
1304       LLVMBuildStore(builder, x10, xs[2]);
1305       LLVMBuildStore(builder, x11, xs[3]);
1306       LLVMBuildStore(builder, y00, ys[0]);
1307       LLVMBuildStore(builder, y01, ys[1]);
1308       LLVMBuildStore(builder, y10, ys[2]);
1309       LLVMBuildStore(builder, y11, ys[3]);
1310       LLVMBuildStore(builder, z00, zs[0]);
1311       LLVMBuildStore(builder, z01, zs[1]);
1312       LLVMBuildStore(builder, z10, zs[2]);
1313       LLVMBuildStore(builder, z11, zs[3]);
1314 
1315       lp_build_else(&edge_if);
1316 
1317       LLVMBuildStore(builder, x0, xs[0]);
1318       LLVMBuildStore(builder, x1, xs[1]);
1319       LLVMBuildStore(builder, x0, xs[2]);
1320       LLVMBuildStore(builder, x1, xs[3]);
1321       LLVMBuildStore(builder, y0, ys[0]);
1322       LLVMBuildStore(builder, y0, ys[1]);
1323       LLVMBuildStore(builder, y1, ys[2]);
1324       LLVMBuildStore(builder, y1, ys[3]);
1325       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1326          LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1327          LLVMBuildStore(builder, cube_layer, zs[0]);
1328          LLVMBuildStore(builder, cube_layer, zs[1]);
1329          LLVMBuildStore(builder, cube_layer, zs[2]);
1330          LLVMBuildStore(builder, cube_layer, zs[3]);
1331       } else {
1332          LLVMBuildStore(builder, face, zs[0]);
1333          LLVMBuildStore(builder, face, zs[1]);
1334          LLVMBuildStore(builder, face, zs[2]);
1335          LLVMBuildStore(builder, face, zs[3]);
1336       }
1337 
1338       lp_build_endif(&edge_if);
1339 
1340       LLVMTypeRef type = ivec_bld->vec_type;
1341       x00 = LLVMBuildLoad2(builder, type, xs[0], "");
1342       x01 = LLVMBuildLoad2(builder, type, xs[1], "");
1343       x10 = LLVMBuildLoad2(builder, type, xs[2], "");
1344       x11 = LLVMBuildLoad2(builder, type, xs[3], "");
1345       y00 = LLVMBuildLoad2(builder, type, ys[0], "");
1346       y01 = LLVMBuildLoad2(builder, type, ys[1], "");
1347       y10 = LLVMBuildLoad2(builder, type, ys[2], "");
1348       y11 = LLVMBuildLoad2(builder, type, ys[3], "");
1349       z00 = LLVMBuildLoad2(builder, type, zs[0], "");
1350       z01 = LLVMBuildLoad2(builder, type, zs[1], "");
1351       z10 = LLVMBuildLoad2(builder, type, zs[2], "");
1352       z11 = LLVMBuildLoad2(builder, type, zs[3], "");
1353    }
1354 
1355    if (linear_mask) {
1356       /*
1357        * Whack filter weights into place. Whatever texel had more weight is
1358        * the one which should have been selected by nearest filtering hence
1359        * just use 100% weight for it.
1360        */
1361       struct lp_build_context *c_bld = &bld->coord_bld;
1362       LLVMValueRef w1_mask, w1_weight;
1363       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1364 
1365       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1366       /* this select is really just a "and" */
1367       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1368       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1369       if (dims >= 2) {
1370          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1371          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1372          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1373          if (dims == 3) {
1374             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1375             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1376             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1377          }
1378       }
1379    }
1380 
1381    /*
1382     * Get texture colors.
1383     */
1384    /* get x0/x1 texels */
1385    lp_build_sample_texel_soa(bld,
1386                              width_vec, height_vec, depth_vec,
1387                              x00, y00, z00,
1388                              row_stride_vec, img_stride_vec,
1389                              data_ptr, mipoffsets, neighbors[0][0]);
1390    lp_build_sample_texel_soa(bld,
1391                              width_vec, height_vec, depth_vec,
1392                              x01, y01, z01,
1393                              row_stride_vec, img_stride_vec,
1394                              data_ptr, mipoffsets, neighbors[0][1]);
1395 
1396    if (dims == 1) {
1397       assert(!is_gather);
1398       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1399          lp_build_reduce_filter(texel_bld,
1400                                 bld->static_sampler_state->reduction_mode,
1401                                 0,
1402                                 4,
1403                                 s_fpart,
1404                                 neighbors[0][0],
1405                                 neighbors[0][1],
1406                                 colors_out);
1407       } else {
1408          LLVMValueRef cmpval0, cmpval1;
1409          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1410          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1411          /* simplified lerp, AND mask with weight and add */
1412          colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1413                                            cmpval0, cmpval1);
1414          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1415       }
1416    } else {
1417       /* 2D/3D texture */
1418       struct lp_build_if_state corner_if;
1419       LLVMValueRef colors0[4], colorss[4] = { 0 };
1420 
1421       /* get x0/x1 texels at y1 */
1422       lp_build_sample_texel_soa(bld,
1423                                 width_vec, height_vec, depth_vec,
1424                                 x10, y10, z10,
1425                                 row_stride_vec, img_stride_vec,
1426                                 data_ptr, mipoffsets, neighbors[1][0]);
1427       lp_build_sample_texel_soa(bld,
1428                                 width_vec, height_vec, depth_vec,
1429                                 x11, y11, z11,
1430                                 row_stride_vec, img_stride_vec,
1431                                 data_ptr, mipoffsets, neighbors[1][1]);
1432 
1433       /*
1434        * To avoid having to duplicate linear_mask / fetch code use
1435        * another branch (with corner condition though edge would work
1436        * as well) here.
1437        */
1438       if (have_corners && accurate_cube_corners &&
1439           bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1440          LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
1441          LLVMValueRef have_corner, one_third;
1442 
1443          colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
1444          colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
1445          colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
1446          colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
1447 
1448          have_corner = LLVMBuildLoad2(builder, int1t, have_corners, "");
1449 
1450          lp_build_if(&corner_if, bld->gallivm, have_corner);
1451 
1452          one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
1453                                         1.0f/3.0f);
1454 
1455          /* find corner */
1456          c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1457          c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1458          c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1459          c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1460          c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1461          c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1462          c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1463          c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1464 
1465          if (!is_gather) {
1466             /*
1467              * we can't use standard 2d lerp as we need per-element weight
1468              * in case of corners, so just calculate bilinear result as
1469              * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1470              * (This is actually less work than using 2d lerp, 7 vs. 9
1471              * instructions, however calculating the weights needs another 6,
1472              * so actually probably not slower than 2d lerp only for 4 channels
1473              * as weights only need to be calculated once - of course fixing
1474              * the weights has additional cost.)
1475              */
1476             LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
1477             wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1478             wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1479             w00 = lp_build_mul(coord_bld, wx0, wy0);
1480             w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1481             w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1482             w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1483 
1484             /* find corner weight */
1485             c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1486             c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1487             c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1488             c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1489 
1490             /*
1491              * add 1/3 of the corner weight to the weight of the 3 other
1492              * samples and null out corner weight.
1493              */
1494             c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1495             w00 = lp_build_add(coord_bld, w00, c_weight);
1496             w00 = lp_build_andnot(coord_bld, w00, c00f);
1497             w01 = lp_build_add(coord_bld, w01, c_weight);
1498             w01 = lp_build_andnot(coord_bld, w01, c01f);
1499             w10 = lp_build_add(coord_bld, w10, c_weight);
1500             w10 = lp_build_andnot(coord_bld, w10, c10f);
1501             w11 = lp_build_add(coord_bld, w11, c_weight);
1502             w11 = lp_build_andnot(coord_bld, w11, c11f);
1503 
1504             if (bld->static_sampler_state->compare_mode ==
1505                 PIPE_TEX_COMPARE_NONE) {
1506                for (unsigned chan = 0; chan < 4; chan++) {
1507                   colors0[chan] = lp_build_mul(coord_bld, w00,
1508                                                neighbors[0][0][chan]);
1509                   tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1510                   colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1511                   tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1512                   colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1513                   tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1514                   colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1515                }
1516             } else {
1517                LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1518                cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
1519                                                       neighbors[0][0][0]);
1520                cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
1521                                                       neighbors[0][1][0]);
1522                cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
1523                                                       neighbors[1][0][0]);
1524                cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
1525                                                       neighbors[1][1][0]);
1526                /*
1527                 * inputs to interpolation are just masks so just add
1528                 * masked weights together
1529                 */
1530                cmpval00 = LLVMBuildBitCast(builder, cmpval00,
1531                                            coord_bld->vec_type, "");
1532                cmpval01 = LLVMBuildBitCast(builder, cmpval01,
1533                                            coord_bld->vec_type, "");
1534                cmpval10 = LLVMBuildBitCast(builder, cmpval10,
1535                                            coord_bld->vec_type, "");
1536                cmpval11 = LLVMBuildBitCast(builder, cmpval11,
1537                                            coord_bld->vec_type, "");
1538                colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1539                tmp = lp_build_and(coord_bld, w01, cmpval01);
1540                colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1541                tmp = lp_build_and(coord_bld, w10, cmpval10);
1542                colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1543                tmp = lp_build_and(coord_bld, w11, cmpval11);
1544                colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1545                colors0[1] = colors0[2] = colors0[3] = colors0[0];
1546             }
1547          } else {
1548             /*
1549              * We don't have any weights to adjust, so instead calculate
1550              * the fourth texel as simply the average of the other 3.
1551              * (This would work for non-gather too, however we'd have
1552              * a boatload more of the select stuff due to there being
1553              * 4 times as many colors as weights.)
1554              */
1555             LLVMValueRef col00, col01, col10, col11;
1556             LLVMValueRef colc, colc0, colc1;
1557             col10 = lp_build_swizzle_soa_channel(texel_bld,
1558                                                  neighbors[1][0], chan_swiz);
1559             col11 = lp_build_swizzle_soa_channel(texel_bld,
1560                                                  neighbors[1][1], chan_swiz);
1561             col01 = lp_build_swizzle_soa_channel(texel_bld,
1562                                                  neighbors[0][1], chan_swiz);
1563             col00 = lp_build_swizzle_soa_channel(texel_bld,
1564                                                  neighbors[0][0], chan_swiz);
1565 
1566             /*
1567              * The spec says for comparison filtering, the comparison
1568              * must happen before synthesizing the new value.
1569              * This means all gathered values are always 0 or 1,
1570              * except for the non-existing texel, which can be 0,1/3,2/3,1...
1571              * Seems like we'd be allowed to just return 0 or 1 too, so we
1572              * could simplify and pass down the compare mask values to the
1573              * end (using int arithmetic/compare on the mask values to
1574              * construct the fourth texel) and only there convert to floats
1575              * but it's probably not worth it (it might be easier for the cpu
1576              * but not for the code)...
1577              */
1578             if (bld->static_sampler_state->compare_mode !=
1579                 PIPE_TEX_COMPARE_NONE) {
1580                LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1581                cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
1582                cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
1583                cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
1584                cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
1585                col00 = lp_build_select(texel_bld, cmpval00,
1586                                        texel_bld->one, texel_bld->zero);
1587                col01 = lp_build_select(texel_bld, cmpval01,
1588                                        texel_bld->one, texel_bld->zero);
1589                col10 = lp_build_select(texel_bld, cmpval10,
1590                                        texel_bld->one, texel_bld->zero);
1591                col11 = lp_build_select(texel_bld, cmpval11,
1592                                        texel_bld->one, texel_bld->zero);
1593             }
1594 
1595             /*
1596              * Null out corner color.
1597              */
1598             col00 = lp_build_andnot(coord_bld, col00, c00f);
1599             col01 = lp_build_andnot(coord_bld, col01, c01f);
1600             col10 = lp_build_andnot(coord_bld, col10, c10f);
1601             col11 = lp_build_andnot(coord_bld, col11, c11f);
1602 
1603             /*
1604              * New corner texel color is all colors added / 3.
1605              */
1606             colc0 = lp_build_add(coord_bld, col00, col01);
1607             colc1 = lp_build_add(coord_bld, col10, col11);
1608             colc = lp_build_add(coord_bld, colc0, colc1);
1609             colc = lp_build_mul(coord_bld, one_third, colc);
1610 
1611             /*
1612              * Replace the corner texel color with the new value.
1613              */
1614             col00 = lp_build_select(coord_bld, c00, colc, col00);
1615             col01 = lp_build_select(coord_bld, c01, colc, col01);
1616             col10 = lp_build_select(coord_bld, c10, colc, col10);
1617             col11 = lp_build_select(coord_bld, c11, colc, col11);
1618 
1619             colors0[0] = col10;
1620             colors0[1] = col11;
1621             colors0[2] = col01;
1622             colors0[3] = col00;
1623          }
1624 
1625          LLVMBuildStore(builder, colors0[0], colorss[0]);
1626          LLVMBuildStore(builder, colors0[1], colorss[1]);
1627          LLVMBuildStore(builder, colors0[2], colorss[2]);
1628          LLVMBuildStore(builder, colors0[3], colorss[3]);
1629 
1630          lp_build_else(&corner_if);
1631       }
1632 
1633       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1634          if (is_gather) {
1635             /*
1636              * Just assign the red channel (no component selection yet).
1637              * This is a bit hackish, we usually do the swizzle at the
1638              * end of sampling (much less values to swizzle), but this
1639              * obviously cannot work when using gather.
1640              */
1641             colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1642                                                       neighbors[1][0],
1643                                                       chan_swiz);
1644             colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1645                                                       neighbors[1][1],
1646                                                       chan_swiz);
1647             colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1648                                                       neighbors[0][1],
1649                                                       chan_swiz);
1650             colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1651                                                       neighbors[0][0],
1652                                                       chan_swiz);
1653          } else {
1654             /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1655             lp_build_reduce_filter_2d(texel_bld,
1656                                       bld->static_sampler_state->reduction_mode,
1657                                       0,
1658                                       4,
1659                                       s_fpart,
1660                                       t_fpart,
1661                                       neighbors[0][0],
1662                                       neighbors[0][1],
1663                                       neighbors[1][0],
1664                                       neighbors[1][1],
1665                                       colors0);
1666          }
1667       } else {
1668          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1669          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1670          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1671          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1672          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1673 
1674          if (is_gather) {
1675             /* more hacks for swizzling, should be X, ONE or ZERO... */
1676             colors0[0] = lp_build_select(texel_bld, cmpval10,
1677                                          texel_bld->one, texel_bld->zero);
1678             colors0[1] = lp_build_select(texel_bld, cmpval11,
1679                                          texel_bld->one, texel_bld->zero);
1680             colors0[2] = lp_build_select(texel_bld, cmpval01,
1681                                          texel_bld->one, texel_bld->zero);
1682             colors0[3] = lp_build_select(texel_bld, cmpval00,
1683                                          texel_bld->one, texel_bld->zero);
1684          } else {
1685             colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1686                                              cmpval00, cmpval01, cmpval10, cmpval11);
1687             colors0[1] = colors0[2] = colors0[3] = colors0[0];
1688          }
1689       }
1690 
1691       if (have_corners && accurate_cube_corners &&
1692           bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1693          LLVMBuildStore(builder, colors0[0], colorss[0]);
1694          LLVMBuildStore(builder, colors0[1], colorss[1]);
1695          LLVMBuildStore(builder, colors0[2], colorss[2]);
1696          LLVMBuildStore(builder, colors0[3], colorss[3]);
1697 
1698          lp_build_endif(&corner_if);
1699 
1700          colors0[0] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[0], "");
1701          colors0[1] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[1], "");
1702          colors0[2] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[2], "");
1703          colors0[3] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[3], "");
1704       }
1705 
1706       if (dims == 3) {
1707          LLVMValueRef neighbors1[2][2][4];
1708          LLVMValueRef colors1[4];
1709 
1710          assert(!is_gather);
1711 
1712          /* get x0/x1/y0/y1 texels at z1 */
1713          lp_build_sample_texel_soa(bld,
1714                                    width_vec, height_vec, depth_vec,
1715                                    x00, y00, z1,
1716                                    row_stride_vec, img_stride_vec,
1717                                    data_ptr, mipoffsets, neighbors1[0][0]);
1718          lp_build_sample_texel_soa(bld,
1719                                    width_vec, height_vec, depth_vec,
1720                                    x01, y01, z1,
1721                                    row_stride_vec, img_stride_vec,
1722                                    data_ptr, mipoffsets, neighbors1[0][1]);
1723          lp_build_sample_texel_soa(bld,
1724                                    width_vec, height_vec, depth_vec,
1725                                    x10, y10, z1,
1726                                    row_stride_vec, img_stride_vec,
1727                                    data_ptr, mipoffsets, neighbors1[1][0]);
1728          lp_build_sample_texel_soa(bld,
1729                                    width_vec, height_vec, depth_vec,
1730                                    x11, y11, z1,
1731                                    row_stride_vec, img_stride_vec,
1732                                    data_ptr, mipoffsets, neighbors1[1][1]);
1733 
1734          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1735             /* Bilinear interpolate the four samples from the second Z slice */
1736             lp_build_reduce_filter_2d(texel_bld,
1737                                       bld->static_sampler_state->reduction_mode,
1738                                       0,
1739                                       4,
1740                                       s_fpart,
1741                                       t_fpart,
1742                                       neighbors1[0][0],
1743                                       neighbors1[0][1],
1744                                       neighbors1[1][0],
1745                                       neighbors1[1][1],
1746                                       colors1);
1747 
1748             /* Linearly interpolate the two samples from the two 3D slices */
1749             lp_build_reduce_filter(texel_bld,
1750                                    bld->static_sampler_state->reduction_mode,
1751                                    0,
1752                                    4,
1753                                    r_fpart,
1754                                    colors0,
1755                                    colors1,
1756                                    colors_out);
1757          } else {
1758             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1759             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1760             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1761             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1762             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1763             colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1764                                              cmpval00, cmpval01, cmpval10, cmpval11);
1765             /* Linearly interpolate the two samples from the two 3D slices */
1766             colors_out[0] = lp_build_lerp(texel_bld,
1767                                           r_fpart,
1768                                           colors0[0], colors1[0],
1769                                           0);
1770             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1771          }
1772       } else {
1773          /* 2D tex */
1774          for (unsigned chan = 0; chan < 4; chan++) {
1775             colors_out[chan] = colors0[chan];
1776          }
1777       }
1778    }
1779    if (is_gather) {
1780       /*
1781        * For gather, we can't do our usual channel swizzling done later,
1782        * so do it here. It only really matters for 0/1 swizzles in case
1783        * of comparison filtering, since in this case the results would be
1784        * wrong, without comparison it should all work out alright but it
1785        * can't hurt to do that here, since it will instantly drop all
1786        * calculations above, though it's a rather stupid idea to do
1787        * gather on a channel which will always return 0 or 1 in any case...
1788        */
1789       if (chan_swiz == PIPE_SWIZZLE_1) {
1790          for (unsigned chan = 0; chan < 4; chan++) {
1791             colors_out[chan] = texel_bld->one;
1792          }
1793       } else if (chan_swiz == PIPE_SWIZZLE_0) {
1794          for (unsigned chan = 0; chan < 4; chan++) {
1795             colors_out[chan] = texel_bld->zero;
1796          }
1797       }
1798    }
1799 }
1800 
1801 
1802 /**
1803  * Sample the texture/mipmap using given image filter and mip filter.
1804  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1805  * from (vectors or scalars).
1806  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1807  */
1808 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1809 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1810                        unsigned img_filter,
1811                        unsigned mip_filter,
1812                        bool is_gather,
1813                        const LLVMValueRef *coords,
1814                        const LLVMValueRef *offsets,
1815                        LLVMValueRef ilevel0,
1816                        LLVMValueRef ilevel1,
1817                        LLVMValueRef lod_fpart,
1818                        LLVMValueRef *colors_out)
1819 {
1820    LLVMBuilderRef builder = bld->gallivm->builder;
1821    LLVMValueRef size0 = NULL;
1822    LLVMValueRef size1 = NULL;
1823    LLVMValueRef row_stride0_vec = NULL;
1824    LLVMValueRef row_stride1_vec = NULL;
1825    LLVMValueRef img_stride0_vec = NULL;
1826    LLVMValueRef img_stride1_vec = NULL;
1827    LLVMValueRef data_ptr0 = NULL;
1828    LLVMValueRef data_ptr1 = NULL;
1829    LLVMValueRef mipoff0 = NULL;
1830    LLVMValueRef mipoff1 = NULL;
1831    LLVMValueRef colors0[4], colors1[4];
1832 
1833    /* sample the first mipmap level */
1834    lp_build_mipmap_level_sizes(bld, ilevel0,
1835                                &size0,
1836                                &row_stride0_vec, &img_stride0_vec);
1837    if (bld->num_mips == 1) {
1838       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1839    } else {
1840       /* This path should work for num_lods 1 too but slightly less efficient */
1841       data_ptr0 = bld->base_ptr;
1842       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1843    }
1844 
1845    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1846       lp_build_sample_image_nearest(bld, size0,
1847                                     row_stride0_vec, img_stride0_vec,
1848                                     data_ptr0, mipoff0, coords, offsets,
1849                                     colors0);
1850    } else {
1851       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1852       lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1853                                    row_stride0_vec, img_stride0_vec,
1854                                    data_ptr0, mipoff0, coords, offsets,
1855                                    colors0);
1856    }
1857 
1858    /* Store the first level's colors in the output variables */
1859    for (unsigned chan = 0; chan < 4; chan++) {
1860        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1861    }
1862 
1863    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1864       struct lp_build_if_state if_ctx;
1865       LLVMValueRef need_lerp;
1866 
1867       /* need_lerp = lod_fpart > 0 */
1868       if (bld->num_lods == 1) {
1869          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1870                                    lod_fpart, bld->lodf_bld.zero,
1871                                    "need_lerp");
1872       } else {
1873          /*
1874           * We'll do mip filtering if any of the quads (or individual
1875           * pixel in case of per-pixel lod) need it.
1876           * It might be better to split the vectors here and only fetch/filter
1877           * quads which need it (if there's one lod per quad).
1878           */
1879          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1880                                       PIPE_FUNC_GREATER,
1881                                       lod_fpart, bld->lodf_bld.zero);
1882          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1883          lp_build_name(need_lerp, "need_lerp");
1884       }
1885 
1886       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1887       {
1888          /*
1889           * We unfortunately need to clamp lod_fpart here since we can get
1890           * negative values which would screw up filtering if not all
1891           * lod_fpart values have same sign.
1892           */
1893          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1894                                   bld->lodf_bld.zero);
1895          /* sample the second mipmap level */
1896          lp_build_mipmap_level_sizes(bld, ilevel1,
1897                                      &size1,
1898                                      &row_stride1_vec, &img_stride1_vec);
1899          if (bld->num_mips == 1) {
1900             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1901          } else {
1902             data_ptr1 = bld->base_ptr;
1903             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1904          }
1905          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1906             lp_build_sample_image_nearest(bld, size1,
1907                                           row_stride1_vec, img_stride1_vec,
1908                                           data_ptr1, mipoff1, coords, offsets,
1909                                           colors1);
1910          } else {
1911             lp_build_sample_image_linear(bld, false, size1, NULL,
1912                                          row_stride1_vec, img_stride1_vec,
1913                                          data_ptr1, mipoff1, coords, offsets,
1914                                          colors1);
1915          }
1916 
1917          /* interpolate samples from the two mipmap levels */
1918 
1919          if (bld->num_lods != bld->coord_type.length)
1920             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1921                                                               bld->lodf_bld.type,
1922                                                               bld->texel_bld.type,
1923                                                               lod_fpart);
1924 
1925          for (unsigned chan = 0; chan < 4; chan++) {
1926             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1927                                           colors0[chan], colors1[chan],
1928                                           0);
1929             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1930          }
1931       }
1932       lp_build_endif(&if_ctx);
1933    }
1934 }
1935 
1936 
1937 /**
1938  * Sample the texture/mipmap using given mip filter, and using
1939  * both nearest and linear filtering at the same time depending
1940  * on linear_mask.
1941  * lod can be per quad but linear_mask is always per pixel.
1942  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1943  * from (vectors or scalars).
1944  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1945  */
1946 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)1947 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1948                             LLVMValueRef linear_mask,
1949                             unsigned mip_filter,
1950                             const LLVMValueRef *coords,
1951                             const LLVMValueRef *offsets,
1952                             LLVMValueRef ilevel0,
1953                             LLVMValueRef ilevel1,
1954                             LLVMValueRef lod_fpart,
1955                             LLVMValueRef lod_positive,
1956                             LLVMValueRef *colors_out)
1957 {
1958    LLVMBuilderRef builder = bld->gallivm->builder;
1959    LLVMValueRef size0 = NULL;
1960    LLVMValueRef size1 = NULL;
1961    LLVMValueRef row_stride0_vec = NULL;
1962    LLVMValueRef row_stride1_vec = NULL;
1963    LLVMValueRef img_stride0_vec = NULL;
1964    LLVMValueRef img_stride1_vec = NULL;
1965    LLVMValueRef data_ptr0 = NULL;
1966    LLVMValueRef data_ptr1 = NULL;
1967    LLVMValueRef mipoff0 = NULL;
1968    LLVMValueRef mipoff1 = NULL;
1969    LLVMValueRef colors0[4], colors1[4];
1970 
1971    /* sample the first mipmap level */
1972    lp_build_mipmap_level_sizes(bld, ilevel0,
1973                                &size0,
1974                                &row_stride0_vec, &img_stride0_vec);
1975    if (bld->num_mips == 1) {
1976       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1977    } else {
1978       /* This path should work for num_lods 1 too but slightly less efficient */
1979       data_ptr0 = bld->base_ptr;
1980       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1981    }
1982 
1983    lp_build_sample_image_linear(bld, false, size0, linear_mask,
1984                                 row_stride0_vec, img_stride0_vec,
1985                                 data_ptr0, mipoff0, coords, offsets,
1986                                 colors0);
1987 
1988    /* Store the first level's colors in the output variables */
1989    for (unsigned chan = 0; chan < 4; chan++) {
1990        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1991    }
1992 
1993    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1994       struct lp_build_if_state if_ctx;
1995       LLVMValueRef need_lerp;
1996 
1997       /*
1998        * We'll do mip filtering if any of the quads (or individual
1999        * pixel in case of per-pixel lod) need it.
2000        * Note using lod_positive here not lod_fpart since it may be the same
2001        * condition as that used in the outer "if" in the caller hence llvm
2002        * should be able to merge the branches in this case.
2003        */
2004       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
2005       lp_build_name(need_lerp, "need_lerp");
2006 
2007       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
2008       {
2009          /*
2010           * We unfortunately need to clamp lod_fpart here since we can get
2011           * negative values which would screw up filtering if not all
2012           * lod_fpart values have same sign.
2013           */
2014          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
2015                                   bld->lodf_bld.zero);
2016          /* sample the second mipmap level */
2017          lp_build_mipmap_level_sizes(bld, ilevel1,
2018                                      &size1,
2019                                      &row_stride1_vec, &img_stride1_vec);
2020          if (bld->num_mips == 1) {
2021             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
2022          } else {
2023             data_ptr1 = bld->base_ptr;
2024             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
2025          }
2026 
2027          lp_build_sample_image_linear(bld, false, size1, linear_mask,
2028                                       row_stride1_vec, img_stride1_vec,
2029                                       data_ptr1, mipoff1, coords, offsets,
2030                                       colors1);
2031 
2032          /* interpolate samples from the two mipmap levels */
2033 
2034          if (bld->num_lods != bld->coord_type.length)
2035             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2036                                                               bld->lodf_bld.type,
2037                                                               bld->texel_bld.type,
2038                                                               lod_fpart);
2039 
2040          for (unsigned chan = 0; chan < 4; chan++) {
2041             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2042                                           colors0[chan], colors1[chan],
2043                                           0);
2044             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2045          }
2046       }
2047       lp_build_endif(&if_ctx);
2048    }
2049 }
2050 
2051 
2052 /**
2053  * Build (per-coord) layer value.
2054  * Either clamp layer to valid values or fill in optional out_of_bounds
2055  * value and just return value unclamped.
2056  */
2057 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,bool is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)2058 lp_build_layer_coord(struct lp_build_sample_context *bld,
2059                      unsigned texture_unit,
2060                      bool is_cube_array,
2061                      LLVMValueRef layer,
2062                      LLVMValueRef *out_of_bounds)
2063 {
2064    LLVMValueRef num_layers;
2065    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2066 
2067    num_layers = bld->dynamic_state->depth(bld->gallivm, bld->resources_type,
2068                                           bld->resources_ptr, texture_unit, NULL);
2069    num_layers = LLVMBuildZExt(bld->gallivm->builder, num_layers,
2070                               bld->int_bld.elem_type, "");
2071    if (out_of_bounds) {
2072       LLVMValueRef out1, out;
2073       assert(!is_cube_array);
2074       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
2075       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
2076       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
2077       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
2078       return layer;
2079    } else {
2080       LLVMValueRef maxlayer;
2081       LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
2082                                        bld->int_bld.one;
2083       maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
2084       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
2085       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
2086    }
2087 }
2088 
2089 static void
lp_build_sample_ms_offset(struct lp_build_context * int_coord_bld,LLVMValueRef ms_index,LLVMValueRef num_samples,LLVMValueRef sample_stride,LLVMValueRef * offset,LLVMValueRef * out_of_bounds)2090 lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
2091                           LLVMValueRef ms_index,
2092                           LLVMValueRef num_samples,
2093                           LLVMValueRef sample_stride,
2094                           LLVMValueRef *offset,
2095                           LLVMValueRef *out_of_bounds)
2096 {
2097    LLVMValueRef out1;
2098    num_samples = lp_build_broadcast_scalar(int_coord_bld, num_samples);
2099    sample_stride = lp_build_broadcast_scalar(int_coord_bld, sample_stride);
2100    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
2101    *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2102    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, num_samples);
2103    *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2104    LLVMValueRef sample_offset = lp_build_mul(int_coord_bld,
2105                                              sample_stride, ms_index);
2106    *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
2107 }
2108 
2109 
2110 #define WEIGHT_LUT_SIZE 1024
2111 
2112 
2113 static void
lp_build_sample_aniso(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)2114 lp_build_sample_aniso(struct lp_build_sample_context *bld,
2115                       unsigned img_filter,
2116                       unsigned mip_filter,
2117                       bool is_gather,
2118                       const LLVMValueRef *coords,
2119                       const LLVMValueRef *offsets,
2120                       LLVMValueRef ilevel0,
2121                       LLVMValueRef ilevel1,
2122                       LLVMValueRef lod_fpart,
2123                       LLVMValueRef *colors_out)
2124 {
2125    struct gallivm_state *gallivm = bld->gallivm;
2126    LLVMBuilderRef builder = gallivm->builder;
2127    struct lp_build_context *coord_bld = &bld->coord_bld;
2128    struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
2129    LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, coords[0], coords[1]);
2130    LLVMValueRef float_size;
2131    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2132    LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
2133    LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
2134    const unsigned length = bld->coord_bld.type.length;
2135    const unsigned num_quads = length / 4;
2136    LLVMValueRef filter_table = bld->aniso_filter_table;
2137    LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
2138    LLVMValueRef data_ptr0, mipoff0 = NULL;
2139 
2140    lp_build_mipmap_level_sizes(bld, ilevel0,
2141                                &size0,
2142                                &row_stride0_vec, &img_stride0_vec);
2143    if (bld->num_mips == 1) {
2144       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2145    } else {
2146       /* This path should work for num_lods 1 too but slightly less efficient */
2147       data_ptr0 = bld->base_ptr;
2148       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2149    }
2150 
2151    float_size = lp_build_int_to_float(&bld->float_size_in_bld, bld->int_size);
2152 
2153    LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0);
2154    /* extract width and height into vectors for use later */
2155    static const unsigned char swizzle15[] = { /* no-op swizzle */
2156       1, 1, 1, 1, 5, 5, 5, 5
2157    };
2158    static const unsigned char swizzle04[] = { /* no-op swizzle */
2159       0, 0, 0, 0, 4, 4, 4, 4
2160    };
2161    LLVMValueRef width_dim, height_dim;
2162 
2163    width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04,
2164                                       bld->float_size_bld.type.length,
2165                                       bld->coord_bld.type.length);
2166    height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15,
2167                                        bld->float_size_bld.type.length,
2168                                        bld->coord_bld.type.length);
2169 
2170 
2171    /* shuffle width/height for ddx/ddy calculations. */
2172    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
2173 
2174    for (unsigned i = 0; i < num_quads; i++) {
2175       shuffles[i*4+0] = shuffles[i*4+1] = index0;
2176       shuffles[i*4+2] = shuffles[i*4+3] = index1;
2177    }
2178 
2179    LLVMValueRef floatdim =
2180       LLVMBuildShuffleVector(builder, float_size, float_size,
2181                              LLVMConstVector(shuffles, length), "");
2182 
2183    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
2184 
2185    LLVMValueRef scaling =
2186       lp_build_shl(&bld->leveli_bld, bld->leveli_bld.one, ilevel0);
2187    scaling = lp_build_int_to_float(&bld->levelf_bld, scaling);
2188    scaling = lp_build_rcp(&bld->levelf_bld, scaling);
2189 
2190    if (bld->levelf_bld.type.length != length) {
2191       if (bld->levelf_bld.type.length == 1) {
2192          scaling = lp_build_broadcast_scalar(coord_bld,
2193                                              scaling);
2194       } else {
2195          scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2196                                                          bld->levelf_bld.type,
2197                                                          coord_bld->type,
2198                                                          scaling);
2199       }
2200    }
2201 
2202    ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling);
2203 
2204    static const unsigned char swizzle01[] = { /* no-op swizzle */
2205       0, 1, 0, 1,
2206    };
2207    static const unsigned char swizzle23[] = {
2208       2, 3, 2, 3,
2209    };
2210 
2211    LLVMValueRef ddx_ddys, ddx_ddyt;
2212    ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
2213    ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
2214 
2215    /* compute ellipse coefficients */
2216    /* * A*x*x + B*x*y + C*y*y = F.*/
2217    /* float A = vx*vx+vy*vy+1; */
2218    LLVMValueRef A = lp_build_mul(coord_bld, ddx_ddyt, ddx_ddyt);
2219 
2220    LLVMValueRef Ay = lp_build_swizzle_aos(coord_bld, A, swizzle15);
2221    A = lp_build_add(coord_bld, A, Ay);
2222    A = lp_build_add(coord_bld, A, coord_bld->one);
2223    A = lp_build_swizzle_aos(coord_bld, A, swizzle04);
2224 
2225    /* float B = -2*(ux*vx+uy*vy); */
2226    LLVMValueRef B = lp_build_mul(coord_bld, ddx_ddys, ddx_ddyt);
2227    LLVMValueRef By = lp_build_swizzle_aos(coord_bld, B, swizzle15);
2228    B = lp_build_add(coord_bld, B, By);
2229    B = lp_build_mul_imm(coord_bld, B, -2);
2230    B = lp_build_swizzle_aos(coord_bld, B, swizzle04);
2231 
2232    /* float C = ux*ux+uy*uy+1; */
2233    LLVMValueRef C = lp_build_mul(coord_bld, ddx_ddys, ddx_ddys);
2234    LLVMValueRef Cy = lp_build_swizzle_aos(coord_bld, C, swizzle15);
2235    C = lp_build_add(coord_bld, C, Cy);
2236    C = lp_build_add(coord_bld, C, coord_bld->one);
2237    C = lp_build_swizzle_aos(coord_bld, C, swizzle04);
2238 
2239    /* float F = A*C-B*B/4.0f; */
2240    LLVMValueRef F = lp_build_mul(coord_bld, B, B);
2241    F = lp_build_div(coord_bld, F, lp_build_const_vec(gallivm, coord_bld->type, 4.0));
2242    LLVMValueRef F_p2 = lp_build_mul(coord_bld, A, C);
2243    F = lp_build_sub(coord_bld, F_p2, F);
2244 
2245    /* compute ellipse bounding box in texture space */
2246    /* const float d = -B*B+4.0f*C*A; */
2247    LLVMValueRef d = lp_build_sub(coord_bld, coord_bld->zero, lp_build_mul(coord_bld, B, B));
2248    LLVMValueRef d_p2 = lp_build_mul(coord_bld, A, C);
2249    d_p2 = lp_build_mul_imm(coord_bld, d_p2, 4);
2250    d = lp_build_add(coord_bld, d, d_p2);
2251 
2252    /* const float box_u = 2.0f / d * sqrtf(d*C*F); */
2253    /* box_u -> half of bbox with   */
2254    LLVMValueRef temp;
2255    temp = lp_build_mul(coord_bld, d, C);
2256    temp = lp_build_mul(coord_bld, temp, F);
2257    temp = lp_build_sqrt(coord_bld, temp);
2258 
2259    LLVMValueRef box_u = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2260    box_u = lp_build_mul(coord_bld, box_u, temp);
2261 
2262    /* const float box_v = 2.0f / d * sqrtf(A*d*F); */
2263    /* box_v -> half of bbox height */
2264    temp = lp_build_mul(coord_bld, A, d);
2265    temp = lp_build_mul(coord_bld, temp, F);
2266    temp = lp_build_sqrt(coord_bld, temp);
2267 
2268    LLVMValueRef box_v = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2269    box_v = lp_build_mul(coord_bld, box_v, temp);
2270 
2271    /* Scale ellipse formula to directly index the Filter Lookup Table.
2272     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2273     */
2274    LLVMValueRef formScale = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, WEIGHT_LUT_SIZE - 1), F);
2275 
2276    A = lp_build_mul(coord_bld, A, formScale);
2277    B = lp_build_mul(coord_bld, B, formScale);
2278    C = lp_build_mul(coord_bld, C, formScale);
2279    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2280 
2281    LLVMValueRef ddq = lp_build_mul_imm(coord_bld, A, 2);
2282 
2283    /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2284     * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2285     * value, q, is less than F, we're inside the ellipse
2286     */
2287 
2288    LLVMValueRef float_size0 = lp_build_int_to_float(float_size_bld, bld->int_size);
2289    LLVMValueRef width0 = lp_build_extract_broadcast(gallivm,
2290                                                     float_size_bld->type,
2291                                                     coord_bld->type,
2292                                                     float_size0, index0);
2293    LLVMValueRef height0 = lp_build_extract_broadcast(gallivm,
2294                                                      float_size_bld->type,
2295                                                      coord_bld->type,
2296                                                      float_size0, index1);
2297 
2298    /* texture->width0 * scaling */
2299    width0 = lp_build_mul(coord_bld, width0, scaling);
2300    /* texture->height0 * scaling */
2301    height0 = lp_build_mul(coord_bld, height0, scaling);
2302 
2303    /* tex_u = -0.5f * s[j] * texture->width0 * scaling */
2304    LLVMValueRef tex_u = lp_build_mul(coord_bld, coords[0], width0);
2305    tex_u = lp_build_add(coord_bld, tex_u, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2306 
2307    /* tex_v = -0.5f * t[j] * texture->height0 * scaling */
2308    LLVMValueRef tex_v = lp_build_mul(coord_bld, coords[1], height0);
2309    tex_v = lp_build_add(coord_bld, tex_v, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2310 
2311    /* const int u0 = (int) floorf(tex_u - box_u); */
2312    LLVMValueRef u0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_u, box_u)));
2313    /* const int u1 = (int) ceilf(tex_u + box_u); */
2314    LLVMValueRef u1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_u, box_u)));
2315 
2316    /* const int v0 = (int) floorf(tex_v - box_v); */
2317    LLVMValueRef v0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_v, box_v)));
2318    /* const int v1 = (int) ceilf(tex_v + box_v); */
2319    LLVMValueRef v1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_v, box_v)));
2320 
2321    /* const float U = u0 - tex_u; */
2322    LLVMValueRef U = lp_build_sub(coord_bld, lp_build_int_to_float(coord_bld, u0), tex_u);
2323 
2324    /* A * (2 * U + 1) */
2325    LLVMValueRef dq_base = lp_build_mul_imm(coord_bld, U, 2);
2326    dq_base = lp_build_add(coord_bld, dq_base, coord_bld->one);
2327    dq_base = lp_build_mul(coord_bld, dq_base, A);
2328 
2329    /* A * U * U */
2330    LLVMValueRef q_base = lp_build_mul(coord_bld, U, U);
2331    q_base = lp_build_mul(coord_bld, q_base, A);
2332 
2333    LLVMValueRef colors0[4];
2334    LLVMValueRef den_store = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "den");
2335 
2336    for (unsigned chan = 0; chan < 4; chan++)
2337       colors0[chan] = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "colors");
2338 
2339    LLVMValueRef q_store, dq_store;
2340    q_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "q");
2341    dq_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "dq");
2342 
2343    LLVMValueRef v_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "v_limiter");
2344    LLVMValueRef u_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "u_limiter");
2345 
2346    LLVMBuildStore(builder, v0, v_limiter);
2347 
2348    /* create an LLVM loop block for the V iterator */
2349    LLVMBasicBlockRef v_loop_block = lp_build_insert_new_block(gallivm, "vloop");
2350 
2351    LLVMBuildBr(builder, v_loop_block);
2352    LLVMPositionBuilderAtEnd(builder, v_loop_block);
2353 
2354    LLVMValueRef v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2355    LLVMValueRef v_mask = LLVMBuildICmp(builder, LLVMIntSLE, v_val, v1, "");
2356 
2357    /* loop over V values. */
2358    {
2359       /*  const float V = v - tex_v; */
2360       LLVMValueRef V =
2361          lp_build_sub(coord_bld,
2362                       lp_build_int_to_float(coord_bld, v_val), tex_v);
2363 
2364       /* float dq = dq_base + B * V; */
2365       LLVMValueRef dq = lp_build_mul(coord_bld, V, B);
2366       dq = lp_build_add(coord_bld, dq, dq_base);
2367 
2368       /* float q = (C * V + B * U) * V + q_base */
2369       LLVMValueRef q = lp_build_mul(coord_bld, C, V);
2370       q = lp_build_add(coord_bld, q, lp_build_mul(coord_bld, B, U));
2371       q = lp_build_mul(coord_bld, q, V);
2372       q = lp_build_add(coord_bld, q, q_base);
2373 
2374       LLVMBuildStore(builder, q, q_store);
2375       LLVMBuildStore(builder, dq, dq_store);
2376 
2377       LLVMBuildStore(builder, u0, u_limiter);
2378 
2379       /* create an LLVM loop block for the V iterator */
2380       LLVMBasicBlockRef u_loop_block = lp_build_insert_new_block(gallivm, "uloop");
2381 
2382       LLVMBuildBr(builder, u_loop_block);
2383       LLVMPositionBuilderAtEnd(builder, u_loop_block);
2384 
2385       LLVMValueRef u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type,
2386                                           u_limiter, "");
2387       LLVMValueRef u_mask = LLVMBuildICmp(builder,
2388                                           LLVMIntSLE,
2389                                           u_val,
2390                                           u1, "");
2391 
2392       /* loop over U values */
2393       {
2394          /* q = (int)q */
2395          q = lp_build_itrunc(coord_bld,
2396                              LLVMBuildLoad2(builder, bld->coord_bld.vec_type,
2397                                             q_store, ""));
2398 
2399          /*
2400           * avoid OOB access to filter table, generate a mask for q > 1024,
2401           * then truncate it.
2402           */
2403          LLVMValueRef q_mask = LLVMBuildICmp(builder,
2404                                              LLVMIntSLE,
2405                                              q,
2406                                              lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff), "");
2407          q_mask = LLVMBuildSExt(builder, q_mask, bld->int_coord_bld.vec_type, "");
2408 
2409          q = lp_build_max(&bld->int_coord_bld, q, bld->int_coord_bld.zero);
2410          q = lp_build_and(&bld->int_coord_bld, q, lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff));
2411 
2412          /* update the offsets to deal with float size. */
2413          q = lp_build_mul_imm(&bld->int_coord_bld, q, 4);
2414          filter_table = LLVMBuildBitCast(gallivm->builder, filter_table, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
2415 
2416          /* Lookup weights in filter table */
2417          LLVMValueRef weights = lp_build_gather(gallivm, coord_bld->type.length,
2418                                                 coord_bld->type.width,
2419                                                 lp_elem_type(coord_bld->type),
2420                                                 true, filter_table, q, true);
2421 
2422          /*
2423           * Mask off the weights here which should ensure no-op for loops
2424           * where some of the u/v values are not being calculated.
2425           */
2426          weights = LLVMBuildBitCast(builder, weights, bld->int_coord_bld.vec_type, "");
2427          weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, ""));
2428          weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, v_mask, bld->int_coord_bld.vec_type, ""));
2429          weights = lp_build_and(&bld->int_coord_bld, weights, q_mask);
2430          weights = LLVMBuildBitCast(builder, weights, bld->coord_bld.vec_type, "");
2431 
2432          /* if the weights are all 0 avoid doing the sampling at all. */
2433          struct lp_build_if_state noloadw0;
2434 
2435          LLVMValueRef wnz = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
2436                                           weights, bld->coord_bld.zero, "");
2437          wnz = LLVMBuildSExt(builder, wnz, bld->int_coord_bld.vec_type, "");
2438          wnz = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, wnz);
2439          lp_build_if(&noloadw0, gallivm, wnz);
2440          LLVMValueRef new_coords[4];
2441          new_coords[0] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, u_val), width_dim);
2442          new_coords[1] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, v_val), height_dim);
2443          new_coords[2] = coords[2];
2444          new_coords[3] = coords[3];
2445 
2446          /* lookup q in filter table */
2447          LLVMValueRef temp_colors[4];
2448          lp_build_sample_image_nearest(bld, size0,
2449                                        row_stride0_vec, img_stride0_vec,
2450                                        data_ptr0, mipoff0, new_coords, offsets,
2451                                        temp_colors);
2452 
2453          for (unsigned chan = 0; chan < 4; chan++) {
2454             LLVMValueRef tcolor = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, colors0[chan], "");
2455 
2456             tcolor = lp_build_add(&bld->texel_bld, tcolor, lp_build_mul(&bld->texel_bld, temp_colors[chan], weights));
2457             LLVMBuildStore(builder, tcolor, colors0[chan]);
2458          }
2459 
2460          /* multiple colors by weight and add in. */
2461          /* den += weight; */
2462          LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2463          den = lp_build_add(&bld->texel_bld, den, weights);
2464          LLVMBuildStore(builder, den, den_store);
2465 
2466          lp_build_endif(&noloadw0);
2467          /* q += dq; */
2468          /* dq += ddq; */
2469          q = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, q_store, "");
2470          dq = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, dq_store, "");
2471          q = lp_build_add(coord_bld, q, dq);
2472          dq = lp_build_add(coord_bld, dq, ddq);
2473          LLVMBuildStore(builder, q, q_store);
2474          LLVMBuildStore(builder, dq, dq_store);
2475       }
2476       /* u += 1 */
2477       u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, u_limiter, "");
2478       u_val = lp_build_add(&bld->int_coord_bld, u_val, bld->int_coord_bld.one);
2479       LLVMBuildStore(builder, u_val, u_limiter);
2480 
2481       u_mask = LLVMBuildICmp(builder,
2482                              LLVMIntSLE,
2483                              u_val,
2484                              u1, "");
2485       LLVMValueRef u_end_cond = LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, "");
2486       u_end_cond = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, u_end_cond);
2487 
2488       LLVMBasicBlockRef u_end_loop = lp_build_insert_new_block(gallivm, "u_end_loop");
2489 
2490       LLVMBuildCondBr(builder, u_end_cond,
2491                       u_loop_block, u_end_loop);
2492 
2493       LLVMPositionBuilderAtEnd(builder, u_end_loop);
2494 
2495    }
2496 
2497    /* v += 1 */
2498    v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2499    v_val = lp_build_add(&bld->int_coord_bld, v_val, bld->int_coord_bld.one);
2500    LLVMBuildStore(builder, v_val, v_limiter);
2501 
2502    v_mask = LLVMBuildICmp(builder,
2503                           LLVMIntSLE,
2504                           v_val,
2505                           v1, "");
2506    LLVMValueRef v_end_cond = LLVMBuildSExt(builder, v_mask,
2507                                            bld->int_coord_bld.vec_type, "");
2508    v_end_cond = lp_build_any_true_range(&bld->coord_bld,
2509                                         bld->coord_bld.type.length, v_end_cond);
2510 
2511    LLVMBasicBlockRef v_end_loop = lp_build_insert_new_block(gallivm, "v_end_loop");
2512 
2513    LLVMBuildCondBr(builder, v_end_cond,
2514                    v_loop_block, v_end_loop);
2515 
2516    LLVMPositionBuilderAtEnd(builder, v_end_loop);
2517 
2518    LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2519 
2520    for (unsigned chan = 0; chan < 4; chan++) {
2521       colors0[chan] =
2522          lp_build_div(&bld->texel_bld,
2523                       LLVMBuildLoad2(builder, bld->texel_bld.vec_type,
2524                                      colors0[chan], ""), den);
2525    }
2526 
2527    LLVMValueRef den0 = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_EQUAL,
2528                                     den, bld->coord_bld.zero);
2529 
2530    LLVMValueRef den0_any =
2531       lp_build_any_true_range(&bld->coord_bld,
2532                               bld->coord_bld.type.length, den0);
2533 
2534    struct lp_build_if_state den0_fallback;
2535    lp_build_if(&den0_fallback, gallivm, den0_any);
2536    {
2537       LLVMValueRef colors_den0[4];
2538       lp_build_sample_image_linear(bld, false, size0, NULL,
2539                                    row_stride0_vec, img_stride0_vec,
2540                                    data_ptr0, mipoff0, coords, offsets,
2541                                    colors_den0);
2542       for (unsigned chan = 0; chan < 4; chan++) {
2543          LLVMValueRef chan_val =
2544             lp_build_select(&bld->texel_bld, den0,
2545                             colors_den0[chan], colors0[chan]);
2546          LLVMBuildStore(builder, chan_val, colors_out[chan]);
2547       }
2548    }
2549    lp_build_else(&den0_fallback);
2550    {
2551       for (unsigned chan = 0; chan < 4; chan++) {
2552          LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2553       }
2554    }
2555    lp_build_endif(&den0_fallback);
2556 }
2557 
2558 
2559 /**
2560  * Calculate cube face, lod, mip levels.
2561  */
2562 static void
lp_build_sample_common(struct lp_build_sample_context * bld,bool is_lodq,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)2563 lp_build_sample_common(struct lp_build_sample_context *bld,
2564                        bool is_lodq,
2565                        unsigned texture_index,
2566                        unsigned sampler_index,
2567                        LLVMValueRef *coords,
2568                        const struct lp_derivatives *derivs, /* optional */
2569                        LLVMValueRef lod_bias, /* optional */
2570                        LLVMValueRef explicit_lod, /* optional */
2571                        LLVMValueRef *lod_pos_or_zero,
2572                        LLVMValueRef *lod,
2573                        LLVMValueRef *lod_fpart,
2574                        LLVMValueRef *ilevel0,
2575                        LLVMValueRef *ilevel1)
2576 {
2577    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
2578    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
2579    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
2580    const unsigned target = bld->static_texture_state->target;
2581    const bool aniso = bld->static_sampler_state->aniso;
2582    LLVMValueRef first_level, last_level;
2583    LLVMValueRef lod_ipart = NULL;
2584    struct lp_derivatives cube_derivs;
2585 
2586    /*
2587    printf("%s mip %d  min %d  mag %d\n", __func__,
2588           mip_filter, min_filter, mag_filter);
2589    */
2590 
2591    first_level = get_first_level(bld->gallivm,
2592                                  bld->resources_type,
2593                                  bld->resources_ptr,
2594                                  texture_index, NULL,
2595                                  bld->static_texture_state,
2596                                  bld->dynamic_state);
2597    last_level = get_last_level(bld->gallivm,
2598                                bld->resources_type,
2599                                bld->resources_ptr,
2600                                texture_index, NULL,
2601                                bld->static_texture_state,
2602                                bld->dynamic_state);
2603 
2604    /*
2605     * Choose cube face, recompute texcoords for the chosen face and
2606     * calculate / transform derivatives.
2607     */
2608    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
2609       bool need_derivs = ((min_filter != mag_filter ||
2610                            mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
2611                           !bld->static_sampler_state->min_max_lod_equal &&
2612                           !explicit_lod);
2613       lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
2614       if (need_derivs)
2615          derivs = &cube_derivs;
2616 
2617       if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) {
2618          /* calculate cube layer coord now */
2619          LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
2620          LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
2621          layer = lp_build_mul(&bld->int_coord_bld, layer, six);
2622          coords[3] = lp_build_layer_coord(bld, texture_index, true, layer, NULL);
2623          /* because of seamless filtering can't add it to face (coords[2]) here. */
2624       }
2625    } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
2626              target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
2627       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
2628       coords[2] = lp_build_layer_coord(bld, texture_index, false, coords[2], NULL);
2629    }
2630 
2631    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
2632       /*
2633        * Clamp p coords to [0,1] for fixed function depth texture format here.
2634        * Technically this is not entirely correct for unorm depth as the ref
2635        * value should be converted to the depth format (quantization!) and
2636        * comparison then done in texture format. This would actually help
2637        * performance (since only need to do it once and could save the
2638        * per-sample conversion of texels to floats instead), but it would need
2639        * more messy code (would need to push at least some bits down to actual
2640        * fetch so conversion could be skipped, and would have ugly interaction
2641        * with border color, would need to convert border color to that format
2642        * too or do some other tricks to make it work).
2643        */
2644       const struct util_format_description *format_desc = bld->format_desc;
2645       /* not entirely sure we couldn't end up with non-valid swizzle here */
2646       const enum util_format_type chan_type =
2647          format_desc->swizzle[0] <= PIPE_SWIZZLE_W
2648            ? format_desc->channel[format_desc->swizzle[0]].type
2649            : UTIL_FORMAT_TYPE_FLOAT;
2650       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2651          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2652                                     bld->coord_bld.zero, bld->coord_bld.one);
2653       }
2654    }
2655 
2656    /*
2657     * Compute the level of detail (float).
2658     */
2659    if (min_filter != mag_filter ||
2660        mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2661       LLVMValueRef max_aniso = NULL;
2662 
2663       if (aniso)
2664          max_aniso = bld->dynamic_state->max_aniso(bld->gallivm,
2665                                                    bld->resources_type,
2666                                                    bld->resources_ptr,
2667                                                    sampler_index);
2668 
2669       /* Need to compute lod either to choose mipmap levels or to
2670        * distinguish between minification/magnification with one mipmap level.
2671        */
2672       LLVMValueRef first_level_vec =
2673          lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
2674       lp_build_lod_selector(bld, is_lodq, sampler_index,
2675                             first_level_vec,
2676                             coords[0], coords[1], coords[2],
2677                             derivs, lod_bias, explicit_lod,
2678                             mip_filter, max_aniso, lod,
2679                             &lod_ipart, lod_fpart, lod_pos_or_zero);
2680       if (is_lodq) {
2681          last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2682          last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2683          last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2684 
2685          switch (mip_filter) {
2686          case PIPE_TEX_MIPFILTER_NONE:
2687             *lod_fpart = bld->lodf_bld.zero;
2688             break;
2689          case PIPE_TEX_MIPFILTER_NEAREST:
2690             *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2691             FALLTHROUGH;
2692          case PIPE_TEX_MIPFILTER_LINEAR:
2693             *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2694                                         bld->lodf_bld.zero, last_level);
2695             break;
2696          }
2697          return;
2698       }
2699    } else {
2700       lod_ipart = bld->lodi_bld.zero;
2701       *lod_pos_or_zero = bld->lodi_bld.zero;
2702    }
2703 
2704    if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) &&
2705        bld->lodi_bld.type.length != 1) {
2706       /* only makes sense if there's just a single mip level */
2707       assert(bld->num_mips == 1);
2708       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2709    }
2710 
2711    first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2712    last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2713 
2714    /*
2715     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2716     */
2717 
2718    if (aniso) {
2719       lp_build_nearest_mip_level(bld,
2720                                  first_level, last_level,
2721                                  lod_ipart, ilevel0, NULL);
2722       return;
2723    }
2724 
2725    switch (mip_filter) {
2726    default:
2727       unreachable("Bad mip_filter value in lp_build_sample_soa()");
2728    case PIPE_TEX_MIPFILTER_NONE:
2729       /* always use mip level 0 */
2730       *ilevel0 = first_level;
2731       break;
2732    case PIPE_TEX_MIPFILTER_NEAREST:
2733       assert(lod_ipart);
2734       lp_build_nearest_mip_level(bld,
2735                                  first_level, last_level,
2736                                  lod_ipart, ilevel0, NULL);
2737       break;
2738    case PIPE_TEX_MIPFILTER_LINEAR:
2739       assert(lod_ipart);
2740       assert(*lod_fpart);
2741 
2742       lp_build_linear_mip_levels(bld, texture_index,
2743                                  first_level, last_level,
2744                                  lod_ipart, lod_fpart,
2745                                  ilevel0, ilevel1);
2746       break;
2747    }
2748 }
2749 
2750 
2751 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)2752 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2753                             unsigned sampler_unit)
2754 {
2755    struct gallivm_state *gallivm = bld->gallivm;
2756    LLVMBuilderRef builder = gallivm->builder;
2757    LLVMValueRef border_color_ptr =
2758       bld->dynamic_state->border_color(gallivm,
2759                                        bld->resources_type,
2760                                        bld->resources_ptr, sampler_unit);
2761    LLVMValueRef border_color;
2762    const struct util_format_description *format_desc = bld->format_desc;
2763    struct lp_type vec4_type = bld->texel_type;
2764    struct lp_build_context vec4_bld;
2765    LLVMValueRef min_clamp = NULL;
2766    LLVMValueRef max_clamp = NULL;
2767 
2768    /*
2769     * For normalized format need to clamp border color (technically
2770     * probably should also quantize the data). Really sucks doing this
2771     * here but can't avoid at least for now since this is part of
2772     * sampler state and texture format is part of sampler_view state.
2773     * GL expects also expects clamping for uint/sint formats too so
2774     * do that as well (d3d10 can't end up here with uint/sint since it
2775     * only supports them with ld).
2776     */
2777    vec4_type.length = 4;
2778    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2779 
2780    /*
2781     * Vectorized clamping of border color. Loading is a bit of a hack since
2782     * we just cast the pointer to float array to pointer to vec4
2783     * (int or float).
2784     */
2785    LLVMTypeRef border_color_type = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
2786    border_color_ptr = lp_build_array_get_ptr2(gallivm, border_color_type, border_color_ptr,
2787                                               lp_build_const_int32(gallivm, 0));
2788    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2789                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
2790    border_color = LLVMBuildLoad2(builder, vec4_bld.vec_type, border_color_ptr, "");
2791    /* we don't have aligned type in the dynamic state unfortunately */
2792    LLVMSetAlignment(border_color, 4);
2793 
2794    /*
2795     * Instead of having some incredibly complex logic which will try to figure
2796     * out clamping necessary for each channel, simply use the first channel,
2797     * and treat mixed signed/unsigned normalized formats specially.  (Mixed
2798     * non-normalized, which wouldn't work at all here, do not exist for a good
2799     * reason.)
2800     */
2801    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2802       int chan;
2803       /* d/s needs special handling because both present means just sampling depth */
2804       if (util_format_is_depth_and_stencil(format_desc->format)) {
2805          chan = format_desc->swizzle[0];
2806       } else {
2807          chan = util_format_get_first_non_void_channel(format_desc->format);
2808       }
2809       if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2810          unsigned chan_type = format_desc->channel[chan].type;
2811          unsigned chan_norm = format_desc->channel[chan].normalized;
2812          unsigned chan_pure = format_desc->channel[chan].pure_integer;
2813          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2814             if (chan_norm) {
2815                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2816                max_clamp = vec4_bld.one;
2817             } else if (chan_pure) {
2818                /*
2819                 * Border color was stored as int, hence need min/max clamp
2820                 * only if chan has less than 32 bits..
2821                 */
2822                unsigned chan_size = format_desc->channel[chan].size;
2823                if (chan_size < 32) {
2824                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2825                                                      0 - (1 << (chan_size - 1)));
2826                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2827                                                      (1 << (chan_size - 1)) - 1);
2828                }
2829             }
2830             /* TODO: no idea about non-pure, non-normalized! */
2831          } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2832             if (chan_norm) {
2833                min_clamp = vec4_bld.zero;
2834                max_clamp = vec4_bld.one;
2835             } else if (chan_pure) {
2836                /*
2837                 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2838                 * we use Z32_FLOAT_S8X24 to imply sampling depth component and
2839                 * ignoring stencil, which will blow up here if we try to do a
2840                 * uint clamp in a float texel build...  And even if we had
2841                 * that format, mesa st also thinks using z24s8 means depth
2842                 * sampling ignoring stencil.
2843                 */
2844 
2845                /*
2846                 * Border color was stored as uint, hence never need min clamp,
2847                 * and only need max clamp if chan has less than 32 bits.
2848                 */
2849                unsigned chan_size = format_desc->channel[chan].size;
2850                if (chan_size < 32) {
2851                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2852                                                      (1 << chan_size) - 1);
2853                }
2854                /* TODO: no idea about non-pure, non-normalized! */
2855             }
2856          } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2857             /* TODO: I have no idea what clamp this would need if any! */
2858          }
2859       }
2860       /* mixed plain formats (or different pure size) */
2861       switch (format_desc->format) {
2862       case PIPE_FORMAT_B10G10R10A2_UINT:
2863       case PIPE_FORMAT_R10G10B10A2_UINT:
2864          {
2865             unsigned max10 = (1 << 10) - 1;
2866             max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2867                                            max10, (1 << 2) - 1, NULL);
2868          }
2869          break;
2870       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2871          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2872                                         -1.0F, 0.0F, NULL);
2873          max_clamp = vec4_bld.one;
2874          break;
2875       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2876       case PIPE_FORMAT_R5SG5SB6U_NORM:
2877          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2878                                         0.0F, 0.0F, NULL);
2879          max_clamp = vec4_bld.one;
2880          break;
2881       default:
2882          break;
2883       }
2884    } else {
2885       /* cannot figure this out from format description */
2886       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2887          /* s3tc formats are always unorm */
2888          min_clamp = vec4_bld.zero;
2889          max_clamp = vec4_bld.one;
2890       } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2891                  format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
2892                  format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2893          switch (format_desc->format) {
2894          case PIPE_FORMAT_RGTC1_UNORM:
2895          case PIPE_FORMAT_RGTC2_UNORM:
2896          case PIPE_FORMAT_LATC1_UNORM:
2897          case PIPE_FORMAT_LATC2_UNORM:
2898          case PIPE_FORMAT_ETC1_RGB8:
2899          case PIPE_FORMAT_BPTC_RGBA_UNORM:
2900          case PIPE_FORMAT_BPTC_SRGBA:
2901             min_clamp = vec4_bld.zero;
2902             max_clamp = vec4_bld.one;
2903             break;
2904          case PIPE_FORMAT_RGTC1_SNORM:
2905          case PIPE_FORMAT_RGTC2_SNORM:
2906          case PIPE_FORMAT_LATC1_SNORM:
2907          case PIPE_FORMAT_LATC2_SNORM:
2908             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2909             max_clamp = vec4_bld.one;
2910             break;
2911          case PIPE_FORMAT_BPTC_RGB_FLOAT:
2912             /* not sure if we should clamp to max half float? */
2913             break;
2914          case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2915             min_clamp = vec4_bld.zero;
2916             break;
2917          default:
2918             assert(0);
2919             break;
2920          }
2921       } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2922          /*
2923           * all others from subsampled/other group, though we don't care
2924           * about yuv (and should not have any from zs here)
2925           */
2926          switch (format_desc->format) {
2927          case PIPE_FORMAT_R8G8_B8G8_UNORM:
2928          case PIPE_FORMAT_G8R8_G8B8_UNORM:
2929          case PIPE_FORMAT_G8R8_B8R8_UNORM:
2930          case PIPE_FORMAT_R8G8_R8B8_UNORM:
2931          case PIPE_FORMAT_G8B8_G8R8_UNORM:
2932          case PIPE_FORMAT_B8G8_R8G8_UNORM:
2933          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2934             min_clamp = vec4_bld.zero;
2935             max_clamp = vec4_bld.one;
2936             break;
2937          case PIPE_FORMAT_R8G8Bx_SNORM:
2938             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2939             max_clamp = vec4_bld.one;
2940             break;
2941             /*
2942              * Note smallfloat formats usually don't need clamping
2943              * (they still have infinite range) however this is not
2944              * true for r11g11b10 and r9g9b9e5, which can't represent
2945              * negative numbers (and additionally r9g9b9e5 can't represent
2946              * very large numbers). d3d10 seems happy without clamping in
2947              * this case, but gl spec is pretty clear: "for floating
2948              * point and integer formats, border values are clamped to
2949              * the representable range of the format" so do that here.
2950              */
2951          case PIPE_FORMAT_R11G11B10_FLOAT:
2952             min_clamp = vec4_bld.zero;
2953             break;
2954          case PIPE_FORMAT_R9G9B9E5_FLOAT:
2955             min_clamp = vec4_bld.zero;
2956             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2957             break;
2958          default:
2959             assert(0);
2960             break;
2961          }
2962       }
2963    }
2964 
2965    if (min_clamp) {
2966       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2967    }
2968    if (max_clamp) {
2969       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2970    }
2971 
2972    bld->border_color_clamped = border_color;
2973 }
2974 
2975 
2976 /**
2977  * General texture sampling codegen.
2978  * This function handles texture sampling for all texture targets (1D,
2979  * 2D, 3D, cube) and all filtering modes.
2980  */
2981 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)2982 lp_build_sample_general(struct lp_build_sample_context *bld,
2983                         unsigned sampler_unit,
2984                         bool is_gather,
2985                         const LLVMValueRef *coords,
2986                         const LLVMValueRef *offsets,
2987                         LLVMValueRef lod_positive,
2988                         LLVMValueRef lod_fpart,
2989                         LLVMValueRef ilevel0,
2990                         LLVMValueRef ilevel1,
2991                         LLVMValueRef *colors_out)
2992 {
2993    LLVMBuilderRef builder = bld->gallivm->builder;
2994    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2995    const unsigned mip_filter = sampler_state->min_mip_filter;
2996    const unsigned min_filter = sampler_state->min_img_filter;
2997    const unsigned mag_filter = sampler_state->mag_img_filter;
2998    LLVMValueRef texels[4];
2999    unsigned chan;
3000 
3001    /* if we need border color, (potentially) clamp it now */
3002    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
3003                                               min_filter,
3004                                               mag_filter) ||
3005        (bld->dims > 1 &&
3006            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
3007                                                   min_filter,
3008                                                   mag_filter)) ||
3009        (bld->dims > 2 &&
3010            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
3011                                                   min_filter,
3012                                                   mag_filter))) {
3013       lp_build_clamp_border_color(bld, sampler_unit);
3014    }
3015 
3016 
3017    /*
3018     * Get/interpolate texture colors.
3019     */
3020 
3021    for (chan = 0; chan < 4; ++chan) {
3022      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
3023      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
3024    }
3025 
3026    if (sampler_state->aniso) {
3027       lp_build_sample_aniso(bld, PIPE_TEX_FILTER_NEAREST, mip_filter,
3028                             false, coords, offsets, ilevel0,
3029                             ilevel1, lod_fpart, texels);
3030    } else if (min_filter == mag_filter) {
3031       /* no need to distinguish between minification and magnification */
3032       lp_build_sample_mipmap(bld, min_filter, mip_filter,
3033                              is_gather,
3034                              coords, offsets,
3035                              ilevel0, ilevel1, lod_fpart,
3036                              texels);
3037    } else {
3038       /*
3039        * Could also get rid of the if-logic and always use mipmap_both, both
3040        * for the single lod and multi-lod case if nothing really uses this.
3041        */
3042       if (bld->num_lods == 1) {
3043          /* Emit conditional to choose min image filter or mag image filter
3044           * depending on the lod being > 0 or <= 0, respectively.
3045           */
3046          struct lp_build_if_state if_ctx;
3047 
3048          lod_positive = LLVMBuildTrunc(builder, lod_positive,
3049                                        LLVMInt1TypeInContext(bld->gallivm->context),
3050                                        "lod_pos");
3051 
3052          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
3053          {
3054             /* Use the minification filter */
3055             lp_build_sample_mipmap(bld, min_filter, mip_filter, false,
3056                                    coords, offsets,
3057                                    ilevel0, ilevel1, lod_fpart,
3058                                    texels);
3059          }
3060          lp_build_else(&if_ctx);
3061          {
3062             /* Use the magnification filter */
3063             lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
3064                                    false,
3065                                    coords, offsets,
3066                                    ilevel0, NULL, NULL,
3067                                    texels);
3068          }
3069          lp_build_endif(&if_ctx);
3070       } else {
3071          LLVMValueRef need_linear, linear_mask;
3072          unsigned mip_filter_for_nearest;
3073          struct lp_build_if_state if_ctx;
3074 
3075          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
3076             linear_mask = lod_positive;
3077             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
3078          } else {
3079             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
3080             mip_filter_for_nearest = mip_filter;
3081          }
3082          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
3083                                                linear_mask);
3084          lp_build_name(need_linear, "need_linear");
3085 
3086          if (bld->num_lods != bld->coord_type.length) {
3087             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
3088                                                                 bld->lodi_type,
3089                                                                 bld->int_coord_type,
3090                                                                 linear_mask);
3091          }
3092 
3093          lp_build_if(&if_ctx, bld->gallivm, need_linear);
3094          {
3095             /*
3096              * Do sampling with both filters simultaneously. This means using
3097              * a linear filter and doing some tricks (with weights) for the
3098              * pixels which need nearest filter.
3099              * Note that it's probably rare some pixels need nearest and some
3100              * linear filter but the fixups required for the nearest pixels
3101              * aren't all that complicated so just always run a combined path
3102              * if at least some pixels require linear.
3103              */
3104             lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
3105                                         coords, offsets,
3106                                         ilevel0, ilevel1,
3107                                         lod_fpart, lod_positive,
3108                                         texels);
3109          }
3110          lp_build_else(&if_ctx);
3111          {
3112             /*
3113              * All pixels require just nearest filtering, which is way
3114              * cheaper than linear, hence do a separate path for that.
3115              */
3116             lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
3117                                    mip_filter_for_nearest, false,
3118                                    coords, offsets,
3119                                    ilevel0, ilevel1, lod_fpart,
3120                                    texels);
3121          }
3122          lp_build_endif(&if_ctx);
3123       }
3124    }
3125 
3126    for (chan = 0; chan < 4; ++chan) {
3127      colors_out[chan] = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, texels[chan], "");
3128      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
3129    }
3130 }
3131 
3132 
3133 /**
3134  * Texel fetch function.  In contrast to general sampling there is no
3135  * filtering, no coord minification, lod (if any) is always explicit uint,
3136  * coords are uints (in terms of texel units) directly to be applied to the
3137  * selected mip level (after adding texel offsets).  This function handles
3138  * texel fetch for all targets where texel fetch is supported (no cube maps,
3139  * but 1d, 2d, 3d are supported, arrays and buffers should be too).
3140  */
3141 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef ms_index,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)3142 lp_build_fetch_texel(struct lp_build_sample_context *bld,
3143                      unsigned texture_unit,
3144                      LLVMValueRef ms_index,
3145                      const LLVMValueRef *coords,
3146                      LLVMValueRef explicit_lod,
3147                      const LLVMValueRef *offsets,
3148                      LLVMValueRef *colors_out)
3149 {
3150    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
3151    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
3152    unsigned dims = bld->dims, chan;
3153    unsigned target = bld->static_texture_state->target;
3154    bool out_of_bound_ret_zero = true;
3155    LLVMValueRef size, ilevel;
3156    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
3157    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
3158    LLVMValueRef width, height, depth, i, j;
3159    LLVMValueRef offset, out_of_bounds, out1;
3160 
3161    LLVMValueRef first_level;
3162 
3163    first_level = get_first_level(bld->gallivm,
3164                                  bld->resources_type,
3165                                  bld->resources_ptr,
3166                                  texture_unit, NULL,
3167                                  bld->static_texture_state,
3168                                  bld->dynamic_state);
3169    out_of_bounds = int_coord_bld->zero;
3170 
3171    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
3172       if (bld->num_mips != int_coord_bld->type.length) {
3173          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
3174                                             perquadi_bld->type, explicit_lod, 0);
3175       } else {
3176          ilevel = explicit_lod;
3177       }
3178 
3179       LLVMValueRef last_level;
3180 
3181       last_level = get_last_level(bld->gallivm,
3182                                   bld->resources_type,
3183                                   bld->resources_ptr,
3184                                   texture_unit, NULL,
3185                                   bld->static_texture_state,
3186                                   bld->dynamic_state);
3187 
3188       first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
3189       last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
3190       lp_build_nearest_mip_level(bld,
3191                                  first_level, last_level,
3192                                  ilevel, &ilevel,
3193                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
3194    } else {
3195       assert(bld->num_mips == 1);
3196       if (bld->static_texture_state->target != PIPE_BUFFER) {
3197          ilevel = first_level;
3198       } else {
3199          ilevel = lp_build_const_int32(bld->gallivm, 0);
3200       }
3201    }
3202    lp_build_mipmap_level_sizes(bld, ilevel,
3203                                &size,
3204                                &row_stride_vec, &img_stride_vec);
3205    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
3206                                 size, &width, &height, &depth);
3207 
3208    if (target == PIPE_TEXTURE_1D_ARRAY ||
3209        target == PIPE_TEXTURE_2D_ARRAY) {
3210       if (out_of_bound_ret_zero) {
3211          z = lp_build_layer_coord(bld, texture_unit, false, z, &out1);
3212          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3213       } else {
3214          z = lp_build_layer_coord(bld, texture_unit, false, z, NULL);
3215       }
3216    }
3217 
3218    /* This is a lot like border sampling */
3219    if (offsets[0]) {
3220       /*
3221        * coords are really unsigned, offsets are signed, but I don't think
3222        * exceeding 31 bits is possible
3223        */
3224       x = lp_build_add(int_coord_bld, x, offsets[0]);
3225    }
3226    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
3227    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3228    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
3229    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3230 
3231    if (dims >= 2) {
3232       if (offsets[1]) {
3233          y = lp_build_add(int_coord_bld, y, offsets[1]);
3234       }
3235       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
3236       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3237       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
3238       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3239 
3240       if (dims >= 3) {
3241          if (offsets[2]) {
3242             z = lp_build_add(int_coord_bld, z, offsets[2]);
3243          }
3244          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
3245          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3246          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
3247          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3248       }
3249    }
3250 
3251    lp_build_sample_offset(int_coord_bld,
3252                           bld->format_desc,
3253                           x, y, z, row_stride_vec, img_stride_vec,
3254                           &offset, &i, &j);
3255 
3256    if (bld->static_texture_state->target != PIPE_BUFFER) {
3257       offset = lp_build_add(int_coord_bld, offset,
3258                             lp_build_get_mip_offsets(bld, ilevel));
3259    }
3260 
3261    if (bld->fetch_ms && bld->static_texture_state->level_zero_only) {
3262       LLVMValueRef num_samples = bld->dynamic_state->last_level(bld->gallivm,
3263                                                                 bld->resources_type,
3264                                                                 bld->resources_ptr,
3265                                                                 texture_unit, NULL);
3266       num_samples = LLVMBuildZExt(bld->gallivm->builder, num_samples,
3267                                   bld->int_bld.elem_type, "");
3268       LLVMValueRef sample_stride = lp_sample_load_mip_value(bld->gallivm,
3269                                                             bld->mip_offsets_type,
3270                                                             bld->mip_offsets,
3271                                                             lp_build_const_int32(bld->gallivm, LP_JIT_TEXTURE_SAMPLE_STRIDE));
3272       lp_build_sample_ms_offset(int_coord_bld, ms_index, num_samples, sample_stride,
3273                                 &offset, &out_of_bounds);
3274    }
3275 
3276    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
3277 
3278    lp_build_fetch_rgba_soa(bld->gallivm,
3279                            bld->format_desc,
3280                            bld->texel_type, true,
3281                            bld->base_ptr, offset,
3282                            i, j,
3283                            bld->cache,
3284                            colors_out);
3285 
3286    if (out_of_bound_ret_zero) {
3287       /*
3288        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
3289        * Could use min/max above instead of out-of-bounds comparisons
3290        * if we don't care about the result returned for out-of-bounds.
3291        */
3292       LLVMValueRef oob[4] = {
3293          bld->texel_bld.zero,
3294          bld->texel_bld.zero,
3295          bld->texel_bld.zero,
3296          bld->texel_bld.zero,
3297       };
3298       lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob);
3299       for (chan = 0; chan < 4; chan++) {
3300          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
3301                                             oob[chan], colors_out[chan]);
3302       }
3303    }
3304 }
3305 
3306 
3307 /**
3308  * Just set texels to white instead of actually sampling the texture.
3309  * For debugging.
3310  */
3311 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])3312 lp_build_sample_nop(struct gallivm_state *gallivm,
3313                     struct lp_type type,
3314                     const LLVMValueRef *coords,
3315                     LLVMValueRef texel_out[4])
3316 {
3317    LLVMValueRef one = lp_build_one(gallivm, type);
3318    for (unsigned chan = 0; chan < 4; chan++) {
3319       texel_out[chan] = one;
3320    }
3321 }
3322 
3323 
3324 struct lp_type
lp_build_texel_type(struct lp_type texel_type,const struct util_format_description * format_desc)3325 lp_build_texel_type(struct lp_type texel_type,
3326                     const struct util_format_description *format_desc)
3327 {
3328    /* always using the first channel hopefully should be safe,
3329     * if not things WILL break in other places anyway.
3330     */
3331    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
3332        format_desc->channel[0].pure_integer) {
3333       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
3334          texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length);
3335       } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
3336          texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3337       }
3338    } else if (util_format_has_stencil(format_desc) &&
3339        !util_format_has_depth(format_desc)) {
3340       /* for stencil only formats, sample stencil (uint) */
3341       texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3342    }
3343    return texel_type;
3344 }
3345 
3346 
3347 /**
3348  * Build the actual texture sampling code.
3349  * 'texel' will return a vector of four LLVMValueRefs corresponding to
3350  * R, G, B, A.
3351  * \param type  vector float type to use for coords, etc.
3352  * \param sample_key
3353  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
3354  */
3355 void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMTypeRef thread_data_type,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef ms_index,LLVMValueRef aniso_filter_table,LLVMValueRef texel_out[4])3356 lp_build_sample_soa_code(struct gallivm_state *gallivm,
3357                          const struct lp_static_texture_state *static_texture_state,
3358                          const struct lp_static_sampler_state *static_sampler_state,
3359                          struct lp_sampler_dynamic_state *dynamic_state,
3360                          struct lp_type type,
3361                          unsigned sample_key,
3362                          unsigned texture_index,
3363                          unsigned sampler_index,
3364                          LLVMTypeRef resources_type,
3365                          LLVMValueRef resources_ptr,
3366                          LLVMTypeRef thread_data_type,
3367                          LLVMValueRef thread_data_ptr,
3368                          const LLVMValueRef *coords,
3369                          const LLVMValueRef *offsets,
3370                          const struct lp_derivatives *derivs, /* optional */
3371                          LLVMValueRef lod, /* optional */
3372                          LLVMValueRef ms_index, /* optional */
3373                          LLVMValueRef aniso_filter_table,
3374                          LLVMValueRef texel_out[4])
3375 {
3376    assert(static_texture_state);
3377    assert(static_texture_state->format < PIPE_FORMAT_COUNT);
3378    assert(static_sampler_state);
3379 
3380    const enum pipe_texture_target target = static_texture_state->target;
3381    const unsigned dims = texture_dims(target);
3382    const unsigned num_quads = type.length / 4;
3383    struct lp_build_sample_context bld;
3384    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
3385    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
3386    LLVMBuilderRef builder = gallivm->builder;
3387    const struct util_format_description *res_format_desc;
3388 
3389    if (0) {
3390       enum pipe_format fmt = static_texture_state->format;
3391       debug_printf("Sample from %s\n", util_format_name(fmt));
3392    }
3393 
3394    const enum lp_sampler_lod_property lod_property =
3395       (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
3396       LP_SAMPLER_LOD_PROPERTY_SHIFT;
3397    const enum lp_sampler_lod_control lod_control =
3398       (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3399       LP_SAMPLER_LOD_CONTROL_SHIFT;
3400    const enum lp_sampler_op_type op_type =
3401       (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3402       LP_SAMPLER_OP_TYPE_SHIFT;
3403 
3404    const bool fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
3405    const bool op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
3406    const bool op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
3407    const bool op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
3408 
3409    LLVMValueRef lod_bias = NULL;
3410    LLVMValueRef explicit_lod = NULL;
3411    if (lod_control == LP_SAMPLER_LOD_BIAS) {
3412       lod_bias = lod;
3413       assert(lod);
3414       assert(derivs == NULL);
3415    } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3416       explicit_lod = lod;
3417       assert(lod);
3418       assert(derivs == NULL);
3419    } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3420       assert(derivs);
3421       assert(lod == NULL);
3422    } else {
3423       assert(derivs == NULL);
3424       assert(lod == NULL);
3425    }
3426 
3427    if (static_texture_state->format == PIPE_FORMAT_NONE) {
3428       /*
3429        * If there's nothing bound, format is NONE, and we must return
3430        * all zero as mandated by d3d10 in this case.
3431        */
3432       LLVMValueRef zero = lp_build_zero(gallivm, type);
3433       for (unsigned chan = 0; chan < 4; chan++) {
3434          texel_out[chan] = zero;
3435       }
3436       return;
3437    }
3438 
3439    assert(type.floating);
3440 
3441    /* Setup our build context */
3442    memset(&bld, 0, sizeof bld);
3443    bld.gallivm = gallivm;
3444    bld.resources_type = resources_type;
3445    bld.resources_ptr = resources_ptr;
3446    bld.aniso_filter_table = aniso_filter_table;
3447    bld.static_sampler_state = &derived_sampler_state;
3448    bld.static_texture_state = static_texture_state;
3449    bld.dynamic_state = dynamic_state;
3450    bld.format_desc = util_format_description(static_texture_state->format);
3451    bld.dims = dims;
3452 
3453    res_format_desc = util_format_description(static_texture_state->res_format);
3454 
3455    if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
3456       bld.no_quad_lod = true;
3457    }
3458    if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) {
3459       bld.no_rho_approx = true;
3460    }
3461    if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) {
3462       bld.no_brilinear = true;
3463    }
3464 
3465    bld.vector_width = lp_type_width(type);
3466 
3467    bld.float_type = lp_type_float(32);
3468    bld.int_type = lp_type_int(32);
3469    bld.coord_type = type;
3470    bld.int_coord_type = lp_int_type(type);
3471    bld.float_size_in_type = lp_type_float(32);
3472    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
3473    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
3474 
3475    bld.texel_type = lp_build_texel_type(type, bld.format_desc);
3476 
3477    if (!static_texture_state->level_zero_only ||
3478        !static_sampler_state->max_lod_pos || op_is_lodq) {
3479       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
3480    } else {
3481       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3482    }
3483 
3484    if (op_is_gather) {
3485       /*
3486        * gather4 is exactly like GL_LINEAR filtering but in the end skipping
3487        * the actual filtering. Using mostly the same paths, so cube face
3488        * selection, coord wrapping etc. all naturally uses the same code.
3489        */
3490       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3491       derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
3492       derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
3493    }
3494 
3495    const enum pipe_tex_mipfilter mip_filter =
3496       derived_sampler_state.min_mip_filter;
3497 
3498    if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3499        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3500       /*
3501        * Seamless filtering ignores wrap modes.
3502        * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
3503        * bilinear it's not correct but way better than using for instance
3504        * repeat.  Note we even set this for non-seamless. Technically GL
3505        * allows any wrap mode, which made sense when supporting true borders
3506        * (can get seamless effect with border and CLAMP_TO_BORDER), but
3507        * gallium doesn't support borders and d3d9 requires wrap modes to be
3508        * ignored and it's a pain to fix up the sampler state (as it makes it
3509        * texture dependent).
3510        */
3511       derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3512       derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3513    }
3514 
3515    /*
3516     * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
3517     * so AoS path could be used. Not sure it's worth the trouble...
3518     */
3519    const enum pipe_tex_filter min_img_filter =
3520       derived_sampler_state.min_img_filter;
3521    const enum pipe_tex_filter mag_img_filter =
3522       derived_sampler_state.mag_img_filter;
3523 
3524    /*
3525     * This is all a bit complicated different paths are chosen for performance
3526     * reasons.
3527     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
3528     * everything (the last two options are equivalent for 4-wide case).
3529     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
3530     * lod is calculated then the lod value extracted afterwards so making this
3531     * case basically the same as far as lod handling is concerned for the
3532     * further sample/filter code as the 1 lod for everything case.
3533     * Different lod handling mostly shows up when building mipmap sizes
3534     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
3535     * (getting the fractional part of the lod to the right texels).
3536     */
3537 
3538    /*
3539     * There are other situations where at least the multiple int lods could be
3540     * avoided like min and max lod being equal.
3541     */
3542    bld.num_mips = bld.num_lods = 1;
3543 
3544    if ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
3545          (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3546           static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
3547         op_is_lodq) {
3548       /*
3549        * special case for using per-pixel lod even for implicit lod,
3550        * which is generally never required (ok by APIs) except to please
3551        * some (somewhat broken imho) tests (because per-pixel face selection
3552        * can cause derivatives to be different for pixels outside the primitive
3553        * due to the major axis division even if pre-project derivatives are
3554        * looking normal).
3555        * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
3556        * cube maps we do indeed get per-pixel lod values).
3557        */
3558       bld.num_mips = type.length;
3559       bld.num_lods = type.length;
3560    } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
3561        (explicit_lod || lod_bias || derivs)) {
3562       if ((!op_is_tex && target != PIPE_BUFFER) ||
3563           (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3564          bld.num_mips = type.length;
3565          bld.num_lods = type.length;
3566       } else if (op_is_tex && min_img_filter != mag_img_filter) {
3567          bld.num_mips = 1;
3568          bld.num_lods = type.length;
3569       }
3570    }
3571    /* TODO: for true scalar_lod should only use 1 lod value */
3572    else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
3573             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3574       bld.num_mips = num_quads;
3575       bld.num_lods = num_quads;
3576    } else if (op_is_tex && min_img_filter != mag_img_filter) {
3577       bld.num_mips = 1;
3578       bld.num_lods = num_quads;
3579    }
3580 
3581    bld.fetch_ms = fetch_ms;
3582    if (op_is_gather)
3583       bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
3584    bld.lodf_type = type;
3585    /* we want native vector size to be able to use our intrinsics */
3586    if (bld.num_lods != type.length) {
3587       /* TODO: this currently always has to be per-quad or per-element */
3588       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
3589    }
3590    bld.lodi_type = lp_int_type(bld.lodf_type);
3591    bld.levelf_type = bld.lodf_type;
3592    if (bld.num_mips == 1) {
3593       bld.levelf_type.length = 1;
3594    }
3595    bld.leveli_type = lp_int_type(bld.levelf_type);
3596    bld.float_size_type = bld.float_size_in_type;
3597 
3598    /* Note: size vectors may not be native. They contain minified w/h/d/_
3599     * values, with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to
3600     * 8x4f32
3601     */
3602    if (bld.num_mips > 1) {
3603       bld.float_size_type.length = bld.num_mips == type.length ?
3604                                       bld.num_mips * bld.float_size_in_type.length :
3605                                       type.length;
3606    }
3607    bld.int_size_type = lp_int_type(bld.float_size_type);
3608 
3609    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
3610    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
3611    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
3612    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
3613    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
3614    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
3615    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
3616    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
3617    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
3618    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
3619    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
3620    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
3621    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
3622    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
3623 
3624    /* Get the dynamic state */
3625    LLVMValueRef tex_width = dynamic_state->width(gallivm, resources_type,
3626                                                  resources_ptr, texture_index,
3627                                                  NULL);
3628    bld.row_stride_array = dynamic_state->row_stride(gallivm, resources_type,
3629                                                     resources_ptr, texture_index, NULL,
3630                                                     &bld.row_stride_type);
3631    bld.img_stride_array = dynamic_state->img_stride(gallivm, resources_type,
3632                                                     resources_ptr, texture_index, NULL,
3633                                                     &bld.img_stride_type);
3634    bld.base_ptr = dynamic_state->base_ptr(gallivm, resources_type,
3635                                           resources_ptr, texture_index, NULL);
3636    bld.mip_offsets = dynamic_state->mip_offsets(gallivm, resources_type,
3637                                                 resources_ptr, texture_index, NULL,
3638                                                 &bld.mip_offsets_type);
3639 
3640    /* Note that mip_offsets is an array[level] of offsets to texture images */
3641 
3642    if (dynamic_state->cache_ptr && thread_data_ptr) {
3643       bld.cache = dynamic_state->cache_ptr(gallivm, thread_data_type,
3644                                            thread_data_ptr, texture_index);
3645    }
3646 
3647    uint32_t res_bw = res_format_desc->block.width;
3648    uint32_t res_bh = res_format_desc->block.height;
3649    uint32_t bw = bld.format_desc->block.width;
3650    uint32_t bh = bld.format_desc->block.height;
3651 
3652    /* only scale if the blocksizes are different. */
3653    if (res_bw == bw)
3654       res_bw = bw = 1;
3655    if (res_bh == bh)
3656       res_bh = bh = 1;
3657 
3658    /* width, height, depth as single int vector */
3659    if (dims <= 1) {
3660       bld.int_size = tex_width;
3661       bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
3662       bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
3663       bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
3664    } else {
3665       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3666                                             tex_width,
3667                                             LLVMConstInt(i32t, 0, 0), "");
3668       bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3669                                                      LLVMConstInt(i32t, res_bw, 0),
3670                                                      LLVMConstInt(i32t, 0, 0), "");
3671       bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3672                                                           LLVMConstInt(i32t, util_logbase2(res_bw), 0),
3673                                                           LLVMConstInt(i32t, 0, 0), "");
3674       bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3675                                                       LLVMConstInt(i32t, bw, 0),
3676                                                       LLVMConstInt(i32t, 0, 0), "");
3677       if (dims >= 2) {
3678          LLVMValueRef tex_height =
3679             dynamic_state->height(gallivm, resources_type,
3680                                   resources_ptr, texture_index, NULL);
3681          tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
3682                                     bld.int_bld.elem_type, "");
3683          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3684                                                tex_height,
3685                                                LLVMConstInt(i32t, 1, 0), "");
3686          bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3687                                                         LLVMConstInt(i32t, res_bh, 0),
3688                                                         LLVMConstInt(i32t, 1, 0), "");
3689          bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3690                                                              LLVMConstInt(i32t, util_logbase2(res_bh), 0),
3691                                                         LLVMConstInt(i32t, 1, 0), "");
3692          bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3693                                                          LLVMConstInt(i32t, bh, 0),
3694                                                          LLVMConstInt(i32t, 1, 0), "");
3695          if (dims >= 3) {
3696             LLVMValueRef tex_depth =
3697                dynamic_state->depth(gallivm, resources_type, resources_ptr,
3698                                     texture_index, NULL);
3699             tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
3700                                       bld.int_bld.elem_type, "");
3701             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3702                                                   tex_depth,
3703                                                   LLVMConstInt(i32t, 2, 0), "");
3704             bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3705                                                            LLVMConstInt(i32t, 1, 0),
3706                                                            LLVMConstInt(i32t, 2, 0), "");
3707             bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3708                                                            LLVMConstInt(i32t, 0, 0),
3709                                                            LLVMConstInt(i32t, 2, 0), "");
3710             bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3711                                                             LLVMConstInt(i32t, 1, 0),
3712                                                             LLVMConstInt(i32t, 2, 0), "");
3713          }
3714       }
3715    }
3716 
3717    LLVMValueRef newcoords[5];
3718    for (unsigned i = 0; i < 5; i++) {
3719       newcoords[i] = coords[i];
3720    }
3721 
3722    if (util_format_is_pure_integer(static_texture_state->format) &&
3723        !util_format_has_depth(bld.format_desc) && op_is_tex &&
3724        (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
3725         static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3726         static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3727       /*
3728        * Bail if impossible filtering is specified (the awkard additional
3729        * depth check is because it is legal in gallium to have things like
3730        * S8Z24 here which would say it's pure int despite such formats should
3731        * sample the depth component).
3732        * In GL such filters make the texture incomplete, this makes it robust
3733        * against gallium frontends which set this up regardless (we'd crash in
3734        * the lerp later otherwise).
3735        * At least in some apis it may be legal to use such filters with lod
3736        * queries and/or gather (at least for gather d3d10 says only the wrap
3737        * bits are really used hence filter bits are likely simply ignored).
3738        * For fetch, we don't get valid samplers either way here.
3739        */
3740       LLVMValueRef zero = lp_build_zero(gallivm, type);
3741       for (unsigned chan = 0; chan < 4; chan++) {
3742          texel_out[chan] = zero;
3743       }
3744       return;
3745    }
3746 
3747    if (0) {
3748       /* For debug: no-op texture sampling */
3749       lp_build_sample_nop(gallivm,
3750                           bld.texel_type,
3751                           newcoords,
3752                           texel_out);
3753    } else if (op_type == LP_SAMPLER_OP_FETCH) {
3754       lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
3755                            lod, offsets, texel_out);
3756    } else {
3757       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3758       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3759       bool use_aos = util_format_fits_8unorm(bld.format_desc) &&
3760                 op_is_tex &&
3761                 /* not sure this is strictly needed or simply impossible */
3762                 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3763                 derived_sampler_state.aniso == 0 &&
3764                 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3765 
3766       use_aos &= bld.num_lods <= num_quads ||
3767                  derived_sampler_state.min_img_filter ==
3768                     derived_sampler_state.mag_img_filter;
3769 
3770       if (gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
3771          use_aos = 0;
3772       }
3773 
3774       if (dims > 1) {
3775          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3776          if (dims > 2) {
3777             use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3778          }
3779       }
3780       if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3781            static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3782           derived_sampler_state.seamless_cube_map &&
3783           (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3784            derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3785          /* theoretically possible with AoS filtering but not implemented (complex!) */
3786          use_aos = 0;
3787       }
3788 
3789       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3790           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3791          debug_printf("%s: using floating point linear filtering for %s\n",
3792                       __func__, bld.format_desc->short_name);
3793          debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
3794                       "  wraps %d  wrapt %d  wrapr %d\n",
3795                       derived_sampler_state.min_img_filter,
3796                       derived_sampler_state.mag_img_filter,
3797                       derived_sampler_state.min_mip_filter,
3798                       static_texture_state->target,
3799                       derived_sampler_state.seamless_cube_map,
3800                       derived_sampler_state.wrap_s,
3801                       derived_sampler_state.wrap_t,
3802                       derived_sampler_state.wrap_r);
3803       }
3804 
3805       lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3806                              newcoords, derivs, lod_bias, explicit_lod,
3807                              &lod_positive, &lod, &lod_fpart,
3808                              &ilevel0, &ilevel1);
3809 
3810       if (op_is_lodq) {
3811          texel_out[0] = lod_fpart;
3812          texel_out[1] = lod;
3813          texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3814          return;
3815       }
3816 
3817       if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3818          /* The aos path doesn't do seamless filtering so simply add cube layer
3819           * to face now.
3820           */
3821          newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3822       }
3823 
3824       /*
3825        * we only try 8-wide sampling with soa or if we have AVX2
3826        * as it appears to be a loss with just AVX)
3827        */
3828       if (num_quads == 1 || !use_aos ||
3829           (util_get_cpu_caps()->has_avx2 &&
3830            (bld.num_lods == 1 ||
3831             derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3832          if (use_aos) {
3833             /* do sampling/filtering with fixed pt arithmetic */
3834             lp_build_sample_aos(&bld,
3835                                 newcoords[0], newcoords[1],
3836                                 newcoords[2],
3837                                 offsets, lod_positive, lod_fpart,
3838                                 ilevel0, ilevel1,
3839                                 texel_out);
3840          } else {
3841             lp_build_sample_general(&bld, sampler_index,
3842                                     op_type == LP_SAMPLER_OP_GATHER,
3843                                     newcoords, offsets,
3844                                     lod_positive, lod_fpart,
3845                                     ilevel0, ilevel1,
3846                                     texel_out);
3847          }
3848       } else {
3849          struct lp_build_sample_context bld4;
3850          struct lp_type type4 = type;
3851          LLVMValueRef texelout4[4];
3852          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3853 
3854          type4.length = 4;
3855 
3856          /* Setup our build context */
3857          memset(&bld4, 0, sizeof bld4);
3858          bld4.no_quad_lod = bld.no_quad_lod;
3859          bld4.no_rho_approx = bld.no_rho_approx;
3860          bld4.no_brilinear = bld.no_brilinear;
3861          bld4.gallivm = bld.gallivm;
3862          bld4.resources_type = bld.resources_type;
3863          bld4.resources_ptr = bld.resources_ptr;
3864          bld4.aniso_filter_table = aniso_filter_table;
3865          bld4.static_texture_state = bld.static_texture_state;
3866          bld4.static_sampler_state = bld.static_sampler_state;
3867          bld4.dynamic_state = bld.dynamic_state;
3868          bld4.format_desc = bld.format_desc;
3869          bld4.dims = bld.dims;
3870          bld4.row_stride_type = bld.row_stride_type;
3871          bld4.row_stride_array = bld.row_stride_array;
3872          bld4.img_stride_type = bld.img_stride_type;
3873          bld4.img_stride_array = bld.img_stride_array;
3874          bld4.base_ptr = bld.base_ptr;
3875          bld4.mip_offsets_type = bld.mip_offsets_type;
3876          bld4.mip_offsets = bld.mip_offsets;
3877          bld4.int_size = bld.int_size;
3878          bld4.int_tex_blocksize = bld.int_tex_blocksize;
3879          bld4.int_tex_blocksize_log2 = bld.int_tex_blocksize_log2;
3880          bld4.int_view_blocksize = bld.int_view_blocksize;
3881          bld4.cache = bld.cache;
3882 
3883          bld4.vector_width = lp_type_width(type4);
3884 
3885          bld4.float_type = lp_type_float(32);
3886          bld4.int_type = lp_type_int(32);
3887          bld4.coord_type = type4;
3888          bld4.int_coord_type = lp_int_type(type4);
3889          bld4.float_size_in_type = lp_type_float(32);
3890          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3891          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3892          bld4.texel_type = bld.texel_type;
3893          bld4.texel_type.length = 4;
3894 
3895          bld4.num_mips = bld4.num_lods = 1;
3896          if (bld4.no_quad_lod && bld4.no_rho_approx &&
3897              (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3898               static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3899              (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3900             bld4.num_mips = type4.length;
3901             bld4.num_lods = type4.length;
3902          }
3903          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3904              (explicit_lod || lod_bias || derivs)) {
3905             if ((!op_is_tex && target != PIPE_BUFFER) ||
3906                 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3907                bld4.num_mips = type4.length;
3908                bld4.num_lods = type4.length;
3909             } else if (op_is_tex && min_img_filter != mag_img_filter) {
3910                bld4.num_mips = 1;
3911                bld4.num_lods = type4.length;
3912             }
3913          }
3914 
3915          /* we want native vector size to be able to use our intrinsics */
3916          bld4.lodf_type = type4;
3917          if (bld4.num_lods != type4.length) {
3918             bld4.lodf_type.length = 1;
3919          }
3920          bld4.lodi_type = lp_int_type(bld4.lodf_type);
3921          bld4.levelf_type = type4;
3922          if (bld4.num_mips != type4.length) {
3923             bld4.levelf_type.length = 1;
3924          }
3925          bld4.leveli_type = lp_int_type(bld4.levelf_type);
3926          bld4.float_size_type = bld4.float_size_in_type;
3927          if (bld4.num_mips > 1) {
3928             bld4.float_size_type.length = bld4.num_mips == type4.length ?
3929                                             bld4.num_mips * bld4.float_size_in_type.length :
3930                                             type4.length;
3931          }
3932          bld4.int_size_type = lp_int_type(bld4.float_size_type);
3933 
3934          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3935          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3936          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3937          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3938          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3939          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3940          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3941          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3942          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3943          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3944          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3945          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3946          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3947          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3948 
3949          for (unsigned i = 0; i < num_quads; i++) {
3950             LLVMValueRef s4, t4, r4;
3951             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3952             LLVMValueRef ilevel04, ilevel14 = NULL;
3953             LLVMValueRef offsets4[4] = { NULL };
3954             unsigned num_lods = bld4.num_lods;
3955 
3956             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3957             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3958             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3959 
3960             if (offsets[0]) {
3961                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3962                if (dims > 1) {
3963                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3964                   if (dims > 2) {
3965                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3966                   }
3967                }
3968             }
3969             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3970             ilevel04 = bld.num_mips == 1 ? ilevel0 :
3971                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3972             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3973                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3974                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3975             }
3976 
3977             if (use_aos) {
3978                /* do sampling/filtering with fixed pt arithmetic */
3979                lp_build_sample_aos(&bld4,
3980                                    s4, t4, r4, offsets4,
3981                                    lod_positive4, lod_fpart4,
3982                                    ilevel04, ilevel14,
3983                                    texelout4);
3984             } else {
3985                /* this path is currently unreachable and hence might break easily... */
3986                LLVMValueRef newcoords4[5];
3987                newcoords4[0] = s4;
3988                newcoords4[1] = t4;
3989                newcoords4[2] = r4;
3990                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3991                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3992 
3993                lp_build_sample_general(&bld4, sampler_index,
3994                                        op_type == LP_SAMPLER_OP_GATHER,
3995                                        newcoords4, offsets4,
3996                                        lod_positive4, lod_fpart4,
3997                                        ilevel04, ilevel14,
3998                                        texelout4);
3999             }
4000             for (unsigned j = 0; j < 4; j++) {
4001                texelouttmp[j][i] = texelout4[j];
4002             }
4003          }
4004 
4005          for (unsigned j = 0; j < 4; j++) {
4006             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
4007          }
4008       }
4009    }
4010 
4011    if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
4012       apply_sampler_swizzle(&bld, texel_out);
4013    }
4014 
4015    /*
4016     * texel type can be a (32bit) int/uint (for pure int formats only),
4017     * however we are expected to always return floats (storage is untyped).
4018     */
4019    if (!bld.texel_type.floating) {
4020       unsigned chan;
4021       for (chan = 0; chan < 4; chan++) {
4022          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
4023                                             lp_build_vec_type(gallivm, type), "");
4024       }
4025    }
4026 }
4027 
4028 
4029 #define USE_TEX_FUNC_CALL 1
4030 
4031 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)4032 get_target_info(enum pipe_texture_target target,
4033                 unsigned *num_coords, unsigned *num_derivs,
4034                 unsigned *num_offsets, unsigned *layer)
4035 {
4036    unsigned dims = texture_dims(target);
4037    *num_coords = dims;
4038    *num_offsets = dims;
4039    *num_derivs = (target == PIPE_TEXTURE_CUBE ||
4040                   target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
4041    *layer = has_layer_coord(target) ? 2: 0;
4042    if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4043       /*
4044        * dims doesn't include r coord for cubes - this is handled
4045        * by layer instead, but need to fix up for cube arrays...
4046        */
4047       *layer = 3;
4048       *num_coords = 3;
4049    }
4050 }
4051 
4052 
4053 /**
4054  * Generate the function body for a texture sampling function.
4055  */
4056 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,LLVMTypeRef resources_type,LLVMTypeRef thread_data_type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key,bool has_aniso_filter_table)4057 lp_build_sample_gen_func(struct gallivm_state *gallivm,
4058                          const struct lp_static_texture_state *static_texture_state,
4059                          const struct lp_static_sampler_state *static_sampler_state,
4060                          struct lp_sampler_dynamic_state *dynamic_state,
4061                          struct lp_type type,
4062                          LLVMTypeRef resources_type,
4063                          LLVMTypeRef thread_data_type,
4064                          unsigned texture_index,
4065                          unsigned sampler_index,
4066                          LLVMValueRef function,
4067                          unsigned num_args,
4068                          unsigned sample_key,
4069                          bool has_aniso_filter_table)
4070 {
4071    LLVMBuilderRef old_builder;
4072    LLVMBasicBlockRef block;
4073    LLVMValueRef coords[5];
4074    LLVMValueRef offsets[3] = { NULL };
4075    LLVMValueRef lod = NULL;
4076    LLVMValueRef ms_index = NULL;
4077    LLVMValueRef resources_ptr;
4078    LLVMValueRef thread_data_ptr = NULL;
4079    LLVMValueRef aniso_filter_table = NULL;
4080    LLVMValueRef texel_out[4];
4081    struct lp_derivatives derivs;
4082    struct lp_derivatives *deriv_ptr = NULL;
4083    unsigned num_param = 0;
4084    unsigned num_coords, num_derivs, num_offsets, layer;
4085    bool need_cache = false;
4086 
4087    const enum lp_sampler_lod_control lod_control =
4088        (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
4089        >> LP_SAMPLER_LOD_CONTROL_SHIFT;
4090 
4091    const enum lp_sampler_op_type op_type =
4092       (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4093 
4094    get_target_info(static_texture_state->target,
4095                    &num_coords, &num_derivs, &num_offsets, &layer);
4096 
4097    /* lod query doesn't take a layer */
4098    if (layer && op_type == LP_SAMPLER_OP_LODQ)
4099       layer = 0;
4100 
4101    if (dynamic_state->cache_ptr) {
4102       const struct util_format_description *format_desc;
4103       format_desc = util_format_description(static_texture_state->format);
4104       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4105          need_cache = true;
4106       }
4107    }
4108 
4109    /* "unpack" arguments */
4110    resources_ptr = LLVMGetParam(function, num_param++);
4111    if (has_aniso_filter_table)
4112       aniso_filter_table = LLVMGetParam(function, num_param++);
4113    if (need_cache) {
4114       thread_data_ptr = LLVMGetParam(function, num_param++);
4115    }
4116    for (unsigned i = 0; i < num_coords; i++) {
4117       coords[i] = LLVMGetParam(function, num_param++);
4118    }
4119    for (unsigned i = num_coords; i < 5; i++) {
4120       /* This is rather unfortunate... */
4121       coords[i] = lp_build_undef(gallivm, type);
4122    }
4123    if (layer) {
4124       coords[layer] = LLVMGetParam(function, num_param++);
4125    }
4126    if (sample_key & LP_SAMPLER_SHADOW) {
4127       coords[4] = LLVMGetParam(function, num_param++);
4128    }
4129    if (sample_key & LP_SAMPLER_FETCH_MS) {
4130       ms_index = LLVMGetParam(function, num_param++);
4131    }
4132    if (sample_key & LP_SAMPLER_OFFSETS) {
4133       for (unsigned i = 0; i < num_offsets; i++) {
4134          offsets[i] = LLVMGetParam(function, num_param++);
4135       }
4136    }
4137    if (lod_control == LP_SAMPLER_LOD_BIAS ||
4138        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4139       lod = LLVMGetParam(function, num_param++);
4140    } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4141       for (unsigned i = 0; i < num_derivs; i++) {
4142          derivs.ddx[i] = LLVMGetParam(function, num_param++);
4143          derivs.ddy[i] = LLVMGetParam(function, num_param++);
4144       }
4145       deriv_ptr = &derivs;
4146    }
4147 
4148    assert(num_args == num_param);
4149 
4150    /*
4151     * Function body
4152     */
4153 
4154    old_builder = gallivm->builder;
4155    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
4156    gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
4157    LLVMPositionBuilderAtEnd(gallivm->builder, block);
4158 
4159    lp_build_sample_soa_code(gallivm,
4160                             static_texture_state,
4161                             static_sampler_state,
4162                             dynamic_state,
4163                             type,
4164                             sample_key,
4165                             texture_index,
4166                             sampler_index,
4167                             resources_type,
4168                             resources_ptr,
4169                             thread_data_type,
4170                             thread_data_ptr,
4171                             coords,
4172                             offsets,
4173                             deriv_ptr,
4174                             lod,
4175                             ms_index,
4176                             aniso_filter_table,
4177                             texel_out);
4178 
4179    LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
4180 
4181    LLVMDisposeBuilder(gallivm->builder);
4182    gallivm->builder = old_builder;
4183 
4184    gallivm_verify_function(gallivm, function);
4185 }
4186 
4187 
4188 /**
4189  * Call the matching function for texture sampling.
4190  * If there's no match, generate a new one.
4191  */
4192 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params,unsigned texture_index,unsigned sampler_index,LLVMValueRef * tex_ret)4193 lp_build_sample_soa_func(struct gallivm_state *gallivm,
4194                          const struct lp_static_texture_state *static_texture_state,
4195                          const struct lp_static_sampler_state *static_sampler_state,
4196                          struct lp_sampler_dynamic_state *dynamic_state,
4197                          const struct lp_sampler_params *params,
4198                          unsigned texture_index, unsigned sampler_index,
4199                          LLVMValueRef *tex_ret)
4200 {
4201    LLVMBuilderRef builder = gallivm->builder;
4202    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
4203                              LLVMGetInsertBlock(builder)));
4204    LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
4205    unsigned sample_key = params->sample_key;
4206    const LLVMValueRef *coords = params->coords;
4207    const LLVMValueRef *offsets = params->offsets;
4208    const struct lp_derivatives *derivs = params->derivs;
4209 
4210    const enum lp_sampler_lod_control lod_control =
4211       (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
4212       LP_SAMPLER_LOD_CONTROL_SHIFT;
4213 
4214    const enum lp_sampler_op_type op_type =
4215       (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4216 
4217    unsigned num_coords, num_derivs, num_offsets, layer;
4218    get_target_info(static_texture_state->target,
4219                    &num_coords, &num_derivs, &num_offsets, &layer);
4220 
4221    /* lod query doesn't take a layer */
4222    if (layer && op_type == LP_SAMPLER_OP_LODQ)
4223       layer = 0;
4224 
4225    bool need_cache = false;
4226    if (dynamic_state->cache_ptr) {
4227       const struct util_format_description *format_desc;
4228       format_desc = util_format_description(static_texture_state->format);
4229       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4230          need_cache = true;
4231       }
4232    }
4233 
4234    /*
4235     * texture function matches are found by name.
4236     * Thus the name has to include both the texture and sampler unit
4237     * (which covers all static state) plus the actual texture function
4238     * (including things like offsets, shadow coord, lod control).
4239     * Additionally lod_property has to be included too.
4240     */
4241    char func_name[64];
4242    snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
4243             texture_index, sampler_index, sample_key);
4244 
4245    LLVMValueRef function = LLVMGetNamedFunction(module, func_name);
4246    LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
4247    LLVMTypeRef ret_type;
4248    LLVMTypeRef val_type[4];
4249    unsigned num_param = 0;
4250 
4251    /*
4252     * Generate the function prototype.
4253     */
4254 
4255    arg_types[num_param++] = LLVMTypeOf(params->resources_ptr);
4256    if (params->aniso_filter_table)
4257       arg_types[num_param++] = LLVMTypeOf(params->aniso_filter_table);
4258    if (need_cache) {
4259       arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
4260    }
4261    for (unsigned i = 0; i < num_coords; i++) {
4262       arg_types[num_param++] = LLVMTypeOf(coords[0]);
4263       assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
4264    }
4265    if (layer) {
4266       arg_types[num_param++] = LLVMTypeOf(coords[layer]);
4267       assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
4268    }
4269    if (sample_key & LP_SAMPLER_SHADOW) {
4270       arg_types[num_param++] = LLVMTypeOf(coords[0]);
4271    }
4272    if (sample_key & LP_SAMPLER_FETCH_MS) {
4273       arg_types[num_param++] = LLVMTypeOf(params->ms_index);
4274    }
4275    if (sample_key & LP_SAMPLER_OFFSETS) {
4276       for (unsigned i = 0; i < num_offsets; i++) {
4277          arg_types[num_param++] = LLVMTypeOf(offsets[0]);
4278          assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
4279       }
4280    }
4281    if (lod_control == LP_SAMPLER_LOD_BIAS ||
4282        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4283       arg_types[num_param++] = LLVMTypeOf(params->lod);
4284    } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4285       for (unsigned i = 0; i < num_derivs; i++) {
4286          arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
4287          arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
4288          assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
4289          assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
4290       }
4291    }
4292 
4293    val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4294          lp_build_vec_type(gallivm, params->type);
4295    ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4296    LLVMTypeRef function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
4297 
4298    if (!function) {
4299       function = LLVMAddFunction(module, func_name, function_type);
4300 
4301       for (unsigned i = 0; i < num_param; ++i) {
4302          if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
4303 
4304             lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
4305          }
4306       }
4307 
4308       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
4309       LLVMSetLinkage(function, LLVMInternalLinkage);
4310 
4311       lp_build_sample_gen_func(gallivm,
4312                                static_texture_state,
4313                                static_sampler_state,
4314                                dynamic_state,
4315                                params->type,
4316                                params->resources_type,
4317                                params->thread_data_type,
4318                                texture_index,
4319                                sampler_index,
4320                                function,
4321                                num_param,
4322                                sample_key,
4323                                params->aniso_filter_table ? true : false);
4324    }
4325 
4326    unsigned num_args = 0;
4327    args[num_args++] = params->resources_ptr;
4328    if (params->aniso_filter_table)
4329       args[num_args++] = params->aniso_filter_table;
4330    if (need_cache) {
4331       args[num_args++] = params->thread_data_ptr;
4332    }
4333    for (unsigned i = 0; i < num_coords; i++) {
4334       args[num_args++] = coords[i];
4335    }
4336    if (layer) {
4337       args[num_args++] = coords[layer];
4338    }
4339    if (sample_key & LP_SAMPLER_SHADOW) {
4340       args[num_args++] = coords[4];
4341    }
4342    if (sample_key & LP_SAMPLER_FETCH_MS) {
4343       args[num_args++] = params->ms_index;
4344    }
4345    if (sample_key & LP_SAMPLER_OFFSETS) {
4346       for (unsigned i = 0; i < num_offsets; i++) {
4347          args[num_args++] = offsets[i];
4348       }
4349    }
4350    if (lod_control == LP_SAMPLER_LOD_BIAS ||
4351        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4352       args[num_args++] = params->lod;
4353    } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4354       for (unsigned i = 0; i < num_derivs; i++) {
4355          args[num_args++] = derivs->ddx[i];
4356          args[num_args++] = derivs->ddy[i];
4357       }
4358    }
4359 
4360    assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
4361 
4362    *tex_ret = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
4363    LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
4364    LLVMValueRef inst = LLVMGetLastInstruction(bb);
4365    LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
4366 }
4367 
4368 
4369 /**
4370  * Build texture sampling code.
4371  * Either via a function call or inline it directly.
4372  */
4373 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)4374 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
4375                     const struct lp_static_sampler_state *static_sampler_state,
4376                     struct lp_sampler_dynamic_state *dynamic_state,
4377                     struct gallivm_state *gallivm,
4378                     const struct lp_sampler_params *params)
4379 {
4380    bool use_tex_func = false;
4381 
4382    /*
4383     * Do not use a function call if the sampling is "simple enough".
4384     * We define this by
4385     * a) format
4386     * b) no mips (either one level only or no mip filter)
4387     * No mips will definitely make the code smaller, though
4388     * the format requirement is a bit iffy - there's some (SoA) formats
4389     * which definitely generate less code. This does happen to catch
4390     * some important cases though which are hurt quite a bit by using
4391     * a call (though not really because of the call overhead but because
4392     * they are reusing the same texture unit with some of the same
4393     * parameters).
4394     * Ideally we'd let llvm recognize this stuff by doing IPO passes.
4395     */
4396 
4397    if (USE_TEX_FUNC_CALL) {
4398       const struct util_format_description *format_desc =
4399          util_format_description(static_texture_state->format);
4400       const bool simple_format =
4401          (util_format_is_rgba8_variant(format_desc) &&
4402          format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
4403       const enum lp_sampler_op_type op_type =
4404          (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
4405          LP_SAMPLER_OP_TYPE_SHIFT;
4406       const bool simple_tex =
4407          op_type != LP_SAMPLER_OP_TEXTURE ||
4408            ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
4409              static_texture_state->level_zero_only == true) &&
4410             static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
4411 
4412       use_tex_func = !(simple_format && simple_tex);
4413    }
4414 
4415    if (use_tex_func) {
4416       LLVMValueRef tex_ret;
4417       lp_build_sample_soa_func(gallivm,
4418                                static_texture_state,
4419                                static_sampler_state,
4420                                dynamic_state,
4421                                params, params->texture_index,
4422                                params->sampler_index, &tex_ret);
4423 
4424       for (unsigned i = 0; i < 4; i++) {
4425          params->texel[i] =
4426             LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
4427       }
4428    } else {
4429       lp_build_sample_soa_code(gallivm,
4430                                static_texture_state,
4431                                static_sampler_state,
4432                                dynamic_state,
4433                                params->type,
4434                                params->sample_key,
4435                                params->texture_index,
4436                                params->sampler_index,
4437                                params->resources_type,
4438                                params->resources_ptr,
4439                                params->thread_data_type,
4440                                params->thread_data_ptr,
4441                                params->coords,
4442                                params->offsets,
4443                                params->derivs,
4444                                params->lod,
4445                                params->ms_index,
4446                                params->aniso_filter_table,
4447                                params->texel);
4448    }
4449 }
4450 
4451 
4452 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)4453 lp_build_size_query_soa(struct gallivm_state *gallivm,
4454                         const struct lp_static_texture_state *static_state,
4455                         struct lp_sampler_dynamic_state *dynamic_state,
4456                         const struct lp_sampler_size_query_params *params)
4457 {
4458    LLVMValueRef first_level = NULL;
4459    const unsigned num_lods = 1;
4460    LLVMTypeRef resources_type = params->resources_type;
4461    LLVMValueRef resources_ptr = params->resources_ptr;
4462    const unsigned texture_unit = params->texture_unit;
4463    const enum pipe_texture_target target = params->target;
4464    LLVMValueRef texture_unit_offset = params->texture_unit_offset;
4465    const struct util_format_description *format_desc =
4466       util_format_description(static_state->format);
4467    const struct util_format_description *res_format_desc =
4468       util_format_description(static_state->res_format);
4469 
4470    if (static_state->format == PIPE_FORMAT_NONE) {
4471       /*
4472        * If there's nothing bound, format is NONE, and we must return
4473        * all zero as mandated by d3d10 in this case.
4474        */
4475       LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
4476       for (unsigned chan = 0; chan < 4; chan++) {
4477          params->sizes_out[chan] = zero;
4478       }
4479       return;
4480    }
4481 
4482    /*
4483     * Do some sanity verification about bound texture and shader dcl target.
4484     * Not entirely sure what's possible but assume array/non-array
4485     * always compatible (probably not ok for OpenGL but d3d10 has no
4486     * distinction of arrays at the resource level).
4487     * Everything else looks bogus (though not entirely sure about rect/2d).
4488     * Currently disabled because it causes assertion failures if there's
4489     * nothing bound (or rather a dummy texture, not that this case would
4490     * return the right values).
4491     */
4492    if (0 && static_state->target != target) {
4493       if (static_state->target == PIPE_TEXTURE_1D)
4494          assert(target == PIPE_TEXTURE_1D_ARRAY);
4495       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
4496          assert(target == PIPE_TEXTURE_1D);
4497       else if (static_state->target == PIPE_TEXTURE_2D)
4498          assert(target == PIPE_TEXTURE_2D_ARRAY);
4499       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
4500          assert(target == PIPE_TEXTURE_2D);
4501       else if (static_state->target == PIPE_TEXTURE_CUBE)
4502          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
4503       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
4504          assert(target == PIPE_TEXTURE_CUBE);
4505       else
4506          assert(0);
4507    }
4508 
4509    const unsigned dims = texture_dims(target);
4510 
4511    const bool has_array = has_layer_coord(target);
4512 
4513    assert(!params->int_type.floating);
4514 
4515    struct lp_build_context bld_int_vec4;
4516    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
4517 
4518    if (params->samples_only) {
4519       LLVMValueRef num_samples;
4520       if (params->ms && static_state->level_zero_only) {
4521          /* multisample never has levels. */
4522          num_samples = dynamic_state->last_level(gallivm,
4523                                                  resources_type,
4524                                                  resources_ptr,
4525                                                  texture_unit,
4526                                                  texture_unit_offset);
4527          num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4528                                      bld_int_vec4.elem_type, "");
4529       } else {
4530          num_samples = lp_build_const_int32(gallivm, 0);
4531       }
4532       params->sizes_out[0] =
4533          lp_build_broadcast(gallivm,
4534                             lp_build_vec_type(gallivm, params->int_type),
4535                             num_samples);
4536       return;
4537    }
4538 
4539    LLVMValueRef lod;
4540    LLVMValueRef level = 0;
4541    if (params->explicit_lod) {
4542       /* FIXME: this needs to honor per-element lod */
4543       lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
4544                                     lp_build_const_int32(gallivm, 0), "");
4545       first_level = get_first_level(gallivm, resources_type, resources_ptr,
4546                                     texture_unit, texture_unit_offset,
4547                                     static_state, dynamic_state);
4548       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
4549       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
4550    } else {
4551       lod = bld_int_vec4.zero;
4552    }
4553 
4554    LLVMValueRef size = bld_int_vec4.undef;
4555    LLVMValueRef tex_blocksize = bld_int_vec4.undef;
4556    LLVMValueRef tex_blocksize_log2 = bld_int_vec4.undef;
4557    LLVMValueRef view_blocksize = bld_int_vec4.undef;
4558 
4559    uint32_t res_bw = res_format_desc->block.width;
4560    uint32_t res_bh = res_format_desc->block.height;
4561    uint32_t bw = format_desc->block.width;
4562    uint32_t bh = format_desc->block.height;
4563 
4564    /* only scale if the blocksizes are different. */
4565    if (res_bw == bw)
4566       res_bw = bw = 1;
4567    if (res_bh == bh)
4568       res_bh = bh = 1;
4569 
4570    LLVMValueRef tex_width = dynamic_state->width(gallivm,
4571                                                  resources_type,
4572                                                  resources_ptr,
4573                                                  texture_unit,
4574                                                  texture_unit_offset);
4575    size = LLVMBuildInsertElement(gallivm->builder, size,
4576                                  tex_width,
4577                                  lp_build_const_int32(gallivm, 0), "");
4578    tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4579                                           lp_build_const_int32(gallivm, res_bw),
4580                                           lp_build_const_int32(gallivm, 0), "");
4581    tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4582                                                lp_build_const_int32(gallivm, util_logbase2(res_bw)),
4583                                                lp_build_const_int32(gallivm, 0), "");
4584    view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4585                                            lp_build_const_int32(gallivm, bw),
4586                                            lp_build_const_int32(gallivm, 0), "");
4587    if (dims >= 2) {
4588       LLVMValueRef tex_height =
4589          dynamic_state->height(gallivm, resources_type,
4590                                resources_ptr, texture_unit, texture_unit_offset);
4591       tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
4592                                  bld_int_vec4.elem_type, "");
4593       size = LLVMBuildInsertElement(gallivm->builder, size, tex_height,
4594                                     lp_build_const_int32(gallivm, 1), "");
4595       tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4596                                              lp_build_const_int32(gallivm, res_bh),
4597                                              lp_build_const_int32(gallivm, 1), "");
4598       tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4599                                                   lp_build_const_int32(gallivm, util_logbase2(res_bh)),
4600                                                   lp_build_const_int32(gallivm, 1), "");
4601       view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4602                                               lp_build_const_int32(gallivm, bh),
4603                                               lp_build_const_int32(gallivm, 1), "");
4604    }
4605 
4606    if (dims >= 3) {
4607       LLVMValueRef tex_depth  =
4608          dynamic_state->depth(gallivm, resources_type,
4609                               resources_ptr, texture_unit, texture_unit_offset);
4610       tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
4611                                 bld_int_vec4.elem_type, "");
4612       size = LLVMBuildInsertElement(gallivm->builder, size, tex_depth,
4613                                     lp_build_const_int32(gallivm, 2), "");
4614       tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4615                                              lp_build_const_int32(gallivm, 1),
4616                                              lp_build_const_int32(gallivm, 2), "");
4617       tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4618                                                   lp_build_const_int32(gallivm, 0),
4619                                                   lp_build_const_int32(gallivm, 2), "");
4620       view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4621                                               lp_build_const_int32(gallivm, 1),
4622                                               lp_build_const_int32(gallivm, 2), "");
4623    }
4624 
4625    size = lp_build_minify(&bld_int_vec4, size, lod, true);
4626    size = lp_build_scale_view_dims(&bld_int_vec4, size, tex_blocksize,
4627                                    tex_blocksize_log2, view_blocksize);
4628 
4629    if (has_array) {
4630       LLVMValueRef layers = dynamic_state->depth(gallivm, resources_type,
4631                                                  resources_ptr, texture_unit,
4632                                                  texture_unit_offset);
4633       layers = LLVMBuildZExt(gallivm->builder, layers,
4634                              bld_int_vec4.elem_type, "");
4635       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4636          /*
4637           * It looks like GL wants number of cubes, d3d10.1 has it undefined?
4638           * Could avoid this by passing in number of cubes instead of total
4639           * number of layers (might make things easier elsewhere too).
4640           */
4641          LLVMValueRef six = lp_build_const_int32(gallivm, 6);
4642          layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
4643       }
4644       size = LLVMBuildInsertElement(gallivm->builder, size, layers,
4645                                     lp_build_const_int32(gallivm, dims), "");
4646    }
4647 
4648    /*
4649     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
4650     * if level is out of bounds (note this can't cover unbound texture
4651     * here, which also requires returning zero).
4652     */
4653    if (params->explicit_lod && params->is_sviewinfo) {
4654       LLVMValueRef last_level, out, out1;
4655       struct lp_build_context leveli_bld;
4656 
4657       /* everything is scalar for now */
4658       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
4659       last_level = get_last_level(gallivm, resources_type, resources_ptr,
4660                                   texture_unit, texture_unit_offset,
4661                                   static_state, dynamic_state);
4662 
4663       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
4664       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
4665       out = lp_build_or(&leveli_bld, out, out1);
4666       if (num_lods == 1) {
4667          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
4668       } else {
4669          /* TODO */
4670          assert(0);
4671       }
4672       size = lp_build_andnot(&bld_int_vec4, size, out);
4673    }
4674 
4675    unsigned i;
4676    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
4677       params->sizes_out[i] =
4678          lp_build_extract_broadcast(gallivm, bld_int_vec4.type,
4679                                     params->int_type,
4680                                     size,
4681                                     lp_build_const_int32(gallivm, i));
4682    }
4683    if (params->is_sviewinfo) {
4684       for (; i < 4; i++) {
4685          params->sizes_out[i] = lp_build_const_vec(gallivm,
4686                                                    params->int_type, 0.0);
4687       }
4688    }
4689 
4690    /*
4691     * if there's no explicit_lod (buffers, rects) queries requiring nr of
4692     * mips would be illegal.
4693     */
4694    if (params->is_sviewinfo && params->explicit_lod) {
4695       struct lp_build_context bld_int_scalar;
4696       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
4697 
4698       LLVMValueRef num_levels;
4699       if (static_state->level_zero_only) {
4700          num_levels = bld_int_scalar.one;
4701       } else {
4702          LLVMValueRef last_level;
4703          last_level = get_last_level(gallivm, resources_type, resources_ptr,
4704                                      texture_unit, texture_unit_offset,
4705                                      static_state, dynamic_state);
4706          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
4707          num_levels = lp_build_add(&bld_int_scalar, num_levels,
4708                                    bld_int_scalar.one);
4709       }
4710       params->sizes_out[3] =
4711          lp_build_broadcast(gallivm,
4712                             lp_build_vec_type(gallivm, params->int_type),
4713                             num_levels);
4714    }
4715 
4716    if (target == PIPE_BUFFER) {
4717       struct lp_build_context bld_int;
4718       lp_build_context_init(&bld_int, gallivm, params->int_type);
4719 
4720       params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
4721          lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
4722    }
4723 }
4724 
4725 
4726 static void
lp_build_do_atomic_soa(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type type,LLVMValueRef exec_mask,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef out_of_bounds,unsigned img_op,LLVMAtomicRMWBinOp op,const LLVMValueRef rgba_in[4],const LLVMValueRef rgba2_in[4],LLVMValueRef atomic_result[4])4727 lp_build_do_atomic_soa(struct gallivm_state *gallivm,
4728                        const struct util_format_description *format_desc,
4729                        struct lp_type type,
4730                        LLVMValueRef exec_mask,
4731                        LLVMValueRef base_ptr,
4732                        LLVMValueRef offset,
4733                        LLVMValueRef out_of_bounds,
4734                        unsigned img_op,
4735                        LLVMAtomicRMWBinOp op,
4736                        const LLVMValueRef rgba_in[4],
4737                        const LLVMValueRef rgba2_in[4],
4738                        LLVMValueRef atomic_result[4])
4739 {
4740    const enum pipe_format format = format_desc->format;
4741 
4742    bool valid = format == PIPE_FORMAT_R32_UINT ||
4743                 format == PIPE_FORMAT_R32_SINT ||
4744                 format == PIPE_FORMAT_R32_FLOAT;
4745 
4746    bool integer = format != PIPE_FORMAT_R32_FLOAT;
4747    if (img_op == LP_IMG_ATOMIC) {
4748       switch (op) {
4749       case LLVMAtomicRMWBinOpAdd:
4750       case LLVMAtomicRMWBinOpSub:
4751       case LLVMAtomicRMWBinOpAnd:
4752       case LLVMAtomicRMWBinOpNand:
4753       case LLVMAtomicRMWBinOpOr:
4754       case LLVMAtomicRMWBinOpXor:
4755       case LLVMAtomicRMWBinOpMax:
4756       case LLVMAtomicRMWBinOpMin:
4757       case LLVMAtomicRMWBinOpUMax:
4758       case LLVMAtomicRMWBinOpUMin:
4759          valid &= integer;
4760          break;
4761       case LLVMAtomicRMWBinOpFAdd:
4762       case LLVMAtomicRMWBinOpFSub:
4763 #if LLVM_VERSION_MAJOR >= 15
4764          case LLVMAtomicRMWBinOpFMax:
4765          case LLVMAtomicRMWBinOpFMin:
4766 #endif
4767          valid &= !integer;
4768          break;
4769       default:
4770          break;
4771       }
4772    } else {
4773       valid &= integer;
4774    }
4775 
4776    if (!valid) {
4777       atomic_result[0] = lp_build_zero(gallivm, type);
4778       return;
4779    }
4780 
4781    LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
4782       LLVMFloatTypeInContext(gallivm->context) :
4783       LLVMInt32TypeInContext(gallivm->context);
4784 
4785    LLVMTypeRef atom_res_elem_type =
4786       LLVMVectorType(ref_type, type.length);
4787    LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
4788 
4789    offset = LLVMBuildGEP2(gallivm->builder,
4790                           LLVMInt8TypeInContext(gallivm->context),
4791                           base_ptr, &offset, 1, "");
4792 
4793    struct lp_build_loop_state loop_state;
4794    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
4795    struct lp_build_if_state ifthen;
4796    LLVMValueRef cond;
4797    LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
4798 
4799    LLVMValueRef should_store_mask =
4800       LLVMBuildAnd(gallivm->builder, exec_mask,
4801                    LLVMBuildNot(gallivm->builder, out_of_bounds, ""),
4802                    "store_mask");
4803    assert(exec_mask);
4804 
4805    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask,
4806                         lp_build_const_int_vec(gallivm, type, 0), "");
4807    cond = LLVMBuildExtractElement(gallivm->builder, cond,
4808                                   loop_state.counter, "");
4809    lp_build_if(&ifthen, gallivm, cond);
4810 
4811    LLVMValueRef data =
4812       LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
4813    LLVMValueRef cast_base_ptr =
4814       LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
4815    cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
4816               LLVMPointerType(ref_type, 0), "");
4817    data = LLVMBuildBitCast(gallivm->builder, data,
4818                            ref_type, "");
4819 
4820    if (img_op == LP_IMG_ATOMIC_CAS) {
4821       LLVMValueRef cas_src_ptr =
4822          LLVMBuildExtractElement(gallivm->builder, packed2,
4823                                  loop_state.counter, "");
4824       LLVMValueRef cas_src =
4825          LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
4826                           ref_type, "");
4827       data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
4828                                     cas_src,
4829                                     LLVMAtomicOrderingSequentiallyConsistent,
4830                                     LLVMAtomicOrderingSequentiallyConsistent,
4831                                     false);
4832       data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
4833    } else {
4834       data = LLVMBuildAtomicRMW(gallivm->builder, op,
4835                                 cast_base_ptr, data,
4836                                 LLVMAtomicOrderingSequentiallyConsistent,
4837                                 false);
4838    }
4839 
4840    LLVMValueRef temp_res =
4841       LLVMBuildLoad2(gallivm->builder, atom_res_elem_type, atom_res, "");
4842    temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data,
4843                                      loop_state.counter, "");
4844    LLVMBuildStore(gallivm->builder, temp_res, atom_res);
4845 
4846    lp_build_endif(&ifthen);
4847    lp_build_loop_end_cond(&loop_state,
4848                           lp_build_const_int32(gallivm, type.length),
4849                           NULL, LLVMIntUGE);
4850    atomic_result[0] = LLVMBuildLoad2(gallivm->builder, atom_res_elem_type,
4851                                      atom_res, "");
4852 }
4853 
4854 
4855 static void
lp_build_img_op_no_format(struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4856 lp_build_img_op_no_format(struct gallivm_state *gallivm,
4857                           const struct lp_img_params *params,
4858                           LLVMValueRef outdata[4])
4859 {
4860    /*
4861     * If there's nothing bound, format is NONE, and we must return
4862     * all zero as mandated by d3d10 in this case.
4863     */
4864    if (params->img_op != LP_IMG_STORE) {
4865       LLVMValueRef zero = lp_build_zero(gallivm, params->type);
4866       for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1);
4867            chan++) {
4868          outdata[chan] = zero;
4869       }
4870    }
4871 }
4872 
4873 
4874 void
lp_build_img_op_soa(const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4875 lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
4876                     struct lp_sampler_dynamic_state *dynamic_state,
4877                     struct gallivm_state *gallivm,
4878                     const struct lp_img_params *params,
4879                     LLVMValueRef outdata[4])
4880 {
4881    const enum pipe_texture_target target = params->target;
4882    const unsigned dims = texture_dims(target);
4883    const struct util_format_description *format_desc =
4884       util_format_description(static_texture_state->format);
4885    const struct util_format_description *res_format_desc =
4886       util_format_description(static_texture_state->res_format);
4887    LLVMValueRef x = params->coords[0], y = params->coords[1],
4888       z = params->coords[2];
4889    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
4890 
4891    /** regular scalar int type */
4892    struct lp_type int_coord_type = lp_uint_type(params->type);
4893    struct lp_build_context int_coord_bld;
4894    lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
4895 
4896    if (static_texture_state->format == PIPE_FORMAT_NONE) {
4897       lp_build_img_op_no_format(gallivm, params, outdata);
4898       return;
4899 
4900    }
4901 
4902    LLVMValueRef row_stride = dynamic_state->row_stride(gallivm,
4903                                                        params->resources_type,
4904                                                        params->resources_ptr,
4905                                                        params->image_index, NULL, NULL);
4906    LLVMValueRef img_stride = dynamic_state->img_stride(gallivm,
4907                                                        params->resources_type,
4908                                                        params->resources_ptr,
4909                                                        params->image_index, NULL, NULL);
4910    LLVMValueRef base_ptr = dynamic_state->base_ptr(gallivm,
4911                                                    params->resources_type,
4912                                                    params->resources_ptr,
4913                                                    params->image_index, NULL);
4914    LLVMValueRef width = dynamic_state->width(gallivm,
4915                                              params->resources_type,
4916                                              params->resources_ptr,
4917                                              params->image_index, NULL);
4918    LLVMValueRef height = dynamic_state->height(gallivm,
4919                                                params->resources_type,
4920                                                params->resources_ptr,
4921                                                params->image_index, NULL);
4922    height = LLVMBuildZExt(gallivm->builder, height,
4923                           int_coord_bld.elem_type, "");
4924    LLVMValueRef depth = dynamic_state->depth(gallivm,
4925                                              params->resources_type,
4926                                              params->resources_ptr,
4927                                              params->image_index, NULL);
4928    depth = LLVMBuildZExt(gallivm->builder, depth,
4929                          int_coord_bld.elem_type, "");
4930    bool layer_coord = has_layer_coord(target);
4931 
4932    width = lp_build_scale_view_dim(gallivm, width, res_format_desc->block.width,
4933                                    format_desc->block.width);
4934    width = lp_build_broadcast_scalar(&int_coord_bld, width);
4935    if (dims >= 2) {
4936       height = lp_build_scale_view_dim(gallivm, height, res_format_desc->block.height,
4937                                        format_desc->block.height);
4938       height = lp_build_broadcast_scalar(&int_coord_bld, height);
4939       row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
4940    }
4941    if (dims >= 3 || layer_coord) {
4942       depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
4943       img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
4944    }
4945 
4946    LLVMValueRef out_of_bounds = int_coord_bld.zero;
4947    LLVMValueRef out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
4948    out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4949 
4950    if (dims >= 2) {
4951       out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
4952       out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4953    }
4954    if (dims >= 3 || layer_coord) {
4955       out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
4956       out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4957    }
4958 
4959    LLVMValueRef offset, i, j;
4960    lp_build_sample_offset(&int_coord_bld,
4961                           format_desc,
4962                           x, y, z, row_stride_vec, img_stride_vec,
4963                           &offset, &i, &j);
4964 
4965    if (params->ms_index && static_texture_state->level_zero_only) {
4966       LLVMValueRef num_samples = dynamic_state->last_level(gallivm,
4967                                                            params->resources_type,
4968                                                            params->resources_ptr,
4969                                                            params->image_index, NULL);
4970       num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4971                                   int_coord_bld.elem_type, "");
4972       LLVMValueRef sample_stride = dynamic_state->sample_stride(gallivm,
4973                                                                 params->resources_type,
4974                                                                 params->resources_ptr,
4975                                                                 params->image_index, NULL);
4976       lp_build_sample_ms_offset(&int_coord_bld,
4977                                 params->ms_index, num_samples,
4978                                 sample_stride, &offset,
4979                                 &out_of_bounds);
4980    }
4981    if (params->img_op == LP_IMG_LOAD) {
4982       struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
4983 
4984       offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
4985       struct lp_build_context texel_bld;
4986       lp_build_context_init(&texel_bld, gallivm, texel_type);
4987       lp_build_fetch_rgba_soa(gallivm,
4988                               format_desc,
4989                               texel_type, true,
4990                               base_ptr, offset,
4991                               i, j,
4992                               NULL,
4993                               outdata);
4994 
4995       for (unsigned chan = 0; chan < 3; chan++) {
4996          outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
4997                                          texel_bld.zero, outdata[chan]);
4998       }
4999       if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
5000          outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5001                                       texel_bld.one, outdata[3]);
5002       } else {
5003          outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5004                                       texel_bld.zero, outdata[3]);
5005       }
5006    } else if (params->img_op == LP_IMG_STORE) {
5007       lp_build_store_rgba_soa(gallivm, format_desc, params->type,
5008                               params->exec_mask, base_ptr, offset,
5009                               out_of_bounds, params->indata);
5010    } else {
5011       lp_build_do_atomic_soa(gallivm, format_desc, params->type,
5012                              params->exec_mask, base_ptr, offset,
5013                              out_of_bounds, params->img_op, params->op,
5014                              params->indata, params->indata2, outdata);
5015    }
5016 }
5017 
5018 
5019 /*
5020  * These functions are for indirect texture access suppoort.
5021  *
5022  * Indirect textures are implemented using a switch statement, that
5023  * takes the texture index and jumps to the sampler functions for
5024  * that texture unit.
5025  */
5026 
5027 /*
5028  * Initialise an indexed sampler switch block.
5029  *
5030  * This sets up the switch_info state and adds the LLVM flow control pieces.
5031  */
5032 void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_sampler_params * params,LLVMValueRef idx,unsigned base,unsigned range)5033 lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
5034                            struct gallivm_state *gallivm,
5035                            const struct lp_sampler_params *params,
5036                            LLVMValueRef idx,
5037                            unsigned base, unsigned range)
5038 {
5039    switch_info->gallivm = gallivm;
5040    switch_info->params = *params;
5041    switch_info->base = base;
5042    switch_info->range = range;
5043 
5044    /* for generating the switch functions we don't want the texture index
5045     * offset
5046     */
5047    switch_info->params.texture_index_offset = 0;
5048 
5049    LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5050    switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
5051 
5052    switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
5053                                              switch_info->merge_ref,
5054                                              range - base);
5055 
5056    LLVMTypeRef val_type[4];
5057    val_type[0] = val_type[1] = val_type[2] = val_type[3] =
5058       lp_build_vec_type(gallivm, params->type);
5059 
5060    LLVMTypeRef ret_type =
5061       LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
5062 
5063    LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5064 
5065    LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5066 
5067    switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
5068    LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
5069 }
5070 
5071 
5072 /*
5073  * Add an individual entry to the indirect texture switch.
5074  *
5075  * This builds the sample function and links a case for it into the switch
5076  * statement.
5077  */
5078 void
lp_build_sample_array_case_soa(struct lp_build_sample_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_texture_state)5079 lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
5080                            int idx,
5081                            const struct lp_static_texture_state *static_texture_state,
5082                            const struct lp_static_sampler_state *static_sampler_state,
5083                            struct lp_sampler_dynamic_state *dynamic_texture_state)
5084 {
5085    struct gallivm_state *gallivm = switch_info->gallivm;
5086    LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
5087 
5088    LLVMAddCase(switch_info->switch_ref,
5089                LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0),
5090                this_block);
5091    LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5092 
5093    LLVMValueRef tex_ret;
5094    lp_build_sample_soa_func(gallivm, static_texture_state,
5095                             static_sampler_state, dynamic_texture_state,
5096                             &switch_info->params, idx, idx, &tex_ret);
5097 
5098    LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
5099    LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5100 }
5101 
5102 
5103 /*
5104  * Finish a switch statement.
5105  *
5106  * This handles extract the results from the switch.
5107  */
5108 void
lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch * switch_info)5109 lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
5110 {
5111    struct gallivm_state *gallivm = switch_info->gallivm;
5112 
5113    LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5114    for (unsigned i = 0; i < 4; i++) {
5115       switch_info->params.texel[i] =
5116          LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
5117    }
5118 }
5119 
5120 
5121 void
lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef idx,unsigned base,unsigned range)5122 lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info,
5123                              struct gallivm_state *gallivm,
5124                              const struct lp_img_params *params,
5125                              LLVMValueRef idx,
5126                              unsigned base, unsigned range)
5127 {
5128    switch_info->gallivm = gallivm;
5129    switch_info->params = *params;
5130    switch_info->base = base;
5131    switch_info->range = range;
5132 
5133    /* for generating the switch functions we don't want the texture index
5134     * offset
5135     */
5136    switch_info->params.image_index_offset = 0;
5137 
5138    LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5139    switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge");
5140 
5141    switch_info->switch_ref =
5142       LLVMBuildSwitch(gallivm->builder, idx,
5143                       switch_info->merge_ref, range - base);
5144 
5145    if (params->img_op != LP_IMG_STORE) {
5146       LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type);
5147       LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5148 
5149       LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5150 
5151       for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5152          switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, "");
5153          LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1);
5154       }
5155    }
5156 }
5157 
5158 
5159 void
lp_build_image_op_array_case(struct lp_build_img_op_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state)5160 lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
5161                             int idx,
5162                             const struct lp_static_texture_state *static_texture_state,
5163                             struct lp_sampler_dynamic_state *dynamic_state)
5164 {
5165    struct gallivm_state *gallivm = switch_info->gallivm;
5166    LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img");
5167    LLVMValueRef tex_ret[4];
5168 
5169    LLVMAddCase(switch_info->switch_ref,
5170                lp_build_const_int32(gallivm, idx), this_block);
5171    LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5172 
5173    switch_info->params.image_index = idx;
5174 
5175    lp_build_img_op_soa(static_texture_state, dynamic_state,
5176                        switch_info->gallivm, &switch_info->params, tex_ret);
5177 
5178    if (switch_info->params.img_op != LP_IMG_STORE) {
5179       for (unsigned i = 0;
5180            i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5181          tex_ret[i] =
5182             LLVMBuildBitCast(gallivm->builder, tex_ret[i],
5183                              lp_build_vec_type(gallivm,
5184                                                switch_info->params.type), "");
5185       }
5186 
5187       this_block = LLVMGetInsertBlock(gallivm->builder);
5188       for (unsigned i = 0;
5189            i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5190          LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1);
5191       }
5192    }
5193    LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5194 }
5195 
5196 
5197 void
lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch * switch_info)5198 lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info)
5199 {
5200    struct gallivm_state *gallivm = switch_info->gallivm;
5201 
5202    LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5203 
5204    if (switch_info->params.img_op != LP_IMG_STORE) {
5205       for (unsigned i = 0;
5206            i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5207          switch_info->params.outdata[i] = switch_info->phi[i];
5208       }
5209    }
5210 }
5211