• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Texture sampling -- AoS.
31  *
32  * @author Jose Fonseca <jfonseca@vmware.com>
33  * @author Brian Paul <brianp@vmware.com>
34  */
35 
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
60 
61 
62 /**
63  * Build LLVM code for texture coord wrapping, for nearest filtering,
64  * for scaled integer texcoords.
65  * \param block_length  is the length of the pixel block along the
66  *                      coordinate axis
67  * \param coord  the incoming texcoord (s,t or r) scaled to the texture size
68  * \param coord_f  the incoming texcoord (s,t or r) as float vec
69  * \param length  the texture size along one dimension
70  * \param stride  pixel stride along the coordinate axis (in bytes)
71  * \param offset  the texel offset along the coord axis
72  * \param is_pot  if TRUE, length is a power of two
73  * \param wrap_mode  one of PIPE_TEX_WRAP_x
74  * \param out_offset  byte offset for the wrapped coordinate
75  * \param out_i  resulting sub-block pixel coordinate for coord0
76  */
77 static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * out_offset,LLVMValueRef * out_i)78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79                                  unsigned block_length,
80                                  LLVMValueRef coord,
81                                  LLVMValueRef coord_f,
82                                  LLVMValueRef length,
83                                  LLVMValueRef stride,
84                                  LLVMValueRef offset,
85                                  boolean is_pot,
86                                  unsigned wrap_mode,
87                                  LLVMValueRef *out_offset,
88                                  LLVMValueRef *out_i)
89 {
90    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91    LLVMBuilderRef builder = bld->gallivm->builder;
92    LLVMValueRef length_minus_one;
93 
94    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
95 
96    switch(wrap_mode) {
97    case PIPE_TEX_WRAP_REPEAT:
98       if(is_pot)
99          coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
100       else {
101          struct lp_build_context *coord_bld = &bld->coord_bld;
102          LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
103          if (offset) {
104             offset = lp_build_int_to_float(coord_bld, offset);
105             offset = lp_build_div(coord_bld, offset, length_f);
106             coord_f = lp_build_add(coord_bld, coord_f, offset);
107          }
108          coord = lp_build_fract_safe(coord_bld, coord_f);
109          coord = lp_build_mul(coord_bld, coord, length_f);
110          coord = lp_build_itrunc(coord_bld, coord);
111       }
112       break;
113 
114    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
117       break;
118 
119    case PIPE_TEX_WRAP_CLAMP:
120    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121    case PIPE_TEX_WRAP_MIRROR_REPEAT:
122    case PIPE_TEX_WRAP_MIRROR_CLAMP:
123    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
125    default:
126       assert(0);
127    }
128 
129    lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
130                                   out_offset, out_i);
131 }
132 
133 
134 /**
135  * Build LLVM code for texture coord wrapping, for nearest filtering,
136  * for float texcoords.
137  * \param coord  the incoming texcoord (s,t or r)
138  * \param length  the texture size along one dimension
139  * \param offset  the texel offset along the coord axis
140  * \param is_pot  if TRUE, length is a power of two
141  * \param wrap_mode  one of PIPE_TEX_WRAP_x
142  * \param icoord  the texcoord after wrapping, as int
143  */
144 static void
lp_build_sample_wrap_nearest_float(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * icoord)145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
146                                    LLVMValueRef coord,
147                                    LLVMValueRef length,
148                                    LLVMValueRef offset,
149                                    boolean is_pot,
150                                    unsigned wrap_mode,
151                                    LLVMValueRef *icoord)
152 {
153    struct lp_build_context *coord_bld = &bld->coord_bld;
154    LLVMValueRef length_minus_one;
155 
156    switch(wrap_mode) {
157    case PIPE_TEX_WRAP_REPEAT:
158       if (offset) {
159          /* this is definitely not ideal for POT case */
160          offset = lp_build_int_to_float(coord_bld, offset);
161          offset = lp_build_div(coord_bld, offset, length);
162          coord = lp_build_add(coord_bld, coord, offset);
163       }
164       /* take fraction, unnormalize */
165       coord = lp_build_fract_safe(coord_bld, coord);
166       coord = lp_build_mul(coord_bld, coord, length);
167       *icoord = lp_build_itrunc(coord_bld, coord);
168       break;
169    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
170       length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
171       if (bld->static_sampler_state->normalized_coords) {
172          /* scale coord to length */
173          coord = lp_build_mul(coord_bld, coord, length);
174       }
175       if (offset) {
176          offset = lp_build_int_to_float(coord_bld, offset);
177          coord = lp_build_add(coord_bld, coord, offset);
178       }
179       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
180                              length_minus_one);
181       *icoord = lp_build_itrunc(coord_bld, coord);
182       break;
183 
184    case PIPE_TEX_WRAP_CLAMP:
185    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
186    case PIPE_TEX_WRAP_MIRROR_REPEAT:
187    case PIPE_TEX_WRAP_MIRROR_CLAMP:
188    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
189    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
190    default:
191       assert(0);
192    }
193 }
194 
195 
196 /**
197  * Helper to compute the first coord and the weight for
198  * linear wrap repeat npot textures
199  */
200 static void
lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_i)201 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
202                                       LLVMValueRef coord_f,
203                                       LLVMValueRef length_i,
204                                       LLVMValueRef length_f,
205                                       LLVMValueRef *coord0_i,
206                                       LLVMValueRef *weight_i)
207 {
208    struct lp_build_context *coord_bld = &bld->coord_bld;
209    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
210    struct lp_build_context abs_coord_bld;
211    struct lp_type abs_type;
212    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
213                                                 int_coord_bld->one);
214    LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
215 
216    /* wrap with normalized floats is just fract */
217    coord_f = lp_build_fract(coord_bld, coord_f);
218    /* mul by size */
219    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
220    /* convert to int, compute lerp weight */
221    coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
222 
223    /* At this point we don't have any negative numbers so use non-signed
224     * build context which might help on some archs.
225     */
226    abs_type = coord_bld->type;
227    abs_type.sign = 0;
228    lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
229    *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
230 
231    /* subtract 0.5 (add -128) */
232    i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
233    *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
234 
235    /* compute fractional part (AND with 0xff) */
236    i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
237    *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
238 
239    /* compute floor (shift right 8) */
240    i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
241    *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
242    /*
243     * we avoided the 0.5/length division before the repeat wrap,
244     * now need to fix up edge cases with selects
245     */
246    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
247                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
248    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
249    /*
250     * We should never get values too large - except if coord was nan or inf,
251     * in which case things go terribly wrong...
252     * Alternatively, could use fract_safe above...
253     */
254    *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
255 }
256 
257 
258 /**
259  * Build LLVM code for texture coord wrapping, for linear filtering,
260  * for scaled integer texcoords.
261  * \param block_length  is the length of the pixel block along the
262  *                      coordinate axis
263  * \param coord0  the incoming texcoord (s,t or r) scaled to the texture size
264  * \param coord_f  the incoming texcoord (s,t or r) as float vec
265  * \param length  the texture size along one dimension
266  * \param stride  pixel stride along the coordinate axis (in bytes)
267  * \param offset  the texel offset along the coord axis
268  * \param is_pot  if TRUE, length is a power of two
269  * \param wrap_mode  one of PIPE_TEX_WRAP_x
270  * \param offset0  resulting relative offset for coord0
271  * \param offset1  resulting relative offset for coord0 + 1
272  * \param i0  resulting sub-block pixel coordinate for coord0
273  * \param i1  resulting sub-block pixel coordinate for coord0 + 1
274  */
275 static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord0,LLVMValueRef * weight_i,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * offset0,LLVMValueRef * offset1,LLVMValueRef * i0,LLVMValueRef * i1)276 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
277                                 unsigned block_length,
278                                 LLVMValueRef coord0,
279                                 LLVMValueRef *weight_i,
280                                 LLVMValueRef coord_f,
281                                 LLVMValueRef length,
282                                 LLVMValueRef stride,
283                                 LLVMValueRef offset,
284                                 boolean is_pot,
285                                 unsigned wrap_mode,
286                                 LLVMValueRef *offset0,
287                                 LLVMValueRef *offset1,
288                                 LLVMValueRef *i0,
289                                 LLVMValueRef *i1)
290 {
291    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
292    LLVMBuilderRef builder = bld->gallivm->builder;
293    LLVMValueRef length_minus_one;
294    LLVMValueRef lmask, umask, mask;
295 
296    /*
297     * If the pixel block covers more than one pixel then there is no easy
298     * way to calculate offset1 relative to offset0. Instead, compute them
299     * independently. Otherwise, try to compute offset0 and offset1 with
300     * a single stride multiplication.
301     */
302 
303    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
304 
305    if (block_length != 1) {
306       LLVMValueRef coord1;
307       switch(wrap_mode) {
308       case PIPE_TEX_WRAP_REPEAT:
309          if (is_pot) {
310             coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
311             coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
312             coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
313          }
314          else {
315             LLVMValueRef mask;
316             LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
317             if (offset) {
318                offset = lp_build_int_to_float(&bld->coord_bld, offset);
319                offset = lp_build_div(&bld->coord_bld, offset, length_f);
320                coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
321             }
322             lp_build_coord_repeat_npot_linear_int(bld, coord_f,
323                                                   length, length_f,
324                                                   &coord0, weight_i);
325             mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
326                                     PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
327             coord1 = LLVMBuildAnd(builder,
328                                   lp_build_add(int_coord_bld, coord0,
329                                                int_coord_bld->one),
330                                   mask, "");
331          }
332          break;
333 
334       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
335          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
336          coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
337                                 length_minus_one);
338          coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
339                                 length_minus_one);
340          break;
341 
342       case PIPE_TEX_WRAP_CLAMP:
343       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
344       case PIPE_TEX_WRAP_MIRROR_REPEAT:
345       case PIPE_TEX_WRAP_MIRROR_CLAMP:
346       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
347       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
348       default:
349          assert(0);
350          coord0 = int_coord_bld->zero;
351          coord1 = int_coord_bld->zero;
352          break;
353       }
354       lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
355                                      offset0, i0);
356       lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
357                                      offset1, i1);
358       return;
359    }
360 
361    *i0 = int_coord_bld->zero;
362    *i1 = int_coord_bld->zero;
363 
364    switch(wrap_mode) {
365    case PIPE_TEX_WRAP_REPEAT:
366       if (is_pot) {
367          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
368       }
369       else {
370          LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
371          if (offset) {
372             offset = lp_build_int_to_float(&bld->coord_bld, offset);
373             offset = lp_build_div(&bld->coord_bld, offset, length_f);
374             coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
375          }
376          lp_build_coord_repeat_npot_linear_int(bld, coord_f,
377                                                length, length_f,
378                                                &coord0, weight_i);
379       }
380 
381       mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
382                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
383 
384       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
385       *offset1 = LLVMBuildAnd(builder,
386                               lp_build_add(int_coord_bld, *offset0, stride),
387                               mask, "");
388       break;
389 
390    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
391       /* XXX this might be slower than the separate path
392        * on some newer cpus. With sse41 this is 8 instructions vs. 7
393        * - at least on SNB this is almost certainly slower since
394        * min/max are cheaper than selects, and the muls aren't bad.
395        */
396       lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
397                                PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
398       umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
399                                PIPE_FUNC_LESS, coord0, length_minus_one);
400 
401       coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
402       coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
403 
404       mask = LLVMBuildAnd(builder, lmask, umask, "");
405 
406       *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
407       *offset1 = lp_build_add(int_coord_bld,
408                               *offset0,
409                               LLVMBuildAnd(builder, stride, mask, ""));
410       break;
411 
412    case PIPE_TEX_WRAP_CLAMP:
413    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
414    case PIPE_TEX_WRAP_MIRROR_REPEAT:
415    case PIPE_TEX_WRAP_MIRROR_CLAMP:
416    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
417    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
418    default:
419       assert(0);
420       *offset0 = int_coord_bld->zero;
421       *offset1 = int_coord_bld->zero;
422       break;
423    }
424 }
425 
426 
427 /**
428  * Build LLVM code for texture coord wrapping, for linear filtering,
429  * for float texcoords.
430  * \param block_length  is the length of the pixel block along the
431  *                      coordinate axis
432  * \param coord  the incoming texcoord (s,t or r)
433  * \param length  the texture size along one dimension
434  * \param offset  the texel offset along the coord axis
435  * \param is_pot  if TRUE, length is a power of two
436  * \param wrap_mode  one of PIPE_TEX_WRAP_x
437  * \param coord0  the first texcoord after wrapping, as int
438  * \param coord1  the second texcoord after wrapping, as int
439  * \param weight  the filter weight as int (0-255)
440  * \param force_nearest  if this coord actually uses nearest filtering
441  */
442 static void
lp_build_sample_wrap_linear_float(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * coord0,LLVMValueRef * coord1,LLVMValueRef * weight,unsigned force_nearest)443 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
444                                   unsigned block_length,
445                                   LLVMValueRef coord,
446                                   LLVMValueRef length,
447                                   LLVMValueRef offset,
448                                   boolean is_pot,
449                                   unsigned wrap_mode,
450                                   LLVMValueRef *coord0,
451                                   LLVMValueRef *coord1,
452                                   LLVMValueRef *weight,
453                                   unsigned force_nearest)
454 {
455    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
456    struct lp_build_context *coord_bld = &bld->coord_bld;
457    LLVMBuilderRef builder = bld->gallivm->builder;
458    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
459    LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
460 
461    switch(wrap_mode) {
462    case PIPE_TEX_WRAP_REPEAT:
463       if (is_pot) {
464          /* mul by size and subtract 0.5 */
465          coord = lp_build_mul(coord_bld, coord, length);
466          if (offset) {
467             offset = lp_build_int_to_float(coord_bld, offset);
468             coord = lp_build_add(coord_bld, coord, offset);
469          }
470          if (!force_nearest)
471             coord = lp_build_sub(coord_bld, coord, half);
472          *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
473          /* convert to int, compute lerp weight */
474          lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
475          *coord1 = lp_build_ifloor(coord_bld, *coord1);
476          /* repeat wrap */
477          length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
478          *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
479          *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
480       }
481       else {
482          LLVMValueRef mask;
483          if (offset) {
484             offset = lp_build_int_to_float(coord_bld, offset);
485             offset = lp_build_div(coord_bld, offset, length);
486             coord = lp_build_add(coord_bld, coord, offset);
487          }
488          /* wrap with normalized floats is just fract */
489          coord = lp_build_fract(coord_bld, coord);
490          /* unnormalize */
491          coord = lp_build_mul(coord_bld, coord, length);
492          /*
493           * we avoided the 0.5/length division, have to fix up wrong
494           * edge cases with selects
495           */
496          *coord1 = lp_build_add(coord_bld, coord, half);
497          coord = lp_build_sub(coord_bld, coord, half);
498          *weight = lp_build_fract(coord_bld, coord);
499          /*
500           * It is important for this comparison to be unordered
501           * (or need fract_safe above).
502           */
503          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
504                                  PIPE_FUNC_LESS, coord, coord_bld->zero);
505          *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
506          *coord0 = lp_build_itrunc(coord_bld, *coord0);
507          mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
508                                  PIPE_FUNC_LESS, *coord1, length);
509          *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
510          *coord1 = lp_build_itrunc(coord_bld, *coord1);
511       }
512       break;
513    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
514       if (bld->static_sampler_state->normalized_coords) {
515          /* mul by tex size */
516          coord = lp_build_mul(coord_bld, coord, length);
517       }
518       if (offset) {
519          offset = lp_build_int_to_float(coord_bld, offset);
520          coord = lp_build_add(coord_bld, coord, offset);
521       }
522       /* subtract 0.5 */
523       if (!force_nearest) {
524          coord = lp_build_sub(coord_bld, coord, half);
525       }
526       /* clamp to [0, length - 1] */
527       coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
528                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
529       coord = lp_build_max(coord_bld, coord, coord_bld->zero);
530       *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
531       /* convert to int, compute lerp weight */
532       lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
533       /* coord1 = min(coord1, length-1) */
534       *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
535       *coord1 = lp_build_itrunc(coord_bld, *coord1);
536       break;
537    default:
538       assert(0);
539       *coord0 = int_coord_bld->zero;
540       *coord1 = int_coord_bld->zero;
541       *weight = coord_bld->zero;
542       break;
543    }
544    *weight = lp_build_mul_imm(coord_bld, *weight, 256);
545    *weight = lp_build_itrunc(coord_bld, *weight);
546    return;
547 }
548 
549 
550 /**
551  * Fetch texels for image with nearest sampling.
552  * Return filtered color as two vectors of 16-bit fixed point values.
553  */
554 static void
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset,LLVMValueRef x_subcoord,LLVMValueRef y_subcoord,LLVMValueRef * colors)555 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
556                                     LLVMValueRef data_ptr,
557                                     LLVMValueRef offset,
558                                     LLVMValueRef x_subcoord,
559                                     LLVMValueRef y_subcoord,
560                                     LLVMValueRef *colors)
561 {
562    /*
563     * Fetch the pixels as 4 x 32bit (rgba order might differ):
564     *
565     *   rgba0 rgba1 rgba2 rgba3
566     *
567     * bit cast them into 16 x u8
568     *
569     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
570     *
571     * unpack them into two 8 x i16:
572     *
573     *   r0 g0 b0 a0 r1 g1 b1 a1
574     *   r2 g2 b2 a2 r3 g3 b3 a3
575     *
576     * The higher 8 bits of the resulting elements will be zero.
577     */
578    LLVMBuilderRef builder = bld->gallivm->builder;
579    LLVMValueRef rgba8;
580    struct lp_build_context u8n;
581    LLVMTypeRef u8n_vec_type;
582    struct lp_type fetch_type;
583 
584    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
585    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
586 
587    fetch_type = lp_type_uint(bld->texel_type.width);
588    if (util_format_is_rgba8_variant(bld->format_desc)) {
589       /*
590        * Given the format is a rgba8, just read the pixels as is,
591        * without any swizzling. Swizzling will be done later.
592        */
593       rgba8 = lp_build_gather(bld->gallivm,
594                               bld->texel_type.length,
595                               bld->format_desc->block.bits,
596                               fetch_type,
597                               TRUE,
598                               data_ptr, offset, TRUE);
599 
600       rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
601    }
602    else {
603       rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
604                                       bld->format_desc,
605                                       u8n.type,
606                                       TRUE,
607                                       data_ptr, offset,
608                                       x_subcoord,
609                                       y_subcoord,
610                                       bld->cache);
611    }
612 
613    *colors = rgba8;
614 }
615 
616 
617 /**
618  * Sample a single texture image with nearest sampling.
619  * If sampling a cube texture, r = cube face in [0,5].
620  * Return filtered color as two vectors of 16-bit fixed point values.
621  */
622 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)623 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
624                               LLVMValueRef int_size,
625                               LLVMValueRef row_stride_vec,
626                               LLVMValueRef img_stride_vec,
627                               LLVMValueRef data_ptr,
628                               LLVMValueRef mipoffsets,
629                               LLVMValueRef s,
630                               LLVMValueRef t,
631                               LLVMValueRef r,
632                               const LLVMValueRef *offsets,
633                               LLVMValueRef *colors)
634 {
635    const unsigned dims = bld->dims;
636    struct lp_build_context i32;
637    LLVMValueRef width_vec, height_vec, depth_vec;
638    LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
639    LLVMValueRef s_float, t_float = NULL, r_float = NULL;
640    LLVMValueRef x_stride;
641    LLVMValueRef x_offset, offset;
642    LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
643 
644    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
645 
646    lp_build_extract_image_sizes(bld,
647                                 &bld->int_size_bld,
648                                 bld->int_coord_type,
649                                 int_size,
650                                 &width_vec,
651                                 &height_vec,
652                                 &depth_vec);
653 
654    s_float = s; t_float = t; r_float = r;
655 
656    if (bld->static_sampler_state->normalized_coords) {
657       LLVMValueRef flt_size;
658 
659       flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
660 
661       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
662    }
663 
664    /* convert float to int */
665    /* For correct rounding, need floor, not truncation here.
666     * Note that in some cases (clamp to edge, no texel offsets) we
667     * could use a non-signed build context which would help archs
668     * greatly which don't have arch rounding.
669     */
670    s_ipart = lp_build_ifloor(&bld->coord_bld, s);
671    if (dims >= 2)
672       t_ipart = lp_build_ifloor(&bld->coord_bld, t);
673    if (dims >= 3)
674       r_ipart = lp_build_ifloor(&bld->coord_bld, r);
675 
676    /* add texel offsets */
677    if (offsets[0]) {
678       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
679       if (dims >= 2) {
680          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
681          if (dims >= 3) {
682             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
683          }
684       }
685    }
686 
687    /* get pixel, row, image strides */
688    x_stride = lp_build_const_vec(bld->gallivm,
689                                  bld->int_coord_bld.type,
690                                  bld->format_desc->block.bits/8);
691 
692    /* Do texcoord wrapping, compute texel offset */
693    lp_build_sample_wrap_nearest_int(bld,
694                                     bld->format_desc->block.width,
695                                     s_ipart, s_float,
696                                     width_vec, x_stride, offsets[0],
697                                     bld->static_texture_state->pot_width,
698                                     bld->static_sampler_state->wrap_s,
699                                     &x_offset, &x_subcoord);
700    offset = x_offset;
701    if (dims >= 2) {
702       LLVMValueRef y_offset;
703       lp_build_sample_wrap_nearest_int(bld,
704                                        bld->format_desc->block.height,
705                                        t_ipart, t_float,
706                                        height_vec, row_stride_vec, offsets[1],
707                                        bld->static_texture_state->pot_height,
708                                        bld->static_sampler_state->wrap_t,
709                                        &y_offset, &y_subcoord);
710       offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
711       if (dims >= 3) {
712          LLVMValueRef z_offset;
713          lp_build_sample_wrap_nearest_int(bld,
714                                           1, /* block length (depth) */
715                                           r_ipart, r_float,
716                                           depth_vec, img_stride_vec, offsets[2],
717                                           bld->static_texture_state->pot_depth,
718                                           bld->static_sampler_state->wrap_r,
719                                           &z_offset, &z_subcoord);
720          offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
721       }
722    }
723    if (has_layer_coord(bld->static_texture_state->target)) {
724       LLVMValueRef z_offset;
725       /* The r coord is the cube face in [0,5] or array layer */
726       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
727       offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
728    }
729    if (mipoffsets) {
730       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
731    }
732 
733    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
734                                        x_subcoord, y_subcoord,
735                                        colors);
736 }
737 
738 
739 /**
740  * Sample a single texture image with nearest sampling.
741  * If sampling a cube texture, r = cube face in [0,5].
742  * Return filtered color as two vectors of 16-bit fixed point values.
743  * Does address calcs (except offsets) with floats.
744  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
745  */
746 static void
lp_build_sample_image_nearest_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)747 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
748                                      LLVMValueRef int_size,
749                                      LLVMValueRef row_stride_vec,
750                                      LLVMValueRef img_stride_vec,
751                                      LLVMValueRef data_ptr,
752                                      LLVMValueRef mipoffsets,
753                                      LLVMValueRef s,
754                                      LLVMValueRef t,
755                                      LLVMValueRef r,
756                                      const LLVMValueRef *offsets,
757                                      LLVMValueRef *colors)
758    {
759    const unsigned dims = bld->dims;
760    LLVMValueRef width_vec, height_vec, depth_vec;
761    LLVMValueRef offset;
762    LLVMValueRef x_subcoord, y_subcoord;
763    LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
764    LLVMValueRef flt_size;
765 
766    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
767 
768    lp_build_extract_image_sizes(bld,
769                                 &bld->float_size_bld,
770                                 bld->coord_type,
771                                 flt_size,
772                                 &width_vec,
773                                 &height_vec,
774                                 &depth_vec);
775 
776    /* Do texcoord wrapping */
777    lp_build_sample_wrap_nearest_float(bld,
778                                       s, width_vec, offsets[0],
779                                       bld->static_texture_state->pot_width,
780                                       bld->static_sampler_state->wrap_s,
781                                       &x_icoord);
782 
783    if (dims >= 2) {
784       lp_build_sample_wrap_nearest_float(bld,
785                                          t, height_vec, offsets[1],
786                                          bld->static_texture_state->pot_height,
787                                          bld->static_sampler_state->wrap_t,
788                                          &y_icoord);
789 
790       if (dims >= 3) {
791          lp_build_sample_wrap_nearest_float(bld,
792                                             r, depth_vec, offsets[2],
793                                             bld->static_texture_state->pot_depth,
794                                             bld->static_sampler_state->wrap_r,
795                                             &z_icoord);
796       }
797    }
798    if (has_layer_coord(bld->static_texture_state->target)) {
799       z_icoord = r;
800    }
801 
802    /*
803     * From here on we deal with ints, and we should split up the 256bit
804     * vectors manually for better generated code.
805     */
806 
807    /*
808     * compute texel offsets -
809     * cannot do offset calc with floats, difficult for block-based formats,
810     * and not enough precision anyway.
811     */
812    lp_build_sample_offset(&bld->int_coord_bld,
813                           bld->format_desc,
814                           x_icoord, y_icoord,
815                           z_icoord,
816                           row_stride_vec, img_stride_vec,
817                           &offset,
818                           &x_subcoord, &y_subcoord);
819    if (mipoffsets) {
820       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
821    }
822 
823    lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
824                                        x_subcoord, y_subcoord,
825                                        colors);
826 }
827 
828 
829 /**
830  * Fetch texels for image with linear sampling.
831  * Return filtered color as two vectors of 16-bit fixed point values.
832  */
833 static void
lp_build_sample_fetch_image_linear(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset[2][2][2],LLVMValueRef x_subcoord[2],LLVMValueRef y_subcoord[2],LLVMValueRef s_fpart,LLVMValueRef t_fpart,LLVMValueRef r_fpart,LLVMValueRef * colors)834 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
835                                    LLVMValueRef data_ptr,
836                                    LLVMValueRef offset[2][2][2],
837                                    LLVMValueRef x_subcoord[2],
838                                    LLVMValueRef y_subcoord[2],
839                                    LLVMValueRef s_fpart,
840                                    LLVMValueRef t_fpart,
841                                    LLVMValueRef r_fpart,
842                                    LLVMValueRef *colors)
843 {
844    const unsigned dims = bld->dims;
845    LLVMBuilderRef builder = bld->gallivm->builder;
846    struct lp_build_context u8n;
847    LLVMTypeRef u8n_vec_type;
848    LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
849    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
850    LLVMValueRef shuffle;
851    LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
852    LLVMValueRef packed;
853    unsigned i, j, k;
854    unsigned numj, numk;
855 
856    lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
857    u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
858 
859    /*
860     * Transform 4 x i32 in
861     *
862     *   s_fpart = {s0, s1, s2, s3}
863     *
864     * where each value is between 0 and 0xff,
865     *
866     * into one 16 x i20
867     *
868     *   s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
869     *
870     * and likewise for t_fpart. There is no risk of loosing precision here
871     * since the fractional parts only use the lower 8bits.
872     */
873    s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
874    if (dims >= 2)
875       t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
876    if (dims >= 3)
877       r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
878 
879    for (j = 0; j < u8n.type.length; j += 4) {
880 #ifdef PIPE_ARCH_LITTLE_ENDIAN
881       unsigned subindex = 0;
882 #else
883       unsigned subindex = 3;
884 #endif
885       LLVMValueRef index;
886 
887       index = LLVMConstInt(elem_type, j + subindex, 0);
888       for (i = 0; i < 4; ++i)
889          shuffles[j + i] = index;
890    }
891 
892    shuffle = LLVMConstVector(shuffles, u8n.type.length);
893 
894    s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
895                                     shuffle, "");
896    if (dims >= 2) {
897       t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
898                                        shuffle, "");
899    }
900    if (dims >= 3) {
901       r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
902                                        shuffle, "");
903    }
904 
905    /*
906     * Fetch the pixels as 4 x 32bit (rgba order might differ):
907     *
908     *   rgba0 rgba1 rgba2 rgba3
909     *
910     * bit cast them into 16 x u8
911     *
912     *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
913     *
914     * unpack them into two 8 x i16:
915     *
916     *   r0 g0 b0 a0 r1 g1 b1 a1
917     *   r2 g2 b2 a2 r3 g3 b3 a3
918     *
919     * The higher 8 bits of the resulting elements will be zero.
920     */
921    numj = 1 + (dims >= 2);
922    numk = 1 + (dims >= 3);
923 
924    for (k = 0; k < numk; k++) {
925       for (j = 0; j < numj; j++) {
926          for (i = 0; i < 2; i++) {
927             LLVMValueRef rgba8;
928 
929             if (util_format_is_rgba8_variant(bld->format_desc)) {
930                struct lp_type fetch_type;
931                /*
932                 * Given the format is a rgba8, just read the pixels as is,
933                 * without any swizzling. Swizzling will be done later.
934                 */
935                fetch_type = lp_type_uint(bld->texel_type.width);
936                rgba8 = lp_build_gather(bld->gallivm,
937                                        bld->texel_type.length,
938                                        bld->format_desc->block.bits,
939                                        fetch_type,
940                                        TRUE,
941                                        data_ptr, offset[k][j][i], TRUE);
942 
943                rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
944             }
945             else {
946                rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
947                                                bld->format_desc,
948                                                u8n.type,
949                                                TRUE,
950                                                data_ptr, offset[k][j][i],
951                                                x_subcoord[i],
952                                                y_subcoord[j],
953                                                bld->cache);
954             }
955 
956             neighbors[k][j][i] = rgba8;
957          }
958       }
959    }
960 
961    /*
962     * Linear interpolation with 8.8 fixed point.
963     */
964    if (bld->static_sampler_state->force_nearest_s) {
965       /* special case 1-D lerp */
966       packed = lp_build_lerp(&u8n,
967                              t_fpart,
968                              neighbors[0][0][0],
969                              neighbors[0][0][1],
970                              LP_BLD_LERP_PRESCALED_WEIGHTS);
971    }
972    else if (bld->static_sampler_state->force_nearest_t) {
973       /* special case 1-D lerp */
974       packed = lp_build_lerp(&u8n,
975                              s_fpart,
976                              neighbors[0][0][0],
977                              neighbors[0][0][1],
978                              LP_BLD_LERP_PRESCALED_WEIGHTS);
979    }
980    else {
981       /* general 1/2/3-D lerping */
982       if (dims == 1) {
983          packed = lp_build_lerp(&u8n,
984                                 s_fpart,
985                                 neighbors[0][0][0],
986                                 neighbors[0][0][1],
987                                 LP_BLD_LERP_PRESCALED_WEIGHTS);
988       } else if (dims == 2) {
989          /* 2-D lerp */
990          packed = lp_build_lerp_2d(&u8n,
991                                    s_fpart, t_fpart,
992                                    neighbors[0][0][0],
993                                    neighbors[0][0][1],
994                                    neighbors[0][1][0],
995                                    neighbors[0][1][1],
996                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
997       } else {
998          /* 3-D lerp */
999          assert(dims == 3);
1000          packed = lp_build_lerp_3d(&u8n,
1001                                    s_fpart, t_fpart, r_fpart,
1002                                    neighbors[0][0][0],
1003                                    neighbors[0][0][1],
1004                                    neighbors[0][1][0],
1005                                    neighbors[0][1][1],
1006                                    neighbors[1][0][0],
1007                                    neighbors[1][0][1],
1008                                    neighbors[1][1][0],
1009                                    neighbors[1][1][1],
1010                                    LP_BLD_LERP_PRESCALED_WEIGHTS);
1011       }
1012    }
1013 
1014    *colors = packed;
1015 }
1016 
1017 /**
1018  * Sample a single texture image with (bi-)(tri-)linear sampling.
1019  * Return filtered color as two vectors of 16-bit fixed point values.
1020  */
1021 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)1022 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1023                              LLVMValueRef int_size,
1024                              LLVMValueRef row_stride_vec,
1025                              LLVMValueRef img_stride_vec,
1026                              LLVMValueRef data_ptr,
1027                              LLVMValueRef mipoffsets,
1028                              LLVMValueRef s,
1029                              LLVMValueRef t,
1030                              LLVMValueRef r,
1031                              const LLVMValueRef *offsets,
1032                              LLVMValueRef *colors)
1033 {
1034    const unsigned dims = bld->dims;
1035    LLVMBuilderRef builder = bld->gallivm->builder;
1036    struct lp_build_context i32;
1037    LLVMValueRef i32_c8, i32_c128, i32_c255;
1038    LLVMValueRef width_vec, height_vec, depth_vec;
1039    LLVMValueRef s_ipart, s_fpart, s_float;
1040    LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
1041    LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
1042    LLVMValueRef x_stride, y_stride, z_stride;
1043    LLVMValueRef x_offset0, x_offset1;
1044    LLVMValueRef y_offset0, y_offset1;
1045    LLVMValueRef z_offset0, z_offset1;
1046    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1047    LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1048    unsigned x, y, z;
1049 
1050    lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1051 
1052    lp_build_extract_image_sizes(bld,
1053                                 &bld->int_size_bld,
1054                                 bld->int_coord_type,
1055                                 int_size,
1056                                 &width_vec,
1057                                 &height_vec,
1058                                 &depth_vec);
1059 
1060    s_float = s; t_float = t; r_float = r;
1061 
1062    if (bld->static_sampler_state->normalized_coords) {
1063       LLVMValueRef scaled_size;
1064       LLVMValueRef flt_size;
1065 
1066       /* scale size by 256 (8 fractional bits) */
1067       scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1068 
1069       flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1070 
1071       lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1072    }
1073    else {
1074       /* scale coords by 256 (8 fractional bits) */
1075       s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1076       if (dims >= 2)
1077          t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1078       if (dims >= 3)
1079          r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1080    }
1081 
1082    /* convert float to int */
1083    /* For correct rounding, need round to nearest, not truncation here.
1084     * Note that in some cases (clamp to edge, no texel offsets) we
1085     * could use a non-signed build context which would help archs which
1086     * don't have fptosi intrinsic with nearest rounding implemented.
1087     */
1088    s = lp_build_iround(&bld->coord_bld, s);
1089    if (dims >= 2)
1090       t = lp_build_iround(&bld->coord_bld, t);
1091    if (dims >= 3)
1092       r = lp_build_iround(&bld->coord_bld, r);
1093 
1094    /* subtract 0.5 (add -128) */
1095    i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1096    if (!bld->static_sampler_state->force_nearest_s) {
1097       s = LLVMBuildAdd(builder, s, i32_c128, "");
1098    }
1099    if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
1100       t = LLVMBuildAdd(builder, t, i32_c128, "");
1101    }
1102    if (dims >= 3) {
1103       r = LLVMBuildAdd(builder, r, i32_c128, "");
1104    }
1105 
1106    /* compute floor (shift right 8) */
1107    i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1108    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1109    if (dims >= 2)
1110       t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1111    if (dims >= 3)
1112       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1113 
1114    /* add texel offsets */
1115    if (offsets[0]) {
1116       s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
1117       if (dims >= 2) {
1118          t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
1119          if (dims >= 3) {
1120             r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
1121          }
1122       }
1123    }
1124 
1125    /* compute fractional part (AND with 0xff) */
1126    i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1127    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1128    if (dims >= 2)
1129       t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1130    if (dims >= 3)
1131       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1132 
1133    /* get pixel, row and image strides */
1134    x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1135                                  bld->format_desc->block.bits/8);
1136    y_stride = row_stride_vec;
1137    z_stride = img_stride_vec;
1138 
1139    /* do texcoord wrapping and compute texel offsets */
1140    lp_build_sample_wrap_linear_int(bld,
1141                                    bld->format_desc->block.width,
1142                                    s_ipart, &s_fpart, s_float,
1143                                    width_vec, x_stride, offsets[0],
1144                                    bld->static_texture_state->pot_width,
1145                                    bld->static_sampler_state->wrap_s,
1146                                    &x_offset0, &x_offset1,
1147                                    &x_subcoord[0], &x_subcoord[1]);
1148 
1149    /* add potential cube/array/mip offsets now as they are constant per pixel */
1150    if (has_layer_coord(bld->static_texture_state->target)) {
1151       LLVMValueRef z_offset;
1152       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1153       /* The r coord is the cube face in [0,5] or array layer */
1154       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1155       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1156    }
1157    if (mipoffsets) {
1158       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1159       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1160    }
1161 
1162    for (z = 0; z < 2; z++) {
1163       for (y = 0; y < 2; y++) {
1164          offset[z][y][0] = x_offset0;
1165          offset[z][y][1] = x_offset1;
1166       }
1167    }
1168 
1169    if (dims >= 2) {
1170       lp_build_sample_wrap_linear_int(bld,
1171                                       bld->format_desc->block.height,
1172                                       t_ipart, &t_fpart, t_float,
1173                                       height_vec, y_stride, offsets[1],
1174                                       bld->static_texture_state->pot_height,
1175                                       bld->static_sampler_state->wrap_t,
1176                                       &y_offset0, &y_offset1,
1177                                       &y_subcoord[0], &y_subcoord[1]);
1178 
1179       for (z = 0; z < 2; z++) {
1180          for (x = 0; x < 2; x++) {
1181             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1182                                            offset[z][0][x], y_offset0);
1183             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1184                                            offset[z][1][x], y_offset1);
1185          }
1186       }
1187    }
1188 
1189    if (dims >= 3) {
1190       lp_build_sample_wrap_linear_int(bld,
1191                                       1, /* block length (depth) */
1192                                       r_ipart, &r_fpart, r_float,
1193                                       depth_vec, z_stride, offsets[2],
1194                                       bld->static_texture_state->pot_depth,
1195                                       bld->static_sampler_state->wrap_r,
1196                                       &z_offset0, &z_offset1,
1197                                       &z_subcoord[0], &z_subcoord[1]);
1198       for (y = 0; y < 2; y++) {
1199          for (x = 0; x < 2; x++) {
1200             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1201                                            offset[0][y][x], z_offset0);
1202             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1203                                            offset[1][y][x], z_offset1);
1204          }
1205       }
1206    }
1207 
1208    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1209                                       x_subcoord, y_subcoord,
1210                                       s_fpart, t_fpart, r_fpart,
1211                                       colors);
1212 }
1213 
1214 
1215 /**
1216  * Sample a single texture image with (bi-)(tri-)linear sampling.
1217  * Return filtered color as two vectors of 16-bit fixed point values.
1218  * Does address calcs (except offsets) with floats.
1219  * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1220  */
1221 static void
lp_build_sample_image_linear_afloat(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)1222 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1223                                     LLVMValueRef int_size,
1224                                     LLVMValueRef row_stride_vec,
1225                                     LLVMValueRef img_stride_vec,
1226                                     LLVMValueRef data_ptr,
1227                                     LLVMValueRef mipoffsets,
1228                                     LLVMValueRef s,
1229                                     LLVMValueRef t,
1230                                     LLVMValueRef r,
1231                                     const LLVMValueRef *offsets,
1232                                     LLVMValueRef *colors)
1233 {
1234    const unsigned dims = bld->dims;
1235    LLVMValueRef width_vec, height_vec, depth_vec;
1236    LLVMValueRef s_fpart;
1237    LLVMValueRef t_fpart = NULL;
1238    LLVMValueRef r_fpart = NULL;
1239    LLVMValueRef x_stride, y_stride, z_stride;
1240    LLVMValueRef x_offset0, x_offset1;
1241    LLVMValueRef y_offset0, y_offset1;
1242    LLVMValueRef z_offset0, z_offset1;
1243    LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1244    LLVMValueRef x_subcoord[2], y_subcoord[2];
1245    LLVMValueRef flt_size;
1246    LLVMValueRef x_icoord0, x_icoord1;
1247    LLVMValueRef y_icoord0, y_icoord1;
1248    LLVMValueRef z_icoord0, z_icoord1;
1249    unsigned x, y, z;
1250 
1251    flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1252 
1253    lp_build_extract_image_sizes(bld,
1254                                 &bld->float_size_bld,
1255                                 bld->coord_type,
1256                                 flt_size,
1257                                 &width_vec,
1258                                 &height_vec,
1259                                 &depth_vec);
1260 
1261    /* do texcoord wrapping and compute texel offsets */
1262    lp_build_sample_wrap_linear_float(bld,
1263                                      bld->format_desc->block.width,
1264                                      s, width_vec, offsets[0],
1265                                      bld->static_texture_state->pot_width,
1266                                      bld->static_sampler_state->wrap_s,
1267                                      &x_icoord0, &x_icoord1,
1268                                      &s_fpart,
1269                                      bld->static_sampler_state->force_nearest_s);
1270 
1271    if (dims >= 2) {
1272       lp_build_sample_wrap_linear_float(bld,
1273                                         bld->format_desc->block.height,
1274                                         t, height_vec, offsets[1],
1275                                         bld->static_texture_state->pot_height,
1276                                         bld->static_sampler_state->wrap_t,
1277                                         &y_icoord0, &y_icoord1,
1278                                         &t_fpart,
1279                                         bld->static_sampler_state->force_nearest_t);
1280 
1281       if (dims >= 3) {
1282          lp_build_sample_wrap_linear_float(bld,
1283                                            1, /* block length (depth) */
1284                                            r, depth_vec, offsets[2],
1285                                            bld->static_texture_state->pot_depth,
1286                                            bld->static_sampler_state->wrap_r,
1287                                            &z_icoord0, &z_icoord1,
1288                                            &r_fpart, 0);
1289       }
1290    }
1291 
1292    /*
1293     * From here on we deal with ints, and we should split up the 256bit
1294     * vectors manually for better generated code.
1295     */
1296 
1297    /* get pixel, row and image strides */
1298    x_stride = lp_build_const_vec(bld->gallivm,
1299                                  bld->int_coord_bld.type,
1300                                  bld->format_desc->block.bits/8);
1301    y_stride = row_stride_vec;
1302    z_stride = img_stride_vec;
1303 
1304    /*
1305     * compute texel offset -
1306     * cannot do offset calc with floats, difficult for block-based formats,
1307     * and not enough precision anyway.
1308     */
1309    lp_build_sample_partial_offset(&bld->int_coord_bld,
1310                                   bld->format_desc->block.width,
1311                                   x_icoord0, x_stride,
1312                                   &x_offset0, &x_subcoord[0]);
1313    lp_build_sample_partial_offset(&bld->int_coord_bld,
1314                                   bld->format_desc->block.width,
1315                                   x_icoord1, x_stride,
1316                                   &x_offset1, &x_subcoord[1]);
1317 
1318    /* add potential cube/array/mip offsets now as they are constant per pixel */
1319    if (has_layer_coord(bld->static_texture_state->target)) {
1320       LLVMValueRef z_offset;
1321       z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1322       /* The r coord is the cube face in [0,5] or array layer */
1323       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1324       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1325    }
1326    if (mipoffsets) {
1327       x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1328       x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1329    }
1330 
1331    for (z = 0; z < 2; z++) {
1332       for (y = 0; y < 2; y++) {
1333          offset[z][y][0] = x_offset0;
1334          offset[z][y][1] = x_offset1;
1335       }
1336    }
1337 
1338    if (dims >= 2) {
1339       lp_build_sample_partial_offset(&bld->int_coord_bld,
1340                                      bld->format_desc->block.height,
1341                                      y_icoord0, y_stride,
1342                                      &y_offset0, &y_subcoord[0]);
1343       lp_build_sample_partial_offset(&bld->int_coord_bld,
1344                                      bld->format_desc->block.height,
1345                                      y_icoord1, y_stride,
1346                                      &y_offset1, &y_subcoord[1]);
1347       for (z = 0; z < 2; z++) {
1348          for (x = 0; x < 2; x++) {
1349             offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1350                                            offset[z][0][x], y_offset0);
1351             offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1352                                            offset[z][1][x], y_offset1);
1353          }
1354       }
1355    }
1356 
1357    if (dims >= 3) {
1358       LLVMValueRef z_subcoord[2];
1359       lp_build_sample_partial_offset(&bld->int_coord_bld,
1360                                      1,
1361                                      z_icoord0, z_stride,
1362                                      &z_offset0, &z_subcoord[0]);
1363       lp_build_sample_partial_offset(&bld->int_coord_bld,
1364                                      1,
1365                                      z_icoord1, z_stride,
1366                                      &z_offset1, &z_subcoord[1]);
1367       for (y = 0; y < 2; y++) {
1368          for (x = 0; x < 2; x++) {
1369             offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1370                                            offset[0][y][x], z_offset0);
1371             offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1372                                            offset[1][y][x], z_offset1);
1373          }
1374       }
1375    }
1376 
1377    lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1378                                       x_subcoord, y_subcoord,
1379                                       s_fpart, t_fpart, r_fpart,
1380                                       colors);
1381 }
1382 
1383 
1384 /**
1385  * Sample the texture/mipmap using given image filter and mip filter.
1386  * data0_ptr and data1_ptr point to the two mipmap levels to sample
1387  * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1388  * If we're using nearest miplevel sampling the '1' values will be null/unused.
1389  */
1390 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef colors_var)1391 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1392                        unsigned img_filter,
1393                        unsigned mip_filter,
1394                        LLVMValueRef s,
1395                        LLVMValueRef t,
1396                        LLVMValueRef r,
1397                        const LLVMValueRef *offsets,
1398                        LLVMValueRef ilevel0,
1399                        LLVMValueRef ilevel1,
1400                        LLVMValueRef lod_fpart,
1401                        LLVMValueRef colors_var)
1402 {
1403    LLVMBuilderRef builder = bld->gallivm->builder;
1404    LLVMValueRef size0;
1405    LLVMValueRef size1;
1406    LLVMValueRef row_stride0_vec = NULL;
1407    LLVMValueRef row_stride1_vec = NULL;
1408    LLVMValueRef img_stride0_vec = NULL;
1409    LLVMValueRef img_stride1_vec = NULL;
1410    LLVMValueRef data_ptr0;
1411    LLVMValueRef data_ptr1;
1412    LLVMValueRef mipoff0 = NULL;
1413    LLVMValueRef mipoff1 = NULL;
1414    LLVMValueRef colors0;
1415    LLVMValueRef colors1;
1416    boolean use_floats = util_cpu_caps.has_avx &&
1417                         !util_cpu_caps.has_avx2 &&
1418                         bld->coord_type.length > 4;
1419 
1420    /* sample the first mipmap level */
1421    lp_build_mipmap_level_sizes(bld, ilevel0,
1422                                &size0,
1423                                &row_stride0_vec, &img_stride0_vec);
1424    if (bld->num_mips == 1) {
1425       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1426    }
1427    else {
1428       /* This path should work for num_lods 1 too but slightly less efficient */
1429       data_ptr0 = bld->base_ptr;
1430       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1431    }
1432 
1433    if (use_floats) {
1434       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1435          lp_build_sample_image_nearest_afloat(bld,
1436                                               size0,
1437                                               row_stride0_vec, img_stride0_vec,
1438                                               data_ptr0, mipoff0, s, t, r, offsets,
1439                                               &colors0);
1440       }
1441       else {
1442          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1443          lp_build_sample_image_linear_afloat(bld,
1444                                              size0,
1445                                              row_stride0_vec, img_stride0_vec,
1446                                              data_ptr0, mipoff0, s, t, r, offsets,
1447                                              &colors0);
1448       }
1449    }
1450    else {
1451       if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1452          lp_build_sample_image_nearest(bld,
1453                                        size0,
1454                                        row_stride0_vec, img_stride0_vec,
1455                                        data_ptr0, mipoff0, s, t, r, offsets,
1456                                        &colors0);
1457       }
1458       else {
1459          assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1460          lp_build_sample_image_linear(bld,
1461                                       size0,
1462                                       row_stride0_vec, img_stride0_vec,
1463                                       data_ptr0, mipoff0, s, t, r, offsets,
1464                                       &colors0);
1465       }
1466    }
1467 
1468    /* Store the first level's colors in the output variables */
1469    LLVMBuildStore(builder, colors0, colors_var);
1470 
1471    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1472       LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1473                                                      bld->lodf_bld.type, 256.0);
1474       LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1475       struct lp_build_if_state if_ctx;
1476       LLVMValueRef need_lerp;
1477       unsigned num_quads = bld->coord_bld.type.length / 4;
1478       unsigned i;
1479 
1480       lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1481       lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1482 
1483       /* need_lerp = lod_fpart > 0 */
1484       if (bld->num_lods == 1) {
1485          need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1486                                    lod_fpart, bld->lodi_bld.zero,
1487                                    "need_lerp");
1488       }
1489       else {
1490          /*
1491           * We'll do mip filtering if any of the quads need it.
1492           * It might be better to split the vectors here and only fetch/filter
1493           * quads which need it.
1494           */
1495          /*
1496           * We need to clamp lod_fpart here since we can get negative
1497           * values which would screw up filtering if not all
1498           * lod_fpart values have same sign.
1499           * We can however then skip the greater than comparison.
1500           */
1501          lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1502                                   bld->lodi_bld.zero);
1503          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1504       }
1505 
1506       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1507       {
1508          struct lp_build_context u8n_bld;
1509 
1510          lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1511 
1512          /* sample the second mipmap level */
1513          lp_build_mipmap_level_sizes(bld, ilevel1,
1514                                      &size1,
1515                                      &row_stride1_vec, &img_stride1_vec);
1516          if (bld->num_mips == 1) {
1517             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1518          }
1519          else {
1520             data_ptr1 = bld->base_ptr;
1521             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1522          }
1523 
1524          if (use_floats) {
1525             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1526                lp_build_sample_image_nearest_afloat(bld,
1527                                                     size1,
1528                                                     row_stride1_vec, img_stride1_vec,
1529                                                     data_ptr1, mipoff1, s, t, r, offsets,
1530                                                     &colors1);
1531             }
1532             else {
1533                lp_build_sample_image_linear_afloat(bld,
1534                                                    size1,
1535                                                    row_stride1_vec, img_stride1_vec,
1536                                                    data_ptr1, mipoff1, s, t, r, offsets,
1537                                                    &colors1);
1538             }
1539          }
1540          else {
1541             if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1542                lp_build_sample_image_nearest(bld,
1543                                              size1,
1544                                              row_stride1_vec, img_stride1_vec,
1545                                              data_ptr1, mipoff1, s, t, r, offsets,
1546                                              &colors1);
1547             }
1548             else {
1549                lp_build_sample_image_linear(bld,
1550                                             size1,
1551                                             row_stride1_vec, img_stride1_vec,
1552                                             data_ptr1, mipoff1, s, t, r, offsets,
1553                                             &colors1);
1554             }
1555          }
1556 
1557          /* interpolate samples from the two mipmap levels */
1558 
1559          if (num_quads == 1 && bld->num_lods == 1) {
1560             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1561             lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1562          }
1563          else {
1564             unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1565             LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1566             LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1567 
1568             /* Take the LSB of lod_fpart */
1569             lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1570 
1571             /* Broadcast each lod weight into their respective channels */
1572             for (i = 0; i < u8n_bld.type.length; ++i) {
1573                shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1574             }
1575             lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1576                                                LLVMConstVector(shuffle, u8n_bld.type.length), "");
1577          }
1578 
1579          colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1580                                  colors0, colors1,
1581                                  LP_BLD_LERP_PRESCALED_WEIGHTS);
1582 
1583          LLVMBuildStore(builder, colors0, colors_var);
1584       }
1585       lp_build_endif(&if_ctx);
1586    }
1587 }
1588 
1589 
1590 
1591 /**
1592  * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
1593  * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
1594  * but only limited texture coord wrap modes.
1595  */
1596 void
lp_build_sample_aos(struct lp_build_sample_context * bld,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef texel_out[4])1597 lp_build_sample_aos(struct lp_build_sample_context *bld,
1598                     unsigned sampler_unit,
1599                     LLVMValueRef s,
1600                     LLVMValueRef t,
1601                     LLVMValueRef r,
1602                     const LLVMValueRef *offsets,
1603                     LLVMValueRef lod_positive,
1604                     LLVMValueRef lod_fpart,
1605                     LLVMValueRef ilevel0,
1606                     LLVMValueRef ilevel1,
1607                     LLVMValueRef texel_out[4])
1608 {
1609    LLVMBuilderRef builder = bld->gallivm->builder;
1610    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1611    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1612    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1613    const unsigned dims = bld->dims;
1614    LLVMValueRef packed_var, packed;
1615    LLVMValueRef unswizzled[4];
1616    struct lp_build_context u8n_bld;
1617 
1618    /* we only support the common/simple wrap modes at this time */
1619    assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1620    if (dims >= 2)
1621       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1622    if (dims >= 3)
1623       assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1624 
1625 
1626    /* make 8-bit unorm builder context */
1627    lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1628 
1629    /*
1630     * Get/interpolate texture colors.
1631     */
1632 
1633    packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1634 
1635    if (min_filter == mag_filter) {
1636       /* no need to distinguish between minification and magnification */
1637       lp_build_sample_mipmap(bld,
1638                              min_filter, mip_filter,
1639                              s, t, r, offsets,
1640                              ilevel0, ilevel1, lod_fpart,
1641                              packed_var);
1642    }
1643    else {
1644       /* Emit conditional to choose min image filter or mag image filter
1645        * depending on the lod being > 0 or <= 0, respectively.
1646        */
1647       struct lp_build_if_state if_ctx;
1648 
1649       /*
1650        * FIXME this should take all lods into account, if some are min
1651        * some max probably could hack up the weights in the linear
1652        * path with selects to work for nearest.
1653        */
1654       if (bld->num_lods > 1)
1655          lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1656                                                 lp_build_const_int32(bld->gallivm, 0), "");
1657 
1658       lod_positive = LLVMBuildTrunc(builder, lod_positive,
1659                                     LLVMInt1TypeInContext(bld->gallivm->context), "");
1660 
1661       lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1662       {
1663          /* Use the minification filter */
1664          lp_build_sample_mipmap(bld,
1665                                 min_filter, mip_filter,
1666                                 s, t, r, offsets,
1667                                 ilevel0, ilevel1, lod_fpart,
1668                                 packed_var);
1669       }
1670       lp_build_else(&if_ctx);
1671       {
1672          /* Use the magnification filter */
1673          lp_build_sample_mipmap(bld,
1674                                 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1675                                 s, t, r, offsets,
1676                                 ilevel0, NULL, NULL,
1677                                 packed_var);
1678       }
1679       lp_build_endif(&if_ctx);
1680    }
1681 
1682    packed = LLVMBuildLoad(builder, packed_var, "");
1683 
1684    /*
1685     * Convert to SoA and swizzle.
1686     */
1687    lp_build_rgba8_to_fi32_soa(bld->gallivm,
1688                              bld->texel_type,
1689                              packed, unswizzled);
1690 
1691    if (util_format_is_rgba8_variant(bld->format_desc)) {
1692       lp_build_format_swizzle_soa(bld->format_desc,
1693                                   &bld->texel_bld,
1694                                   unswizzled, texel_out);
1695    }
1696    else {
1697       texel_out[0] = unswizzled[0];
1698       texel_out[1] = unswizzled[1];
1699       texel_out[2] = unswizzled[2];
1700       texel_out[3] = unswizzled[3];
1701    }
1702 }
1703