1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- AoS.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/format/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
60
61
62 /**
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
66 * coordinate axis
67 * \param coord the incoming texcoord (s,t or r) scaled to the texture size
68 * \param coord_f the incoming texcoord (s,t or r) as float vec
69 * \param length the texture size along one dimension
70 * \param stride pixel stride along the coordinate axis (in bytes)
71 * \param offset the texel offset along the coord axis
72 * \param is_pot if TRUE, length is a power of two
73 * \param wrap_mode one of PIPE_TEX_WRAP_x
74 * \param out_offset byte offset for the wrapped coordinate
75 * \param out_i resulting sub-block pixel coordinate for coord0
76 */
77 static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * out_offset,LLVMValueRef * out_i)78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79 unsigned block_length,
80 LLVMValueRef coord,
81 LLVMValueRef coord_f,
82 LLVMValueRef length,
83 LLVMValueRef stride,
84 LLVMValueRef offset,
85 boolean is_pot,
86 unsigned wrap_mode,
87 LLVMValueRef *out_offset,
88 LLVMValueRef *out_i)
89 {
90 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91 LLVMBuilderRef builder = bld->gallivm->builder;
92 LLVMValueRef length_minus_one;
93
94 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
95
96 switch(wrap_mode) {
97 case PIPE_TEX_WRAP_REPEAT:
98 if(is_pot)
99 coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
100 else {
101 struct lp_build_context *coord_bld = &bld->coord_bld;
102 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
103 if (offset) {
104 offset = lp_build_int_to_float(coord_bld, offset);
105 offset = lp_build_div(coord_bld, offset, length_f);
106 coord_f = lp_build_add(coord_bld, coord_f, offset);
107 }
108 coord = lp_build_fract_safe(coord_bld, coord_f);
109 coord = lp_build_mul(coord_bld, coord, length_f);
110 coord = lp_build_itrunc(coord_bld, coord);
111 }
112 break;
113
114 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
117 break;
118
119 case PIPE_TEX_WRAP_CLAMP:
120 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121 case PIPE_TEX_WRAP_MIRROR_REPEAT:
122 case PIPE_TEX_WRAP_MIRROR_CLAMP:
123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
125 default:
126 assert(0);
127 }
128
129 lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
130 out_offset, out_i);
131 }
132
133
134 /**
135 * Helper to compute the first coord and the weight for
136 * linear wrap repeat npot textures
137 */
138 static void
lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_i)139 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
140 LLVMValueRef coord_f,
141 LLVMValueRef length_i,
142 LLVMValueRef length_f,
143 LLVMValueRef *coord0_i,
144 LLVMValueRef *weight_i)
145 {
146 struct lp_build_context *coord_bld = &bld->coord_bld;
147 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
148 struct lp_build_context abs_coord_bld;
149 struct lp_type abs_type;
150 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
151 int_coord_bld->one);
152 LLVMValueRef mask, i32_c8, i32_c128, i32_c255;
153
154 /* wrap with normalized floats is just fract */
155 coord_f = lp_build_fract(coord_bld, coord_f);
156 /* mul by size */
157 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
158 /* convert to int, compute lerp weight */
159 coord_f = lp_build_mul_imm(&bld->coord_bld, coord_f, 256);
160
161 /* At this point we don't have any negative numbers so use non-signed
162 * build context which might help on some archs.
163 */
164 abs_type = coord_bld->type;
165 abs_type.sign = 0;
166 lp_build_context_init(&abs_coord_bld, bld->gallivm, abs_type);
167 *coord0_i = lp_build_iround(&abs_coord_bld, coord_f);
168
169 /* subtract 0.5 (add -128) */
170 i32_c128 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, -128);
171 *coord0_i = LLVMBuildAdd(bld->gallivm->builder, *coord0_i, i32_c128, "");
172
173 /* compute fractional part (AND with 0xff) */
174 i32_c255 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 255);
175 *weight_i = LLVMBuildAnd(bld->gallivm->builder, *coord0_i, i32_c255, "");
176
177 /* compute floor (shift right 8) */
178 i32_c8 = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 8);
179 *coord0_i = LLVMBuildAShr(bld->gallivm->builder, *coord0_i, i32_c8, "");
180 /*
181 * we avoided the 0.5/length division before the repeat wrap,
182 * now need to fix up edge cases with selects
183 */
184 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
185 PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
186 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
187 /*
188 * We should never get values too large - except if coord was nan or inf,
189 * in which case things go terribly wrong...
190 * Alternatively, could use fract_safe above...
191 */
192 *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
193 }
194
195
196 /**
197 * Build LLVM code for texture coord wrapping, for linear filtering,
198 * for scaled integer texcoords.
199 * \param block_length is the length of the pixel block along the
200 * coordinate axis
201 * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
202 * \param coord_f the incoming texcoord (s,t or r) as float vec
203 * \param length the texture size along one dimension
204 * \param stride pixel stride along the coordinate axis (in bytes)
205 * \param offset the texel offset along the coord axis
206 * \param is_pot if TRUE, length is a power of two
207 * \param wrap_mode one of PIPE_TEX_WRAP_x
208 * \param offset0 resulting relative offset for coord0
209 * \param offset1 resulting relative offset for coord0 + 1
210 * \param i0 resulting sub-block pixel coordinate for coord0
211 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
212 */
213 static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context * bld,unsigned block_length,LLVMValueRef coord0,LLVMValueRef * weight_i,LLVMValueRef coord_f,LLVMValueRef length,LLVMValueRef stride,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * offset0,LLVMValueRef * offset1,LLVMValueRef * i0,LLVMValueRef * i1)214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
215 unsigned block_length,
216 LLVMValueRef coord0,
217 LLVMValueRef *weight_i,
218 LLVMValueRef coord_f,
219 LLVMValueRef length,
220 LLVMValueRef stride,
221 LLVMValueRef offset,
222 boolean is_pot,
223 unsigned wrap_mode,
224 LLVMValueRef *offset0,
225 LLVMValueRef *offset1,
226 LLVMValueRef *i0,
227 LLVMValueRef *i1)
228 {
229 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230 LLVMBuilderRef builder = bld->gallivm->builder;
231 LLVMValueRef length_minus_one;
232 LLVMValueRef lmask, umask, mask;
233
234 /*
235 * If the pixel block covers more than one pixel then there is no easy
236 * way to calculate offset1 relative to offset0. Instead, compute them
237 * independently. Otherwise, try to compute offset0 and offset1 with
238 * a single stride multiplication.
239 */
240
241 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
242
243 if (block_length != 1) {
244 LLVMValueRef coord1;
245 switch(wrap_mode) {
246 case PIPE_TEX_WRAP_REPEAT:
247 if (is_pot) {
248 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
250 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
251 }
252 else {
253 LLVMValueRef mask;
254 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
255 if (offset) {
256 offset = lp_build_int_to_float(&bld->coord_bld, offset);
257 offset = lp_build_div(&bld->coord_bld, offset, length_f);
258 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
259 }
260 lp_build_coord_repeat_npot_linear_int(bld, coord_f,
261 length, length_f,
262 &coord0, weight_i);
263 mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
264 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
265 coord1 = LLVMBuildAnd(builder,
266 lp_build_add(int_coord_bld, coord0,
267 int_coord_bld->one),
268 mask, "");
269 }
270 break;
271
272 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
273 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
274 coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
275 length_minus_one);
276 coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
277 length_minus_one);
278 break;
279
280 case PIPE_TEX_WRAP_CLAMP:
281 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
282 case PIPE_TEX_WRAP_MIRROR_REPEAT:
283 case PIPE_TEX_WRAP_MIRROR_CLAMP:
284 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
285 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
286 default:
287 assert(0);
288 coord0 = int_coord_bld->zero;
289 coord1 = int_coord_bld->zero;
290 break;
291 }
292 lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
293 offset0, i0);
294 lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
295 offset1, i1);
296 return;
297 }
298
299 *i0 = int_coord_bld->zero;
300 *i1 = int_coord_bld->zero;
301
302 switch(wrap_mode) {
303 case PIPE_TEX_WRAP_REPEAT:
304 if (is_pot) {
305 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
306 }
307 else {
308 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
309 if (offset) {
310 offset = lp_build_int_to_float(&bld->coord_bld, offset);
311 offset = lp_build_div(&bld->coord_bld, offset, length_f);
312 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
313 }
314 lp_build_coord_repeat_npot_linear_int(bld, coord_f,
315 length, length_f,
316 &coord0, weight_i);
317 }
318
319 mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
320 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
321
322 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
323 *offset1 = LLVMBuildAnd(builder,
324 lp_build_add(int_coord_bld, *offset0, stride),
325 mask, "");
326 break;
327
328 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
329 /* XXX this might be slower than the separate path
330 * on some newer cpus. With sse41 this is 8 instructions vs. 7
331 * - at least on SNB this is almost certainly slower since
332 * min/max are cheaper than selects, and the muls aren't bad.
333 */
334 lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
336 umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
337 PIPE_FUNC_LESS, coord0, length_minus_one);
338
339 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
340 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
341
342 mask = LLVMBuildAnd(builder, lmask, umask, "");
343
344 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
345 *offset1 = lp_build_add(int_coord_bld,
346 *offset0,
347 LLVMBuildAnd(builder, stride, mask, ""));
348 break;
349
350 case PIPE_TEX_WRAP_CLAMP:
351 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
352 case PIPE_TEX_WRAP_MIRROR_REPEAT:
353 case PIPE_TEX_WRAP_MIRROR_CLAMP:
354 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
355 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
356 default:
357 assert(0);
358 *offset0 = int_coord_bld->zero;
359 *offset1 = int_coord_bld->zero;
360 break;
361 }
362 }
363
364
365 /**
366 * Fetch texels for image with nearest sampling.
367 * Return filtered color as two vectors of 16-bit fixed point values.
368 */
369 static void
lp_build_sample_fetch_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset,LLVMValueRef x_subcoord,LLVMValueRef y_subcoord,LLVMValueRef * colors)370 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
371 LLVMValueRef data_ptr,
372 LLVMValueRef offset,
373 LLVMValueRef x_subcoord,
374 LLVMValueRef y_subcoord,
375 LLVMValueRef *colors)
376 {
377 /*
378 * Fetch the pixels as 4 x 32bit (rgba order might differ):
379 *
380 * rgba0 rgba1 rgba2 rgba3
381 *
382 * bit cast them into 16 x u8
383 *
384 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
385 *
386 * unpack them into two 8 x i16:
387 *
388 * r0 g0 b0 a0 r1 g1 b1 a1
389 * r2 g2 b2 a2 r3 g3 b3 a3
390 *
391 * The higher 8 bits of the resulting elements will be zero.
392 */
393 LLVMBuilderRef builder = bld->gallivm->builder;
394 LLVMValueRef rgba8;
395 struct lp_build_context u8n;
396 LLVMTypeRef u8n_vec_type;
397 struct lp_type fetch_type;
398
399 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
400 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
401
402 fetch_type = lp_type_uint(bld->texel_type.width);
403 if (util_format_is_rgba8_variant(bld->format_desc)) {
404 /*
405 * Given the format is a rgba8, just read the pixels as is,
406 * without any swizzling. Swizzling will be done later.
407 */
408 rgba8 = lp_build_gather(bld->gallivm,
409 bld->texel_type.length,
410 bld->format_desc->block.bits,
411 fetch_type,
412 TRUE,
413 data_ptr, offset, TRUE);
414
415 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
416 }
417 else {
418 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
419 bld->format_desc,
420 u8n.type,
421 TRUE,
422 data_ptr, offset,
423 x_subcoord,
424 y_subcoord,
425 bld->cache);
426 }
427
428 *colors = rgba8;
429 }
430
431
432 /**
433 * Sample a single texture image with nearest sampling.
434 * If sampling a cube texture, r = cube face in [0,5].
435 * Return filtered color as two vectors of 16-bit fixed point values.
436 */
437 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)438 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
439 LLVMValueRef int_size,
440 LLVMValueRef row_stride_vec,
441 LLVMValueRef img_stride_vec,
442 LLVMValueRef data_ptr,
443 LLVMValueRef mipoffsets,
444 LLVMValueRef s,
445 LLVMValueRef t,
446 LLVMValueRef r,
447 const LLVMValueRef *offsets,
448 LLVMValueRef *colors)
449 {
450 const unsigned dims = bld->dims;
451 struct lp_build_context i32;
452 LLVMValueRef width_vec, height_vec, depth_vec;
453 LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
454 LLVMValueRef s_float, t_float = NULL, r_float = NULL;
455 LLVMValueRef x_stride;
456 LLVMValueRef x_offset, offset;
457 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
458
459 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
460
461 lp_build_extract_image_sizes(bld,
462 &bld->int_size_bld,
463 bld->int_coord_type,
464 int_size,
465 &width_vec,
466 &height_vec,
467 &depth_vec);
468
469 s_float = s; t_float = t; r_float = r;
470
471 if (bld->static_sampler_state->normalized_coords) {
472 LLVMValueRef flt_size;
473
474 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
475
476 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
477 }
478
479 /* convert float to int */
480 /* For correct rounding, need floor, not truncation here.
481 * Note that in some cases (clamp to edge, no texel offsets) we
482 * could use a non-signed build context which would help archs
483 * greatly which don't have arch rounding.
484 */
485 s_ipart = lp_build_ifloor(&bld->coord_bld, s);
486 if (dims >= 2)
487 t_ipart = lp_build_ifloor(&bld->coord_bld, t);
488 if (dims >= 3)
489 r_ipart = lp_build_ifloor(&bld->coord_bld, r);
490
491 /* add texel offsets */
492 if (offsets[0]) {
493 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
494 if (dims >= 2) {
495 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
496 if (dims >= 3) {
497 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
498 }
499 }
500 }
501
502 /* get pixel, row, image strides */
503 x_stride = lp_build_const_vec(bld->gallivm,
504 bld->int_coord_bld.type,
505 bld->format_desc->block.bits/8);
506
507 /* Do texcoord wrapping, compute texel offset */
508 lp_build_sample_wrap_nearest_int(bld,
509 bld->format_desc->block.width,
510 s_ipart, s_float,
511 width_vec, x_stride, offsets[0],
512 bld->static_texture_state->pot_width,
513 bld->static_sampler_state->wrap_s,
514 &x_offset, &x_subcoord);
515 offset = x_offset;
516 if (dims >= 2) {
517 LLVMValueRef y_offset;
518 lp_build_sample_wrap_nearest_int(bld,
519 bld->format_desc->block.height,
520 t_ipart, t_float,
521 height_vec, row_stride_vec, offsets[1],
522 bld->static_texture_state->pot_height,
523 bld->static_sampler_state->wrap_t,
524 &y_offset, &y_subcoord);
525 offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
526 if (dims >= 3) {
527 LLVMValueRef z_offset;
528 lp_build_sample_wrap_nearest_int(bld,
529 1, /* block length (depth) */
530 r_ipart, r_float,
531 depth_vec, img_stride_vec, offsets[2],
532 bld->static_texture_state->pot_depth,
533 bld->static_sampler_state->wrap_r,
534 &z_offset, &z_subcoord);
535 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
536 }
537 }
538 if (has_layer_coord(bld->static_texture_state->target)) {
539 LLVMValueRef z_offset;
540 /* The r coord is the cube face in [0,5] or array layer */
541 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
542 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
543 }
544 if (mipoffsets) {
545 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
546 }
547
548 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
549 x_subcoord, y_subcoord,
550 colors);
551 }
552
553
554 /**
555 * Fetch texels for image with linear sampling.
556 * Return filtered color as two vectors of 16-bit fixed point values.
557 */
558 static void
lp_build_sample_fetch_image_linear(struct lp_build_sample_context * bld,LLVMValueRef data_ptr,LLVMValueRef offset[2][2][2],LLVMValueRef x_subcoord[2],LLVMValueRef y_subcoord[2],LLVMValueRef s_fpart,LLVMValueRef t_fpart,LLVMValueRef r_fpart,LLVMValueRef * colors)559 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
560 LLVMValueRef data_ptr,
561 LLVMValueRef offset[2][2][2],
562 LLVMValueRef x_subcoord[2],
563 LLVMValueRef y_subcoord[2],
564 LLVMValueRef s_fpart,
565 LLVMValueRef t_fpart,
566 LLVMValueRef r_fpart,
567 LLVMValueRef *colors)
568 {
569 const unsigned dims = bld->dims;
570 LLVMBuilderRef builder = bld->gallivm->builder;
571 struct lp_build_context u8n;
572 LLVMTypeRef u8n_vec_type;
573 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
574 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
575 LLVMValueRef shuffle;
576 LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
577 LLVMValueRef packed;
578 unsigned i, j, k;
579 unsigned numj, numk;
580
581 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
582 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
583
584 /*
585 * Transform 4 x i32 in
586 *
587 * s_fpart = {s0, s1, s2, s3}
588 *
589 * where each value is between 0 and 0xff,
590 *
591 * into one 16 x i20
592 *
593 * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
594 *
595 * and likewise for t_fpart. There is no risk of loosing precision here
596 * since the fractional parts only use the lower 8bits.
597 */
598 s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
599 if (dims >= 2)
600 t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
601 if (dims >= 3)
602 r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
603
604 for (j = 0; j < u8n.type.length; j += 4) {
605 #if UTIL_ARCH_LITTLE_ENDIAN
606 unsigned subindex = 0;
607 #else
608 unsigned subindex = 3;
609 #endif
610 LLVMValueRef index;
611
612 index = LLVMConstInt(elem_type, j + subindex, 0);
613 for (i = 0; i < 4; ++i)
614 shuffles[j + i] = index;
615 }
616
617 shuffle = LLVMConstVector(shuffles, u8n.type.length);
618
619 s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
620 shuffle, "");
621 if (dims >= 2) {
622 t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
623 shuffle, "");
624 }
625 if (dims >= 3) {
626 r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
627 shuffle, "");
628 }
629
630 /*
631 * Fetch the pixels as 4 x 32bit (rgba order might differ):
632 *
633 * rgba0 rgba1 rgba2 rgba3
634 *
635 * bit cast them into 16 x u8
636 *
637 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
638 *
639 * unpack them into two 8 x i16:
640 *
641 * r0 g0 b0 a0 r1 g1 b1 a1
642 * r2 g2 b2 a2 r3 g3 b3 a3
643 *
644 * The higher 8 bits of the resulting elements will be zero.
645 */
646 numj = 1 + (dims >= 2);
647 numk = 1 + (dims >= 3);
648
649 for (k = 0; k < numk; k++) {
650 for (j = 0; j < numj; j++) {
651 for (i = 0; i < 2; i++) {
652 LLVMValueRef rgba8;
653
654 if (util_format_is_rgba8_variant(bld->format_desc)) {
655 struct lp_type fetch_type;
656 /*
657 * Given the format is a rgba8, just read the pixels as is,
658 * without any swizzling. Swizzling will be done later.
659 */
660 fetch_type = lp_type_uint(bld->texel_type.width);
661 rgba8 = lp_build_gather(bld->gallivm,
662 bld->texel_type.length,
663 bld->format_desc->block.bits,
664 fetch_type,
665 TRUE,
666 data_ptr, offset[k][j][i], TRUE);
667
668 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
669 }
670 else {
671 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
672 bld->format_desc,
673 u8n.type,
674 TRUE,
675 data_ptr, offset[k][j][i],
676 x_subcoord[i],
677 y_subcoord[j],
678 bld->cache);
679 }
680
681 neighbors[k][j][i] = rgba8;
682 }
683 }
684 }
685
686 /*
687 * Linear interpolation with 8.8 fixed point.
688 */
689 if (bld->static_sampler_state->force_nearest_s) {
690 /* special case 1-D lerp */
691 packed = lp_build_lerp(&u8n,
692 t_fpart,
693 neighbors[0][0][0],
694 neighbors[0][0][1],
695 LP_BLD_LERP_PRESCALED_WEIGHTS);
696 }
697 else if (bld->static_sampler_state->force_nearest_t) {
698 /* special case 1-D lerp */
699 packed = lp_build_lerp(&u8n,
700 s_fpart,
701 neighbors[0][0][0],
702 neighbors[0][0][1],
703 LP_BLD_LERP_PRESCALED_WEIGHTS);
704 }
705 else {
706 /* general 1/2/3-D lerping */
707 if (dims == 1) {
708 packed = lp_build_lerp(&u8n,
709 s_fpart,
710 neighbors[0][0][0],
711 neighbors[0][0][1],
712 LP_BLD_LERP_PRESCALED_WEIGHTS);
713 } else if (dims == 2) {
714 /* 2-D lerp */
715 packed = lp_build_lerp_2d(&u8n,
716 s_fpart, t_fpart,
717 neighbors[0][0][0],
718 neighbors[0][0][1],
719 neighbors[0][1][0],
720 neighbors[0][1][1],
721 LP_BLD_LERP_PRESCALED_WEIGHTS);
722 } else {
723 /* 3-D lerp */
724 assert(dims == 3);
725 packed = lp_build_lerp_3d(&u8n,
726 s_fpart, t_fpart, r_fpart,
727 neighbors[0][0][0],
728 neighbors[0][0][1],
729 neighbors[0][1][0],
730 neighbors[0][1][1],
731 neighbors[1][0][0],
732 neighbors[1][0][1],
733 neighbors[1][1][0],
734 neighbors[1][1][1],
735 LP_BLD_LERP_PRESCALED_WEIGHTS);
736 }
737 }
738
739 *colors = packed;
740 }
741
742 /**
743 * Sample a single texture image with (bi-)(tri-)linear sampling.
744 * Return filtered color as two vectors of 16-bit fixed point values.
745 */
746 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,LLVMValueRef int_size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef * colors)747 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
748 LLVMValueRef int_size,
749 LLVMValueRef row_stride_vec,
750 LLVMValueRef img_stride_vec,
751 LLVMValueRef data_ptr,
752 LLVMValueRef mipoffsets,
753 LLVMValueRef s,
754 LLVMValueRef t,
755 LLVMValueRef r,
756 const LLVMValueRef *offsets,
757 LLVMValueRef *colors)
758 {
759 const unsigned dims = bld->dims;
760 LLVMBuilderRef builder = bld->gallivm->builder;
761 struct lp_build_context i32;
762 LLVMValueRef i32_c8, i32_c128, i32_c255;
763 LLVMValueRef width_vec, height_vec, depth_vec;
764 LLVMValueRef s_ipart, s_fpart, s_float;
765 LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
766 LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
767 LLVMValueRef x_stride, y_stride, z_stride;
768 LLVMValueRef x_offset0, x_offset1;
769 LLVMValueRef y_offset0, y_offset1;
770 LLVMValueRef z_offset0, z_offset1;
771 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
772 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
773 unsigned x, y, z;
774
775 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
776
777 lp_build_extract_image_sizes(bld,
778 &bld->int_size_bld,
779 bld->int_coord_type,
780 int_size,
781 &width_vec,
782 &height_vec,
783 &depth_vec);
784
785 s_float = s; t_float = t; r_float = r;
786
787 if (bld->static_sampler_state->normalized_coords) {
788 LLVMValueRef scaled_size;
789 LLVMValueRef flt_size;
790
791 /* scale size by 256 (8 fractional bits) */
792 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
793
794 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
795
796 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
797 }
798 else {
799 /* scale coords by 256 (8 fractional bits) */
800 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
801 if (dims >= 2)
802 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
803 if (dims >= 3)
804 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
805 }
806
807 /* convert float to int */
808 /* For correct rounding, need round to nearest, not truncation here.
809 * Note that in some cases (clamp to edge, no texel offsets) we
810 * could use a non-signed build context which would help archs which
811 * don't have fptosi intrinsic with nearest rounding implemented.
812 */
813 s = lp_build_iround(&bld->coord_bld, s);
814 if (dims >= 2)
815 t = lp_build_iround(&bld->coord_bld, t);
816 if (dims >= 3)
817 r = lp_build_iround(&bld->coord_bld, r);
818
819 /* subtract 0.5 (add -128) */
820 i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
821 if (!bld->static_sampler_state->force_nearest_s) {
822 s = LLVMBuildAdd(builder, s, i32_c128, "");
823 }
824 if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
825 t = LLVMBuildAdd(builder, t, i32_c128, "");
826 }
827 if (dims >= 3) {
828 r = LLVMBuildAdd(builder, r, i32_c128, "");
829 }
830
831 /* compute floor (shift right 8) */
832 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
833 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
834 if (dims >= 2)
835 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
836 if (dims >= 3)
837 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
838
839 /* add texel offsets */
840 if (offsets[0]) {
841 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
842 if (dims >= 2) {
843 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
844 if (dims >= 3) {
845 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
846 }
847 }
848 }
849
850 /* compute fractional part (AND with 0xff) */
851 i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
852 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
853 if (dims >= 2)
854 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
855 if (dims >= 3)
856 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
857
858 /* get pixel, row and image strides */
859 x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
860 bld->format_desc->block.bits/8);
861 y_stride = row_stride_vec;
862 z_stride = img_stride_vec;
863
864 /* do texcoord wrapping and compute texel offsets */
865 lp_build_sample_wrap_linear_int(bld,
866 bld->format_desc->block.width,
867 s_ipart, &s_fpart, s_float,
868 width_vec, x_stride, offsets[0],
869 bld->static_texture_state->pot_width,
870 bld->static_sampler_state->wrap_s,
871 &x_offset0, &x_offset1,
872 &x_subcoord[0], &x_subcoord[1]);
873
874 /* add potential cube/array/mip offsets now as they are constant per pixel */
875 if (has_layer_coord(bld->static_texture_state->target)) {
876 LLVMValueRef z_offset;
877 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
878 /* The r coord is the cube face in [0,5] or array layer */
879 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
880 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
881 }
882 if (mipoffsets) {
883 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
884 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
885 }
886
887 for (z = 0; z < 2; z++) {
888 for (y = 0; y < 2; y++) {
889 offset[z][y][0] = x_offset0;
890 offset[z][y][1] = x_offset1;
891 }
892 }
893
894 if (dims >= 2) {
895 lp_build_sample_wrap_linear_int(bld,
896 bld->format_desc->block.height,
897 t_ipart, &t_fpart, t_float,
898 height_vec, y_stride, offsets[1],
899 bld->static_texture_state->pot_height,
900 bld->static_sampler_state->wrap_t,
901 &y_offset0, &y_offset1,
902 &y_subcoord[0], &y_subcoord[1]);
903
904 for (z = 0; z < 2; z++) {
905 for (x = 0; x < 2; x++) {
906 offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
907 offset[z][0][x], y_offset0);
908 offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
909 offset[z][1][x], y_offset1);
910 }
911 }
912 }
913
914 if (dims >= 3) {
915 lp_build_sample_wrap_linear_int(bld,
916 1, /* block length (depth) */
917 r_ipart, &r_fpart, r_float,
918 depth_vec, z_stride, offsets[2],
919 bld->static_texture_state->pot_depth,
920 bld->static_sampler_state->wrap_r,
921 &z_offset0, &z_offset1,
922 &z_subcoord[0], &z_subcoord[1]);
923 for (y = 0; y < 2; y++) {
924 for (x = 0; x < 2; x++) {
925 offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
926 offset[0][y][x], z_offset0);
927 offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
928 offset[1][y][x], z_offset1);
929 }
930 }
931 }
932
933 lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
934 x_subcoord, y_subcoord,
935 s_fpart, t_fpart, r_fpart,
936 colors);
937 }
938
939
940 /**
941 * Sample the texture/mipmap using given image filter and mip filter.
942 * data0_ptr and data1_ptr point to the two mipmap levels to sample
943 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
944 * If we're using nearest miplevel sampling the '1' values will be null/unused.
945 */
946 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef colors_var)947 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
948 unsigned img_filter,
949 unsigned mip_filter,
950 LLVMValueRef s,
951 LLVMValueRef t,
952 LLVMValueRef r,
953 const LLVMValueRef *offsets,
954 LLVMValueRef ilevel0,
955 LLVMValueRef ilevel1,
956 LLVMValueRef lod_fpart,
957 LLVMValueRef colors_var)
958 {
959 LLVMBuilderRef builder = bld->gallivm->builder;
960 LLVMValueRef size0;
961 LLVMValueRef size1;
962 LLVMValueRef row_stride0_vec = NULL;
963 LLVMValueRef row_stride1_vec = NULL;
964 LLVMValueRef img_stride0_vec = NULL;
965 LLVMValueRef img_stride1_vec = NULL;
966 LLVMValueRef data_ptr0;
967 LLVMValueRef data_ptr1;
968 LLVMValueRef mipoff0 = NULL;
969 LLVMValueRef mipoff1 = NULL;
970 LLVMValueRef colors0;
971 LLVMValueRef colors1;
972
973 /* sample the first mipmap level */
974 lp_build_mipmap_level_sizes(bld, ilevel0,
975 &size0,
976 &row_stride0_vec, &img_stride0_vec);
977 if (bld->num_mips == 1) {
978 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
979 }
980 else {
981 /* This path should work for num_lods 1 too but slightly less efficient */
982 data_ptr0 = bld->base_ptr;
983 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
984 }
985
986 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
987 lp_build_sample_image_nearest(bld,
988 size0,
989 row_stride0_vec, img_stride0_vec,
990 data_ptr0, mipoff0, s, t, r, offsets,
991 &colors0);
992 }
993 else {
994 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
995 lp_build_sample_image_linear(bld,
996 size0,
997 row_stride0_vec, img_stride0_vec,
998 data_ptr0, mipoff0, s, t, r, offsets,
999 &colors0);
1000 }
1001
1002 /* Store the first level's colors in the output variables */
1003 LLVMBuildStore(builder, colors0, colors_var);
1004
1005 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1006 LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1007 bld->lodf_bld.type, 256.0);
1008 LLVMTypeRef i32vec_type = bld->lodi_bld.vec_type;
1009 struct lp_build_if_state if_ctx;
1010 LLVMValueRef need_lerp;
1011 unsigned num_quads = bld->coord_bld.type.length / 4;
1012 unsigned i;
1013
1014 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1015 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1016
1017 /* need_lerp = lod_fpart > 0 */
1018 if (bld->num_lods == 1) {
1019 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1020 lod_fpart, bld->lodi_bld.zero,
1021 "need_lerp");
1022 }
1023 else {
1024 /*
1025 * We'll do mip filtering if any of the quads need it.
1026 * It might be better to split the vectors here and only fetch/filter
1027 * quads which need it.
1028 */
1029 /*
1030 * We need to clamp lod_fpart here since we can get negative
1031 * values which would screw up filtering if not all
1032 * lod_fpart values have same sign.
1033 * We can however then skip the greater than comparison.
1034 */
1035 lod_fpart = lp_build_max(&bld->lodi_bld, lod_fpart,
1036 bld->lodi_bld.zero);
1037 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_fpart);
1038 }
1039
1040 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1041 {
1042 struct lp_build_context u8n_bld;
1043
1044 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1045
1046 /* sample the second mipmap level */
1047 lp_build_mipmap_level_sizes(bld, ilevel1,
1048 &size1,
1049 &row_stride1_vec, &img_stride1_vec);
1050 if (bld->num_mips == 1) {
1051 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1052 }
1053 else {
1054 data_ptr1 = bld->base_ptr;
1055 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1056 }
1057
1058 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1059 lp_build_sample_image_nearest(bld,
1060 size1,
1061 row_stride1_vec, img_stride1_vec,
1062 data_ptr1, mipoff1, s, t, r, offsets,
1063 &colors1);
1064 }
1065 else {
1066 lp_build_sample_image_linear(bld,
1067 size1,
1068 row_stride1_vec, img_stride1_vec,
1069 data_ptr1, mipoff1, s, t, r, offsets,
1070 &colors1);
1071 }
1072
1073 /* interpolate samples from the two mipmap levels */
1074
1075 if (num_quads == 1 && bld->num_lods == 1) {
1076 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1077 lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1078 }
1079 else {
1080 unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1081 LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->lodi_bld.type.length);
1082 LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1083
1084 /* Take the LSB of lod_fpart */
1085 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1086
1087 /* Broadcast each lod weight into their respective channels */
1088 for (i = 0; i < u8n_bld.type.length; ++i) {
1089 shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1090 }
1091 lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1092 LLVMConstVector(shuffle, u8n_bld.type.length), "");
1093 }
1094
1095 colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1096 colors0, colors1,
1097 LP_BLD_LERP_PRESCALED_WEIGHTS);
1098
1099 LLVMBuildStore(builder, colors0, colors_var);
1100 }
1101 lp_build_endif(&if_ctx);
1102 }
1103 }
1104
1105
1106
1107 /**
1108 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1109 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1110 * but only limited texture coord wrap modes.
1111 */
1112 void
lp_build_sample_aos(struct lp_build_sample_context * bld,unsigned sampler_unit,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef texel_out[4])1113 lp_build_sample_aos(struct lp_build_sample_context *bld,
1114 unsigned sampler_unit,
1115 LLVMValueRef s,
1116 LLVMValueRef t,
1117 LLVMValueRef r,
1118 const LLVMValueRef *offsets,
1119 LLVMValueRef lod_positive,
1120 LLVMValueRef lod_fpart,
1121 LLVMValueRef ilevel0,
1122 LLVMValueRef ilevel1,
1123 LLVMValueRef texel_out[4])
1124 {
1125 LLVMBuilderRef builder = bld->gallivm->builder;
1126 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1127 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1128 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1129 const unsigned dims = bld->dims;
1130 LLVMValueRef packed_var, packed;
1131 LLVMValueRef unswizzled[4];
1132 struct lp_build_context u8n_bld;
1133
1134 /* we only support the common/simple wrap modes at this time */
1135 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1136 if (dims >= 2)
1137 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1138 if (dims >= 3)
1139 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1140
1141
1142 /* make 8-bit unorm builder context */
1143 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1144
1145 /*
1146 * Get/interpolate texture colors.
1147 */
1148
1149 packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1150
1151 if (min_filter == mag_filter) {
1152 /* no need to distinguish between minification and magnification */
1153 lp_build_sample_mipmap(bld,
1154 min_filter, mip_filter,
1155 s, t, r, offsets,
1156 ilevel0, ilevel1, lod_fpart,
1157 packed_var);
1158 }
1159 else {
1160 /* Emit conditional to choose min image filter or mag image filter
1161 * depending on the lod being > 0 or <= 0, respectively.
1162 */
1163 struct lp_build_if_state if_ctx;
1164
1165 /*
1166 * FIXME this should take all lods into account, if some are min
1167 * some max probably could hack up the weights in the linear
1168 * path with selects to work for nearest.
1169 */
1170 if (bld->num_lods > 1)
1171 lod_positive = LLVMBuildExtractElement(builder, lod_positive,
1172 lp_build_const_int32(bld->gallivm, 0), "");
1173
1174 lod_positive = LLVMBuildTrunc(builder, lod_positive,
1175 LLVMInt1TypeInContext(bld->gallivm->context), "");
1176
1177 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
1178 {
1179 /* Use the minification filter */
1180 lp_build_sample_mipmap(bld,
1181 min_filter, mip_filter,
1182 s, t, r, offsets,
1183 ilevel0, ilevel1, lod_fpart,
1184 packed_var);
1185 }
1186 lp_build_else(&if_ctx);
1187 {
1188 /* Use the magnification filter */
1189 lp_build_sample_mipmap(bld,
1190 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1191 s, t, r, offsets,
1192 ilevel0, NULL, NULL,
1193 packed_var);
1194 }
1195 lp_build_endif(&if_ctx);
1196 }
1197
1198 packed = LLVMBuildLoad(builder, packed_var, "");
1199
1200 /*
1201 * Convert to SoA and swizzle.
1202 */
1203 lp_build_rgba8_to_fi32_soa(bld->gallivm,
1204 bld->texel_type,
1205 packed, unswizzled);
1206
1207 if (util_format_is_rgba8_variant(bld->format_desc)) {
1208 lp_build_format_swizzle_soa(bld->format_desc,
1209 &bld->texel_bld,
1210 unswizzled, texel_out);
1211 }
1212 else {
1213 texel_out[0] = unswizzled[0];
1214 texel_out[1] = unswizzled[1];
1215 texel_out[2] = unswizzled[2];
1216 texel_out[3] = unswizzled[3];
1217 }
1218 }
1219