1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63 #include "lp_bld_intr.h"
64
65
66 /**
67 * Generate code to fetch a texel from a texture at int coords (x, y, z).
68 * The computation depends on whether the texture is 1D, 2D or 3D.
69 * The result, texel, will be float vectors:
70 * texel[0] = red values
71 * texel[1] = green values
72 * texel[2] = blue values
73 * texel[3] = alpha values
74 */
75 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef texel_out[4])76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
77 LLVMValueRef width,
78 LLVMValueRef height,
79 LLVMValueRef depth,
80 LLVMValueRef x,
81 LLVMValueRef y,
82 LLVMValueRef z,
83 LLVMValueRef y_stride,
84 LLVMValueRef z_stride,
85 LLVMValueRef data_ptr,
86 LLVMValueRef mipoffsets,
87 LLVMValueRef texel_out[4])
88 {
89 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
90 const unsigned dims = bld->dims;
91 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
92 LLVMBuilderRef builder = bld->gallivm->builder;
93 LLVMValueRef offset;
94 LLVMValueRef i, j;
95 LLVMValueRef use_border = NULL;
96
97 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
98 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
99 static_state->min_img_filter,
100 static_state->mag_img_filter)) {
101 LLVMValueRef b1, b2;
102 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
103 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
104 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
105 }
106
107 if (dims >= 2 &&
108 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
109 static_state->min_img_filter,
110 static_state->mag_img_filter)) {
111 LLVMValueRef b1, b2;
112 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
113 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
114 if (use_border) {
115 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
116 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
117 }
118 else {
119 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
120 }
121 }
122
123 if (dims == 3 &&
124 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
125 static_state->min_img_filter,
126 static_state->mag_img_filter)) {
127 LLVMValueRef b1, b2;
128 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
129 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
130 if (use_border) {
131 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
132 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
133 }
134 else {
135 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
136 }
137 }
138
139 /* convert x,y,z coords to linear offset from start of texture, in bytes */
140 lp_build_sample_offset(&bld->int_coord_bld,
141 bld->format_desc,
142 x, y, z, y_stride, z_stride,
143 &offset, &i, &j);
144 if (mipoffsets) {
145 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
146 }
147
148 if (use_border) {
149 /* If we can sample the border color, it means that texcoords may
150 * lie outside the bounds of the texture image. We need to do
151 * something to prevent reading out of bounds and causing a segfault.
152 *
153 * Simply AND the texture coords with !use_border. This will cause
154 * coords which are out of bounds to become zero. Zero's guaranteed
155 * to be inside the texture image.
156 */
157 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
158 }
159
160 lp_build_fetch_rgba_soa(bld->gallivm,
161 bld->format_desc,
162 bld->texel_type, TRUE,
163 data_ptr, offset,
164 i, j,
165 bld->cache,
166 texel_out);
167
168 /*
169 * Note: if we find an app which frequently samples the texture border
170 * we might want to implement a true conditional here to avoid sampling
171 * the texture whenever possible (since that's quite a bit of code).
172 * Ex:
173 * if (use_border) {
174 * texel = border_color;
175 * }
176 * else {
177 * texel = sample_texture(coord);
178 * }
179 * As it is now, we always sample the texture, then selectively replace
180 * the texel color results with the border color.
181 */
182
183 if (use_border) {
184 /* select texel color or border color depending on use_border. */
185 const struct util_format_description *format_desc = bld->format_desc;
186 int chan;
187 struct lp_type border_type = bld->texel_type;
188 border_type.length = 4;
189 /*
190 * Only replace channels which are actually present. The others should
191 * get optimized away eventually by sampler_view swizzle anyway but it's
192 * easier too.
193 */
194 for (chan = 0; chan < 4; chan++) {
195 unsigned chan_s;
196 /* reverse-map channel... */
197 for (chan_s = 0; chan_s < 4; chan_s++) {
198 if (chan_s == format_desc->swizzle[chan]) {
199 break;
200 }
201 }
202 if (chan_s <= 3) {
203 /* use the already clamped color */
204 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
205 LLVMValueRef border_chan;
206
207 border_chan = lp_build_extract_broadcast(bld->gallivm,
208 border_type,
209 bld->texel_type,
210 bld->border_color_clamped,
211 idx);
212 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
213 border_chan, texel_out[chan]);
214 }
215 }
216 }
217 }
218
219
220 /**
221 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
222 */
223 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord)224 lp_build_coord_mirror(struct lp_build_sample_context *bld,
225 LLVMValueRef coord)
226 {
227 struct lp_build_context *coord_bld = &bld->coord_bld;
228 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
229 LLVMValueRef fract, flr, isOdd;
230
231 lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
232 /* kill off NaNs */
233 /* XXX: not safe without arch rounding, fract can be anything. */
234 fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
235 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
236
237 /* isOdd = flr & 1 */
238 isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
239
240 /* make coord positive or negative depending on isOdd */
241 /* XXX slight overkill masking out sign bit is unnecessary */
242 coord = lp_build_set_sign(coord_bld, fract, isOdd);
243
244 /* convert isOdd to float */
245 isOdd = lp_build_int_to_float(coord_bld, isOdd);
246
247 /* add isOdd to coord */
248 coord = lp_build_add(coord_bld, coord, isOdd);
249
250 return coord;
251 }
252
253
254 /**
255 * Helper to compute the first coord and the weight for
256 * linear wrap repeat npot textures
257 */
258 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)259 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
260 LLVMValueRef coord_f,
261 LLVMValueRef length_i,
262 LLVMValueRef length_f,
263 LLVMValueRef *coord0_i,
264 LLVMValueRef *weight_f)
265 {
266 struct lp_build_context *coord_bld = &bld->coord_bld;
267 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
268 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
269 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
270 int_coord_bld->one);
271 LLVMValueRef mask;
272 /* wrap with normalized floats is just fract */
273 coord_f = lp_build_fract(coord_bld, coord_f);
274 /* mul by size and subtract 0.5 */
275 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
276 coord_f = lp_build_sub(coord_bld, coord_f, half);
277 /*
278 * we avoided the 0.5/length division before the repeat wrap,
279 * now need to fix up edge cases with selects
280 */
281 /*
282 * Note we do a float (unordered) compare so we can eliminate NaNs.
283 * (Otherwise would need fract_safe above).
284 */
285 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
286 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
287
288 /* convert to int, compute lerp weight */
289 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
290 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
291 }
292
293
294 /**
295 * Build LLVM code for texture wrap mode for linear filtering.
296 * \param x0_out returns first integer texcoord
297 * \param x1_out returns second integer texcoord
298 * \param weight_out returns linear interpolation weight
299 */
300 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)301 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
302 LLVMValueRef coord,
303 LLVMValueRef length,
304 LLVMValueRef length_f,
305 LLVMValueRef offset,
306 boolean is_pot,
307 unsigned wrap_mode,
308 LLVMValueRef *x0_out,
309 LLVMValueRef *x1_out,
310 LLVMValueRef *weight_out)
311 {
312 struct lp_build_context *coord_bld = &bld->coord_bld;
313 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
314 LLVMBuilderRef builder = bld->gallivm->builder;
315 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
316 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
317 LLVMValueRef coord0, coord1, weight;
318
319 switch(wrap_mode) {
320 case PIPE_TEX_WRAP_REPEAT:
321 if (is_pot) {
322 /* mul by size and subtract 0.5 */
323 coord = lp_build_mul(coord_bld, coord, length_f);
324 coord = lp_build_sub(coord_bld, coord, half);
325 if (offset) {
326 offset = lp_build_int_to_float(coord_bld, offset);
327 coord = lp_build_add(coord_bld, coord, offset);
328 }
329 /* convert to int, compute lerp weight */
330 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
331 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
332 /* repeat wrap */
333 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
334 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
335 }
336 else {
337 LLVMValueRef mask;
338 if (offset) {
339 offset = lp_build_int_to_float(coord_bld, offset);
340 offset = lp_build_div(coord_bld, offset, length_f);
341 coord = lp_build_add(coord_bld, coord, offset);
342 }
343 lp_build_coord_repeat_npot_linear(bld, coord,
344 length, length_f,
345 &coord0, &weight);
346 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
347 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
348 coord1 = LLVMBuildAnd(builder,
349 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
350 mask, "");
351 }
352 break;
353
354 case PIPE_TEX_WRAP_CLAMP:
355 if (bld->static_sampler_state->normalized_coords) {
356 /* scale coord to length */
357 coord = lp_build_mul(coord_bld, coord, length_f);
358 }
359 if (offset) {
360 offset = lp_build_int_to_float(coord_bld, offset);
361 coord = lp_build_add(coord_bld, coord, offset);
362 }
363
364 /* clamp to [0, length] */
365 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
366
367 coord = lp_build_sub(coord_bld, coord, half);
368
369 /* convert to int, compute lerp weight */
370 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
371 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
372 break;
373
374 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
375 {
376 struct lp_build_context abs_coord_bld = bld->coord_bld;
377 abs_coord_bld.type.sign = FALSE;
378
379 if (bld->static_sampler_state->normalized_coords) {
380 /* mul by tex size */
381 coord = lp_build_mul(coord_bld, coord, length_f);
382 }
383 if (offset) {
384 offset = lp_build_int_to_float(coord_bld, offset);
385 coord = lp_build_add(coord_bld, coord, offset);
386 }
387
388 /* clamp to length max */
389 coord = lp_build_min_ext(coord_bld, coord, length_f,
390 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
391 /* subtract 0.5 */
392 coord = lp_build_sub(coord_bld, coord, half);
393 /* clamp to [0, length - 0.5] */
394 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
395 /* convert to int, compute lerp weight */
396 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
397 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
398 /* coord1 = min(coord1, length-1) */
399 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
400 break;
401 }
402
403 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
404 if (bld->static_sampler_state->normalized_coords) {
405 /* scale coord to length */
406 coord = lp_build_mul(coord_bld, coord, length_f);
407 }
408 if (offset) {
409 offset = lp_build_int_to_float(coord_bld, offset);
410 coord = lp_build_add(coord_bld, coord, offset);
411 }
412 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
413 /* can skip clamp (though might not work for very large coord values) */
414 coord = lp_build_sub(coord_bld, coord, half);
415 /* convert to int, compute lerp weight */
416 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
417 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
418 break;
419
420 case PIPE_TEX_WRAP_MIRROR_REPEAT:
421 if (offset) {
422 offset = lp_build_int_to_float(coord_bld, offset);
423 offset = lp_build_div(coord_bld, offset, length_f);
424 coord = lp_build_add(coord_bld, coord, offset);
425 }
426 /* compute mirror function */
427 coord = lp_build_coord_mirror(bld, coord);
428
429 /* scale coord to length */
430 coord = lp_build_mul(coord_bld, coord, length_f);
431 coord = lp_build_sub(coord_bld, coord, half);
432
433 /* convert to int, compute lerp weight */
434 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
435 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
436
437 /* coord0 = max(coord0, 0) */
438 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
439 /* coord1 = min(coord1, length-1) */
440 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
441 break;
442
443 case PIPE_TEX_WRAP_MIRROR_CLAMP:
444 if (bld->static_sampler_state->normalized_coords) {
445 /* scale coord to length */
446 coord = lp_build_mul(coord_bld, coord, length_f);
447 }
448 if (offset) {
449 offset = lp_build_int_to_float(coord_bld, offset);
450 coord = lp_build_add(coord_bld, coord, offset);
451 }
452 coord = lp_build_abs(coord_bld, coord);
453
454 /* clamp to [0, length] */
455 coord = lp_build_min(coord_bld, coord, length_f);
456
457 coord = lp_build_sub(coord_bld, coord, half);
458
459 /* convert to int, compute lerp weight */
460 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
461 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
462 break;
463
464 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
465 {
466 struct lp_build_context abs_coord_bld = bld->coord_bld;
467 abs_coord_bld.type.sign = FALSE;
468
469 if (bld->static_sampler_state->normalized_coords) {
470 /* scale coord to length */
471 coord = lp_build_mul(coord_bld, coord, length_f);
472 }
473 if (offset) {
474 offset = lp_build_int_to_float(coord_bld, offset);
475 coord = lp_build_add(coord_bld, coord, offset);
476 }
477 coord = lp_build_abs(coord_bld, coord);
478
479 /* clamp to length max */
480 coord = lp_build_min_ext(coord_bld, coord, length_f,
481 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
482 /* subtract 0.5 */
483 coord = lp_build_sub(coord_bld, coord, half);
484 /* clamp to [0, length - 0.5] */
485 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
486
487 /* convert to int, compute lerp weight */
488 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
489 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
490 /* coord1 = min(coord1, length-1) */
491 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
492 }
493 break;
494
495 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
496 {
497 if (bld->static_sampler_state->normalized_coords) {
498 /* scale coord to length */
499 coord = lp_build_mul(coord_bld, coord, length_f);
500 }
501 if (offset) {
502 offset = lp_build_int_to_float(coord_bld, offset);
503 coord = lp_build_add(coord_bld, coord, offset);
504 }
505 coord = lp_build_abs(coord_bld, coord);
506
507 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
508 /* skip clamp - always positive, and other side
509 only potentially matters for very large coords */
510 coord = lp_build_sub(coord_bld, coord, half);
511
512 /* convert to int, compute lerp weight */
513 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
514 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
515 }
516 break;
517
518 default:
519 assert(0);
520 coord0 = NULL;
521 coord1 = NULL;
522 weight = NULL;
523 }
524
525 *x0_out = coord0;
526 *x1_out = coord1;
527 *weight_out = weight;
528 }
529
530
531 /**
532 * Build LLVM code for texture wrap mode for nearest filtering.
533 * \param coord the incoming texcoord (nominally in [0,1])
534 * \param length the texture size along one dimension, as int vector
535 * \param length_f the texture size along one dimension, as float vector
536 * \param offset texel offset along one dimension (as int vector)
537 * \param is_pot if TRUE, length is a power of two
538 * \param wrap_mode one of PIPE_TEX_WRAP_x
539 */
540 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,boolean is_pot,unsigned wrap_mode)541 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
542 LLVMValueRef coord,
543 LLVMValueRef length,
544 LLVMValueRef length_f,
545 LLVMValueRef offset,
546 boolean is_pot,
547 unsigned wrap_mode)
548 {
549 struct lp_build_context *coord_bld = &bld->coord_bld;
550 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
551 LLVMBuilderRef builder = bld->gallivm->builder;
552 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
553 LLVMValueRef icoord;
554
555 switch(wrap_mode) {
556 case PIPE_TEX_WRAP_REPEAT:
557 if (is_pot) {
558 coord = lp_build_mul(coord_bld, coord, length_f);
559 icoord = lp_build_ifloor(coord_bld, coord);
560 if (offset) {
561 icoord = lp_build_add(int_coord_bld, icoord, offset);
562 }
563 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
564 }
565 else {
566 if (offset) {
567 offset = lp_build_int_to_float(coord_bld, offset);
568 offset = lp_build_div(coord_bld, offset, length_f);
569 coord = lp_build_add(coord_bld, coord, offset);
570 }
571 /* take fraction, unnormalize */
572 coord = lp_build_fract_safe(coord_bld, coord);
573 coord = lp_build_mul(coord_bld, coord, length_f);
574 icoord = lp_build_itrunc(coord_bld, coord);
575 }
576 break;
577
578 case PIPE_TEX_WRAP_CLAMP:
579 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
580 if (bld->static_sampler_state->normalized_coords) {
581 /* scale coord to length */
582 coord = lp_build_mul(coord_bld, coord, length_f);
583 }
584
585 if (offset) {
586 offset = lp_build_int_to_float(coord_bld, offset);
587 coord = lp_build_add(coord_bld, coord, offset);
588 }
589 /* floor */
590 /* use itrunc instead since we clamp to 0 anyway */
591 icoord = lp_build_itrunc(coord_bld, coord);
592
593 /* clamp to [0, length - 1]. */
594 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
595 length_minus_one);
596 break;
597
598 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
599 if (bld->static_sampler_state->normalized_coords) {
600 /* scale coord to length */
601 coord = lp_build_mul(coord_bld, coord, length_f);
602 }
603 /* no clamp necessary, border masking will handle this */
604 icoord = lp_build_ifloor(coord_bld, coord);
605 if (offset) {
606 icoord = lp_build_add(int_coord_bld, icoord, offset);
607 }
608 break;
609
610 case PIPE_TEX_WRAP_MIRROR_REPEAT:
611 if (offset) {
612 offset = lp_build_int_to_float(coord_bld, offset);
613 offset = lp_build_div(coord_bld, offset, length_f);
614 coord = lp_build_add(coord_bld, coord, offset);
615 }
616 /* compute mirror function */
617 coord = lp_build_coord_mirror(bld, coord);
618
619 /* scale coord to length */
620 assert(bld->static_sampler_state->normalized_coords);
621 coord = lp_build_mul(coord_bld, coord, length_f);
622
623 /* itrunc == ifloor here */
624 icoord = lp_build_itrunc(coord_bld, coord);
625
626 /* clamp to [0, length - 1] */
627 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
628 break;
629
630 case PIPE_TEX_WRAP_MIRROR_CLAMP:
631 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
632 if (bld->static_sampler_state->normalized_coords) {
633 /* scale coord to length */
634 coord = lp_build_mul(coord_bld, coord, length_f);
635 }
636 if (offset) {
637 offset = lp_build_int_to_float(coord_bld, offset);
638 coord = lp_build_add(coord_bld, coord, offset);
639 }
640 coord = lp_build_abs(coord_bld, coord);
641
642 /* itrunc == ifloor here */
643 icoord = lp_build_itrunc(coord_bld, coord);
644 /*
645 * Use unsigned min due to possible undef values (NaNs, overflow)
646 */
647 {
648 struct lp_build_context abs_coord_bld = *int_coord_bld;
649 abs_coord_bld.type.sign = FALSE;
650 /* clamp to [0, length - 1] */
651 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
652 }
653 break;
654
655 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
656 if (bld->static_sampler_state->normalized_coords) {
657 /* scale coord to length */
658 coord = lp_build_mul(coord_bld, coord, length_f);
659 }
660 if (offset) {
661 offset = lp_build_int_to_float(coord_bld, offset);
662 coord = lp_build_add(coord_bld, coord, offset);
663 }
664 coord = lp_build_abs(coord_bld, coord);
665
666 /* itrunc == ifloor here */
667 icoord = lp_build_itrunc(coord_bld, coord);
668 break;
669
670 default:
671 assert(0);
672 icoord = NULL;
673 }
674
675 return icoord;
676 }
677
678
679 /**
680 * Do shadow test/comparison.
681 * \param p shadow ref value
682 * \param texel the texel to compare against
683 */
684 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)685 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
686 LLVMValueRef p,
687 LLVMValueRef texel)
688 {
689 struct lp_build_context *texel_bld = &bld->texel_bld;
690 LLVMValueRef res;
691
692 if (0) {
693 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
694 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
695 }
696
697 /* result = (p FUNC texel) ? 1 : 0 */
698 /*
699 * honor d3d10 floating point rules here, which state that comparisons
700 * are ordered except NOT_EQUAL which is unordered.
701 */
702 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
703 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
704 p, texel);
705 }
706 else {
707 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
708 p, texel);
709 }
710 return res;
711 }
712
713
714 /**
715 * Generate code to sample a mipmap level with nearest filtering.
716 * If sampling a cube texture, r = cube face in [0,5].
717 */
718 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])719 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
720 LLVMValueRef size,
721 LLVMValueRef row_stride_vec,
722 LLVMValueRef img_stride_vec,
723 LLVMValueRef data_ptr,
724 LLVMValueRef mipoffsets,
725 LLVMValueRef *coords,
726 const LLVMValueRef *offsets,
727 LLVMValueRef colors_out[4])
728 {
729 const unsigned dims = bld->dims;
730 LLVMValueRef width_vec;
731 LLVMValueRef height_vec;
732 LLVMValueRef depth_vec;
733 LLVMValueRef flt_size;
734 LLVMValueRef flt_width_vec;
735 LLVMValueRef flt_height_vec;
736 LLVMValueRef flt_depth_vec;
737 LLVMValueRef x, y = NULL, z = NULL;
738
739 lp_build_extract_image_sizes(bld,
740 &bld->int_size_bld,
741 bld->int_coord_type,
742 size,
743 &width_vec, &height_vec, &depth_vec);
744
745 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
746
747 lp_build_extract_image_sizes(bld,
748 &bld->float_size_bld,
749 bld->coord_type,
750 flt_size,
751 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
752
753 /*
754 * Compute integer texcoords.
755 */
756 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
757 flt_width_vec, offsets[0],
758 bld->static_texture_state->pot_width,
759 bld->static_sampler_state->wrap_s);
760 lp_build_name(x, "tex.x.wrapped");
761
762 if (dims >= 2) {
763 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
764 flt_height_vec, offsets[1],
765 bld->static_texture_state->pot_height,
766 bld->static_sampler_state->wrap_t);
767 lp_build_name(y, "tex.y.wrapped");
768
769 if (dims == 3) {
770 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
771 flt_depth_vec, offsets[2],
772 bld->static_texture_state->pot_depth,
773 bld->static_sampler_state->wrap_r);
774 lp_build_name(z, "tex.z.wrapped");
775 }
776 }
777 if (has_layer_coord(bld->static_texture_state->target)) {
778 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
779 /* add cube layer to face */
780 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
781 }
782 else {
783 z = coords[2];
784 }
785 lp_build_name(z, "tex.z.layer");
786 }
787
788 /*
789 * Get texture colors.
790 */
791 lp_build_sample_texel_soa(bld,
792 width_vec, height_vec, depth_vec,
793 x, y, z,
794 row_stride_vec, img_stride_vec,
795 data_ptr, mipoffsets, colors_out);
796
797 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
798 LLVMValueRef cmpval;
799 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
800 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
801 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
802 bld->texel_bld.one, bld->texel_bld.zero);
803 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
804 }
805
806 }
807
808
809 /**
810 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
811 */
812 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)813 lp_build_masklerp(struct lp_build_context *bld,
814 LLVMValueRef weight,
815 LLVMValueRef mask0,
816 LLVMValueRef mask1)
817 {
818 struct gallivm_state *gallivm = bld->gallivm;
819 LLVMBuilderRef builder = gallivm->builder;
820 LLVMValueRef weight2;
821
822 weight2 = lp_build_sub(bld, bld->one, weight);
823 weight = LLVMBuildBitCast(builder, weight,
824 lp_build_int_vec_type(gallivm, bld->type), "");
825 weight2 = LLVMBuildBitCast(builder, weight2,
826 lp_build_int_vec_type(gallivm, bld->type), "");
827 weight = LLVMBuildAnd(builder, weight, mask1, "");
828 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
829 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
830 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
831 return lp_build_add(bld, weight, weight2);
832 }
833
834 /**
835 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
836 */
837 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)838 lp_build_masklerp2d(struct lp_build_context *bld,
839 LLVMValueRef weight0,
840 LLVMValueRef weight1,
841 LLVMValueRef mask00,
842 LLVMValueRef mask01,
843 LLVMValueRef mask10,
844 LLVMValueRef mask11)
845 {
846 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
847 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
848 return lp_build_lerp(bld, weight1, val0, val1, 0);
849 }
850
851 /*
852 * this is a bit excessive code for something OpenGL just recommends
853 * but does not require.
854 */
855 #define ACCURATE_CUBE_CORNERS 1
856
857 /**
858 * Generate code to sample a mipmap level with linear filtering.
859 * If sampling a cube texture, r = cube face in [0,5].
860 * If linear_mask is present, only pixels having their mask set
861 * will receive linear filtering, the rest will use nearest.
862 */
863 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,boolean is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])864 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
865 boolean is_gather,
866 LLVMValueRef size,
867 LLVMValueRef linear_mask,
868 LLVMValueRef row_stride_vec,
869 LLVMValueRef img_stride_vec,
870 LLVMValueRef data_ptr,
871 LLVMValueRef mipoffsets,
872 LLVMValueRef *coords,
873 const LLVMValueRef *offsets,
874 LLVMValueRef colors_out[4])
875 {
876 LLVMBuilderRef builder = bld->gallivm->builder;
877 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
878 struct lp_build_context *coord_bld = &bld->coord_bld;
879 struct lp_build_context *texel_bld = &bld->texel_bld;
880 const unsigned dims = bld->dims;
881 LLVMValueRef width_vec;
882 LLVMValueRef height_vec;
883 LLVMValueRef depth_vec;
884 LLVMValueRef flt_size;
885 LLVMValueRef flt_width_vec;
886 LLVMValueRef flt_height_vec;
887 LLVMValueRef flt_depth_vec;
888 LLVMValueRef fall_off[4], have_corners;
889 LLVMValueRef z1 = NULL;
890 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
891 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
892 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
893 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
894 LLVMValueRef xs[4], ys[4], zs[4];
895 LLVMValueRef neighbors[2][2][4];
896 int chan, texel_index;
897 boolean seamless_cube_filter, accurate_cube_corners;
898
899 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
900 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
901 bld->static_sampler_state->seamless_cube_map;
902 /*
903 * XXX I don't know how this is really supposed to work with gather. From GL
904 * spec wording (not gather specific) it sounds like the 4th missing texel
905 * should be an average of the other 3, hence for gather could return this.
906 * This is however NOT how the code here works, which just fixes up the
907 * weights used for filtering instead. And of course for gather there is
908 * no filter to tweak...
909 */
910 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
911 !is_gather;
912
913 lp_build_extract_image_sizes(bld,
914 &bld->int_size_bld,
915 bld->int_coord_type,
916 size,
917 &width_vec, &height_vec, &depth_vec);
918
919 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
920
921 lp_build_extract_image_sizes(bld,
922 &bld->float_size_bld,
923 bld->coord_type,
924 flt_size,
925 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
926
927 /*
928 * Compute integer texcoords.
929 */
930
931 if (!seamless_cube_filter) {
932 lp_build_sample_wrap_linear(bld, coords[0], width_vec,
933 flt_width_vec, offsets[0],
934 bld->static_texture_state->pot_width,
935 bld->static_sampler_state->wrap_s,
936 &x00, &x01, &s_fpart);
937 lp_build_name(x00, "tex.x0.wrapped");
938 lp_build_name(x01, "tex.x1.wrapped");
939 x10 = x00;
940 x11 = x01;
941
942 if (dims >= 2) {
943 lp_build_sample_wrap_linear(bld, coords[1], height_vec,
944 flt_height_vec, offsets[1],
945 bld->static_texture_state->pot_height,
946 bld->static_sampler_state->wrap_t,
947 &y00, &y10, &t_fpart);
948 lp_build_name(y00, "tex.y0.wrapped");
949 lp_build_name(y10, "tex.y1.wrapped");
950 y01 = y00;
951 y11 = y10;
952
953 if (dims == 3) {
954 lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
955 flt_depth_vec, offsets[2],
956 bld->static_texture_state->pot_depth,
957 bld->static_sampler_state->wrap_r,
958 &z00, &z1, &r_fpart);
959 z01 = z10 = z11 = z00;
960 lp_build_name(z00, "tex.z0.wrapped");
961 lp_build_name(z1, "tex.z1.wrapped");
962 }
963 }
964 if (has_layer_coord(bld->static_texture_state->target)) {
965 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
966 /* add cube layer to face */
967 z00 = z01 = z10 = z11 = z1 =
968 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
969 }
970 else {
971 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
972 }
973 lp_build_name(z00, "tex.z0.layer");
974 lp_build_name(z1, "tex.z1.layer");
975 }
976 }
977 else {
978 struct lp_build_if_state edge_if;
979 LLVMTypeRef int1t;
980 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
981 LLVMValueRef coord, have_edge, have_corner;
982 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
983 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
984 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
985 LLVMValueRef face = coords[2];
986 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
987 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
988 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
989 height_vec = width_vec;
990 flt_height_vec = flt_width_vec;
991
992 /* XXX the overflow logic is actually sort of duplicated with trilinear,
993 * since an overflow in one mip should also have a corresponding overflow
994 * in another.
995 */
996 /* should always have normalized coords, and offsets are undefined */
997 assert(bld->static_sampler_state->normalized_coords);
998 coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
999 /* instead of clamp, build mask if overflowed */
1000 coord = lp_build_sub(coord_bld, coord, half);
1001 /* convert to int, compute lerp weight */
1002 /* not ideal with AVX (and no AVX2) */
1003 lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
1004 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1005 coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
1006 coord = lp_build_sub(coord_bld, coord, half);
1007 lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
1008 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1009
1010 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1011 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1012 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1013 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1014
1015 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1016 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1017 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1018 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1019
1020 /* needed for accurate corner filtering branch later, rely on 0 init */
1021 int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1022 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1023
1024 for (texel_index = 0; texel_index < 4; texel_index++) {
1025 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1026 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1027 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1028 }
1029
1030 lp_build_if(&edge_if, bld->gallivm, have_edge);
1031
1032 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1033 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1034 LLVMBuildStore(builder, have_corner, have_corners);
1035
1036 /*
1037 * Need to feed clamped values here for cheap corner handling,
1038 * but only for y coord (as when falling off both edges we only
1039 * fall off the x one) - this should be sufficient.
1040 */
1041 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1042 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1043
1044 /*
1045 * Get all possible new coords.
1046 */
1047 lp_build_cube_new_coords(ivec_bld, face,
1048 x0, x1, y0_clamped, y1_clamped,
1049 length_minus_one,
1050 new_faces, new_xcoords, new_ycoords);
1051
1052 /* handle fall off x-, x+ direction */
1053 /* determine new coords, face (not both fall_off vars can be true at same time) */
1054 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1055 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1056 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1057 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1058 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1059 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1060 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1061 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1062
1063 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1064 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1065
1066 /* handle fall off y-, y+ direction */
1067 /*
1068 * Cheap corner logic: just hack up things so a texel doesn't fall
1069 * off both sides (which means filter weights will be wrong but we'll only
1070 * use valid texels in the filter).
1071 * This means however (y) coords must additionally be clamped (see above).
1072 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1073 */
1074 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1075 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1076 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1077 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1078
1079 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1080 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1081 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1082 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1083 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1084 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1085 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1086 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1087
1088 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1089 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1090 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1091 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1092
1093 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1094 /* now can add cube layer to face (per sample) */
1095 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1096 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1097 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1098 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1099 }
1100
1101 LLVMBuildStore(builder, x00, xs[0]);
1102 LLVMBuildStore(builder, x01, xs[1]);
1103 LLVMBuildStore(builder, x10, xs[2]);
1104 LLVMBuildStore(builder, x11, xs[3]);
1105 LLVMBuildStore(builder, y00, ys[0]);
1106 LLVMBuildStore(builder, y01, ys[1]);
1107 LLVMBuildStore(builder, y10, ys[2]);
1108 LLVMBuildStore(builder, y11, ys[3]);
1109 LLVMBuildStore(builder, z00, zs[0]);
1110 LLVMBuildStore(builder, z01, zs[1]);
1111 LLVMBuildStore(builder, z10, zs[2]);
1112 LLVMBuildStore(builder, z11, zs[3]);
1113
1114 lp_build_else(&edge_if);
1115
1116 LLVMBuildStore(builder, x0, xs[0]);
1117 LLVMBuildStore(builder, x1, xs[1]);
1118 LLVMBuildStore(builder, x0, xs[2]);
1119 LLVMBuildStore(builder, x1, xs[3]);
1120 LLVMBuildStore(builder, y0, ys[0]);
1121 LLVMBuildStore(builder, y0, ys[1]);
1122 LLVMBuildStore(builder, y1, ys[2]);
1123 LLVMBuildStore(builder, y1, ys[3]);
1124 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1125 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1126 LLVMBuildStore(builder, cube_layer, zs[0]);
1127 LLVMBuildStore(builder, cube_layer, zs[1]);
1128 LLVMBuildStore(builder, cube_layer, zs[2]);
1129 LLVMBuildStore(builder, cube_layer, zs[3]);
1130 }
1131 else {
1132 LLVMBuildStore(builder, face, zs[0]);
1133 LLVMBuildStore(builder, face, zs[1]);
1134 LLVMBuildStore(builder, face, zs[2]);
1135 LLVMBuildStore(builder, face, zs[3]);
1136 }
1137
1138 lp_build_endif(&edge_if);
1139
1140 x00 = LLVMBuildLoad(builder, xs[0], "");
1141 x01 = LLVMBuildLoad(builder, xs[1], "");
1142 x10 = LLVMBuildLoad(builder, xs[2], "");
1143 x11 = LLVMBuildLoad(builder, xs[3], "");
1144 y00 = LLVMBuildLoad(builder, ys[0], "");
1145 y01 = LLVMBuildLoad(builder, ys[1], "");
1146 y10 = LLVMBuildLoad(builder, ys[2], "");
1147 y11 = LLVMBuildLoad(builder, ys[3], "");
1148 z00 = LLVMBuildLoad(builder, zs[0], "");
1149 z01 = LLVMBuildLoad(builder, zs[1], "");
1150 z10 = LLVMBuildLoad(builder, zs[2], "");
1151 z11 = LLVMBuildLoad(builder, zs[3], "");
1152 }
1153
1154 if (linear_mask) {
1155 /*
1156 * Whack filter weights into place. Whatever texel had more weight is
1157 * the one which should have been selected by nearest filtering hence
1158 * just use 100% weight for it.
1159 */
1160 struct lp_build_context *c_bld = &bld->coord_bld;
1161 LLVMValueRef w1_mask, w1_weight;
1162 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1163
1164 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1165 /* this select is really just a "and" */
1166 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1167 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1168 if (dims >= 2) {
1169 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1170 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1171 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1172 if (dims == 3) {
1173 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1174 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1175 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1176 }
1177 }
1178 }
1179
1180 /*
1181 * Get texture colors.
1182 */
1183 /* get x0/x1 texels */
1184 lp_build_sample_texel_soa(bld,
1185 width_vec, height_vec, depth_vec,
1186 x00, y00, z00,
1187 row_stride_vec, img_stride_vec,
1188 data_ptr, mipoffsets, neighbors[0][0]);
1189 lp_build_sample_texel_soa(bld,
1190 width_vec, height_vec, depth_vec,
1191 x01, y01, z01,
1192 row_stride_vec, img_stride_vec,
1193 data_ptr, mipoffsets, neighbors[0][1]);
1194
1195 if (dims == 1) {
1196 assert(!is_gather);
1197 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1198 /* Interpolate two samples from 1D image to produce one color */
1199 for (chan = 0; chan < 4; chan++) {
1200 colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
1201 neighbors[0][0][chan],
1202 neighbors[0][1][chan],
1203 0);
1204 }
1205 }
1206 else {
1207 LLVMValueRef cmpval0, cmpval1;
1208 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1209 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1210 /* simplified lerp, AND mask with weight and add */
1211 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1212 cmpval0, cmpval1);
1213 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1214 }
1215 }
1216 else {
1217 /* 2D/3D texture */
1218 struct lp_build_if_state corner_if;
1219 LLVMValueRef colors0[4], colorss[4];
1220
1221 /* get x0/x1 texels at y1 */
1222 lp_build_sample_texel_soa(bld,
1223 width_vec, height_vec, depth_vec,
1224 x10, y10, z10,
1225 row_stride_vec, img_stride_vec,
1226 data_ptr, mipoffsets, neighbors[1][0]);
1227 lp_build_sample_texel_soa(bld,
1228 width_vec, height_vec, depth_vec,
1229 x11, y11, z11,
1230 row_stride_vec, img_stride_vec,
1231 data_ptr, mipoffsets, neighbors[1][1]);
1232
1233 /*
1234 * To avoid having to duplicate linear_mask / fetch code use
1235 * another branch (with corner condition though edge would work
1236 * as well) here.
1237 */
1238 if (accurate_cube_corners) {
1239 LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1240 LLVMValueRef c_weight, c00, c01, c10, c11;
1241 LLVMValueRef have_corner, one_third, tmp;
1242
1243 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1244 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1245 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1246 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1247
1248 have_corner = LLVMBuildLoad(builder, have_corners, "");
1249
1250 lp_build_if(&corner_if, bld->gallivm, have_corner);
1251
1252 /*
1253 * we can't use standard 2d lerp as we need per-element weight
1254 * in case of corners, so just calculate bilinear result as
1255 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1256 * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1257 * however calculating the weights needs another 6, so actually probably
1258 * not slower than 2d lerp only for 4 channels as weights only need
1259 * to be calculated once - of course fixing the weights has additional cost.)
1260 */
1261 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1262 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1263 w00 = lp_build_mul(coord_bld, wx0, wy0);
1264 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1265 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1266 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1267
1268 /* find corner weight */
1269 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1270 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1271 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1272 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1273 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1274 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1275 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1276 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1277
1278 /*
1279 * add 1/3 of the corner weight to each of the 3 other samples
1280 * and null out corner weight
1281 */
1282 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1283 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1284 w00 = lp_build_add(coord_bld, w00, c_weight);
1285 c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1286 w00 = lp_build_andnot(coord_bld, w00, c00);
1287 w01 = lp_build_add(coord_bld, w01, c_weight);
1288 c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1289 w01 = lp_build_andnot(coord_bld, w01, c01);
1290 w10 = lp_build_add(coord_bld, w10, c_weight);
1291 c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1292 w10 = lp_build_andnot(coord_bld, w10, c10);
1293 w11 = lp_build_add(coord_bld, w11, c_weight);
1294 c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1295 w11 = lp_build_andnot(coord_bld, w11, c11);
1296
1297 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1298 for (chan = 0; chan < 4; chan++) {
1299 colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1300 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1301 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1302 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1303 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1304 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1305 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1306 }
1307 }
1308 else {
1309 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1310 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1311 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1312 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1313 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1314 /* inputs to interpolation are just masks so just add masked weights together */
1315 cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1316 cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1317 cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1318 cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1319 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1320 tmp = lp_build_and(coord_bld, w01, cmpval01);
1321 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1322 tmp = lp_build_and(coord_bld, w10, cmpval10);
1323 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1324 tmp = lp_build_and(coord_bld, w11, cmpval11);
1325 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1326 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1327 }
1328
1329 LLVMBuildStore(builder, colors0[0], colorss[0]);
1330 LLVMBuildStore(builder, colors0[1], colorss[1]);
1331 LLVMBuildStore(builder, colors0[2], colorss[2]);
1332 LLVMBuildStore(builder, colors0[3], colorss[3]);
1333
1334 lp_build_else(&corner_if);
1335 }
1336
1337 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1338 if (is_gather) {
1339 /*
1340 * Just assign the red channel (no component selection yet).
1341 * This is a bit hackish, we usually do the swizzle at the
1342 * end of sampling (much less values to swizzle), but this
1343 * obviously cannot work when using gather.
1344 */
1345 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1346 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1347 neighbors[1][0],
1348 chan_swiz);
1349 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1350 neighbors[1][1],
1351 chan_swiz);
1352 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1353 neighbors[0][1],
1354 chan_swiz);
1355 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1356 neighbors[0][0],
1357 chan_swiz);
1358 }
1359 else {
1360 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1361 for (chan = 0; chan < 4; chan++) {
1362 colors0[chan] = lp_build_lerp_2d(texel_bld,
1363 s_fpart, t_fpart,
1364 neighbors[0][0][chan],
1365 neighbors[0][1][chan],
1366 neighbors[1][0][chan],
1367 neighbors[1][1][chan],
1368 0);
1369 }
1370 }
1371 }
1372 else {
1373 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1374 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1375 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1376 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1377 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1378
1379 if (is_gather) {
1380 /* more hacks for swizzling, should be X, ONE or ZERO... */
1381 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1382 if (chan_swiz <= PIPE_SWIZZLE_W) {
1383 colors0[0] = lp_build_select(texel_bld, cmpval10,
1384 texel_bld->one, texel_bld->zero);
1385 colors0[1] = lp_build_select(texel_bld, cmpval11,
1386 texel_bld->one, texel_bld->zero);
1387 colors0[2] = lp_build_select(texel_bld, cmpval01,
1388 texel_bld->one, texel_bld->zero);
1389 colors0[3] = lp_build_select(texel_bld, cmpval00,
1390 texel_bld->one, texel_bld->zero);
1391 }
1392 else if (chan_swiz == PIPE_SWIZZLE_0) {
1393 colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1394 texel_bld->zero;
1395 }
1396 else {
1397 colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1398 texel_bld->one;
1399 }
1400 }
1401 else {
1402 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1403 cmpval00, cmpval01, cmpval10, cmpval11);
1404 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1405 }
1406 }
1407
1408 if (accurate_cube_corners) {
1409 LLVMBuildStore(builder, colors0[0], colorss[0]);
1410 LLVMBuildStore(builder, colors0[1], colorss[1]);
1411 LLVMBuildStore(builder, colors0[2], colorss[2]);
1412 LLVMBuildStore(builder, colors0[3], colorss[3]);
1413
1414 lp_build_endif(&corner_if);
1415
1416 colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1417 colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1418 colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1419 colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1420 }
1421
1422 if (dims == 3) {
1423 LLVMValueRef neighbors1[2][2][4];
1424 LLVMValueRef colors1[4];
1425
1426 assert(!is_gather);
1427
1428 /* get x0/x1/y0/y1 texels at z1 */
1429 lp_build_sample_texel_soa(bld,
1430 width_vec, height_vec, depth_vec,
1431 x00, y00, z1,
1432 row_stride_vec, img_stride_vec,
1433 data_ptr, mipoffsets, neighbors1[0][0]);
1434 lp_build_sample_texel_soa(bld,
1435 width_vec, height_vec, depth_vec,
1436 x01, y01, z1,
1437 row_stride_vec, img_stride_vec,
1438 data_ptr, mipoffsets, neighbors1[0][1]);
1439 lp_build_sample_texel_soa(bld,
1440 width_vec, height_vec, depth_vec,
1441 x10, y10, z1,
1442 row_stride_vec, img_stride_vec,
1443 data_ptr, mipoffsets, neighbors1[1][0]);
1444 lp_build_sample_texel_soa(bld,
1445 width_vec, height_vec, depth_vec,
1446 x11, y11, z1,
1447 row_stride_vec, img_stride_vec,
1448 data_ptr, mipoffsets, neighbors1[1][1]);
1449
1450 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1451 /* Bilinear interpolate the four samples from the second Z slice */
1452 for (chan = 0; chan < 4; chan++) {
1453 colors1[chan] = lp_build_lerp_2d(texel_bld,
1454 s_fpart, t_fpart,
1455 neighbors1[0][0][chan],
1456 neighbors1[0][1][chan],
1457 neighbors1[1][0][chan],
1458 neighbors1[1][1][chan],
1459 0);
1460 }
1461 /* Linearly interpolate the two samples from the two 3D slices */
1462 for (chan = 0; chan < 4; chan++) {
1463 colors_out[chan] = lp_build_lerp(texel_bld,
1464 r_fpart,
1465 colors0[chan], colors1[chan],
1466 0);
1467 }
1468 }
1469 else {
1470 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1471 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1472 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1473 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1474 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1475 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1476 cmpval00, cmpval01, cmpval10, cmpval11);
1477 /* Linearly interpolate the two samples from the two 3D slices */
1478 colors_out[0] = lp_build_lerp(texel_bld,
1479 r_fpart,
1480 colors0[0], colors1[0],
1481 0);
1482 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1483 }
1484 }
1485 else {
1486 /* 2D tex */
1487 for (chan = 0; chan < 4; chan++) {
1488 colors_out[chan] = colors0[chan];
1489 }
1490 }
1491 }
1492 }
1493
1494
1495 /**
1496 * Sample the texture/mipmap using given image filter and mip filter.
1497 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1498 * from (vectors or scalars).
1499 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1500 */
1501 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,boolean is_gather,LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1502 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1503 unsigned img_filter,
1504 unsigned mip_filter,
1505 boolean is_gather,
1506 LLVMValueRef *coords,
1507 const LLVMValueRef *offsets,
1508 LLVMValueRef ilevel0,
1509 LLVMValueRef ilevel1,
1510 LLVMValueRef lod_fpart,
1511 LLVMValueRef *colors_out)
1512 {
1513 LLVMBuilderRef builder = bld->gallivm->builder;
1514 LLVMValueRef size0 = NULL;
1515 LLVMValueRef size1 = NULL;
1516 LLVMValueRef row_stride0_vec = NULL;
1517 LLVMValueRef row_stride1_vec = NULL;
1518 LLVMValueRef img_stride0_vec = NULL;
1519 LLVMValueRef img_stride1_vec = NULL;
1520 LLVMValueRef data_ptr0 = NULL;
1521 LLVMValueRef data_ptr1 = NULL;
1522 LLVMValueRef mipoff0 = NULL;
1523 LLVMValueRef mipoff1 = NULL;
1524 LLVMValueRef colors0[4], colors1[4];
1525 unsigned chan;
1526
1527 /* sample the first mipmap level */
1528 lp_build_mipmap_level_sizes(bld, ilevel0,
1529 &size0,
1530 &row_stride0_vec, &img_stride0_vec);
1531 if (bld->num_mips == 1) {
1532 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1533 }
1534 else {
1535 /* This path should work for num_lods 1 too but slightly less efficient */
1536 data_ptr0 = bld->base_ptr;
1537 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1538 }
1539 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1540 lp_build_sample_image_nearest(bld, size0,
1541 row_stride0_vec, img_stride0_vec,
1542 data_ptr0, mipoff0, coords, offsets,
1543 colors0);
1544 }
1545 else {
1546 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1547 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1548 row_stride0_vec, img_stride0_vec,
1549 data_ptr0, mipoff0, coords, offsets,
1550 colors0);
1551 }
1552
1553 /* Store the first level's colors in the output variables */
1554 for (chan = 0; chan < 4; chan++) {
1555 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1556 }
1557
1558 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1559 struct lp_build_if_state if_ctx;
1560 LLVMValueRef need_lerp;
1561
1562 /* need_lerp = lod_fpart > 0 */
1563 if (bld->num_lods == 1) {
1564 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1565 lod_fpart, bld->lodf_bld.zero,
1566 "need_lerp");
1567 }
1568 else {
1569 /*
1570 * We'll do mip filtering if any of the quads (or individual
1571 * pixel in case of per-pixel lod) need it.
1572 * It might be better to split the vectors here and only fetch/filter
1573 * quads which need it (if there's one lod per quad).
1574 */
1575 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1576 PIPE_FUNC_GREATER,
1577 lod_fpart, bld->lodf_bld.zero);
1578 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1579 }
1580
1581 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1582 {
1583 /*
1584 * We unfortunately need to clamp lod_fpart here since we can get
1585 * negative values which would screw up filtering if not all
1586 * lod_fpart values have same sign.
1587 */
1588 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1589 bld->lodf_bld.zero);
1590 /* sample the second mipmap level */
1591 lp_build_mipmap_level_sizes(bld, ilevel1,
1592 &size1,
1593 &row_stride1_vec, &img_stride1_vec);
1594 if (bld->num_mips == 1) {
1595 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1596 }
1597 else {
1598 data_ptr1 = bld->base_ptr;
1599 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1600 }
1601 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1602 lp_build_sample_image_nearest(bld, size1,
1603 row_stride1_vec, img_stride1_vec,
1604 data_ptr1, mipoff1, coords, offsets,
1605 colors1);
1606 }
1607 else {
1608 lp_build_sample_image_linear(bld, FALSE, size1, NULL,
1609 row_stride1_vec, img_stride1_vec,
1610 data_ptr1, mipoff1, coords, offsets,
1611 colors1);
1612 }
1613
1614 /* interpolate samples from the two mipmap levels */
1615
1616 if (bld->num_lods != bld->coord_type.length)
1617 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1618 bld->lodf_bld.type,
1619 bld->texel_bld.type,
1620 lod_fpart);
1621
1622 for (chan = 0; chan < 4; chan++) {
1623 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1624 colors0[chan], colors1[chan],
1625 0);
1626 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1627 }
1628 }
1629 lp_build_endif(&if_ctx);
1630 }
1631 }
1632
1633
1634 /**
1635 * Sample the texture/mipmap using given mip filter, and using
1636 * both nearest and linear filtering at the same time depending
1637 * on linear_mask.
1638 * lod can be per quad but linear_mask is always per pixel.
1639 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1640 * from (vectors or scalars).
1641 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1642 */
1643 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)1644 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1645 LLVMValueRef linear_mask,
1646 unsigned mip_filter,
1647 LLVMValueRef *coords,
1648 const LLVMValueRef *offsets,
1649 LLVMValueRef ilevel0,
1650 LLVMValueRef ilevel1,
1651 LLVMValueRef lod_fpart,
1652 LLVMValueRef lod_positive,
1653 LLVMValueRef *colors_out)
1654 {
1655 LLVMBuilderRef builder = bld->gallivm->builder;
1656 LLVMValueRef size0 = NULL;
1657 LLVMValueRef size1 = NULL;
1658 LLVMValueRef row_stride0_vec = NULL;
1659 LLVMValueRef row_stride1_vec = NULL;
1660 LLVMValueRef img_stride0_vec = NULL;
1661 LLVMValueRef img_stride1_vec = NULL;
1662 LLVMValueRef data_ptr0 = NULL;
1663 LLVMValueRef data_ptr1 = NULL;
1664 LLVMValueRef mipoff0 = NULL;
1665 LLVMValueRef mipoff1 = NULL;
1666 LLVMValueRef colors0[4], colors1[4];
1667 unsigned chan;
1668
1669 /* sample the first mipmap level */
1670 lp_build_mipmap_level_sizes(bld, ilevel0,
1671 &size0,
1672 &row_stride0_vec, &img_stride0_vec);
1673 if (bld->num_mips == 1) {
1674 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1675 }
1676 else {
1677 /* This path should work for num_lods 1 too but slightly less efficient */
1678 data_ptr0 = bld->base_ptr;
1679 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1680 }
1681
1682 lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
1683 row_stride0_vec, img_stride0_vec,
1684 data_ptr0, mipoff0, coords, offsets,
1685 colors0);
1686
1687 /* Store the first level's colors in the output variables */
1688 for (chan = 0; chan < 4; chan++) {
1689 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1690 }
1691
1692 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1693 struct lp_build_if_state if_ctx;
1694 LLVMValueRef need_lerp;
1695
1696 /*
1697 * We'll do mip filtering if any of the quads (or individual
1698 * pixel in case of per-pixel lod) need it.
1699 * Note using lod_positive here not lod_fpart since it may be the same
1700 * condition as that used in the outer "if" in the caller hence llvm
1701 * should be able to merge the branches in this case.
1702 */
1703 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1704
1705 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1706 {
1707 /*
1708 * We unfortunately need to clamp lod_fpart here since we can get
1709 * negative values which would screw up filtering if not all
1710 * lod_fpart values have same sign.
1711 */
1712 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1713 bld->lodf_bld.zero);
1714 /* sample the second mipmap level */
1715 lp_build_mipmap_level_sizes(bld, ilevel1,
1716 &size1,
1717 &row_stride1_vec, &img_stride1_vec);
1718 if (bld->num_mips == 1) {
1719 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1720 }
1721 else {
1722 data_ptr1 = bld->base_ptr;
1723 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1724 }
1725
1726 lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
1727 row_stride1_vec, img_stride1_vec,
1728 data_ptr1, mipoff1, coords, offsets,
1729 colors1);
1730
1731 /* interpolate samples from the two mipmap levels */
1732
1733 if (bld->num_lods != bld->coord_type.length)
1734 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1735 bld->lodf_bld.type,
1736 bld->texel_bld.type,
1737 lod_fpart);
1738
1739 for (chan = 0; chan < 4; chan++) {
1740 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1741 colors0[chan], colors1[chan],
1742 0);
1743 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1744 }
1745 }
1746 lp_build_endif(&if_ctx);
1747 }
1748 }
1749
1750
1751 /**
1752 * Build (per-coord) layer value.
1753 * Either clamp layer to valid values or fill in optional out_of_bounds
1754 * value and just return value unclamped.
1755 */
1756 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,boolean is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)1757 lp_build_layer_coord(struct lp_build_sample_context *bld,
1758 unsigned texture_unit,
1759 boolean is_cube_array,
1760 LLVMValueRef layer,
1761 LLVMValueRef *out_of_bounds)
1762 {
1763 LLVMValueRef num_layers;
1764 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1765
1766 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
1767 bld->context_ptr, texture_unit);
1768
1769 if (out_of_bounds) {
1770 LLVMValueRef out1, out;
1771 assert(!is_cube_array);
1772 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1773 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1774 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1775 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1776 return layer;
1777 }
1778 else {
1779 LLVMValueRef maxlayer;
1780 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
1781 bld->int_bld.one;
1782 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
1783 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1784 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1785 }
1786 }
1787
1788
1789 /**
1790 * Calculate cube face, lod, mip levels.
1791 */
1792 static void
lp_build_sample_common(struct lp_build_sample_context * bld,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)1793 lp_build_sample_common(struct lp_build_sample_context *bld,
1794 unsigned texture_index,
1795 unsigned sampler_index,
1796 LLVMValueRef *coords,
1797 const struct lp_derivatives *derivs, /* optional */
1798 LLVMValueRef lod_bias, /* optional */
1799 LLVMValueRef explicit_lod, /* optional */
1800 LLVMValueRef *lod_pos_or_zero,
1801 LLVMValueRef *lod_fpart,
1802 LLVMValueRef *ilevel0,
1803 LLVMValueRef *ilevel1)
1804 {
1805 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1806 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1807 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1808 const unsigned target = bld->static_texture_state->target;
1809 LLVMValueRef first_level, cube_rho = NULL;
1810 LLVMValueRef lod_ipart = NULL;
1811 struct lp_derivatives cube_derivs;
1812
1813 /*
1814 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1815 mip_filter, min_filter, mag_filter);
1816 */
1817
1818 /*
1819 * Choose cube face, recompute texcoords for the chosen face and
1820 * compute rho here too (as it requires transform of derivatives).
1821 */
1822 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
1823 boolean need_derivs;
1824 need_derivs = ((min_filter != mag_filter ||
1825 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1826 !bld->static_sampler_state->min_max_lod_equal &&
1827 !explicit_lod);
1828 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1829 derivs = &cube_derivs;
1830 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
1831 /* calculate cube layer coord now */
1832 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
1833 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
1834 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
1835 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
1836 /* because of seamless filtering can't add it to face (coords[2]) here. */
1837 }
1838 }
1839 else if (target == PIPE_TEXTURE_1D_ARRAY ||
1840 target == PIPE_TEXTURE_2D_ARRAY) {
1841 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1842 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
1843 }
1844
1845 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1846 /*
1847 * Clamp p coords to [0,1] for fixed function depth texture format here.
1848 * Technically this is not entirely correct for unorm depth as the ref value
1849 * should be converted to the depth format (quantization!) and comparison
1850 * then done in texture format. This would actually help performance (since
1851 * only need to do it once and could save the per-sample conversion of texels
1852 * to floats instead), but it would need more messy code (would need to push
1853 * at least some bits down to actual fetch so conversion could be skipped,
1854 * and would have ugly interaction with border color, would need to convert
1855 * border color to that format too or do some other tricks to make it work).
1856 */
1857 const struct util_format_description *format_desc = bld->format_desc;
1858 unsigned chan_type;
1859 /* not entirely sure we couldn't end up with non-valid swizzle here */
1860 chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
1861 format_desc->channel[format_desc->swizzle[0]].type :
1862 UTIL_FORMAT_TYPE_FLOAT;
1863 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1864 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1865 bld->coord_bld.zero, bld->coord_bld.one);
1866 }
1867 }
1868
1869 /*
1870 * Compute the level of detail (float).
1871 */
1872 if (min_filter != mag_filter ||
1873 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1874 /* Need to compute lod either to choose mipmap levels or to
1875 * distinguish between minification/magnification with one mipmap level.
1876 */
1877 lp_build_lod_selector(bld, texture_index, sampler_index,
1878 coords[0], coords[1], coords[2], cube_rho,
1879 derivs, lod_bias, explicit_lod,
1880 mip_filter,
1881 &lod_ipart, lod_fpart, lod_pos_or_zero);
1882 } else {
1883 lod_ipart = bld->lodi_bld.zero;
1884 *lod_pos_or_zero = bld->lodi_bld.zero;
1885 }
1886
1887 if (bld->num_lods != bld->num_mips) {
1888 /* only makes sense if there's just a single mip level */
1889 assert(bld->num_mips == 1);
1890 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1891 }
1892
1893 /*
1894 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1895 */
1896 switch (mip_filter) {
1897 default:
1898 assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1899 /* fall-through */
1900 case PIPE_TEX_MIPFILTER_NONE:
1901 /* always use mip level 0 */
1902 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1903 bld->gallivm, bld->context_ptr,
1904 texture_index);
1905 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1906 *ilevel0 = first_level;
1907 break;
1908 case PIPE_TEX_MIPFILTER_NEAREST:
1909 assert(lod_ipart);
1910 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1911 break;
1912 case PIPE_TEX_MIPFILTER_LINEAR:
1913 assert(lod_ipart);
1914 assert(*lod_fpart);
1915 lp_build_linear_mip_levels(bld, texture_index,
1916 lod_ipart, lod_fpart,
1917 ilevel0, ilevel1);
1918 break;
1919 }
1920 }
1921
1922 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)1923 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1924 unsigned sampler_unit)
1925 {
1926 struct gallivm_state *gallivm = bld->gallivm;
1927 LLVMBuilderRef builder = gallivm->builder;
1928 LLVMValueRef border_color_ptr =
1929 bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
1930 bld->context_ptr, sampler_unit);
1931 LLVMValueRef border_color;
1932 const struct util_format_description *format_desc = bld->format_desc;
1933 struct lp_type vec4_type = bld->texel_type;
1934 struct lp_build_context vec4_bld;
1935 LLVMValueRef min_clamp = NULL;
1936 LLVMValueRef max_clamp = NULL;
1937
1938 /*
1939 * For normalized format need to clamp border color (technically
1940 * probably should also quantize the data). Really sucks doing this
1941 * here but can't avoid at least for now since this is part of
1942 * sampler state and texture format is part of sampler_view state.
1943 * GL expects also expects clamping for uint/sint formats too so
1944 * do that as well (d3d10 can't end up here with uint/sint since it
1945 * only supports them with ld).
1946 */
1947 vec4_type.length = 4;
1948 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1949
1950 /*
1951 * Vectorized clamping of border color. Loading is a bit of a hack since
1952 * we just cast the pointer to float array to pointer to vec4
1953 * (int or float).
1954 */
1955 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1956 lp_build_const_int32(gallivm, 0));
1957 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1958 LLVMPointerType(vec4_bld.vec_type, 0), "");
1959 border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1960 /* we don't have aligned type in the dynamic state unfortunately */
1961 LLVMSetAlignment(border_color, 4);
1962
1963 /*
1964 * Instead of having some incredibly complex logic which will try to figure out
1965 * clamping necessary for each channel, simply use the first channel, and treat
1966 * mixed signed/unsigned normalized formats specially.
1967 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1968 * good reason.)
1969 */
1970 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1971 int chan;
1972 /* d/s needs special handling because both present means just sampling depth */
1973 if (util_format_is_depth_and_stencil(format_desc->format)) {
1974 chan = format_desc->swizzle[0];
1975 }
1976 else {
1977 chan = util_format_get_first_non_void_channel(format_desc->format);
1978 }
1979 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
1980 unsigned chan_type = format_desc->channel[chan].type;
1981 unsigned chan_norm = format_desc->channel[chan].normalized;
1982 unsigned chan_pure = format_desc->channel[chan].pure_integer;
1983 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1984 if (chan_norm) {
1985 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1986 max_clamp = vec4_bld.one;
1987 }
1988 else if (chan_pure) {
1989 /*
1990 * Border color was stored as int, hence need min/max clamp
1991 * only if chan has less than 32 bits..
1992 */
1993 unsigned chan_size = format_desc->channel[chan].size;
1994 if (chan_size < 32) {
1995 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1996 0 - (1 << (chan_size - 1)));
1997 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1998 (1 << (chan_size - 1)) - 1);
1999 }
2000 }
2001 /* TODO: no idea about non-pure, non-normalized! */
2002 }
2003 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2004 if (chan_norm) {
2005 min_clamp = vec4_bld.zero;
2006 max_clamp = vec4_bld.one;
2007 }
2008 /*
2009 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2010 * we use Z32_FLOAT_S8X24 to imply sampling depth component
2011 * and ignoring stencil, which will blow up here if we try to
2012 * do a uint clamp in a float texel build...
2013 * And even if we had that format, mesa st also thinks using z24s8
2014 * means depth sampling ignoring stencil.
2015 */
2016 else if (chan_pure) {
2017 /*
2018 * Border color was stored as uint, hence never need min
2019 * clamp, and only need max clamp if chan has less than 32 bits.
2020 */
2021 unsigned chan_size = format_desc->channel[chan].size;
2022 if (chan_size < 32) {
2023 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2024 (1 << chan_size) - 1);
2025 }
2026 /* TODO: no idea about non-pure, non-normalized! */
2027 }
2028 }
2029 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2030 /* TODO: I have no idea what clamp this would need if any! */
2031 }
2032 }
2033 /* mixed plain formats (or different pure size) */
2034 switch (format_desc->format) {
2035 case PIPE_FORMAT_B10G10R10A2_UINT:
2036 case PIPE_FORMAT_R10G10B10A2_UINT:
2037 {
2038 unsigned max10 = (1 << 10) - 1;
2039 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2040 max10, (1 << 2) - 1, NULL);
2041 }
2042 break;
2043 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2044 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2045 -1.0F, 0.0F, NULL);
2046 max_clamp = vec4_bld.one;
2047 break;
2048 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2049 case PIPE_FORMAT_R5SG5SB6U_NORM:
2050 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2051 0.0F, 0.0F, NULL);
2052 max_clamp = vec4_bld.one;
2053 break;
2054 default:
2055 break;
2056 }
2057 }
2058 else {
2059 /* cannot figure this out from format description */
2060 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2061 /* s3tc formats are always unorm */
2062 min_clamp = vec4_bld.zero;
2063 max_clamp = vec4_bld.one;
2064 }
2065 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2066 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2067 switch (format_desc->format) {
2068 case PIPE_FORMAT_RGTC1_UNORM:
2069 case PIPE_FORMAT_RGTC2_UNORM:
2070 case PIPE_FORMAT_LATC1_UNORM:
2071 case PIPE_FORMAT_LATC2_UNORM:
2072 case PIPE_FORMAT_ETC1_RGB8:
2073 min_clamp = vec4_bld.zero;
2074 max_clamp = vec4_bld.one;
2075 break;
2076 case PIPE_FORMAT_RGTC1_SNORM:
2077 case PIPE_FORMAT_RGTC2_SNORM:
2078 case PIPE_FORMAT_LATC1_SNORM:
2079 case PIPE_FORMAT_LATC2_SNORM:
2080 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2081 max_clamp = vec4_bld.one;
2082 break;
2083 default:
2084 assert(0);
2085 break;
2086 }
2087 }
2088 /*
2089 * all others from subsampled/other group, though we don't care
2090 * about yuv (and should not have any from zs here)
2091 */
2092 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2093 switch (format_desc->format) {
2094 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2095 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2096 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2097 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2098 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2099 min_clamp = vec4_bld.zero;
2100 max_clamp = vec4_bld.one;
2101 break;
2102 case PIPE_FORMAT_R8G8Bx_SNORM:
2103 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2104 max_clamp = vec4_bld.one;
2105 break;
2106 /*
2107 * Note smallfloat formats usually don't need clamping
2108 * (they still have infinite range) however this is not
2109 * true for r11g11b10 and r9g9b9e5, which can't represent
2110 * negative numbers (and additionally r9g9b9e5 can't represent
2111 * very large numbers). d3d10 seems happy without clamping in
2112 * this case, but gl spec is pretty clear: "for floating
2113 * point and integer formats, border values are clamped to
2114 * the representable range of the format" so do that here.
2115 */
2116 case PIPE_FORMAT_R11G11B10_FLOAT:
2117 min_clamp = vec4_bld.zero;
2118 break;
2119 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2120 min_clamp = vec4_bld.zero;
2121 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2122 break;
2123 default:
2124 assert(0);
2125 break;
2126 }
2127 }
2128 }
2129
2130 if (min_clamp) {
2131 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2132 }
2133 if (max_clamp) {
2134 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2135 }
2136
2137 bld->border_color_clamped = border_color;
2138 }
2139
2140
2141 /**
2142 * General texture sampling codegen.
2143 * This function handles texture sampling for all texture targets (1D,
2144 * 2D, 3D, cube) and all filtering modes.
2145 */
2146 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,boolean is_gather,LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)2147 lp_build_sample_general(struct lp_build_sample_context *bld,
2148 unsigned sampler_unit,
2149 boolean is_gather,
2150 LLVMValueRef *coords,
2151 const LLVMValueRef *offsets,
2152 LLVMValueRef lod_positive,
2153 LLVMValueRef lod_fpart,
2154 LLVMValueRef ilevel0,
2155 LLVMValueRef ilevel1,
2156 LLVMValueRef *colors_out)
2157 {
2158 LLVMBuilderRef builder = bld->gallivm->builder;
2159 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2160 const unsigned mip_filter = sampler_state->min_mip_filter;
2161 const unsigned min_filter = sampler_state->min_img_filter;
2162 const unsigned mag_filter = sampler_state->mag_img_filter;
2163 LLVMValueRef texels[4];
2164 unsigned chan;
2165
2166 /* if we need border color, (potentially) clamp it now */
2167 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2168 min_filter,
2169 mag_filter) ||
2170 (bld->dims > 1 &&
2171 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2172 min_filter,
2173 mag_filter)) ||
2174 (bld->dims > 2 &&
2175 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2176 min_filter,
2177 mag_filter))) {
2178 lp_build_clamp_border_color(bld, sampler_unit);
2179 }
2180
2181
2182 /*
2183 * Get/interpolate texture colors.
2184 */
2185
2186 for (chan = 0; chan < 4; ++chan) {
2187 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2188 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2189 }
2190
2191 if (min_filter == mag_filter) {
2192 /* no need to distinguish between minification and magnification */
2193 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2194 is_gather,
2195 coords, offsets,
2196 ilevel0, ilevel1, lod_fpart,
2197 texels);
2198 }
2199 else {
2200 /*
2201 * Could also get rid of the if-logic and always use mipmap_both, both
2202 * for the single lod and multi-lod case if nothing really uses this.
2203 */
2204 if (bld->num_lods == 1) {
2205 /* Emit conditional to choose min image filter or mag image filter
2206 * depending on the lod being > 0 or <= 0, respectively.
2207 */
2208 struct lp_build_if_state if_ctx;
2209
2210 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2211 LLVMInt1TypeInContext(bld->gallivm->context), "");
2212
2213 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2214 {
2215 /* Use the minification filter */
2216 lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
2217 coords, offsets,
2218 ilevel0, ilevel1, lod_fpart,
2219 texels);
2220 }
2221 lp_build_else(&if_ctx);
2222 {
2223 /* Use the magnification filter */
2224 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2225 FALSE,
2226 coords, offsets,
2227 ilevel0, NULL, NULL,
2228 texels);
2229 }
2230 lp_build_endif(&if_ctx);
2231 }
2232 else {
2233 LLVMValueRef need_linear, linear_mask;
2234 unsigned mip_filter_for_nearest;
2235 struct lp_build_if_state if_ctx;
2236
2237 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2238 linear_mask = lod_positive;
2239 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2240 }
2241 else {
2242 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2243 mip_filter_for_nearest = mip_filter;
2244 }
2245 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2246 linear_mask);
2247
2248 if (bld->num_lods != bld->coord_type.length) {
2249 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2250 bld->lodi_type,
2251 bld->int_coord_type,
2252 linear_mask);
2253 }
2254
2255 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2256 {
2257 /*
2258 * Do sampling with both filters simultaneously. This means using
2259 * a linear filter and doing some tricks (with weights) for the pixels
2260 * which need nearest filter.
2261 * Note that it's probably rare some pixels need nearest and some
2262 * linear filter but the fixups required for the nearest pixels
2263 * aren't all that complicated so just always run a combined path
2264 * if at least some pixels require linear.
2265 */
2266 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2267 coords, offsets,
2268 ilevel0, ilevel1,
2269 lod_fpart, lod_positive,
2270 texels);
2271 }
2272 lp_build_else(&if_ctx);
2273 {
2274 /*
2275 * All pixels require just nearest filtering, which is way
2276 * cheaper than linear, hence do a separate path for that.
2277 */
2278 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2279 mip_filter_for_nearest, FALSE,
2280 coords, offsets,
2281 ilevel0, ilevel1, lod_fpart,
2282 texels);
2283 }
2284 lp_build_endif(&if_ctx);
2285 }
2286 }
2287
2288 for (chan = 0; chan < 4; ++chan) {
2289 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2290 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2291 }
2292 }
2293
2294
2295 /**
2296 * Texel fetch function.
2297 * In contrast to general sampling there is no filtering, no coord minification,
2298 * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2299 * directly to be applied to the selected mip level (after adding texel offsets).
2300 * This function handles texel fetch for all targets where texel fetch is supported
2301 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2302 */
2303 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)2304 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2305 unsigned texture_unit,
2306 const LLVMValueRef *coords,
2307 LLVMValueRef explicit_lod,
2308 const LLVMValueRef *offsets,
2309 LLVMValueRef *colors_out)
2310 {
2311 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2312 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2313 unsigned dims = bld->dims, chan;
2314 unsigned target = bld->static_texture_state->target;
2315 boolean out_of_bound_ret_zero = TRUE;
2316 LLVMValueRef size, ilevel;
2317 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2318 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2319 LLVMValueRef width, height, depth, i, j;
2320 LLVMValueRef offset, out_of_bounds, out1;
2321
2322 out_of_bounds = int_coord_bld->zero;
2323
2324 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2325 if (bld->num_mips != int_coord_bld->type.length) {
2326 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2327 perquadi_bld->type, explicit_lod, 0);
2328 }
2329 else {
2330 ilevel = explicit_lod;
2331 }
2332 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2333 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2334 }
2335 else {
2336 assert(bld->num_mips == 1);
2337 if (bld->static_texture_state->target != PIPE_BUFFER) {
2338 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
2339 bld->context_ptr, texture_unit);
2340 }
2341 else {
2342 ilevel = lp_build_const_int32(bld->gallivm, 0);
2343 }
2344 }
2345 lp_build_mipmap_level_sizes(bld, ilevel,
2346 &size,
2347 &row_stride_vec, &img_stride_vec);
2348 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2349 size, &width, &height, &depth);
2350
2351 if (target == PIPE_TEXTURE_1D_ARRAY ||
2352 target == PIPE_TEXTURE_2D_ARRAY) {
2353 if (out_of_bound_ret_zero) {
2354 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
2355 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2356 }
2357 else {
2358 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
2359 }
2360 }
2361
2362 /* This is a lot like border sampling */
2363 if (offsets[0]) {
2364 /*
2365 * coords are really unsigned, offsets are signed, but I don't think
2366 * exceeding 31 bits is possible
2367 */
2368 x = lp_build_add(int_coord_bld, x, offsets[0]);
2369 }
2370 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2371 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2372 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2373 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2374
2375 if (dims >= 2) {
2376 if (offsets[1]) {
2377 y = lp_build_add(int_coord_bld, y, offsets[1]);
2378 }
2379 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2380 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2381 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2382 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2383
2384 if (dims >= 3) {
2385 if (offsets[2]) {
2386 z = lp_build_add(int_coord_bld, z, offsets[2]);
2387 }
2388 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2389 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2390 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2391 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2392 }
2393 }
2394
2395 lp_build_sample_offset(int_coord_bld,
2396 bld->format_desc,
2397 x, y, z, row_stride_vec, img_stride_vec,
2398 &offset, &i, &j);
2399
2400 if (bld->static_texture_state->target != PIPE_BUFFER) {
2401 offset = lp_build_add(int_coord_bld, offset,
2402 lp_build_get_mip_offsets(bld, ilevel));
2403 }
2404
2405 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2406
2407 lp_build_fetch_rgba_soa(bld->gallivm,
2408 bld->format_desc,
2409 bld->texel_type, TRUE,
2410 bld->base_ptr, offset,
2411 i, j,
2412 bld->cache,
2413 colors_out);
2414
2415 if (out_of_bound_ret_zero) {
2416 /*
2417 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2418 * Could use min/max above instead of out-of-bounds comparisons
2419 * if we don't care about the result returned for out-of-bounds.
2420 */
2421 for (chan = 0; chan < 4; chan++) {
2422 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2423 bld->texel_bld.zero, colors_out[chan]);
2424 }
2425 }
2426 }
2427
2428
2429 /**
2430 * Just set texels to white instead of actually sampling the texture.
2431 * For debugging.
2432 */
2433 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])2434 lp_build_sample_nop(struct gallivm_state *gallivm,
2435 struct lp_type type,
2436 const LLVMValueRef *coords,
2437 LLVMValueRef texel_out[4])
2438 {
2439 LLVMValueRef one = lp_build_one(gallivm, type);
2440 unsigned chan;
2441
2442 for (chan = 0; chan < 4; chan++) {
2443 texel_out[chan] = one;
2444 }
2445 }
2446
2447
2448 /**
2449 * Build the actual texture sampling code.
2450 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2451 * R, G, B, A.
2452 * \param type vector float type to use for coords, etc.
2453 * \param sample_key
2454 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
2455 */
2456 static void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMValueRef context_ptr,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef texel_out[4])2457 lp_build_sample_soa_code(struct gallivm_state *gallivm,
2458 const struct lp_static_texture_state *static_texture_state,
2459 const struct lp_static_sampler_state *static_sampler_state,
2460 struct lp_sampler_dynamic_state *dynamic_state,
2461 struct lp_type type,
2462 unsigned sample_key,
2463 unsigned texture_index,
2464 unsigned sampler_index,
2465 LLVMValueRef context_ptr,
2466 LLVMValueRef thread_data_ptr,
2467 const LLVMValueRef *coords,
2468 const LLVMValueRef *offsets,
2469 const struct lp_derivatives *derivs, /* optional */
2470 LLVMValueRef lod, /* optional */
2471 LLVMValueRef texel_out[4])
2472 {
2473 unsigned target = static_texture_state->target;
2474 unsigned dims = texture_dims(target);
2475 unsigned num_quads = type.length / 4;
2476 unsigned mip_filter, min_img_filter, mag_img_filter, i;
2477 struct lp_build_sample_context bld;
2478 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2479 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2480 LLVMBuilderRef builder = gallivm->builder;
2481 LLVMValueRef tex_width, newcoords[5];
2482 enum lp_sampler_lod_property lod_property;
2483 enum lp_sampler_lod_control lod_control;
2484 enum lp_sampler_op_type op_type;
2485 LLVMValueRef lod_bias = NULL;
2486 LLVMValueRef explicit_lod = NULL;
2487 boolean op_is_tex;
2488
2489 if (0) {
2490 enum pipe_format fmt = static_texture_state->format;
2491 debug_printf("Sample from %s\n", util_format_name(fmt));
2492 }
2493
2494 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
2495 LP_SAMPLER_LOD_PROPERTY_SHIFT;
2496 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2497 LP_SAMPLER_LOD_CONTROL_SHIFT;
2498 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
2499 LP_SAMPLER_OP_TYPE_SHIFT;
2500
2501 op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
2502
2503 if (lod_control == LP_SAMPLER_LOD_BIAS) {
2504 lod_bias = lod;
2505 assert(lod);
2506 assert(derivs == NULL);
2507 }
2508 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
2509 explicit_lod = lod;
2510 assert(lod);
2511 assert(derivs == NULL);
2512 }
2513 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
2514 assert(derivs);
2515 assert(lod == NULL);
2516 }
2517 else {
2518 assert(derivs == NULL);
2519 assert(lod == NULL);
2520 }
2521
2522 if (static_texture_state->format == PIPE_FORMAT_NONE) {
2523 /*
2524 * If there's nothing bound, format is NONE, and we must return
2525 * all zero as mandated by d3d10 in this case.
2526 */
2527 unsigned chan;
2528 LLVMValueRef zero = lp_build_zero(gallivm, type);
2529 for (chan = 0; chan < 4; chan++) {
2530 texel_out[chan] = zero;
2531 }
2532 return;
2533 }
2534
2535 assert(type.floating);
2536
2537 /* Setup our build context */
2538 memset(&bld, 0, sizeof bld);
2539 bld.gallivm = gallivm;
2540 bld.context_ptr = context_ptr;
2541 bld.static_sampler_state = &derived_sampler_state;
2542 bld.static_texture_state = static_texture_state;
2543 bld.dynamic_state = dynamic_state;
2544 bld.format_desc = util_format_description(static_texture_state->format);
2545 bld.dims = dims;
2546
2547 bld.vector_width = lp_type_width(type);
2548
2549 bld.float_type = lp_type_float(32);
2550 bld.int_type = lp_type_int(32);
2551 bld.coord_type = type;
2552 bld.int_coord_type = lp_int_type(type);
2553 bld.float_size_in_type = lp_type_float(32);
2554 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2555 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2556 bld.texel_type = type;
2557
2558 /* always using the first channel hopefully should be safe,
2559 * if not things WILL break in other places anyway.
2560 */
2561 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2562 bld.format_desc->channel[0].pure_integer) {
2563 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2564 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2565 }
2566 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2567 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2568 }
2569 }
2570 else if (util_format_has_stencil(bld.format_desc) &&
2571 !util_format_has_depth(bld.format_desc)) {
2572 /* for stencil only formats, sample stencil (uint) */
2573 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2574 }
2575
2576 if (!static_texture_state->level_zero_only) {
2577 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2578 } else {
2579 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2580 }
2581 if (op_type == LP_SAMPLER_OP_GATHER) {
2582 /*
2583 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
2584 * the actual filtering. Using mostly the same paths, so cube face
2585 * selection, coord wrapping etc. all naturally uses the same code.
2586 */
2587 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2588 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
2589 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
2590 }
2591 mip_filter = derived_sampler_state.min_mip_filter;
2592
2593 if (0) {
2594 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2595 }
2596
2597 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2598 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2599 {
2600 /*
2601 * Seamless filtering ignores wrap modes.
2602 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2603 * bilinear it's not correct but way better than using for instance repeat.
2604 * Note we even set this for non-seamless. Technically GL allows any wrap
2605 * mode, which made sense when supporting true borders (can get seamless
2606 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2607 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2608 * up the sampler state (as it makes it texture dependent).
2609 */
2610 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2611 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2612 }
2613 /*
2614 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
2615 * so AoS path could be used. Not sure it's worth the trouble...
2616 */
2617
2618 min_img_filter = derived_sampler_state.min_img_filter;
2619 mag_img_filter = derived_sampler_state.mag_img_filter;
2620
2621
2622 /*
2623 * This is all a bit complicated different paths are chosen for performance
2624 * reasons.
2625 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2626 * everything (the last two options are equivalent for 4-wide case).
2627 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2628 * lod is calculated then the lod value extracted afterwards so making this
2629 * case basically the same as far as lod handling is concerned for the
2630 * further sample/filter code as the 1 lod for everything case.
2631 * Different lod handling mostly shows up when building mipmap sizes
2632 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2633 * (getting the fractional part of the lod to the right texels).
2634 */
2635
2636 /*
2637 * There are other situations where at least the multiple int lods could be
2638 * avoided like min and max lod being equal.
2639 */
2640 bld.num_mips = bld.num_lods = 1;
2641
2642 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2643 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2644 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2645 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2646 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2647 /*
2648 * special case for using per-pixel lod even for implicit lod,
2649 * which is generally never required (ok by APIs) except to please
2650 * some (somewhat broken imho) tests (because per-pixel face selection
2651 * can cause derivatives to be different for pixels outside the primitive
2652 * due to the major axis division even if pre-project derivatives are
2653 * looking normal).
2654 */
2655 bld.num_mips = type.length;
2656 bld.num_lods = type.length;
2657 }
2658 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2659 (explicit_lod || lod_bias || derivs)) {
2660 if ((!op_is_tex && target != PIPE_BUFFER) ||
2661 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2662 bld.num_mips = type.length;
2663 bld.num_lods = type.length;
2664 }
2665 else if (op_is_tex && min_img_filter != mag_img_filter) {
2666 bld.num_mips = 1;
2667 bld.num_lods = type.length;
2668 }
2669 }
2670 /* TODO: for true scalar_lod should only use 1 lod value */
2671 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
2672 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2673 bld.num_mips = num_quads;
2674 bld.num_lods = num_quads;
2675 }
2676 else if (op_is_tex && min_img_filter != mag_img_filter) {
2677 bld.num_mips = 1;
2678 bld.num_lods = num_quads;
2679 }
2680
2681
2682 bld.lodf_type = type;
2683 /* we want native vector size to be able to use our intrinsics */
2684 if (bld.num_lods != type.length) {
2685 /* TODO: this currently always has to be per-quad or per-element */
2686 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2687 }
2688 bld.lodi_type = lp_int_type(bld.lodf_type);
2689 bld.levelf_type = bld.lodf_type;
2690 if (bld.num_mips == 1) {
2691 bld.levelf_type.length = 1;
2692 }
2693 bld.leveli_type = lp_int_type(bld.levelf_type);
2694 bld.float_size_type = bld.float_size_in_type;
2695 /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2696 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2697 if (bld.num_mips > 1) {
2698 bld.float_size_type.length = bld.num_mips == type.length ?
2699 bld.num_mips * bld.float_size_in_type.length :
2700 type.length;
2701 }
2702 bld.int_size_type = lp_int_type(bld.float_size_type);
2703
2704 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2705 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2706 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2707 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2708 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2709 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2710 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2711 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2712 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2713 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2714 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2715 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2716 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2717 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2718
2719 /* Get the dynamic state */
2720 tex_width = dynamic_state->width(dynamic_state, gallivm,
2721 context_ptr, texture_index);
2722 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
2723 context_ptr, texture_index);
2724 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
2725 context_ptr, texture_index);
2726 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
2727 context_ptr, texture_index);
2728 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
2729 context_ptr, texture_index);
2730 /* Note that mip_offsets is an array[level] of offsets to texture images */
2731
2732 if (dynamic_state->cache_ptr && thread_data_ptr) {
2733 bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
2734 thread_data_ptr, texture_index);
2735 }
2736
2737 /* width, height, depth as single int vector */
2738 if (dims <= 1) {
2739 bld.int_size = tex_width;
2740 }
2741 else {
2742 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2743 tex_width,
2744 LLVMConstInt(i32t, 0, 0), "");
2745 if (dims >= 2) {
2746 LLVMValueRef tex_height =
2747 dynamic_state->height(dynamic_state, gallivm,
2748 context_ptr, texture_index);
2749 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2750 tex_height,
2751 LLVMConstInt(i32t, 1, 0), "");
2752 if (dims >= 3) {
2753 LLVMValueRef tex_depth =
2754 dynamic_state->depth(dynamic_state, gallivm, context_ptr,
2755 texture_index);
2756 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2757 tex_depth,
2758 LLVMConstInt(i32t, 2, 0), "");
2759 }
2760 }
2761 }
2762
2763 for (i = 0; i < 5; i++) {
2764 newcoords[i] = coords[i];
2765 }
2766
2767 if (0) {
2768 /* For debug: no-op texture sampling */
2769 lp_build_sample_nop(gallivm,
2770 bld.texel_type,
2771 newcoords,
2772 texel_out);
2773 }
2774
2775 else if (op_type == LP_SAMPLER_OP_FETCH) {
2776 lp_build_fetch_texel(&bld, texture_index, newcoords,
2777 lod, offsets,
2778 texel_out);
2779 }
2780
2781 else {
2782 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2783 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2784 boolean use_aos;
2785
2786 if (util_format_is_pure_integer(static_texture_state->format) &&
2787 !util_format_has_depth(bld.format_desc) &&
2788 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
2789 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2790 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2791 /*
2792 * Bail if impossible filtering is specified (the awkard additional
2793 * depth check is because it is legal in gallium to have things like S8Z24
2794 * here which would say it's pure int despite such formats should sample
2795 * the depth component).
2796 * In GL such filters make the texture incomplete, this makes it robust
2797 * against state trackers which set this up regardless (we'd crash in the
2798 * lerp later (except for gather)).
2799 * Must do this after fetch_texel code since with GL state tracker we'll
2800 * get some junk sampler for buffer textures.
2801 */
2802 unsigned chan;
2803 LLVMValueRef zero = lp_build_zero(gallivm, type);
2804 for (chan = 0; chan < 4; chan++) {
2805 texel_out[chan] = zero;
2806 }
2807 return;
2808 }
2809
2810 use_aos = util_format_fits_8unorm(bld.format_desc) &&
2811 op_is_tex &&
2812 /* not sure this is strictly needed or simply impossible */
2813 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
2814 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
2815
2816 use_aos &= bld.num_lods <= num_quads ||
2817 derived_sampler_state.min_img_filter ==
2818 derived_sampler_state.mag_img_filter;
2819 if (dims > 1) {
2820 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
2821 if (dims > 2) {
2822 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
2823 }
2824 }
2825 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
2826 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2827 derived_sampler_state.seamless_cube_map &&
2828 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2829 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2830 /* theoretically possible with AoS filtering but not implemented (complex!) */
2831 use_aos = 0;
2832 }
2833
2834 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2835 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2836 debug_printf("%s: using floating point linear filtering for %s\n",
2837 __FUNCTION__, bld.format_desc->short_name);
2838 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
2839 " wraps %d wrapt %d wrapr %d\n",
2840 derived_sampler_state.min_img_filter,
2841 derived_sampler_state.mag_img_filter,
2842 derived_sampler_state.min_mip_filter,
2843 static_texture_state->target,
2844 derived_sampler_state.seamless_cube_map,
2845 derived_sampler_state.wrap_s,
2846 derived_sampler_state.wrap_t,
2847 derived_sampler_state.wrap_r);
2848 }
2849
2850 lp_build_sample_common(&bld, texture_index, sampler_index,
2851 newcoords,
2852 derivs, lod_bias, explicit_lod,
2853 &lod_positive, &lod_fpart,
2854 &ilevel0, &ilevel1);
2855
2856 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
2857 /* The aos path doesn't do seamless filtering so simply add cube layer
2858 * to face now.
2859 */
2860 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
2861 }
2862
2863 /*
2864 * we only try 8-wide sampling with soa or if we have AVX2
2865 * as it appears to be a loss with just AVX)
2866 */
2867 if (num_quads == 1 || !use_aos ||
2868 (util_cpu_caps.has_avx2 &&
2869 (bld.num_lods == 1 ||
2870 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
2871 if (use_aos) {
2872 /* do sampling/filtering with fixed pt arithmetic */
2873 lp_build_sample_aos(&bld, sampler_index,
2874 newcoords[0], newcoords[1],
2875 newcoords[2],
2876 offsets, lod_positive, lod_fpart,
2877 ilevel0, ilevel1,
2878 texel_out);
2879 }
2880
2881 else {
2882 lp_build_sample_general(&bld, sampler_index,
2883 op_type == LP_SAMPLER_OP_GATHER,
2884 newcoords, offsets,
2885 lod_positive, lod_fpart,
2886 ilevel0, ilevel1,
2887 texel_out);
2888 }
2889 }
2890 else {
2891 unsigned j;
2892 struct lp_build_sample_context bld4;
2893 struct lp_type type4 = type;
2894 unsigned i;
2895 LLVMValueRef texelout4[4];
2896 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2897
2898 type4.length = 4;
2899
2900 /* Setup our build context */
2901 memset(&bld4, 0, sizeof bld4);
2902 bld4.gallivm = bld.gallivm;
2903 bld4.context_ptr = bld.context_ptr;
2904 bld4.static_texture_state = bld.static_texture_state;
2905 bld4.static_sampler_state = bld.static_sampler_state;
2906 bld4.dynamic_state = bld.dynamic_state;
2907 bld4.format_desc = bld.format_desc;
2908 bld4.dims = bld.dims;
2909 bld4.row_stride_array = bld.row_stride_array;
2910 bld4.img_stride_array = bld.img_stride_array;
2911 bld4.base_ptr = bld.base_ptr;
2912 bld4.mip_offsets = bld.mip_offsets;
2913 bld4.int_size = bld.int_size;
2914 bld4.cache = bld.cache;
2915
2916 bld4.vector_width = lp_type_width(type4);
2917
2918 bld4.float_type = lp_type_float(32);
2919 bld4.int_type = lp_type_int(32);
2920 bld4.coord_type = type4;
2921 bld4.int_coord_type = lp_int_type(type4);
2922 bld4.float_size_in_type = lp_type_float(32);
2923 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2924 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2925 bld4.texel_type = bld.texel_type;
2926 bld4.texel_type.length = 4;
2927
2928 bld4.num_mips = bld4.num_lods = 1;
2929 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2930 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2931 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2932 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2933 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2934 bld4.num_mips = type4.length;
2935 bld4.num_lods = type4.length;
2936 }
2937 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2938 (explicit_lod || lod_bias || derivs)) {
2939 if ((!op_is_tex && target != PIPE_BUFFER) ||
2940 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2941 bld4.num_mips = type4.length;
2942 bld4.num_lods = type4.length;
2943 }
2944 else if (op_is_tex && min_img_filter != mag_img_filter) {
2945 bld4.num_mips = 1;
2946 bld4.num_lods = type4.length;
2947 }
2948 }
2949
2950 /* we want native vector size to be able to use our intrinsics */
2951 bld4.lodf_type = type4;
2952 if (bld4.num_lods != type4.length) {
2953 bld4.lodf_type.length = 1;
2954 }
2955 bld4.lodi_type = lp_int_type(bld4.lodf_type);
2956 bld4.levelf_type = type4;
2957 if (bld4.num_mips != type4.length) {
2958 bld4.levelf_type.length = 1;
2959 }
2960 bld4.leveli_type = lp_int_type(bld4.levelf_type);
2961 bld4.float_size_type = bld4.float_size_in_type;
2962 if (bld4.num_mips > 1) {
2963 bld4.float_size_type.length = bld4.num_mips == type4.length ?
2964 bld4.num_mips * bld4.float_size_in_type.length :
2965 type4.length;
2966 }
2967 bld4.int_size_type = lp_int_type(bld4.float_size_type);
2968
2969 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2970 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2971 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2972 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2973 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2974 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2975 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2976 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2977 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2978 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2979 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2980 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2981 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2982 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2983
2984 for (i = 0; i < num_quads; i++) {
2985 LLVMValueRef s4, t4, r4;
2986 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2987 LLVMValueRef ilevel04, ilevel14 = NULL;
2988 LLVMValueRef offsets4[4] = { NULL };
2989 unsigned num_lods = bld4.num_lods;
2990
2991 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2992 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2993 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2994
2995 if (offsets[0]) {
2996 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2997 if (dims > 1) {
2998 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2999 if (dims > 2) {
3000 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3001 }
3002 }
3003 }
3004 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3005 ilevel04 = bld.num_mips == 1 ? ilevel0 :
3006 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3007 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3008 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3009 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3010 }
3011
3012 if (use_aos) {
3013 /* do sampling/filtering with fixed pt arithmetic */
3014 lp_build_sample_aos(&bld4, sampler_index,
3015 s4, t4, r4, offsets4,
3016 lod_positive4, lod_fpart4,
3017 ilevel04, ilevel14,
3018 texelout4);
3019 }
3020
3021 else {
3022 /* this path is currently unreachable and hence might break easily... */
3023 LLVMValueRef newcoords4[5];
3024 newcoords4[0] = s4;
3025 newcoords4[1] = t4;
3026 newcoords4[2] = r4;
3027 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3028 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3029
3030 lp_build_sample_general(&bld4, sampler_index,
3031 op_type == LP_SAMPLER_OP_GATHER,
3032 newcoords4, offsets4,
3033 lod_positive4, lod_fpart4,
3034 ilevel04, ilevel14,
3035 texelout4);
3036 }
3037 for (j = 0; j < 4; j++) {
3038 texelouttmp[j][i] = texelout4[j];
3039 }
3040 }
3041
3042 for (j = 0; j < 4; j++) {
3043 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
3044 }
3045 }
3046 }
3047
3048 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
3049 apply_sampler_swizzle(&bld, texel_out);
3050 }
3051
3052 /*
3053 * texel type can be a (32bit) int/uint (for pure int formats only),
3054 * however we are expected to always return floats (storage is untyped).
3055 */
3056 if (!bld.texel_type.floating) {
3057 unsigned chan;
3058 for (chan = 0; chan < 4; chan++) {
3059 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
3060 lp_build_vec_type(gallivm, type), "");
3061 }
3062 }
3063 }
3064
3065
3066 #define USE_TEX_FUNC_CALL 1
3067
3068 #define LP_MAX_TEX_FUNC_ARGS 32
3069
3070 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)3071 get_target_info(enum pipe_texture_target target,
3072 unsigned *num_coords, unsigned *num_derivs,
3073 unsigned *num_offsets, unsigned *layer)
3074 {
3075 unsigned dims = texture_dims(target);
3076 *num_coords = dims;
3077 *num_offsets = dims;
3078 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3079 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3080 *layer = has_layer_coord(target) ? 2: 0;
3081 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3082 /*
3083 * dims doesn't include r coord for cubes - this is handled
3084 * by layer instead, but need to fix up for cube arrays...
3085 */
3086 *layer = 3;
3087 *num_coords = 3;
3088 }
3089 }
3090
3091
3092 /**
3093 * Generate the function body for a texture sampling function.
3094 */
3095 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key)3096 lp_build_sample_gen_func(struct gallivm_state *gallivm,
3097 const struct lp_static_texture_state *static_texture_state,
3098 const struct lp_static_sampler_state *static_sampler_state,
3099 struct lp_sampler_dynamic_state *dynamic_state,
3100 struct lp_type type,
3101 unsigned texture_index,
3102 unsigned sampler_index,
3103 LLVMValueRef function,
3104 unsigned num_args,
3105 unsigned sample_key)
3106 {
3107 LLVMBuilderRef old_builder;
3108 LLVMBasicBlockRef block;
3109 LLVMValueRef coords[5];
3110 LLVMValueRef offsets[3] = { NULL };
3111 LLVMValueRef lod = NULL;
3112 LLVMValueRef context_ptr;
3113 LLVMValueRef thread_data_ptr = NULL;
3114 LLVMValueRef texel_out[4];
3115 struct lp_derivatives derivs;
3116 struct lp_derivatives *deriv_ptr = NULL;
3117 unsigned num_param = 0;
3118 unsigned i, num_coords, num_derivs, num_offsets, layer;
3119 enum lp_sampler_lod_control lod_control;
3120 boolean need_cache = FALSE;
3121
3122 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3123 LP_SAMPLER_LOD_CONTROL_SHIFT;
3124
3125 get_target_info(static_texture_state->target,
3126 &num_coords, &num_derivs, &num_offsets, &layer);
3127
3128 if (dynamic_state->cache_ptr) {
3129 const struct util_format_description *format_desc;
3130 format_desc = util_format_description(static_texture_state->format);
3131 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3132 need_cache = TRUE;
3133 }
3134 }
3135
3136 /* "unpack" arguments */
3137 context_ptr = LLVMGetParam(function, num_param++);
3138 if (need_cache) {
3139 thread_data_ptr = LLVMGetParam(function, num_param++);
3140 }
3141 for (i = 0; i < num_coords; i++) {
3142 coords[i] = LLVMGetParam(function, num_param++);
3143 }
3144 for (i = num_coords; i < 5; i++) {
3145 /* This is rather unfortunate... */
3146 coords[i] = lp_build_undef(gallivm, type);
3147 }
3148 if (layer) {
3149 coords[layer] = LLVMGetParam(function, num_param++);
3150 }
3151 if (sample_key & LP_SAMPLER_SHADOW) {
3152 coords[4] = LLVMGetParam(function, num_param++);
3153 }
3154 if (sample_key & LP_SAMPLER_OFFSETS) {
3155 for (i = 0; i < num_offsets; i++) {
3156 offsets[i] = LLVMGetParam(function, num_param++);
3157 }
3158 }
3159 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3160 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3161 lod = LLVMGetParam(function, num_param++);
3162 }
3163 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3164 for (i = 0; i < num_derivs; i++) {
3165 derivs.ddx[i] = LLVMGetParam(function, num_param++);
3166 derivs.ddy[i] = LLVMGetParam(function, num_param++);
3167 }
3168 deriv_ptr = &derivs;
3169 }
3170
3171 assert(num_args == num_param);
3172
3173 /*
3174 * Function body
3175 */
3176
3177 old_builder = gallivm->builder;
3178 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3179 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3180 LLVMPositionBuilderAtEnd(gallivm->builder, block);
3181
3182 lp_build_sample_soa_code(gallivm,
3183 static_texture_state,
3184 static_sampler_state,
3185 dynamic_state,
3186 type,
3187 sample_key,
3188 texture_index,
3189 sampler_index,
3190 context_ptr,
3191 thread_data_ptr,
3192 coords,
3193 offsets,
3194 deriv_ptr,
3195 lod,
3196 texel_out);
3197
3198 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3199
3200 LLVMDisposeBuilder(gallivm->builder);
3201 gallivm->builder = old_builder;
3202
3203 gallivm_verify_function(gallivm, function);
3204 }
3205
3206
3207 /**
3208 * Call the matching function for texture sampling.
3209 * If there's no match, generate a new one.
3210 */
3211 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params)3212 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3213 const struct lp_static_texture_state *static_texture_state,
3214 const struct lp_static_sampler_state *static_sampler_state,
3215 struct lp_sampler_dynamic_state *dynamic_state,
3216 const struct lp_sampler_params *params)
3217 {
3218 LLVMBuilderRef builder = gallivm->builder;
3219 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3220 LLVMGetInsertBlock(builder)));
3221 LLVMValueRef function, inst;
3222 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3223 LLVMBasicBlockRef bb;
3224 LLVMValueRef tex_ret;
3225 unsigned num_args = 0;
3226 char func_name[64];
3227 unsigned i, num_coords, num_derivs, num_offsets, layer;
3228 unsigned texture_index = params->texture_index;
3229 unsigned sampler_index = params->sampler_index;
3230 unsigned sample_key = params->sample_key;
3231 const LLVMValueRef *coords = params->coords;
3232 const LLVMValueRef *offsets = params->offsets;
3233 const struct lp_derivatives *derivs = params->derivs;
3234 enum lp_sampler_lod_control lod_control;
3235 boolean need_cache = FALSE;
3236
3237 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3238 LP_SAMPLER_LOD_CONTROL_SHIFT;
3239
3240 get_target_info(static_texture_state->target,
3241 &num_coords, &num_derivs, &num_offsets, &layer);
3242
3243 if (dynamic_state->cache_ptr) {
3244 const struct util_format_description *format_desc;
3245 format_desc = util_format_description(static_texture_state->format);
3246 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3247 /*
3248 * This is not 100% correct, if we have cache but the
3249 * util_format_s3tc_prefer is true the cache won't get used
3250 * regardless (could hook up the block decode there...) */
3251 need_cache = TRUE;
3252 }
3253 }
3254 /*
3255 * texture function matches are found by name.
3256 * Thus the name has to include both the texture and sampler unit
3257 * (which covers all static state) plus the actual texture function
3258 * (including things like offsets, shadow coord, lod control).
3259 * Additionally lod_property has to be included too.
3260 */
3261
3262 util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
3263 texture_index, sampler_index, sample_key);
3264
3265 function = LLVMGetNamedFunction(module, func_name);
3266
3267 if(!function) {
3268 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
3269 LLVMTypeRef ret_type;
3270 LLVMTypeRef function_type;
3271 LLVMTypeRef val_type[4];
3272 unsigned num_param = 0;
3273
3274 /*
3275 * Generate the function prototype.
3276 */
3277
3278 arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
3279 if (need_cache) {
3280 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
3281 }
3282 for (i = 0; i < num_coords; i++) {
3283 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3284 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
3285 }
3286 if (layer) {
3287 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
3288 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
3289 }
3290 if (sample_key & LP_SAMPLER_SHADOW) {
3291 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3292 }
3293 if (sample_key & LP_SAMPLER_OFFSETS) {
3294 for (i = 0; i < num_offsets; i++) {
3295 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
3296 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
3297 }
3298 }
3299 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3300 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3301 arg_types[num_param++] = LLVMTypeOf(params->lod);
3302 }
3303 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3304 for (i = 0; i < num_derivs; i++) {
3305 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
3306 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
3307 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
3308 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
3309 }
3310 }
3311
3312 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
3313 lp_build_vec_type(gallivm, params->type);
3314 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
3315 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
3316 function = LLVMAddFunction(module, func_name, function_type);
3317
3318 for (i = 0; i < num_param; ++i) {
3319 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3320
3321 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
3322 }
3323 }
3324
3325 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
3326 LLVMSetLinkage(function, LLVMInternalLinkage);
3327
3328 lp_build_sample_gen_func(gallivm,
3329 static_texture_state,
3330 static_sampler_state,
3331 dynamic_state,
3332 params->type,
3333 texture_index,
3334 sampler_index,
3335 function,
3336 num_param,
3337 sample_key);
3338 }
3339
3340 num_args = 0;
3341 args[num_args++] = params->context_ptr;
3342 if (need_cache) {
3343 args[num_args++] = params->thread_data_ptr;
3344 }
3345 for (i = 0; i < num_coords; i++) {
3346 args[num_args++] = coords[i];
3347 }
3348 if (layer) {
3349 args[num_args++] = coords[layer];
3350 }
3351 if (sample_key & LP_SAMPLER_SHADOW) {
3352 args[num_args++] = coords[4];
3353 }
3354 if (sample_key & LP_SAMPLER_OFFSETS) {
3355 for (i = 0; i < num_offsets; i++) {
3356 args[num_args++] = offsets[i];
3357 }
3358 }
3359 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3360 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3361 args[num_args++] = params->lod;
3362 }
3363 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3364 for (i = 0; i < num_derivs; i++) {
3365 args[num_args++] = derivs->ddx[i];
3366 args[num_args++] = derivs->ddy[i];
3367 }
3368 }
3369
3370 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
3371
3372 tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
3373 bb = LLVMGetInsertBlock(builder);
3374 inst = LLVMGetLastInstruction(bb);
3375 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
3376
3377 for (i = 0; i < 4; i++) {
3378 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
3379 }
3380 }
3381
3382
3383 /**
3384 * Build texture sampling code.
3385 * Either via a function call or inline it directly.
3386 */
3387 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)3388 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
3389 const struct lp_static_sampler_state *static_sampler_state,
3390 struct lp_sampler_dynamic_state *dynamic_state,
3391 struct gallivm_state *gallivm,
3392 const struct lp_sampler_params *params)
3393 {
3394 boolean use_tex_func = FALSE;
3395
3396 /*
3397 * Do not use a function call if the sampling is "simple enough".
3398 * We define this by
3399 * a) format
3400 * b) no mips (either one level only or no mip filter)
3401 * No mips will definitely make the code smaller, though
3402 * the format requirement is a bit iffy - there's some (SoA) formats
3403 * which definitely generate less code. This does happen to catch
3404 * some important cases though which are hurt quite a bit by using
3405 * a call (though not really because of the call overhead but because
3406 * they are reusing the same texture unit with some of the same
3407 * parameters).
3408 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
3409 */
3410
3411 if (USE_TEX_FUNC_CALL) {
3412 const struct util_format_description *format_desc;
3413 boolean simple_format;
3414 boolean simple_tex;
3415 enum lp_sampler_op_type op_type;
3416 format_desc = util_format_description(static_texture_state->format);
3417 simple_format = !format_desc ||
3418 (util_format_is_rgba8_variant(format_desc) &&
3419 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
3420
3421 op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3422 LP_SAMPLER_OP_TYPE_SHIFT;
3423 simple_tex =
3424 op_type != LP_SAMPLER_OP_TEXTURE ||
3425 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
3426 static_texture_state->level_zero_only == TRUE) &&
3427 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
3428
3429 use_tex_func = format_desc && !(simple_format && simple_tex);
3430 }
3431
3432 if (use_tex_func) {
3433 lp_build_sample_soa_func(gallivm,
3434 static_texture_state,
3435 static_sampler_state,
3436 dynamic_state,
3437 params);
3438 }
3439 else {
3440 lp_build_sample_soa_code(gallivm,
3441 static_texture_state,
3442 static_sampler_state,
3443 dynamic_state,
3444 params->type,
3445 params->sample_key,
3446 params->texture_index,
3447 params->sampler_index,
3448 params->context_ptr,
3449 params->thread_data_ptr,
3450 params->coords,
3451 params->offsets,
3452 params->derivs,
3453 params->lod,
3454 params->texel);
3455 }
3456 }
3457
3458
3459 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)3460 lp_build_size_query_soa(struct gallivm_state *gallivm,
3461 const struct lp_static_texture_state *static_state,
3462 struct lp_sampler_dynamic_state *dynamic_state,
3463 const struct lp_sampler_size_query_params *params)
3464 {
3465 LLVMValueRef lod, level, size;
3466 LLVMValueRef first_level = NULL;
3467 int dims, i;
3468 boolean has_array;
3469 unsigned num_lods = 1;
3470 struct lp_build_context bld_int_vec4;
3471 LLVMValueRef context_ptr = params->context_ptr;
3472 unsigned texture_unit = params->texture_unit;
3473 unsigned target = params->target;
3474
3475 if (static_state->format == PIPE_FORMAT_NONE) {
3476 /*
3477 * If there's nothing bound, format is NONE, and we must return
3478 * all zero as mandated by d3d10 in this case.
3479 */
3480 unsigned chan;
3481 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
3482 for (chan = 0; chan < 4; chan++) {
3483 params->sizes_out[chan] = zero;
3484 }
3485 return;
3486 }
3487
3488 /*
3489 * Do some sanity verification about bound texture and shader dcl target.
3490 * Not entirely sure what's possible but assume array/non-array
3491 * always compatible (probably not ok for OpenGL but d3d10 has no
3492 * distinction of arrays at the resource level).
3493 * Everything else looks bogus (though not entirely sure about rect/2d).
3494 * Currently disabled because it causes assertion failures if there's
3495 * nothing bound (or rather a dummy texture, not that this case would
3496 * return the right values).
3497 */
3498 if (0 && static_state->target != target) {
3499 if (static_state->target == PIPE_TEXTURE_1D)
3500 assert(target == PIPE_TEXTURE_1D_ARRAY);
3501 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
3502 assert(target == PIPE_TEXTURE_1D);
3503 else if (static_state->target == PIPE_TEXTURE_2D)
3504 assert(target == PIPE_TEXTURE_2D_ARRAY);
3505 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
3506 assert(target == PIPE_TEXTURE_2D);
3507 else if (static_state->target == PIPE_TEXTURE_CUBE)
3508 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
3509 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
3510 assert(target == PIPE_TEXTURE_CUBE);
3511 else
3512 assert(0);
3513 }
3514
3515 dims = texture_dims(target);
3516
3517 switch (target) {
3518 case PIPE_TEXTURE_1D_ARRAY:
3519 case PIPE_TEXTURE_2D_ARRAY:
3520 case PIPE_TEXTURE_CUBE_ARRAY:
3521 has_array = TRUE;
3522 break;
3523 default:
3524 has_array = FALSE;
3525 break;
3526 }
3527
3528 assert(!params->int_type.floating);
3529
3530 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
3531
3532 if (params->explicit_lod) {
3533 /* FIXME: this needs to honor per-element lod */
3534 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
3535 lp_build_const_int32(gallivm, 0), "");
3536 first_level = dynamic_state->first_level(dynamic_state, gallivm,
3537 context_ptr, texture_unit);
3538 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
3539 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
3540 } else {
3541 lod = bld_int_vec4.zero;
3542 }
3543
3544 size = bld_int_vec4.undef;
3545
3546 size = LLVMBuildInsertElement(gallivm->builder, size,
3547 dynamic_state->width(dynamic_state, gallivm,
3548 context_ptr, texture_unit),
3549 lp_build_const_int32(gallivm, 0), "");
3550
3551 if (dims >= 2) {
3552 size = LLVMBuildInsertElement(gallivm->builder, size,
3553 dynamic_state->height(dynamic_state, gallivm,
3554 context_ptr, texture_unit),
3555 lp_build_const_int32(gallivm, 1), "");
3556 }
3557
3558 if (dims >= 3) {
3559 size = LLVMBuildInsertElement(gallivm->builder, size,
3560 dynamic_state->depth(dynamic_state, gallivm,
3561 context_ptr, texture_unit),
3562 lp_build_const_int32(gallivm, 2), "");
3563 }
3564
3565 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
3566
3567 if (has_array) {
3568 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
3569 context_ptr, texture_unit);
3570 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3571 /*
3572 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
3573 * Could avoid this by passing in number of cubes instead of total
3574 * number of layers (might make things easier elsewhere too).
3575 */
3576 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
3577 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
3578 }
3579 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
3580 lp_build_const_int32(gallivm, dims), "");
3581 }
3582
3583 /*
3584 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
3585 * if level is out of bounds (note this can't cover unbound texture
3586 * here, which also requires returning zero).
3587 */
3588 if (params->explicit_lod && params->is_sviewinfo) {
3589 LLVMValueRef last_level, out, out1;
3590 struct lp_build_context leveli_bld;
3591
3592 /* everything is scalar for now */
3593 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
3594 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3595 context_ptr, texture_unit);
3596
3597 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
3598 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
3599 out = lp_build_or(&leveli_bld, out, out1);
3600 if (num_lods == 1) {
3601 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
3602 }
3603 else {
3604 /* TODO */
3605 assert(0);
3606 }
3607 size = lp_build_andnot(&bld_int_vec4, size, out);
3608 }
3609 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
3610 params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type,
3611 size,
3612 lp_build_const_int32(gallivm, i));
3613 }
3614 if (params->is_sviewinfo) {
3615 for (; i < 4; i++) {
3616 params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0);
3617 }
3618 }
3619
3620 /*
3621 * if there's no explicit_lod (buffers, rects) queries requiring nr of
3622 * mips would be illegal.
3623 */
3624 if (params->is_sviewinfo && params->explicit_lod) {
3625 struct lp_build_context bld_int_scalar;
3626 LLVMValueRef num_levels;
3627 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
3628
3629 if (static_state->level_zero_only) {
3630 num_levels = bld_int_scalar.one;
3631 }
3632 else {
3633 LLVMValueRef last_level;
3634
3635 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3636 context_ptr, texture_unit);
3637 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
3638 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
3639 }
3640 params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
3641 num_levels);
3642 }
3643 }
3644