1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/compiler.h"
40 #include "util/u_debug.h"
41 #include "util/u_dump.h"
42 #include "util/u_memory.h"
43 #include "util/u_math.h"
44 #include "util/format/u_format.h"
45 #include "util/u_cpu_detect.h"
46 #include "util/format_rgb9e5.h"
47 #include "lp_bld_debug.h"
48 #include "lp_bld_type.h"
49 #include "lp_bld_const.h"
50 #include "lp_bld_conv.h"
51 #include "lp_bld_arit.h"
52 #include "lp_bld_bitarit.h"
53 #include "lp_bld_logic.h"
54 #include "lp_bld_printf.h"
55 #include "lp_bld_swizzle.h"
56 #include "lp_bld_flow.h"
57 #include "lp_bld_gather.h"
58 #include "lp_bld_format.h"
59 #include "lp_bld_sample.h"
60 #include "lp_bld_sample_aos.h"
61 #include "lp_bld_struct.h"
62 #include "lp_bld_quad.h"
63 #include "lp_bld_pack.h"
64 #include "lp_bld_intr.h"
65 #include "lp_bld_misc.h"
66 #include "lp_bld_jit_types.h"
67
68
69 /**
70 * Generate code to fetch a texel from a texture at int coords (x, y, z).
71 * The computation depends on whether the texture is 1D, 2D or 3D.
72 * The result, texel, will be float vectors:
73 * texel[0] = red values
74 * texel[1] = green values
75 * texel[2] = blue values
76 * texel[3] = alpha values
77 */
78 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef texel_out[4])79 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
80 LLVMValueRef width,
81 LLVMValueRef height,
82 LLVMValueRef depth,
83 LLVMValueRef x,
84 LLVMValueRef y,
85 LLVMValueRef z,
86 LLVMValueRef y_stride,
87 LLVMValueRef z_stride,
88 LLVMValueRef data_ptr,
89 LLVMValueRef mipoffsets,
90 LLVMValueRef texel_out[4])
91 {
92 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
93 const unsigned dims = bld->dims;
94 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
95 LLVMBuilderRef builder = bld->gallivm->builder;
96 LLVMValueRef offset;
97 LLVMValueRef i, j;
98 LLVMValueRef use_border = NULL;
99
100 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
101 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
102 static_state->min_img_filter,
103 static_state->mag_img_filter)) {
104 LLVMValueRef b1, b2;
105 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
106 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
107 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
108 }
109
110 if (dims >= 2 &&
111 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
112 static_state->min_img_filter,
113 static_state->mag_img_filter)) {
114 LLVMValueRef b1, b2;
115 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
116 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
117 if (use_border) {
118 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
119 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
120 } else {
121 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
122 }
123 }
124
125 if (dims == 3 &&
126 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
127 static_state->min_img_filter,
128 static_state->mag_img_filter)) {
129 LLVMValueRef b1, b2;
130 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
131 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
132 if (use_border) {
133 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
134 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
135 } else {
136 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
137 }
138 }
139
140 /* convert x,y,z coords to linear offset from start of texture, in bytes */
141 lp_build_sample_offset(&bld->int_coord_bld,
142 bld->format_desc,
143 x, y, z, y_stride, z_stride,
144 &offset, &i, &j);
145 if (mipoffsets) {
146 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
147 }
148
149 if (use_border) {
150 /* If we can sample the border color, it means that texcoords may
151 * lie outside the bounds of the texture image. We need to do
152 * something to prevent reading out of bounds and causing a segfault.
153 *
154 * Simply AND the texture coords with !use_border. This will cause
155 * coords which are out of bounds to become zero. Zero's guaranteed
156 * to be inside the texture image.
157 */
158 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
159 }
160
161 lp_build_fetch_rgba_soa(bld->gallivm,
162 bld->format_desc,
163 bld->texel_type, true,
164 data_ptr, offset,
165 i, j,
166 bld->cache,
167 texel_out);
168
169 /*
170 * Note: if we find an app which frequently samples the texture border
171 * we might want to implement a true conditional here to avoid sampling
172 * the texture whenever possible (since that's quite a bit of code).
173 * Ex:
174 * if (use_border) {
175 * texel = border_color;
176 * } else {
177 * texel = sample_texture(coord);
178 * }
179 * As it is now, we always sample the texture, then selectively replace
180 * the texel color results with the border color.
181 */
182
183 if (use_border) {
184 /* select texel color or border color depending on use_border. */
185 const struct util_format_description *format_desc = bld->format_desc;
186 struct lp_type border_type = bld->texel_type;
187 border_type.length = 4;
188 /*
189 * Only replace channels which are actually present. The others should
190 * get optimized away eventually by sampler_view swizzle anyway but it's
191 * easier too.
192 */
193 for (unsigned chan = 0; chan < 4; chan++) {
194 unsigned chan_s;
195 /* reverse-map channel... */
196 if (util_format_has_stencil(format_desc)) {
197 if (chan == 0)
198 chan_s = 0;
199 else
200 break;
201 } else {
202 for (chan_s = 0; chan_s < 4; chan_s++) {
203 if (chan_s == format_desc->swizzle[chan]) {
204 break;
205 }
206 }
207 }
208 if (chan_s <= 3) {
209 /* use the already clamped color */
210 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
211 LLVMValueRef border_chan;
212
213 border_chan = lp_build_extract_broadcast(bld->gallivm,
214 border_type,
215 bld->texel_type,
216 bld->border_color_clamped,
217 idx);
218 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
219 border_chan, texel_out[chan]);
220 }
221 }
222 }
223 }
224
225 static LLVMValueRef
get_first_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)226 get_first_level(struct gallivm_state *gallivm,
227 LLVMTypeRef resources_type,
228 LLVMValueRef resources_ptr,
229 unsigned texture_unit,
230 LLVMValueRef texture_unit_offset,
231 const struct lp_static_texture_state *static_state,
232 struct lp_sampler_dynamic_state *dynamic_state)
233 {
234 if (static_state->level_zero_only)
235 return lp_build_const_int32(gallivm, 0);
236 else {
237 LLVMValueRef first_level;
238
239 first_level = dynamic_state->first_level(gallivm, resources_type,
240 resources_ptr, texture_unit,
241 texture_unit_offset);
242 first_level = LLVMBuildZExt(gallivm->builder, first_level,
243 LLVMInt32TypeInContext(gallivm->context), "");
244 return first_level;
245 }
246 }
247
248
249 static LLVMValueRef
get_last_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)250 get_last_level(struct gallivm_state *gallivm,
251 LLVMTypeRef resources_type,
252 LLVMValueRef resources_ptr,
253 unsigned texture_unit,
254 LLVMValueRef texture_unit_offset,
255 const struct lp_static_texture_state *static_state,
256 struct lp_sampler_dynamic_state *dynamic_state)
257 {
258 if (static_state->level_zero_only)
259 return lp_build_const_int32(gallivm, 0);
260 else {
261 LLVMValueRef last_level;
262
263 last_level = dynamic_state->last_level(gallivm, resources_type,
264 resources_ptr, texture_unit,
265 texture_unit_offset);
266 last_level = LLVMBuildZExt(gallivm->builder, last_level,
267 LLVMInt32TypeInContext(gallivm->context), "");
268 return last_level;
269 }
270 }
271
272 /**
273 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
274 * (Note that with pot sizes could do this much more easily post-scale
275 * with some bit arithmetic.)
276 */
277 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord,bool posOnly)278 lp_build_coord_mirror(struct lp_build_sample_context *bld,
279 LLVMValueRef coord, bool posOnly)
280 {
281 struct lp_build_context *coord_bld = &bld->coord_bld;
282 LLVMValueRef fract;
283 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
284
285 /*
286 * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
287 * it all works out. (The result is in range [-1, 1.0], negative if
288 * the coord is in the "odd" section, otherwise positive.)
289 */
290
291 coord = lp_build_mul(coord_bld, coord, half);
292 fract = lp_build_round(coord_bld, coord);
293 fract = lp_build_sub(coord_bld, coord, fract);
294 coord = lp_build_add(coord_bld, fract, fract);
295
296 if (posOnly) {
297 /*
298 * Theoretically it's not quite 100% accurate because the spec says
299 * that ultimately a scaled coord of -x.0 should map to int coord
300 * -x + 1 with mirroring, not -x (this does not matter for bilinear
301 * filtering).
302 */
303 coord = lp_build_abs(coord_bld, coord);
304 /* kill off NaNs */
305 /* XXX: not safe without arch rounding, fract can be anything. */
306 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
307 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
308 }
309
310 return coord;
311 }
312
313
314 /**
315 * Helper to compute the first coord and the weight for
316 * linear wrap repeat npot textures
317 */
318 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)319 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
320 LLVMValueRef coord_f,
321 LLVMValueRef length_i,
322 LLVMValueRef length_f,
323 LLVMValueRef *coord0_i,
324 LLVMValueRef *weight_f)
325 {
326 struct lp_build_context *coord_bld = &bld->coord_bld;
327 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
328 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
329 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
330 int_coord_bld->one);
331 LLVMValueRef mask;
332 /* wrap with normalized floats is just fract */
333 coord_f = lp_build_fract(coord_bld, coord_f);
334 /* mul by size and subtract 0.5 */
335 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
336 coord_f = lp_build_sub(coord_bld, coord_f, half);
337 /*
338 * we avoided the 0.5/length division before the repeat wrap,
339 * now need to fix up edge cases with selects
340 */
341 /*
342 * Note we do a float (unordered) compare so we can eliminate NaNs.
343 * (Otherwise would need fract_safe above).
344 */
345 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
346 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
347
348 /* convert to int, compute lerp weight */
349 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
350 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
351 }
352
353
354 /**
355 * Build LLVM code for texture wrap mode for linear filtering.
356 * \param x0_out returns first integer texcoord
357 * \param x1_out returns second integer texcoord
358 * \param weight_out returns linear interpolation weight
359 */
360 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)361 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
362 bool is_gather,
363 LLVMValueRef coord,
364 LLVMValueRef length,
365 LLVMValueRef length_f,
366 LLVMValueRef offset,
367 bool is_pot,
368 unsigned wrap_mode,
369 LLVMValueRef *x0_out,
370 LLVMValueRef *x1_out,
371 LLVMValueRef *weight_out)
372 {
373 struct lp_build_context *coord_bld = &bld->coord_bld;
374 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
375 LLVMBuilderRef builder = bld->gallivm->builder;
376 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
377 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
378 LLVMValueRef coord0, coord1, weight;
379
380 switch (wrap_mode) {
381 case PIPE_TEX_WRAP_REPEAT:
382 if (is_pot) {
383 /* mul by size and subtract 0.5 */
384 coord = lp_build_mul(coord_bld, coord, length_f);
385 coord = lp_build_sub(coord_bld, coord, half);
386 if (offset) {
387 offset = lp_build_int_to_float(coord_bld, offset);
388 coord = lp_build_add(coord_bld, coord, offset);
389 }
390 /* convert to int, compute lerp weight */
391 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
392 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
393 /* repeat wrap */
394 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
395 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
396 } else {
397 LLVMValueRef mask;
398 if (offset) {
399 offset = lp_build_int_to_float(coord_bld, offset);
400 offset = lp_build_div(coord_bld, offset, length_f);
401 coord = lp_build_add(coord_bld, coord, offset);
402 }
403 lp_build_coord_repeat_npot_linear(bld, coord,
404 length, length_f,
405 &coord0, &weight);
406 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
407 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
408 coord1 = LLVMBuildAnd(builder,
409 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
410 mask, "");
411 }
412 break;
413
414 case PIPE_TEX_WRAP_CLAMP:
415 if (bld->static_sampler_state->normalized_coords) {
416 /* scale coord to length */
417 coord = lp_build_mul(coord_bld, coord, length_f);
418 }
419 if (offset) {
420 offset = lp_build_int_to_float(coord_bld, offset);
421 coord = lp_build_add(coord_bld, coord, offset);
422 }
423
424 /*
425 * clamp to [0, length]
426 *
427 * Unlike some other wrap modes, this should be correct for gather
428 * too. GL_CLAMP explicitly does this clamp on the coord prior to
429 * actual wrapping (which is per sample).
430 */
431 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
432
433 coord = lp_build_sub(coord_bld, coord, half);
434
435 /* convert to int, compute lerp weight */
436 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
437 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
438 break;
439
440 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
441 {
442 struct lp_build_context abs_coord_bld = bld->coord_bld;
443 abs_coord_bld.type.sign = false;
444
445 if (bld->static_sampler_state->normalized_coords) {
446 /* mul by tex size */
447 coord = lp_build_mul(coord_bld, coord, length_f);
448 }
449 if (offset) {
450 offset = lp_build_int_to_float(coord_bld, offset);
451 coord = lp_build_add(coord_bld, coord, offset);
452 }
453
454 /* clamp to length max */
455 coord = lp_build_min_ext(coord_bld, coord, length_f,
456 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
457 if (!is_gather) {
458 /* subtract 0.5 */
459 coord = lp_build_sub(coord_bld, coord, half);
460 /* clamp to [0, length - 0.5] */
461 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
462 /* convert to int, compute lerp weight */
463 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
464 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
465 } else {
466 /*
467 * The non-gather path will end up with coords 0, 1 if coord was
468 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
469 * really matter what the second coord is). But for gather, we
470 * really need to end up with coords 0, 0.
471 */
472 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
473 coord0 = lp_build_sub(coord_bld, coord, half);
474 coord1 = lp_build_add(coord_bld, coord, half);
475 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
476 coord0 = lp_build_itrunc(coord_bld, coord0);
477 coord1 = lp_build_itrunc(coord_bld, coord1);
478 weight = coord_bld->undef;
479 }
480 /* coord1 = min(coord1, length-1) */
481 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
482 break;
483 }
484
485 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
486 if (bld->static_sampler_state->normalized_coords) {
487 /* scale coord to length */
488 coord = lp_build_mul(coord_bld, coord, length_f);
489 }
490 if (offset) {
491 offset = lp_build_int_to_float(coord_bld, offset);
492 coord = lp_build_add(coord_bld, coord, offset);
493 }
494 /*
495 * We don't need any clamp. Technically, for very large (pos or neg)
496 * (or infinite) values, clamp against [-length, length] would be
497 * correct, but we don't need to guarantee any specific
498 * result for such coords (the ifloor will be undefined, but for modes
499 * requiring border all resulting coords are safe).
500 */
501 coord = lp_build_sub(coord_bld, coord, half);
502 /* convert to int, compute lerp weight */
503 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
504 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
505 break;
506
507 case PIPE_TEX_WRAP_MIRROR_REPEAT:
508 if (offset) {
509 offset = lp_build_int_to_float(coord_bld, offset);
510 offset = lp_build_div(coord_bld, offset, length_f);
511 coord = lp_build_add(coord_bld, coord, offset);
512 }
513 if (!is_gather) {
514 /* compute mirror function */
515 coord = lp_build_coord_mirror(bld, coord, true);
516
517 /* scale coord to length */
518 coord = lp_build_mul(coord_bld, coord, length_f);
519 coord = lp_build_sub(coord_bld, coord, half);
520
521 /* convert to int, compute lerp weight */
522 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
523 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
524
525 /* coord0 = max(coord0, 0) */
526 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
527 /* coord1 = min(coord1, length-1) */
528 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
529 } else {
530 /*
531 * This is pretty reasonable in the end, all what the tests care
532 * about is nasty edge cases (scaled coords x.5, so the individual
533 * coords are actually integers, which is REALLY tricky to get right
534 * due to this working differently both for negative numbers as well
535 * as for even/odd cases). But with enough magic it's not too complex
536 * after all.
537 * Maybe should try a bit arithmetic one though for POT textures...
538 */
539 LLVMValueRef isNeg;
540 /*
541 * Wrapping just once still works, even though it means we can
542 * get "wrong" sign due to performing mirror in the middle of the
543 * two coords (because this can only happen very near the odd/even
544 * edges, so both coords will actually end up as 0 or length - 1
545 * in the end).
546 * For GL4 gather with per-sample offsets we'd need to the mirroring
547 * per coord too.
548 */
549 coord = lp_build_coord_mirror(bld, coord, false);
550 coord = lp_build_mul(coord_bld, coord, length_f);
551
552 /*
553 * NaNs should be safe here, we'll do away with them with
554 * the ones' complement plus min.
555 */
556 coord0 = lp_build_sub(coord_bld, coord, half);
557 coord0 = lp_build_ifloor(coord_bld, coord0);
558 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
559 /* ones complement for neg numbers (mirror(negX) = X - 1) */
560 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
561 coord0, int_coord_bld->zero);
562 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
563 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
564 coord1, int_coord_bld->zero);
565 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
566 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
567 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
568
569 weight = coord_bld->undef;
570 }
571 break;
572
573 case PIPE_TEX_WRAP_MIRROR_CLAMP:
574 if (bld->static_sampler_state->normalized_coords) {
575 /* scale coord to length */
576 coord = lp_build_mul(coord_bld, coord, length_f);
577 }
578 if (offset) {
579 offset = lp_build_int_to_float(coord_bld, offset);
580 coord = lp_build_add(coord_bld, coord, offset);
581 }
582 /*
583 * XXX: probably not correct for gather, albeit I'm not
584 * entirely sure as it's poorly specified. The wrapping looks
585 * correct according to the spec which is against gl 1.2.1,
586 * however negative values will be swapped - gl re-specified
587 * wrapping with newer versions (no more pre-clamp except with
588 * GL_CLAMP).
589 */
590 coord = lp_build_abs(coord_bld, coord);
591
592 /* clamp to [0, length] */
593 coord = lp_build_min_ext(coord_bld, coord, length_f,
594 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
595
596 coord = lp_build_sub(coord_bld, coord, half);
597
598 /* convert to int, compute lerp weight */
599 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
600 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
601 break;
602
603 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
604 {
605 struct lp_build_context abs_coord_bld = bld->coord_bld;
606 abs_coord_bld.type.sign = false;
607
608 if (bld->static_sampler_state->normalized_coords) {
609 /* scale coord to length */
610 coord = lp_build_mul(coord_bld, coord, length_f);
611 }
612 if (offset) {
613 offset = lp_build_int_to_float(coord_bld, offset);
614 coord = lp_build_add(coord_bld, coord, offset);
615 }
616 if (!is_gather) {
617 coord = lp_build_abs(coord_bld, coord);
618
619 /* clamp to length max */
620 coord = lp_build_min_ext(coord_bld, coord, length_f,
621 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
622 /* subtract 0.5 */
623 coord = lp_build_sub(coord_bld, coord, half);
624 /* clamp to [0, length - 0.5] */
625 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
626
627 /* convert to int, compute lerp weight */
628 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
629 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
630 /* coord1 = min(coord1, length-1) */
631 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
632 } else {
633 /*
634 * The non-gather path will swap coord0/1 if coord was negative,
635 * which is ok for filtering since the filter weight matches
636 * accordingly. Also, if coord is close to zero, coord0/1 will
637 * be 0 and 1, instead of 0 and 0 (again ok due to filter
638 * weight being 0.0). Both issues need to be fixed for gather.
639 */
640 LLVMValueRef isNeg;
641
642 /*
643 * Actually wanted to cheat here and use:
644 * coord1 = lp_build_iround(coord_bld, coord);
645 * but it's not good enough for some tests (even piglit
646 * textureGather is set up in a way so the coords area always
647 * .5, that is right at the crossover points).
648 * So do ordinary sub/floor, then do ones' complement
649 * for negative numbers.
650 * (Note can't just do sub|add/abs/itrunc per coord neither -
651 * because the spec demands that mirror(3.0) = 3 but
652 * mirror(-3.0) = 2.)
653 */
654 coord = lp_build_sub(coord_bld, coord, half);
655 coord0 = lp_build_ifloor(coord_bld, coord);
656 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
657 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
658 int_coord_bld->zero);
659 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
660 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
661
662 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
663 int_coord_bld->zero);
664 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
665 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
666
667 weight = coord_bld->undef;
668 }
669 }
670 break;
671
672 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
673 {
674 if (bld->static_sampler_state->normalized_coords) {
675 /* scale coord to length */
676 coord = lp_build_mul(coord_bld, coord, length_f);
677 }
678 if (offset) {
679 offset = lp_build_int_to_float(coord_bld, offset);
680 coord = lp_build_add(coord_bld, coord, offset);
681 }
682 /*
683 * XXX: probably not correct for gather due to swapped
684 * order if coord is negative (same rationale as for
685 * MIRROR_CLAMP).
686 */
687 coord = lp_build_abs(coord_bld, coord);
688
689 /*
690 * We don't need any clamp. Technically, for very large
691 * (or infinite) values, clamp against length would be
692 * correct, but we don't need to guarantee any specific
693 * result for such coords (the ifloor will be undefined, but
694 * for modes requiring border all resulting coords are safe).
695 */
696 coord = lp_build_sub(coord_bld, coord, half);
697
698 /* convert to int, compute lerp weight */
699 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
700 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
701 }
702 break;
703
704 default:
705 assert(0);
706 coord0 = NULL;
707 coord1 = NULL;
708 weight = NULL;
709 }
710
711 *x0_out = coord0;
712 *x1_out = coord1;
713 *weight_out = weight;
714 }
715
716
717 /**
718 * Build LLVM code for texture wrap mode for nearest filtering.
719 * \param coord the incoming texcoord (nominally in [0,1])
720 * \param length the texture size along one dimension, as int vector
721 * \param length_f the texture size along one dimension, as float vector
722 * \param offset texel offset along one dimension (as int vector)
723 * \param is_pot if TRUE, length is a power of two
724 * \param wrap_mode one of PIPE_TEX_WRAP_x
725 */
726 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode)727 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
728 LLVMValueRef coord,
729 LLVMValueRef length,
730 LLVMValueRef length_f,
731 LLVMValueRef offset,
732 bool is_pot,
733 unsigned wrap_mode)
734 {
735 struct lp_build_context *coord_bld = &bld->coord_bld;
736 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
737 LLVMBuilderRef builder = bld->gallivm->builder;
738 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
739 LLVMValueRef icoord;
740
741 switch (wrap_mode) {
742 case PIPE_TEX_WRAP_REPEAT:
743 if (is_pot) {
744 coord = lp_build_mul(coord_bld, coord, length_f);
745 icoord = lp_build_ifloor(coord_bld, coord);
746 if (offset) {
747 icoord = lp_build_add(int_coord_bld, icoord, offset);
748 }
749 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
750 } else {
751 if (offset) {
752 offset = lp_build_int_to_float(coord_bld, offset);
753 offset = lp_build_div(coord_bld, offset, length_f);
754 coord = lp_build_add(coord_bld, coord, offset);
755 }
756 /* take fraction, unnormalize */
757 coord = lp_build_fract_safe(coord_bld, coord);
758 coord = lp_build_mul(coord_bld, coord, length_f);
759 icoord = lp_build_itrunc(coord_bld, coord);
760 }
761 break;
762
763 case PIPE_TEX_WRAP_CLAMP:
764 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
765 if (bld->static_sampler_state->normalized_coords) {
766 /* scale coord to length */
767 coord = lp_build_mul(coord_bld, coord, length_f);
768 }
769
770 if (offset) {
771 offset = lp_build_int_to_float(coord_bld, offset);
772 coord = lp_build_add(coord_bld, coord, offset);
773 }
774 /* floor */
775 /* use itrunc instead since we clamp to 0 anyway */
776 icoord = lp_build_itrunc(coord_bld, coord);
777
778 /* clamp to [0, length - 1]. */
779 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
780 length_minus_one);
781 break;
782
783 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
784 if (bld->static_sampler_state->normalized_coords) {
785 /* scale coord to length */
786 coord = lp_build_mul(coord_bld, coord, length_f);
787 }
788 /* no clamp necessary, border masking will handle this */
789 icoord = lp_build_ifloor(coord_bld, coord);
790 if (offset) {
791 icoord = lp_build_add(int_coord_bld, icoord, offset);
792 }
793 break;
794
795 case PIPE_TEX_WRAP_MIRROR_REPEAT:
796 if (offset) {
797 offset = lp_build_int_to_float(coord_bld, offset);
798 offset = lp_build_div(coord_bld, offset, length_f);
799 coord = lp_build_add(coord_bld, coord, offset);
800 }
801 /* compute mirror function */
802 coord = lp_build_coord_mirror(bld, coord, true);
803
804 /* scale coord to length */
805 assert(bld->static_sampler_state->normalized_coords);
806 coord = lp_build_mul(coord_bld, coord, length_f);
807
808 /* itrunc == ifloor here */
809 icoord = lp_build_itrunc(coord_bld, coord);
810
811 /* clamp to [0, length - 1] */
812 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
813 break;
814
815 case PIPE_TEX_WRAP_MIRROR_CLAMP:
816 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
817 if (bld->static_sampler_state->normalized_coords) {
818 /* scale coord to length */
819 coord = lp_build_mul(coord_bld, coord, length_f);
820 }
821 if (offset) {
822 offset = lp_build_int_to_float(coord_bld, offset);
823 coord = lp_build_add(coord_bld, coord, offset);
824 }
825 coord = lp_build_abs(coord_bld, coord);
826
827 /* itrunc == ifloor here */
828 icoord = lp_build_itrunc(coord_bld, coord);
829 /*
830 * Use unsigned min due to possible undef values (NaNs, overflow)
831 */
832 {
833 struct lp_build_context abs_coord_bld = *int_coord_bld;
834 abs_coord_bld.type.sign = false;
835 /* clamp to [0, length - 1] */
836 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
837 }
838 break;
839
840 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
841 if (bld->static_sampler_state->normalized_coords) {
842 /* scale coord to length */
843 coord = lp_build_mul(coord_bld, coord, length_f);
844 }
845 if (offset) {
846 offset = lp_build_int_to_float(coord_bld, offset);
847 coord = lp_build_add(coord_bld, coord, offset);
848 }
849 coord = lp_build_abs(coord_bld, coord);
850
851 /* itrunc == ifloor here */
852 icoord = lp_build_itrunc(coord_bld, coord);
853 break;
854
855 default:
856 assert(0);
857 icoord = NULL;
858 }
859
860 return icoord;
861 }
862
863
864 /**
865 * Do shadow test/comparison.
866 * \param p shadow ref value
867 * \param texel the texel to compare against
868 */
869 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)870 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
871 LLVMValueRef p,
872 LLVMValueRef texel)
873 {
874 struct lp_build_context *texel_bld = &bld->texel_bld;
875 LLVMValueRef res;
876
877 if (0) {
878 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
879 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
880 }
881
882 /* result = (p FUNC texel) ? 1 : 0 */
883 /*
884 * honor d3d10 floating point rules here, which state that comparisons
885 * are ordered except NOT_EQUAL which is unordered.
886 */
887 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
888 res = lp_build_cmp_ordered(texel_bld,
889 bld->static_sampler_state->compare_func,
890 p, texel);
891 } else {
892 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
893 p, texel);
894 }
895 return res;
896 }
897
898
899 /**
900 * Generate code to sample a mipmap level with nearest filtering.
901 * If sampling a cube texture, r = cube face in [0,5].
902 */
903 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])904 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
905 LLVMValueRef size,
906 LLVMValueRef row_stride_vec,
907 LLVMValueRef img_stride_vec,
908 LLVMValueRef data_ptr,
909 LLVMValueRef mipoffsets,
910 const LLVMValueRef *coords,
911 const LLVMValueRef *offsets,
912 LLVMValueRef colors_out[4])
913 {
914 const unsigned dims = bld->dims;
915 LLVMValueRef width_vec;
916 LLVMValueRef height_vec;
917 LLVMValueRef depth_vec;
918 LLVMValueRef flt_size;
919 LLVMValueRef flt_width_vec;
920 LLVMValueRef flt_height_vec;
921 LLVMValueRef flt_depth_vec;
922 LLVMValueRef x, y = NULL, z = NULL;
923
924 lp_build_extract_image_sizes(bld,
925 &bld->int_size_bld,
926 bld->int_coord_type,
927 size,
928 &width_vec, &height_vec, &depth_vec);
929
930 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
931
932 lp_build_extract_image_sizes(bld,
933 &bld->float_size_bld,
934 bld->coord_type,
935 flt_size,
936 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
937
938 /*
939 * Compute integer texcoords.
940 */
941 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
942 flt_width_vec, offsets[0],
943 bld->static_texture_state->pot_width,
944 bld->static_sampler_state->wrap_s);
945 lp_build_name(x, "tex.x.wrapped");
946
947 if (dims >= 2) {
948 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
949 flt_height_vec, offsets[1],
950 bld->static_texture_state->pot_height,
951 bld->static_sampler_state->wrap_t);
952 lp_build_name(y, "tex.y.wrapped");
953
954 if (dims == 3) {
955 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
956 flt_depth_vec, offsets[2],
957 bld->static_texture_state->pot_depth,
958 bld->static_sampler_state->wrap_r);
959 lp_build_name(z, "tex.z.wrapped");
960 }
961 }
962 if (has_layer_coord(bld->static_texture_state->target)) {
963 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
964 /* add cube layer to face */
965 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
966 } else {
967 z = coords[2];
968 }
969 lp_build_name(z, "tex.z.layer");
970 }
971
972 /*
973 * Get texture colors.
974 */
975 lp_build_sample_texel_soa(bld,
976 width_vec, height_vec, depth_vec,
977 x, y, z,
978 row_stride_vec, img_stride_vec,
979 data_ptr, mipoffsets, colors_out);
980
981 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
982 LLVMValueRef cmpval;
983 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
984 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
985 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
986 bld->texel_bld.one, bld->texel_bld.zero);
987 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
988 }
989
990 }
991
992
993 /**
994 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
995 */
996 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)997 lp_build_masklerp(struct lp_build_context *bld,
998 LLVMValueRef weight,
999 LLVMValueRef mask0,
1000 LLVMValueRef mask1)
1001 {
1002 struct gallivm_state *gallivm = bld->gallivm;
1003 LLVMBuilderRef builder = gallivm->builder;
1004 LLVMValueRef weight2;
1005
1006 weight2 = lp_build_sub(bld, bld->one, weight);
1007 weight = LLVMBuildBitCast(builder, weight,
1008 lp_build_int_vec_type(gallivm, bld->type), "");
1009 weight2 = LLVMBuildBitCast(builder, weight2,
1010 lp_build_int_vec_type(gallivm, bld->type), "");
1011 weight = LLVMBuildAnd(builder, weight, mask1, "");
1012 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
1013 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
1014 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
1015 return lp_build_add(bld, weight, weight2);
1016 }
1017
1018 /**
1019 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
1020 */
1021 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)1022 lp_build_masklerp2d(struct lp_build_context *bld,
1023 LLVMValueRef weight0,
1024 LLVMValueRef weight1,
1025 LLVMValueRef mask00,
1026 LLVMValueRef mask01,
1027 LLVMValueRef mask10,
1028 LLVMValueRef mask11)
1029 {
1030 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
1031 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
1032 return lp_build_lerp(bld, weight1, val0, val1, 0);
1033 }
1034
1035 /*
1036 * this is a bit excessive code for something OpenGL just recommends
1037 * but does not require.
1038 */
1039 #define ACCURATE_CUBE_CORNERS 1
1040
1041 /**
1042 * Generate code to sample a mipmap level with linear filtering.
1043 * If sampling a cube texture, r = cube face in [0,5].
1044 * If linear_mask is present, only pixels having their mask set
1045 * will receive linear filtering, the rest will use nearest.
1046 */
1047 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])1048 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1049 bool is_gather,
1050 LLVMValueRef size,
1051 LLVMValueRef linear_mask,
1052 LLVMValueRef row_stride_vec,
1053 LLVMValueRef img_stride_vec,
1054 LLVMValueRef data_ptr,
1055 LLVMValueRef mipoffsets,
1056 const LLVMValueRef *coords,
1057 const LLVMValueRef *offsets,
1058 LLVMValueRef colors_out[4])
1059 {
1060 LLVMBuilderRef builder = bld->gallivm->builder;
1061 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1062 struct lp_build_context *coord_bld = &bld->coord_bld;
1063 struct lp_build_context *texel_bld = &bld->texel_bld;
1064 const unsigned dims = bld->dims;
1065 LLVMValueRef width_vec;
1066 LLVMValueRef height_vec;
1067 LLVMValueRef depth_vec;
1068 LLVMValueRef flt_size;
1069 LLVMValueRef flt_width_vec;
1070 LLVMValueRef flt_height_vec;
1071 LLVMValueRef flt_depth_vec;
1072 LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL;
1073 LLVMValueRef z1 = NULL;
1074 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1075 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1076 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1077 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1078 LLVMValueRef xs[4], ys[4], zs[4];
1079 LLVMValueRef neighbors[2][2][4];
1080 bool seamless_cube_filter, accurate_cube_corners;
1081 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1082
1083 if (is_gather) {
1084 switch (bld->gather_comp) {
1085 case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
1086 case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
1087 case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
1088 case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
1089 default:
1090 break;
1091 }
1092 }
1093
1094 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1095 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1096 bld->static_sampler_state->seamless_cube_map;
1097
1098 /*
1099 * Disable accurate cube corners for integer textures, which should only
1100 * get here in the gather path.
1101 */
1102 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1103 !util_format_is_pure_integer(bld->static_texture_state->format);
1104
1105 lp_build_extract_image_sizes(bld,
1106 &bld->int_size_bld,
1107 bld->int_coord_type,
1108 size,
1109 &width_vec, &height_vec, &depth_vec);
1110
1111 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1112
1113 lp_build_extract_image_sizes(bld,
1114 &bld->float_size_bld,
1115 bld->coord_type,
1116 flt_size,
1117 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1118
1119 LLVMTypeRef int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1120
1121 /*
1122 * Compute integer texcoords.
1123 */
1124
1125 if (!seamless_cube_filter) {
1126 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1127 flt_width_vec, offsets[0],
1128 bld->static_texture_state->pot_width,
1129 bld->static_sampler_state->wrap_s,
1130 &x00, &x01, &s_fpart);
1131 lp_build_name(x00, "tex.x0.wrapped");
1132 lp_build_name(x01, "tex.x1.wrapped");
1133 x10 = x00;
1134 x11 = x01;
1135
1136 if (dims >= 2) {
1137 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1138 flt_height_vec, offsets[1],
1139 bld->static_texture_state->pot_height,
1140 bld->static_sampler_state->wrap_t,
1141 &y00, &y10, &t_fpart);
1142 lp_build_name(y00, "tex.y0.wrapped");
1143 lp_build_name(y10, "tex.y1.wrapped");
1144 y01 = y00;
1145 y11 = y10;
1146
1147 if (dims == 3) {
1148 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1149 flt_depth_vec, offsets[2],
1150 bld->static_texture_state->pot_depth,
1151 bld->static_sampler_state->wrap_r,
1152 &z00, &z1, &r_fpart);
1153 z01 = z10 = z11 = z00;
1154 lp_build_name(z00, "tex.z0.wrapped");
1155 lp_build_name(z1, "tex.z1.wrapped");
1156 }
1157 }
1158 if (has_layer_coord(bld->static_texture_state->target)) {
1159 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1160 /* add cube layer to face */
1161 z00 = z01 = z10 = z11 = z1 =
1162 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1163 } else {
1164 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
1165 }
1166 lp_build_name(z00, "tex.z0.layer");
1167 lp_build_name(z1, "tex.z1.layer");
1168 }
1169 } else {
1170 struct lp_build_if_state edge_if;
1171 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1172 LLVMValueRef coord0, coord1, have_edge, have_corner;
1173 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1174 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1175 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1176 LLVMValueRef face = coords[2];
1177 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1178 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1179 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1180 height_vec = width_vec;
1181 flt_height_vec = flt_width_vec;
1182
1183 /* XXX the overflow logic is actually sort of duplicated with trilinear,
1184 * since an overflow in one mip should also have a corresponding overflow
1185 * in another.
1186 */
1187 /* should always have normalized coords, and offsets are undefined */
1188 assert(bld->static_sampler_state->normalized_coords);
1189 /*
1190 * The coords should all be between [0,1] however we can have NaNs,
1191 * which will wreak havoc. In particular the y1_clamped value below
1192 * can be -INT_MAX (on x86) and be propagated right through (probably
1193 * other values might be bogus in the end too).
1194 * So kill off the NaNs here.
1195 */
1196 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1197 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1198 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
1199 /* instead of clamp, build mask if overflowed */
1200 coord0 = lp_build_sub(coord_bld, coord0, half);
1201 /* convert to int, compute lerp weight */
1202 /* not ideal with AVX (and no AVX2) */
1203 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
1204 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1205 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1206 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1207 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
1208 coord1 = lp_build_sub(coord_bld, coord1, half);
1209 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
1210 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1211
1212 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1213 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1214 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1215 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1216
1217 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1218 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1219 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1220 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1221
1222 /* needed for accurate corner filtering branch later, rely on 0 init */
1223 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1224
1225 for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
1226 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1227 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1228 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1229 }
1230
1231 lp_build_if(&edge_if, bld->gallivm, have_edge);
1232
1233 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1234 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1235 LLVMBuildStore(builder, have_corner, have_corners);
1236
1237 /*
1238 * Need to feed clamped values here for cheap corner handling,
1239 * but only for y coord (as when falling off both edges we only
1240 * fall off the x one) - this should be sufficient.
1241 */
1242 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1243 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1244
1245 /*
1246 * Get all possible new coords.
1247 */
1248 lp_build_cube_new_coords(ivec_bld, face,
1249 x0, x1, y0_clamped, y1_clamped,
1250 length_minus_one,
1251 new_faces, new_xcoords, new_ycoords);
1252
1253 /* handle fall off x-, x+ direction */
1254 /* determine new coords, face (not both fall_off vars can be true at same time) */
1255 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1256 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1257 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1258 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1259 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1260 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1261 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1262 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1263
1264 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1265 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1266
1267 /* handle fall off y-, y+ direction */
1268 /*
1269 * Cheap corner logic: just hack up things so a texel doesn't fall
1270 * off both sides (which means filter weights will be wrong but we'll only
1271 * use valid texels in the filter).
1272 * This means however (y) coords must additionally be clamped (see above).
1273 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1274 */
1275 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1276 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1277 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1278 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1279
1280 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1281 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1282 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1283 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1284 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1285 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1286 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1287 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1288
1289 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1290 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1291 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1292 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1293
1294 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1295 /* now can add cube layer to face (per sample) */
1296 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1297 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1298 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1299 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1300 }
1301
1302 LLVMBuildStore(builder, x00, xs[0]);
1303 LLVMBuildStore(builder, x01, xs[1]);
1304 LLVMBuildStore(builder, x10, xs[2]);
1305 LLVMBuildStore(builder, x11, xs[3]);
1306 LLVMBuildStore(builder, y00, ys[0]);
1307 LLVMBuildStore(builder, y01, ys[1]);
1308 LLVMBuildStore(builder, y10, ys[2]);
1309 LLVMBuildStore(builder, y11, ys[3]);
1310 LLVMBuildStore(builder, z00, zs[0]);
1311 LLVMBuildStore(builder, z01, zs[1]);
1312 LLVMBuildStore(builder, z10, zs[2]);
1313 LLVMBuildStore(builder, z11, zs[3]);
1314
1315 lp_build_else(&edge_if);
1316
1317 LLVMBuildStore(builder, x0, xs[0]);
1318 LLVMBuildStore(builder, x1, xs[1]);
1319 LLVMBuildStore(builder, x0, xs[2]);
1320 LLVMBuildStore(builder, x1, xs[3]);
1321 LLVMBuildStore(builder, y0, ys[0]);
1322 LLVMBuildStore(builder, y0, ys[1]);
1323 LLVMBuildStore(builder, y1, ys[2]);
1324 LLVMBuildStore(builder, y1, ys[3]);
1325 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1326 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1327 LLVMBuildStore(builder, cube_layer, zs[0]);
1328 LLVMBuildStore(builder, cube_layer, zs[1]);
1329 LLVMBuildStore(builder, cube_layer, zs[2]);
1330 LLVMBuildStore(builder, cube_layer, zs[3]);
1331 } else {
1332 LLVMBuildStore(builder, face, zs[0]);
1333 LLVMBuildStore(builder, face, zs[1]);
1334 LLVMBuildStore(builder, face, zs[2]);
1335 LLVMBuildStore(builder, face, zs[3]);
1336 }
1337
1338 lp_build_endif(&edge_if);
1339
1340 LLVMTypeRef type = ivec_bld->vec_type;
1341 x00 = LLVMBuildLoad2(builder, type, xs[0], "");
1342 x01 = LLVMBuildLoad2(builder, type, xs[1], "");
1343 x10 = LLVMBuildLoad2(builder, type, xs[2], "");
1344 x11 = LLVMBuildLoad2(builder, type, xs[3], "");
1345 y00 = LLVMBuildLoad2(builder, type, ys[0], "");
1346 y01 = LLVMBuildLoad2(builder, type, ys[1], "");
1347 y10 = LLVMBuildLoad2(builder, type, ys[2], "");
1348 y11 = LLVMBuildLoad2(builder, type, ys[3], "");
1349 z00 = LLVMBuildLoad2(builder, type, zs[0], "");
1350 z01 = LLVMBuildLoad2(builder, type, zs[1], "");
1351 z10 = LLVMBuildLoad2(builder, type, zs[2], "");
1352 z11 = LLVMBuildLoad2(builder, type, zs[3], "");
1353 }
1354
1355 if (linear_mask) {
1356 /*
1357 * Whack filter weights into place. Whatever texel had more weight is
1358 * the one which should have been selected by nearest filtering hence
1359 * just use 100% weight for it.
1360 */
1361 struct lp_build_context *c_bld = &bld->coord_bld;
1362 LLVMValueRef w1_mask, w1_weight;
1363 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1364
1365 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1366 /* this select is really just a "and" */
1367 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1368 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1369 if (dims >= 2) {
1370 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1371 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1372 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1373 if (dims == 3) {
1374 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1375 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1376 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1377 }
1378 }
1379 }
1380
1381 /*
1382 * Get texture colors.
1383 */
1384 /* get x0/x1 texels */
1385 lp_build_sample_texel_soa(bld,
1386 width_vec, height_vec, depth_vec,
1387 x00, y00, z00,
1388 row_stride_vec, img_stride_vec,
1389 data_ptr, mipoffsets, neighbors[0][0]);
1390 lp_build_sample_texel_soa(bld,
1391 width_vec, height_vec, depth_vec,
1392 x01, y01, z01,
1393 row_stride_vec, img_stride_vec,
1394 data_ptr, mipoffsets, neighbors[0][1]);
1395
1396 if (dims == 1) {
1397 assert(!is_gather);
1398 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1399 lp_build_reduce_filter(texel_bld,
1400 bld->static_sampler_state->reduction_mode,
1401 0,
1402 4,
1403 s_fpart,
1404 neighbors[0][0],
1405 neighbors[0][1],
1406 colors_out);
1407 } else {
1408 LLVMValueRef cmpval0, cmpval1;
1409 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1410 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1411 /* simplified lerp, AND mask with weight and add */
1412 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1413 cmpval0, cmpval1);
1414 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1415 }
1416 } else {
1417 /* 2D/3D texture */
1418 struct lp_build_if_state corner_if;
1419 LLVMValueRef colors0[4], colorss[4] = { 0 };
1420
1421 /* get x0/x1 texels at y1 */
1422 lp_build_sample_texel_soa(bld,
1423 width_vec, height_vec, depth_vec,
1424 x10, y10, z10,
1425 row_stride_vec, img_stride_vec,
1426 data_ptr, mipoffsets, neighbors[1][0]);
1427 lp_build_sample_texel_soa(bld,
1428 width_vec, height_vec, depth_vec,
1429 x11, y11, z11,
1430 row_stride_vec, img_stride_vec,
1431 data_ptr, mipoffsets, neighbors[1][1]);
1432
1433 /*
1434 * To avoid having to duplicate linear_mask / fetch code use
1435 * another branch (with corner condition though edge would work
1436 * as well) here.
1437 */
1438 if (have_corners && accurate_cube_corners &&
1439 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1440 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
1441 LLVMValueRef have_corner, one_third;
1442
1443 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
1444 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
1445 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
1446 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
1447
1448 have_corner = LLVMBuildLoad2(builder, int1t, have_corners, "");
1449
1450 lp_build_if(&corner_if, bld->gallivm, have_corner);
1451
1452 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
1453 1.0f/3.0f);
1454
1455 /* find corner */
1456 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1457 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1458 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1459 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1460 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1461 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1462 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1463 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1464
1465 if (!is_gather) {
1466 /*
1467 * we can't use standard 2d lerp as we need per-element weight
1468 * in case of corners, so just calculate bilinear result as
1469 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1470 * (This is actually less work than using 2d lerp, 7 vs. 9
1471 * instructions, however calculating the weights needs another 6,
1472 * so actually probably not slower than 2d lerp only for 4 channels
1473 * as weights only need to be calculated once - of course fixing
1474 * the weights has additional cost.)
1475 */
1476 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
1477 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1478 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1479 w00 = lp_build_mul(coord_bld, wx0, wy0);
1480 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1481 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1482 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1483
1484 /* find corner weight */
1485 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1486 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1487 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1488 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1489
1490 /*
1491 * add 1/3 of the corner weight to the weight of the 3 other
1492 * samples and null out corner weight.
1493 */
1494 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1495 w00 = lp_build_add(coord_bld, w00, c_weight);
1496 w00 = lp_build_andnot(coord_bld, w00, c00f);
1497 w01 = lp_build_add(coord_bld, w01, c_weight);
1498 w01 = lp_build_andnot(coord_bld, w01, c01f);
1499 w10 = lp_build_add(coord_bld, w10, c_weight);
1500 w10 = lp_build_andnot(coord_bld, w10, c10f);
1501 w11 = lp_build_add(coord_bld, w11, c_weight);
1502 w11 = lp_build_andnot(coord_bld, w11, c11f);
1503
1504 if (bld->static_sampler_state->compare_mode ==
1505 PIPE_TEX_COMPARE_NONE) {
1506 for (unsigned chan = 0; chan < 4; chan++) {
1507 colors0[chan] = lp_build_mul(coord_bld, w00,
1508 neighbors[0][0][chan]);
1509 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1510 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1511 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1512 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1513 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1514 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1515 }
1516 } else {
1517 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1518 cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
1519 neighbors[0][0][0]);
1520 cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
1521 neighbors[0][1][0]);
1522 cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
1523 neighbors[1][0][0]);
1524 cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
1525 neighbors[1][1][0]);
1526 /*
1527 * inputs to interpolation are just masks so just add
1528 * masked weights together
1529 */
1530 cmpval00 = LLVMBuildBitCast(builder, cmpval00,
1531 coord_bld->vec_type, "");
1532 cmpval01 = LLVMBuildBitCast(builder, cmpval01,
1533 coord_bld->vec_type, "");
1534 cmpval10 = LLVMBuildBitCast(builder, cmpval10,
1535 coord_bld->vec_type, "");
1536 cmpval11 = LLVMBuildBitCast(builder, cmpval11,
1537 coord_bld->vec_type, "");
1538 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1539 tmp = lp_build_and(coord_bld, w01, cmpval01);
1540 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1541 tmp = lp_build_and(coord_bld, w10, cmpval10);
1542 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1543 tmp = lp_build_and(coord_bld, w11, cmpval11);
1544 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1545 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1546 }
1547 } else {
1548 /*
1549 * We don't have any weights to adjust, so instead calculate
1550 * the fourth texel as simply the average of the other 3.
1551 * (This would work for non-gather too, however we'd have
1552 * a boatload more of the select stuff due to there being
1553 * 4 times as many colors as weights.)
1554 */
1555 LLVMValueRef col00, col01, col10, col11;
1556 LLVMValueRef colc, colc0, colc1;
1557 col10 = lp_build_swizzle_soa_channel(texel_bld,
1558 neighbors[1][0], chan_swiz);
1559 col11 = lp_build_swizzle_soa_channel(texel_bld,
1560 neighbors[1][1], chan_swiz);
1561 col01 = lp_build_swizzle_soa_channel(texel_bld,
1562 neighbors[0][1], chan_swiz);
1563 col00 = lp_build_swizzle_soa_channel(texel_bld,
1564 neighbors[0][0], chan_swiz);
1565
1566 /*
1567 * The spec says for comparison filtering, the comparison
1568 * must happen before synthesizing the new value.
1569 * This means all gathered values are always 0 or 1,
1570 * except for the non-existing texel, which can be 0,1/3,2/3,1...
1571 * Seems like we'd be allowed to just return 0 or 1 too, so we
1572 * could simplify and pass down the compare mask values to the
1573 * end (using int arithmetic/compare on the mask values to
1574 * construct the fourth texel) and only there convert to floats
1575 * but it's probably not worth it (it might be easier for the cpu
1576 * but not for the code)...
1577 */
1578 if (bld->static_sampler_state->compare_mode !=
1579 PIPE_TEX_COMPARE_NONE) {
1580 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1581 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
1582 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
1583 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
1584 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
1585 col00 = lp_build_select(texel_bld, cmpval00,
1586 texel_bld->one, texel_bld->zero);
1587 col01 = lp_build_select(texel_bld, cmpval01,
1588 texel_bld->one, texel_bld->zero);
1589 col10 = lp_build_select(texel_bld, cmpval10,
1590 texel_bld->one, texel_bld->zero);
1591 col11 = lp_build_select(texel_bld, cmpval11,
1592 texel_bld->one, texel_bld->zero);
1593 }
1594
1595 /*
1596 * Null out corner color.
1597 */
1598 col00 = lp_build_andnot(coord_bld, col00, c00f);
1599 col01 = lp_build_andnot(coord_bld, col01, c01f);
1600 col10 = lp_build_andnot(coord_bld, col10, c10f);
1601 col11 = lp_build_andnot(coord_bld, col11, c11f);
1602
1603 /*
1604 * New corner texel color is all colors added / 3.
1605 */
1606 colc0 = lp_build_add(coord_bld, col00, col01);
1607 colc1 = lp_build_add(coord_bld, col10, col11);
1608 colc = lp_build_add(coord_bld, colc0, colc1);
1609 colc = lp_build_mul(coord_bld, one_third, colc);
1610
1611 /*
1612 * Replace the corner texel color with the new value.
1613 */
1614 col00 = lp_build_select(coord_bld, c00, colc, col00);
1615 col01 = lp_build_select(coord_bld, c01, colc, col01);
1616 col10 = lp_build_select(coord_bld, c10, colc, col10);
1617 col11 = lp_build_select(coord_bld, c11, colc, col11);
1618
1619 colors0[0] = col10;
1620 colors0[1] = col11;
1621 colors0[2] = col01;
1622 colors0[3] = col00;
1623 }
1624
1625 LLVMBuildStore(builder, colors0[0], colorss[0]);
1626 LLVMBuildStore(builder, colors0[1], colorss[1]);
1627 LLVMBuildStore(builder, colors0[2], colorss[2]);
1628 LLVMBuildStore(builder, colors0[3], colorss[3]);
1629
1630 lp_build_else(&corner_if);
1631 }
1632
1633 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1634 if (is_gather) {
1635 /*
1636 * Just assign the red channel (no component selection yet).
1637 * This is a bit hackish, we usually do the swizzle at the
1638 * end of sampling (much less values to swizzle), but this
1639 * obviously cannot work when using gather.
1640 */
1641 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1642 neighbors[1][0],
1643 chan_swiz);
1644 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1645 neighbors[1][1],
1646 chan_swiz);
1647 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1648 neighbors[0][1],
1649 chan_swiz);
1650 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1651 neighbors[0][0],
1652 chan_swiz);
1653 } else {
1654 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1655 lp_build_reduce_filter_2d(texel_bld,
1656 bld->static_sampler_state->reduction_mode,
1657 0,
1658 4,
1659 s_fpart,
1660 t_fpart,
1661 neighbors[0][0],
1662 neighbors[0][1],
1663 neighbors[1][0],
1664 neighbors[1][1],
1665 colors0);
1666 }
1667 } else {
1668 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1669 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1670 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1671 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1672 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1673
1674 if (is_gather) {
1675 /* more hacks for swizzling, should be X, ONE or ZERO... */
1676 colors0[0] = lp_build_select(texel_bld, cmpval10,
1677 texel_bld->one, texel_bld->zero);
1678 colors0[1] = lp_build_select(texel_bld, cmpval11,
1679 texel_bld->one, texel_bld->zero);
1680 colors0[2] = lp_build_select(texel_bld, cmpval01,
1681 texel_bld->one, texel_bld->zero);
1682 colors0[3] = lp_build_select(texel_bld, cmpval00,
1683 texel_bld->one, texel_bld->zero);
1684 } else {
1685 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1686 cmpval00, cmpval01, cmpval10, cmpval11);
1687 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1688 }
1689 }
1690
1691 if (have_corners && accurate_cube_corners &&
1692 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1693 LLVMBuildStore(builder, colors0[0], colorss[0]);
1694 LLVMBuildStore(builder, colors0[1], colorss[1]);
1695 LLVMBuildStore(builder, colors0[2], colorss[2]);
1696 LLVMBuildStore(builder, colors0[3], colorss[3]);
1697
1698 lp_build_endif(&corner_if);
1699
1700 colors0[0] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[0], "");
1701 colors0[1] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[1], "");
1702 colors0[2] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[2], "");
1703 colors0[3] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[3], "");
1704 }
1705
1706 if (dims == 3) {
1707 LLVMValueRef neighbors1[2][2][4];
1708 LLVMValueRef colors1[4];
1709
1710 assert(!is_gather);
1711
1712 /* get x0/x1/y0/y1 texels at z1 */
1713 lp_build_sample_texel_soa(bld,
1714 width_vec, height_vec, depth_vec,
1715 x00, y00, z1,
1716 row_stride_vec, img_stride_vec,
1717 data_ptr, mipoffsets, neighbors1[0][0]);
1718 lp_build_sample_texel_soa(bld,
1719 width_vec, height_vec, depth_vec,
1720 x01, y01, z1,
1721 row_stride_vec, img_stride_vec,
1722 data_ptr, mipoffsets, neighbors1[0][1]);
1723 lp_build_sample_texel_soa(bld,
1724 width_vec, height_vec, depth_vec,
1725 x10, y10, z1,
1726 row_stride_vec, img_stride_vec,
1727 data_ptr, mipoffsets, neighbors1[1][0]);
1728 lp_build_sample_texel_soa(bld,
1729 width_vec, height_vec, depth_vec,
1730 x11, y11, z1,
1731 row_stride_vec, img_stride_vec,
1732 data_ptr, mipoffsets, neighbors1[1][1]);
1733
1734 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1735 /* Bilinear interpolate the four samples from the second Z slice */
1736 lp_build_reduce_filter_2d(texel_bld,
1737 bld->static_sampler_state->reduction_mode,
1738 0,
1739 4,
1740 s_fpart,
1741 t_fpart,
1742 neighbors1[0][0],
1743 neighbors1[0][1],
1744 neighbors1[1][0],
1745 neighbors1[1][1],
1746 colors1);
1747
1748 /* Linearly interpolate the two samples from the two 3D slices */
1749 lp_build_reduce_filter(texel_bld,
1750 bld->static_sampler_state->reduction_mode,
1751 0,
1752 4,
1753 r_fpart,
1754 colors0,
1755 colors1,
1756 colors_out);
1757 } else {
1758 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1759 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1760 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1761 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1762 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1763 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1764 cmpval00, cmpval01, cmpval10, cmpval11);
1765 /* Linearly interpolate the two samples from the two 3D slices */
1766 colors_out[0] = lp_build_lerp(texel_bld,
1767 r_fpart,
1768 colors0[0], colors1[0],
1769 0);
1770 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1771 }
1772 } else {
1773 /* 2D tex */
1774 for (unsigned chan = 0; chan < 4; chan++) {
1775 colors_out[chan] = colors0[chan];
1776 }
1777 }
1778 }
1779 if (is_gather) {
1780 /*
1781 * For gather, we can't do our usual channel swizzling done later,
1782 * so do it here. It only really matters for 0/1 swizzles in case
1783 * of comparison filtering, since in this case the results would be
1784 * wrong, without comparison it should all work out alright but it
1785 * can't hurt to do that here, since it will instantly drop all
1786 * calculations above, though it's a rather stupid idea to do
1787 * gather on a channel which will always return 0 or 1 in any case...
1788 */
1789 if (chan_swiz == PIPE_SWIZZLE_1) {
1790 for (unsigned chan = 0; chan < 4; chan++) {
1791 colors_out[chan] = texel_bld->one;
1792 }
1793 } else if (chan_swiz == PIPE_SWIZZLE_0) {
1794 for (unsigned chan = 0; chan < 4; chan++) {
1795 colors_out[chan] = texel_bld->zero;
1796 }
1797 }
1798 }
1799 }
1800
1801
1802 /**
1803 * Sample the texture/mipmap using given image filter and mip filter.
1804 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1805 * from (vectors or scalars).
1806 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1807 */
1808 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1809 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1810 unsigned img_filter,
1811 unsigned mip_filter,
1812 bool is_gather,
1813 const LLVMValueRef *coords,
1814 const LLVMValueRef *offsets,
1815 LLVMValueRef ilevel0,
1816 LLVMValueRef ilevel1,
1817 LLVMValueRef lod_fpart,
1818 LLVMValueRef *colors_out)
1819 {
1820 LLVMBuilderRef builder = bld->gallivm->builder;
1821 LLVMValueRef size0 = NULL;
1822 LLVMValueRef size1 = NULL;
1823 LLVMValueRef row_stride0_vec = NULL;
1824 LLVMValueRef row_stride1_vec = NULL;
1825 LLVMValueRef img_stride0_vec = NULL;
1826 LLVMValueRef img_stride1_vec = NULL;
1827 LLVMValueRef data_ptr0 = NULL;
1828 LLVMValueRef data_ptr1 = NULL;
1829 LLVMValueRef mipoff0 = NULL;
1830 LLVMValueRef mipoff1 = NULL;
1831 LLVMValueRef colors0[4], colors1[4];
1832
1833 /* sample the first mipmap level */
1834 lp_build_mipmap_level_sizes(bld, ilevel0,
1835 &size0,
1836 &row_stride0_vec, &img_stride0_vec);
1837 if (bld->num_mips == 1) {
1838 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1839 } else {
1840 /* This path should work for num_lods 1 too but slightly less efficient */
1841 data_ptr0 = bld->base_ptr;
1842 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1843 }
1844
1845 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1846 lp_build_sample_image_nearest(bld, size0,
1847 row_stride0_vec, img_stride0_vec,
1848 data_ptr0, mipoff0, coords, offsets,
1849 colors0);
1850 } else {
1851 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1852 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1853 row_stride0_vec, img_stride0_vec,
1854 data_ptr0, mipoff0, coords, offsets,
1855 colors0);
1856 }
1857
1858 /* Store the first level's colors in the output variables */
1859 for (unsigned chan = 0; chan < 4; chan++) {
1860 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1861 }
1862
1863 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1864 struct lp_build_if_state if_ctx;
1865 LLVMValueRef need_lerp;
1866
1867 /* need_lerp = lod_fpart > 0 */
1868 if (bld->num_lods == 1) {
1869 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1870 lod_fpart, bld->lodf_bld.zero,
1871 "need_lerp");
1872 } else {
1873 /*
1874 * We'll do mip filtering if any of the quads (or individual
1875 * pixel in case of per-pixel lod) need it.
1876 * It might be better to split the vectors here and only fetch/filter
1877 * quads which need it (if there's one lod per quad).
1878 */
1879 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1880 PIPE_FUNC_GREATER,
1881 lod_fpart, bld->lodf_bld.zero);
1882 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1883 lp_build_name(need_lerp, "need_lerp");
1884 }
1885
1886 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1887 {
1888 /*
1889 * We unfortunately need to clamp lod_fpart here since we can get
1890 * negative values which would screw up filtering if not all
1891 * lod_fpart values have same sign.
1892 */
1893 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1894 bld->lodf_bld.zero);
1895 /* sample the second mipmap level */
1896 lp_build_mipmap_level_sizes(bld, ilevel1,
1897 &size1,
1898 &row_stride1_vec, &img_stride1_vec);
1899 if (bld->num_mips == 1) {
1900 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1901 } else {
1902 data_ptr1 = bld->base_ptr;
1903 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1904 }
1905 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1906 lp_build_sample_image_nearest(bld, size1,
1907 row_stride1_vec, img_stride1_vec,
1908 data_ptr1, mipoff1, coords, offsets,
1909 colors1);
1910 } else {
1911 lp_build_sample_image_linear(bld, false, size1, NULL,
1912 row_stride1_vec, img_stride1_vec,
1913 data_ptr1, mipoff1, coords, offsets,
1914 colors1);
1915 }
1916
1917 /* interpolate samples from the two mipmap levels */
1918
1919 if (bld->num_lods != bld->coord_type.length)
1920 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1921 bld->lodf_bld.type,
1922 bld->texel_bld.type,
1923 lod_fpart);
1924
1925 for (unsigned chan = 0; chan < 4; chan++) {
1926 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1927 colors0[chan], colors1[chan],
1928 0);
1929 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1930 }
1931 }
1932 lp_build_endif(&if_ctx);
1933 }
1934 }
1935
1936
1937 /**
1938 * Sample the texture/mipmap using given mip filter, and using
1939 * both nearest and linear filtering at the same time depending
1940 * on linear_mask.
1941 * lod can be per quad but linear_mask is always per pixel.
1942 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1943 * from (vectors or scalars).
1944 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1945 */
1946 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)1947 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1948 LLVMValueRef linear_mask,
1949 unsigned mip_filter,
1950 const LLVMValueRef *coords,
1951 const LLVMValueRef *offsets,
1952 LLVMValueRef ilevel0,
1953 LLVMValueRef ilevel1,
1954 LLVMValueRef lod_fpart,
1955 LLVMValueRef lod_positive,
1956 LLVMValueRef *colors_out)
1957 {
1958 LLVMBuilderRef builder = bld->gallivm->builder;
1959 LLVMValueRef size0 = NULL;
1960 LLVMValueRef size1 = NULL;
1961 LLVMValueRef row_stride0_vec = NULL;
1962 LLVMValueRef row_stride1_vec = NULL;
1963 LLVMValueRef img_stride0_vec = NULL;
1964 LLVMValueRef img_stride1_vec = NULL;
1965 LLVMValueRef data_ptr0 = NULL;
1966 LLVMValueRef data_ptr1 = NULL;
1967 LLVMValueRef mipoff0 = NULL;
1968 LLVMValueRef mipoff1 = NULL;
1969 LLVMValueRef colors0[4], colors1[4];
1970
1971 /* sample the first mipmap level */
1972 lp_build_mipmap_level_sizes(bld, ilevel0,
1973 &size0,
1974 &row_stride0_vec, &img_stride0_vec);
1975 if (bld->num_mips == 1) {
1976 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1977 } else {
1978 /* This path should work for num_lods 1 too but slightly less efficient */
1979 data_ptr0 = bld->base_ptr;
1980 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1981 }
1982
1983 lp_build_sample_image_linear(bld, false, size0, linear_mask,
1984 row_stride0_vec, img_stride0_vec,
1985 data_ptr0, mipoff0, coords, offsets,
1986 colors0);
1987
1988 /* Store the first level's colors in the output variables */
1989 for (unsigned chan = 0; chan < 4; chan++) {
1990 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1991 }
1992
1993 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1994 struct lp_build_if_state if_ctx;
1995 LLVMValueRef need_lerp;
1996
1997 /*
1998 * We'll do mip filtering if any of the quads (or individual
1999 * pixel in case of per-pixel lod) need it.
2000 * Note using lod_positive here not lod_fpart since it may be the same
2001 * condition as that used in the outer "if" in the caller hence llvm
2002 * should be able to merge the branches in this case.
2003 */
2004 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
2005 lp_build_name(need_lerp, "need_lerp");
2006
2007 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
2008 {
2009 /*
2010 * We unfortunately need to clamp lod_fpart here since we can get
2011 * negative values which would screw up filtering if not all
2012 * lod_fpart values have same sign.
2013 */
2014 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
2015 bld->lodf_bld.zero);
2016 /* sample the second mipmap level */
2017 lp_build_mipmap_level_sizes(bld, ilevel1,
2018 &size1,
2019 &row_stride1_vec, &img_stride1_vec);
2020 if (bld->num_mips == 1) {
2021 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
2022 } else {
2023 data_ptr1 = bld->base_ptr;
2024 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
2025 }
2026
2027 lp_build_sample_image_linear(bld, false, size1, linear_mask,
2028 row_stride1_vec, img_stride1_vec,
2029 data_ptr1, mipoff1, coords, offsets,
2030 colors1);
2031
2032 /* interpolate samples from the two mipmap levels */
2033
2034 if (bld->num_lods != bld->coord_type.length)
2035 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2036 bld->lodf_bld.type,
2037 bld->texel_bld.type,
2038 lod_fpart);
2039
2040 for (unsigned chan = 0; chan < 4; chan++) {
2041 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2042 colors0[chan], colors1[chan],
2043 0);
2044 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2045 }
2046 }
2047 lp_build_endif(&if_ctx);
2048 }
2049 }
2050
2051
2052 /**
2053 * Build (per-coord) layer value.
2054 * Either clamp layer to valid values or fill in optional out_of_bounds
2055 * value and just return value unclamped.
2056 */
2057 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,bool is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)2058 lp_build_layer_coord(struct lp_build_sample_context *bld,
2059 unsigned texture_unit,
2060 bool is_cube_array,
2061 LLVMValueRef layer,
2062 LLVMValueRef *out_of_bounds)
2063 {
2064 LLVMValueRef num_layers;
2065 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2066
2067 num_layers = bld->dynamic_state->depth(bld->gallivm, bld->resources_type,
2068 bld->resources_ptr, texture_unit, NULL);
2069 num_layers = LLVMBuildZExt(bld->gallivm->builder, num_layers,
2070 bld->int_bld.elem_type, "");
2071 if (out_of_bounds) {
2072 LLVMValueRef out1, out;
2073 assert(!is_cube_array);
2074 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
2075 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
2076 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
2077 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
2078 return layer;
2079 } else {
2080 LLVMValueRef maxlayer;
2081 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
2082 bld->int_bld.one;
2083 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
2084 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
2085 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
2086 }
2087 }
2088
2089 static void
lp_build_sample_ms_offset(struct lp_build_context * int_coord_bld,LLVMValueRef ms_index,LLVMValueRef num_samples,LLVMValueRef sample_stride,LLVMValueRef * offset,LLVMValueRef * out_of_bounds)2090 lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
2091 LLVMValueRef ms_index,
2092 LLVMValueRef num_samples,
2093 LLVMValueRef sample_stride,
2094 LLVMValueRef *offset,
2095 LLVMValueRef *out_of_bounds)
2096 {
2097 LLVMValueRef out1;
2098 num_samples = lp_build_broadcast_scalar(int_coord_bld, num_samples);
2099 sample_stride = lp_build_broadcast_scalar(int_coord_bld, sample_stride);
2100 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
2101 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2102 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, num_samples);
2103 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2104 LLVMValueRef sample_offset = lp_build_mul(int_coord_bld,
2105 sample_stride, ms_index);
2106 *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
2107 }
2108
2109
2110 #define WEIGHT_LUT_SIZE 1024
2111
2112
2113 static void
lp_build_sample_aniso(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)2114 lp_build_sample_aniso(struct lp_build_sample_context *bld,
2115 unsigned img_filter,
2116 unsigned mip_filter,
2117 bool is_gather,
2118 const LLVMValueRef *coords,
2119 const LLVMValueRef *offsets,
2120 LLVMValueRef ilevel0,
2121 LLVMValueRef ilevel1,
2122 LLVMValueRef lod_fpart,
2123 LLVMValueRef *colors_out)
2124 {
2125 struct gallivm_state *gallivm = bld->gallivm;
2126 LLVMBuilderRef builder = gallivm->builder;
2127 struct lp_build_context *coord_bld = &bld->coord_bld;
2128 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
2129 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, coords[0], coords[1]);
2130 LLVMValueRef float_size;
2131 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2132 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
2133 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
2134 const unsigned length = bld->coord_bld.type.length;
2135 const unsigned num_quads = length / 4;
2136 LLVMValueRef filter_table = bld->aniso_filter_table;
2137 LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
2138 LLVMValueRef data_ptr0, mipoff0 = NULL;
2139
2140 lp_build_mipmap_level_sizes(bld, ilevel0,
2141 &size0,
2142 &row_stride0_vec, &img_stride0_vec);
2143 if (bld->num_mips == 1) {
2144 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2145 } else {
2146 /* This path should work for num_lods 1 too but slightly less efficient */
2147 data_ptr0 = bld->base_ptr;
2148 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2149 }
2150
2151 float_size = lp_build_int_to_float(&bld->float_size_in_bld, bld->int_size);
2152
2153 LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0);
2154 /* extract width and height into vectors for use later */
2155 static const unsigned char swizzle15[] = { /* no-op swizzle */
2156 1, 1, 1, 1, 5, 5, 5, 5
2157 };
2158 static const unsigned char swizzle04[] = { /* no-op swizzle */
2159 0, 0, 0, 0, 4, 4, 4, 4
2160 };
2161 LLVMValueRef width_dim, height_dim;
2162
2163 width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04,
2164 bld->float_size_bld.type.length,
2165 bld->coord_bld.type.length);
2166 height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15,
2167 bld->float_size_bld.type.length,
2168 bld->coord_bld.type.length);
2169
2170
2171 /* shuffle width/height for ddx/ddy calculations. */
2172 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
2173
2174 for (unsigned i = 0; i < num_quads; i++) {
2175 shuffles[i*4+0] = shuffles[i*4+1] = index0;
2176 shuffles[i*4+2] = shuffles[i*4+3] = index1;
2177 }
2178
2179 LLVMValueRef floatdim =
2180 LLVMBuildShuffleVector(builder, float_size, float_size,
2181 LLVMConstVector(shuffles, length), "");
2182
2183 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
2184
2185 LLVMValueRef scaling =
2186 lp_build_shl(&bld->leveli_bld, bld->leveli_bld.one, ilevel0);
2187 scaling = lp_build_int_to_float(&bld->levelf_bld, scaling);
2188 scaling = lp_build_rcp(&bld->levelf_bld, scaling);
2189
2190 if (bld->levelf_bld.type.length != length) {
2191 if (bld->levelf_bld.type.length == 1) {
2192 scaling = lp_build_broadcast_scalar(coord_bld,
2193 scaling);
2194 } else {
2195 scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2196 bld->levelf_bld.type,
2197 coord_bld->type,
2198 scaling);
2199 }
2200 }
2201
2202 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling);
2203
2204 static const unsigned char swizzle01[] = { /* no-op swizzle */
2205 0, 1, 0, 1,
2206 };
2207 static const unsigned char swizzle23[] = {
2208 2, 3, 2, 3,
2209 };
2210
2211 LLVMValueRef ddx_ddys, ddx_ddyt;
2212 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
2213 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
2214
2215 /* compute ellipse coefficients */
2216 /* * A*x*x + B*x*y + C*y*y = F.*/
2217 /* float A = vx*vx+vy*vy+1; */
2218 LLVMValueRef A = lp_build_mul(coord_bld, ddx_ddyt, ddx_ddyt);
2219
2220 LLVMValueRef Ay = lp_build_swizzle_aos(coord_bld, A, swizzle15);
2221 A = lp_build_add(coord_bld, A, Ay);
2222 A = lp_build_add(coord_bld, A, coord_bld->one);
2223 A = lp_build_swizzle_aos(coord_bld, A, swizzle04);
2224
2225 /* float B = -2*(ux*vx+uy*vy); */
2226 LLVMValueRef B = lp_build_mul(coord_bld, ddx_ddys, ddx_ddyt);
2227 LLVMValueRef By = lp_build_swizzle_aos(coord_bld, B, swizzle15);
2228 B = lp_build_add(coord_bld, B, By);
2229 B = lp_build_mul_imm(coord_bld, B, -2);
2230 B = lp_build_swizzle_aos(coord_bld, B, swizzle04);
2231
2232 /* float C = ux*ux+uy*uy+1; */
2233 LLVMValueRef C = lp_build_mul(coord_bld, ddx_ddys, ddx_ddys);
2234 LLVMValueRef Cy = lp_build_swizzle_aos(coord_bld, C, swizzle15);
2235 C = lp_build_add(coord_bld, C, Cy);
2236 C = lp_build_add(coord_bld, C, coord_bld->one);
2237 C = lp_build_swizzle_aos(coord_bld, C, swizzle04);
2238
2239 /* float F = A*C-B*B/4.0f; */
2240 LLVMValueRef F = lp_build_mul(coord_bld, B, B);
2241 F = lp_build_div(coord_bld, F, lp_build_const_vec(gallivm, coord_bld->type, 4.0));
2242 LLVMValueRef F_p2 = lp_build_mul(coord_bld, A, C);
2243 F = lp_build_sub(coord_bld, F_p2, F);
2244
2245 /* compute ellipse bounding box in texture space */
2246 /* const float d = -B*B+4.0f*C*A; */
2247 LLVMValueRef d = lp_build_sub(coord_bld, coord_bld->zero, lp_build_mul(coord_bld, B, B));
2248 LLVMValueRef d_p2 = lp_build_mul(coord_bld, A, C);
2249 d_p2 = lp_build_mul_imm(coord_bld, d_p2, 4);
2250 d = lp_build_add(coord_bld, d, d_p2);
2251
2252 /* const float box_u = 2.0f / d * sqrtf(d*C*F); */
2253 /* box_u -> half of bbox with */
2254 LLVMValueRef temp;
2255 temp = lp_build_mul(coord_bld, d, C);
2256 temp = lp_build_mul(coord_bld, temp, F);
2257 temp = lp_build_sqrt(coord_bld, temp);
2258
2259 LLVMValueRef box_u = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2260 box_u = lp_build_mul(coord_bld, box_u, temp);
2261
2262 /* const float box_v = 2.0f / d * sqrtf(A*d*F); */
2263 /* box_v -> half of bbox height */
2264 temp = lp_build_mul(coord_bld, A, d);
2265 temp = lp_build_mul(coord_bld, temp, F);
2266 temp = lp_build_sqrt(coord_bld, temp);
2267
2268 LLVMValueRef box_v = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2269 box_v = lp_build_mul(coord_bld, box_v, temp);
2270
2271 /* Scale ellipse formula to directly index the Filter Lookup Table.
2272 * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2273 */
2274 LLVMValueRef formScale = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, WEIGHT_LUT_SIZE - 1), F);
2275
2276 A = lp_build_mul(coord_bld, A, formScale);
2277 B = lp_build_mul(coord_bld, B, formScale);
2278 C = lp_build_mul(coord_bld, C, formScale);
2279 /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2280
2281 LLVMValueRef ddq = lp_build_mul_imm(coord_bld, A, 2);
2282
2283 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2284 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2285 * value, q, is less than F, we're inside the ellipse
2286 */
2287
2288 LLVMValueRef float_size0 = lp_build_int_to_float(float_size_bld, bld->int_size);
2289 LLVMValueRef width0 = lp_build_extract_broadcast(gallivm,
2290 float_size_bld->type,
2291 coord_bld->type,
2292 float_size0, index0);
2293 LLVMValueRef height0 = lp_build_extract_broadcast(gallivm,
2294 float_size_bld->type,
2295 coord_bld->type,
2296 float_size0, index1);
2297
2298 /* texture->width0 * scaling */
2299 width0 = lp_build_mul(coord_bld, width0, scaling);
2300 /* texture->height0 * scaling */
2301 height0 = lp_build_mul(coord_bld, height0, scaling);
2302
2303 /* tex_u = -0.5f * s[j] * texture->width0 * scaling */
2304 LLVMValueRef tex_u = lp_build_mul(coord_bld, coords[0], width0);
2305 tex_u = lp_build_add(coord_bld, tex_u, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2306
2307 /* tex_v = -0.5f * t[j] * texture->height0 * scaling */
2308 LLVMValueRef tex_v = lp_build_mul(coord_bld, coords[1], height0);
2309 tex_v = lp_build_add(coord_bld, tex_v, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2310
2311 /* const int u0 = (int) floorf(tex_u - box_u); */
2312 LLVMValueRef u0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_u, box_u)));
2313 /* const int u1 = (int) ceilf(tex_u + box_u); */
2314 LLVMValueRef u1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_u, box_u)));
2315
2316 /* const int v0 = (int) floorf(tex_v - box_v); */
2317 LLVMValueRef v0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_v, box_v)));
2318 /* const int v1 = (int) ceilf(tex_v + box_v); */
2319 LLVMValueRef v1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_v, box_v)));
2320
2321 /* const float U = u0 - tex_u; */
2322 LLVMValueRef U = lp_build_sub(coord_bld, lp_build_int_to_float(coord_bld, u0), tex_u);
2323
2324 /* A * (2 * U + 1) */
2325 LLVMValueRef dq_base = lp_build_mul_imm(coord_bld, U, 2);
2326 dq_base = lp_build_add(coord_bld, dq_base, coord_bld->one);
2327 dq_base = lp_build_mul(coord_bld, dq_base, A);
2328
2329 /* A * U * U */
2330 LLVMValueRef q_base = lp_build_mul(coord_bld, U, U);
2331 q_base = lp_build_mul(coord_bld, q_base, A);
2332
2333 LLVMValueRef colors0[4];
2334 LLVMValueRef den_store = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "den");
2335
2336 for (unsigned chan = 0; chan < 4; chan++)
2337 colors0[chan] = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "colors");
2338
2339 LLVMValueRef q_store, dq_store;
2340 q_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "q");
2341 dq_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "dq");
2342
2343 LLVMValueRef v_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "v_limiter");
2344 LLVMValueRef u_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "u_limiter");
2345
2346 LLVMBuildStore(builder, v0, v_limiter);
2347
2348 /* create an LLVM loop block for the V iterator */
2349 LLVMBasicBlockRef v_loop_block = lp_build_insert_new_block(gallivm, "vloop");
2350
2351 LLVMBuildBr(builder, v_loop_block);
2352 LLVMPositionBuilderAtEnd(builder, v_loop_block);
2353
2354 LLVMValueRef v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2355 LLVMValueRef v_mask = LLVMBuildICmp(builder, LLVMIntSLE, v_val, v1, "");
2356
2357 /* loop over V values. */
2358 {
2359 /* const float V = v - tex_v; */
2360 LLVMValueRef V =
2361 lp_build_sub(coord_bld,
2362 lp_build_int_to_float(coord_bld, v_val), tex_v);
2363
2364 /* float dq = dq_base + B * V; */
2365 LLVMValueRef dq = lp_build_mul(coord_bld, V, B);
2366 dq = lp_build_add(coord_bld, dq, dq_base);
2367
2368 /* float q = (C * V + B * U) * V + q_base */
2369 LLVMValueRef q = lp_build_mul(coord_bld, C, V);
2370 q = lp_build_add(coord_bld, q, lp_build_mul(coord_bld, B, U));
2371 q = lp_build_mul(coord_bld, q, V);
2372 q = lp_build_add(coord_bld, q, q_base);
2373
2374 LLVMBuildStore(builder, q, q_store);
2375 LLVMBuildStore(builder, dq, dq_store);
2376
2377 LLVMBuildStore(builder, u0, u_limiter);
2378
2379 /* create an LLVM loop block for the V iterator */
2380 LLVMBasicBlockRef u_loop_block = lp_build_insert_new_block(gallivm, "uloop");
2381
2382 LLVMBuildBr(builder, u_loop_block);
2383 LLVMPositionBuilderAtEnd(builder, u_loop_block);
2384
2385 LLVMValueRef u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type,
2386 u_limiter, "");
2387 LLVMValueRef u_mask = LLVMBuildICmp(builder,
2388 LLVMIntSLE,
2389 u_val,
2390 u1, "");
2391
2392 /* loop over U values */
2393 {
2394 /* q = (int)q */
2395 q = lp_build_itrunc(coord_bld,
2396 LLVMBuildLoad2(builder, bld->coord_bld.vec_type,
2397 q_store, ""));
2398
2399 /*
2400 * avoid OOB access to filter table, generate a mask for q > 1024,
2401 * then truncate it.
2402 */
2403 LLVMValueRef q_mask = LLVMBuildICmp(builder,
2404 LLVMIntSLE,
2405 q,
2406 lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff), "");
2407 q_mask = LLVMBuildSExt(builder, q_mask, bld->int_coord_bld.vec_type, "");
2408
2409 q = lp_build_max(&bld->int_coord_bld, q, bld->int_coord_bld.zero);
2410 q = lp_build_and(&bld->int_coord_bld, q, lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff));
2411
2412 /* update the offsets to deal with float size. */
2413 q = lp_build_mul_imm(&bld->int_coord_bld, q, 4);
2414 filter_table = LLVMBuildBitCast(gallivm->builder, filter_table, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
2415
2416 /* Lookup weights in filter table */
2417 LLVMValueRef weights = lp_build_gather(gallivm, coord_bld->type.length,
2418 coord_bld->type.width,
2419 lp_elem_type(coord_bld->type),
2420 true, filter_table, q, true);
2421
2422 /*
2423 * Mask off the weights here which should ensure no-op for loops
2424 * where some of the u/v values are not being calculated.
2425 */
2426 weights = LLVMBuildBitCast(builder, weights, bld->int_coord_bld.vec_type, "");
2427 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, ""));
2428 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, v_mask, bld->int_coord_bld.vec_type, ""));
2429 weights = lp_build_and(&bld->int_coord_bld, weights, q_mask);
2430 weights = LLVMBuildBitCast(builder, weights, bld->coord_bld.vec_type, "");
2431
2432 /* if the weights are all 0 avoid doing the sampling at all. */
2433 struct lp_build_if_state noloadw0;
2434
2435 LLVMValueRef wnz = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
2436 weights, bld->coord_bld.zero, "");
2437 wnz = LLVMBuildSExt(builder, wnz, bld->int_coord_bld.vec_type, "");
2438 wnz = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, wnz);
2439 lp_build_if(&noloadw0, gallivm, wnz);
2440 LLVMValueRef new_coords[4];
2441 new_coords[0] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, u_val), width_dim);
2442 new_coords[1] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, v_val), height_dim);
2443 new_coords[2] = coords[2];
2444 new_coords[3] = coords[3];
2445
2446 /* lookup q in filter table */
2447 LLVMValueRef temp_colors[4];
2448 lp_build_sample_image_nearest(bld, size0,
2449 row_stride0_vec, img_stride0_vec,
2450 data_ptr0, mipoff0, new_coords, offsets,
2451 temp_colors);
2452
2453 for (unsigned chan = 0; chan < 4; chan++) {
2454 LLVMValueRef tcolor = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, colors0[chan], "");
2455
2456 tcolor = lp_build_add(&bld->texel_bld, tcolor, lp_build_mul(&bld->texel_bld, temp_colors[chan], weights));
2457 LLVMBuildStore(builder, tcolor, colors0[chan]);
2458 }
2459
2460 /* multiple colors by weight and add in. */
2461 /* den += weight; */
2462 LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2463 den = lp_build_add(&bld->texel_bld, den, weights);
2464 LLVMBuildStore(builder, den, den_store);
2465
2466 lp_build_endif(&noloadw0);
2467 /* q += dq; */
2468 /* dq += ddq; */
2469 q = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, q_store, "");
2470 dq = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, dq_store, "");
2471 q = lp_build_add(coord_bld, q, dq);
2472 dq = lp_build_add(coord_bld, dq, ddq);
2473 LLVMBuildStore(builder, q, q_store);
2474 LLVMBuildStore(builder, dq, dq_store);
2475 }
2476 /* u += 1 */
2477 u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, u_limiter, "");
2478 u_val = lp_build_add(&bld->int_coord_bld, u_val, bld->int_coord_bld.one);
2479 LLVMBuildStore(builder, u_val, u_limiter);
2480
2481 u_mask = LLVMBuildICmp(builder,
2482 LLVMIntSLE,
2483 u_val,
2484 u1, "");
2485 LLVMValueRef u_end_cond = LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, "");
2486 u_end_cond = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, u_end_cond);
2487
2488 LLVMBasicBlockRef u_end_loop = lp_build_insert_new_block(gallivm, "u_end_loop");
2489
2490 LLVMBuildCondBr(builder, u_end_cond,
2491 u_loop_block, u_end_loop);
2492
2493 LLVMPositionBuilderAtEnd(builder, u_end_loop);
2494
2495 }
2496
2497 /* v += 1 */
2498 v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2499 v_val = lp_build_add(&bld->int_coord_bld, v_val, bld->int_coord_bld.one);
2500 LLVMBuildStore(builder, v_val, v_limiter);
2501
2502 v_mask = LLVMBuildICmp(builder,
2503 LLVMIntSLE,
2504 v_val,
2505 v1, "");
2506 LLVMValueRef v_end_cond = LLVMBuildSExt(builder, v_mask,
2507 bld->int_coord_bld.vec_type, "");
2508 v_end_cond = lp_build_any_true_range(&bld->coord_bld,
2509 bld->coord_bld.type.length, v_end_cond);
2510
2511 LLVMBasicBlockRef v_end_loop = lp_build_insert_new_block(gallivm, "v_end_loop");
2512
2513 LLVMBuildCondBr(builder, v_end_cond,
2514 v_loop_block, v_end_loop);
2515
2516 LLVMPositionBuilderAtEnd(builder, v_end_loop);
2517
2518 LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2519
2520 for (unsigned chan = 0; chan < 4; chan++) {
2521 colors0[chan] =
2522 lp_build_div(&bld->texel_bld,
2523 LLVMBuildLoad2(builder, bld->texel_bld.vec_type,
2524 colors0[chan], ""), den);
2525 }
2526
2527 LLVMValueRef den0 = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_EQUAL,
2528 den, bld->coord_bld.zero);
2529
2530 LLVMValueRef den0_any =
2531 lp_build_any_true_range(&bld->coord_bld,
2532 bld->coord_bld.type.length, den0);
2533
2534 struct lp_build_if_state den0_fallback;
2535 lp_build_if(&den0_fallback, gallivm, den0_any);
2536 {
2537 LLVMValueRef colors_den0[4];
2538 lp_build_sample_image_linear(bld, false, size0, NULL,
2539 row_stride0_vec, img_stride0_vec,
2540 data_ptr0, mipoff0, coords, offsets,
2541 colors_den0);
2542 for (unsigned chan = 0; chan < 4; chan++) {
2543 LLVMValueRef chan_val =
2544 lp_build_select(&bld->texel_bld, den0,
2545 colors_den0[chan], colors0[chan]);
2546 LLVMBuildStore(builder, chan_val, colors_out[chan]);
2547 }
2548 }
2549 lp_build_else(&den0_fallback);
2550 {
2551 for (unsigned chan = 0; chan < 4; chan++) {
2552 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2553 }
2554 }
2555 lp_build_endif(&den0_fallback);
2556 }
2557
2558
2559 /**
2560 * Calculate cube face, lod, mip levels.
2561 */
2562 static void
lp_build_sample_common(struct lp_build_sample_context * bld,bool is_lodq,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)2563 lp_build_sample_common(struct lp_build_sample_context *bld,
2564 bool is_lodq,
2565 unsigned texture_index,
2566 unsigned sampler_index,
2567 LLVMValueRef *coords,
2568 const struct lp_derivatives *derivs, /* optional */
2569 LLVMValueRef lod_bias, /* optional */
2570 LLVMValueRef explicit_lod, /* optional */
2571 LLVMValueRef *lod_pos_or_zero,
2572 LLVMValueRef *lod,
2573 LLVMValueRef *lod_fpart,
2574 LLVMValueRef *ilevel0,
2575 LLVMValueRef *ilevel1)
2576 {
2577 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
2578 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
2579 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
2580 const unsigned target = bld->static_texture_state->target;
2581 const bool aniso = bld->static_sampler_state->aniso;
2582 LLVMValueRef first_level, last_level;
2583 LLVMValueRef lod_ipart = NULL;
2584 struct lp_derivatives cube_derivs;
2585
2586 /*
2587 printf("%s mip %d min %d mag %d\n", __func__,
2588 mip_filter, min_filter, mag_filter);
2589 */
2590
2591 first_level = get_first_level(bld->gallivm,
2592 bld->resources_type,
2593 bld->resources_ptr,
2594 texture_index, NULL,
2595 bld->static_texture_state,
2596 bld->dynamic_state);
2597 last_level = get_last_level(bld->gallivm,
2598 bld->resources_type,
2599 bld->resources_ptr,
2600 texture_index, NULL,
2601 bld->static_texture_state,
2602 bld->dynamic_state);
2603
2604 /*
2605 * Choose cube face, recompute texcoords for the chosen face and
2606 * calculate / transform derivatives.
2607 */
2608 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
2609 bool need_derivs = ((min_filter != mag_filter ||
2610 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
2611 !bld->static_sampler_state->min_max_lod_equal &&
2612 !explicit_lod);
2613 lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
2614 if (need_derivs)
2615 derivs = &cube_derivs;
2616
2617 if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) {
2618 /* calculate cube layer coord now */
2619 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
2620 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
2621 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
2622 coords[3] = lp_build_layer_coord(bld, texture_index, true, layer, NULL);
2623 /* because of seamless filtering can't add it to face (coords[2]) here. */
2624 }
2625 } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
2626 target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
2627 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
2628 coords[2] = lp_build_layer_coord(bld, texture_index, false, coords[2], NULL);
2629 }
2630
2631 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
2632 /*
2633 * Clamp p coords to [0,1] for fixed function depth texture format here.
2634 * Technically this is not entirely correct for unorm depth as the ref
2635 * value should be converted to the depth format (quantization!) and
2636 * comparison then done in texture format. This would actually help
2637 * performance (since only need to do it once and could save the
2638 * per-sample conversion of texels to floats instead), but it would need
2639 * more messy code (would need to push at least some bits down to actual
2640 * fetch so conversion could be skipped, and would have ugly interaction
2641 * with border color, would need to convert border color to that format
2642 * too or do some other tricks to make it work).
2643 */
2644 const struct util_format_description *format_desc = bld->format_desc;
2645 /* not entirely sure we couldn't end up with non-valid swizzle here */
2646 const enum util_format_type chan_type =
2647 format_desc->swizzle[0] <= PIPE_SWIZZLE_W
2648 ? format_desc->channel[format_desc->swizzle[0]].type
2649 : UTIL_FORMAT_TYPE_FLOAT;
2650 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2651 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2652 bld->coord_bld.zero, bld->coord_bld.one);
2653 }
2654 }
2655
2656 /*
2657 * Compute the level of detail (float).
2658 */
2659 if (min_filter != mag_filter ||
2660 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2661 LLVMValueRef max_aniso = NULL;
2662
2663 if (aniso)
2664 max_aniso = bld->dynamic_state->max_aniso(bld->gallivm,
2665 bld->resources_type,
2666 bld->resources_ptr,
2667 sampler_index);
2668
2669 /* Need to compute lod either to choose mipmap levels or to
2670 * distinguish between minification/magnification with one mipmap level.
2671 */
2672 LLVMValueRef first_level_vec =
2673 lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
2674 lp_build_lod_selector(bld, is_lodq, sampler_index,
2675 first_level_vec,
2676 coords[0], coords[1], coords[2],
2677 derivs, lod_bias, explicit_lod,
2678 mip_filter, max_aniso, lod,
2679 &lod_ipart, lod_fpart, lod_pos_or_zero);
2680 if (is_lodq) {
2681 last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2682 last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2683 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2684
2685 switch (mip_filter) {
2686 case PIPE_TEX_MIPFILTER_NONE:
2687 *lod_fpart = bld->lodf_bld.zero;
2688 break;
2689 case PIPE_TEX_MIPFILTER_NEAREST:
2690 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2691 FALLTHROUGH;
2692 case PIPE_TEX_MIPFILTER_LINEAR:
2693 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2694 bld->lodf_bld.zero, last_level);
2695 break;
2696 }
2697 return;
2698 }
2699 } else {
2700 lod_ipart = bld->lodi_bld.zero;
2701 *lod_pos_or_zero = bld->lodi_bld.zero;
2702 }
2703
2704 if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) &&
2705 bld->lodi_bld.type.length != 1) {
2706 /* only makes sense if there's just a single mip level */
2707 assert(bld->num_mips == 1);
2708 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2709 }
2710
2711 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2712 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2713
2714 /*
2715 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2716 */
2717
2718 if (aniso) {
2719 lp_build_nearest_mip_level(bld,
2720 first_level, last_level,
2721 lod_ipart, ilevel0, NULL);
2722 return;
2723 }
2724
2725 switch (mip_filter) {
2726 default:
2727 unreachable("Bad mip_filter value in lp_build_sample_soa()");
2728 case PIPE_TEX_MIPFILTER_NONE:
2729 /* always use mip level 0 */
2730 *ilevel0 = first_level;
2731 break;
2732 case PIPE_TEX_MIPFILTER_NEAREST:
2733 assert(lod_ipart);
2734 lp_build_nearest_mip_level(bld,
2735 first_level, last_level,
2736 lod_ipart, ilevel0, NULL);
2737 break;
2738 case PIPE_TEX_MIPFILTER_LINEAR:
2739 assert(lod_ipart);
2740 assert(*lod_fpart);
2741
2742 lp_build_linear_mip_levels(bld, texture_index,
2743 first_level, last_level,
2744 lod_ipart, lod_fpart,
2745 ilevel0, ilevel1);
2746 break;
2747 }
2748 }
2749
2750
2751 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)2752 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2753 unsigned sampler_unit)
2754 {
2755 struct gallivm_state *gallivm = bld->gallivm;
2756 LLVMBuilderRef builder = gallivm->builder;
2757 LLVMValueRef border_color_ptr =
2758 bld->dynamic_state->border_color(gallivm,
2759 bld->resources_type,
2760 bld->resources_ptr, sampler_unit);
2761 LLVMValueRef border_color;
2762 const struct util_format_description *format_desc = bld->format_desc;
2763 struct lp_type vec4_type = bld->texel_type;
2764 struct lp_build_context vec4_bld;
2765 LLVMValueRef min_clamp = NULL;
2766 LLVMValueRef max_clamp = NULL;
2767
2768 /*
2769 * For normalized format need to clamp border color (technically
2770 * probably should also quantize the data). Really sucks doing this
2771 * here but can't avoid at least for now since this is part of
2772 * sampler state and texture format is part of sampler_view state.
2773 * GL expects also expects clamping for uint/sint formats too so
2774 * do that as well (d3d10 can't end up here with uint/sint since it
2775 * only supports them with ld).
2776 */
2777 vec4_type.length = 4;
2778 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2779
2780 /*
2781 * Vectorized clamping of border color. Loading is a bit of a hack since
2782 * we just cast the pointer to float array to pointer to vec4
2783 * (int or float).
2784 */
2785 LLVMTypeRef border_color_type = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
2786 border_color_ptr = lp_build_array_get_ptr2(gallivm, border_color_type, border_color_ptr,
2787 lp_build_const_int32(gallivm, 0));
2788 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2789 LLVMPointerType(vec4_bld.vec_type, 0), "");
2790 border_color = LLVMBuildLoad2(builder, vec4_bld.vec_type, border_color_ptr, "");
2791 /* we don't have aligned type in the dynamic state unfortunately */
2792 LLVMSetAlignment(border_color, 4);
2793
2794 /*
2795 * Instead of having some incredibly complex logic which will try to figure
2796 * out clamping necessary for each channel, simply use the first channel,
2797 * and treat mixed signed/unsigned normalized formats specially. (Mixed
2798 * non-normalized, which wouldn't work at all here, do not exist for a good
2799 * reason.)
2800 */
2801 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2802 int chan;
2803 /* d/s needs special handling because both present means just sampling depth */
2804 if (util_format_is_depth_and_stencil(format_desc->format)) {
2805 chan = format_desc->swizzle[0];
2806 } else {
2807 chan = util_format_get_first_non_void_channel(format_desc->format);
2808 }
2809 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2810 unsigned chan_type = format_desc->channel[chan].type;
2811 unsigned chan_norm = format_desc->channel[chan].normalized;
2812 unsigned chan_pure = format_desc->channel[chan].pure_integer;
2813 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2814 if (chan_norm) {
2815 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2816 max_clamp = vec4_bld.one;
2817 } else if (chan_pure) {
2818 /*
2819 * Border color was stored as int, hence need min/max clamp
2820 * only if chan has less than 32 bits..
2821 */
2822 unsigned chan_size = format_desc->channel[chan].size;
2823 if (chan_size < 32) {
2824 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2825 0 - (1 << (chan_size - 1)));
2826 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2827 (1 << (chan_size - 1)) - 1);
2828 }
2829 }
2830 /* TODO: no idea about non-pure, non-normalized! */
2831 } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2832 if (chan_norm) {
2833 min_clamp = vec4_bld.zero;
2834 max_clamp = vec4_bld.one;
2835 } else if (chan_pure) {
2836 /*
2837 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2838 * we use Z32_FLOAT_S8X24 to imply sampling depth component and
2839 * ignoring stencil, which will blow up here if we try to do a
2840 * uint clamp in a float texel build... And even if we had
2841 * that format, mesa st also thinks using z24s8 means depth
2842 * sampling ignoring stencil.
2843 */
2844
2845 /*
2846 * Border color was stored as uint, hence never need min clamp,
2847 * and only need max clamp if chan has less than 32 bits.
2848 */
2849 unsigned chan_size = format_desc->channel[chan].size;
2850 if (chan_size < 32) {
2851 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2852 (1 << chan_size) - 1);
2853 }
2854 /* TODO: no idea about non-pure, non-normalized! */
2855 }
2856 } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2857 /* TODO: I have no idea what clamp this would need if any! */
2858 }
2859 }
2860 /* mixed plain formats (or different pure size) */
2861 switch (format_desc->format) {
2862 case PIPE_FORMAT_B10G10R10A2_UINT:
2863 case PIPE_FORMAT_R10G10B10A2_UINT:
2864 {
2865 unsigned max10 = (1 << 10) - 1;
2866 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2867 max10, (1 << 2) - 1, NULL);
2868 }
2869 break;
2870 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2871 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2872 -1.0F, 0.0F, NULL);
2873 max_clamp = vec4_bld.one;
2874 break;
2875 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2876 case PIPE_FORMAT_R5SG5SB6U_NORM:
2877 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2878 0.0F, 0.0F, NULL);
2879 max_clamp = vec4_bld.one;
2880 break;
2881 default:
2882 break;
2883 }
2884 } else {
2885 /* cannot figure this out from format description */
2886 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2887 /* s3tc formats are always unorm */
2888 min_clamp = vec4_bld.zero;
2889 max_clamp = vec4_bld.one;
2890 } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2891 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
2892 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2893 switch (format_desc->format) {
2894 case PIPE_FORMAT_RGTC1_UNORM:
2895 case PIPE_FORMAT_RGTC2_UNORM:
2896 case PIPE_FORMAT_LATC1_UNORM:
2897 case PIPE_FORMAT_LATC2_UNORM:
2898 case PIPE_FORMAT_ETC1_RGB8:
2899 case PIPE_FORMAT_BPTC_RGBA_UNORM:
2900 case PIPE_FORMAT_BPTC_SRGBA:
2901 min_clamp = vec4_bld.zero;
2902 max_clamp = vec4_bld.one;
2903 break;
2904 case PIPE_FORMAT_RGTC1_SNORM:
2905 case PIPE_FORMAT_RGTC2_SNORM:
2906 case PIPE_FORMAT_LATC1_SNORM:
2907 case PIPE_FORMAT_LATC2_SNORM:
2908 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2909 max_clamp = vec4_bld.one;
2910 break;
2911 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2912 /* not sure if we should clamp to max half float? */
2913 break;
2914 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2915 min_clamp = vec4_bld.zero;
2916 break;
2917 default:
2918 assert(0);
2919 break;
2920 }
2921 } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2922 /*
2923 * all others from subsampled/other group, though we don't care
2924 * about yuv (and should not have any from zs here)
2925 */
2926 switch (format_desc->format) {
2927 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2928 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2929 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2930 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2931 case PIPE_FORMAT_G8B8_G8R8_UNORM:
2932 case PIPE_FORMAT_B8G8_R8G8_UNORM:
2933 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2934 min_clamp = vec4_bld.zero;
2935 max_clamp = vec4_bld.one;
2936 break;
2937 case PIPE_FORMAT_R8G8Bx_SNORM:
2938 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2939 max_clamp = vec4_bld.one;
2940 break;
2941 /*
2942 * Note smallfloat formats usually don't need clamping
2943 * (they still have infinite range) however this is not
2944 * true for r11g11b10 and r9g9b9e5, which can't represent
2945 * negative numbers (and additionally r9g9b9e5 can't represent
2946 * very large numbers). d3d10 seems happy without clamping in
2947 * this case, but gl spec is pretty clear: "for floating
2948 * point and integer formats, border values are clamped to
2949 * the representable range of the format" so do that here.
2950 */
2951 case PIPE_FORMAT_R11G11B10_FLOAT:
2952 min_clamp = vec4_bld.zero;
2953 break;
2954 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2955 min_clamp = vec4_bld.zero;
2956 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2957 break;
2958 default:
2959 assert(0);
2960 break;
2961 }
2962 }
2963 }
2964
2965 if (min_clamp) {
2966 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2967 }
2968 if (max_clamp) {
2969 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2970 }
2971
2972 bld->border_color_clamped = border_color;
2973 }
2974
2975
2976 /**
2977 * General texture sampling codegen.
2978 * This function handles texture sampling for all texture targets (1D,
2979 * 2D, 3D, cube) and all filtering modes.
2980 */
2981 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)2982 lp_build_sample_general(struct lp_build_sample_context *bld,
2983 unsigned sampler_unit,
2984 bool is_gather,
2985 const LLVMValueRef *coords,
2986 const LLVMValueRef *offsets,
2987 LLVMValueRef lod_positive,
2988 LLVMValueRef lod_fpart,
2989 LLVMValueRef ilevel0,
2990 LLVMValueRef ilevel1,
2991 LLVMValueRef *colors_out)
2992 {
2993 LLVMBuilderRef builder = bld->gallivm->builder;
2994 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2995 const unsigned mip_filter = sampler_state->min_mip_filter;
2996 const unsigned min_filter = sampler_state->min_img_filter;
2997 const unsigned mag_filter = sampler_state->mag_img_filter;
2998 LLVMValueRef texels[4];
2999 unsigned chan;
3000
3001 /* if we need border color, (potentially) clamp it now */
3002 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
3003 min_filter,
3004 mag_filter) ||
3005 (bld->dims > 1 &&
3006 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
3007 min_filter,
3008 mag_filter)) ||
3009 (bld->dims > 2 &&
3010 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
3011 min_filter,
3012 mag_filter))) {
3013 lp_build_clamp_border_color(bld, sampler_unit);
3014 }
3015
3016
3017 /*
3018 * Get/interpolate texture colors.
3019 */
3020
3021 for (chan = 0; chan < 4; ++chan) {
3022 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
3023 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
3024 }
3025
3026 if (sampler_state->aniso) {
3027 lp_build_sample_aniso(bld, PIPE_TEX_FILTER_NEAREST, mip_filter,
3028 false, coords, offsets, ilevel0,
3029 ilevel1, lod_fpart, texels);
3030 } else if (min_filter == mag_filter) {
3031 /* no need to distinguish between minification and magnification */
3032 lp_build_sample_mipmap(bld, min_filter, mip_filter,
3033 is_gather,
3034 coords, offsets,
3035 ilevel0, ilevel1, lod_fpart,
3036 texels);
3037 } else {
3038 /*
3039 * Could also get rid of the if-logic and always use mipmap_both, both
3040 * for the single lod and multi-lod case if nothing really uses this.
3041 */
3042 if (bld->num_lods == 1) {
3043 /* Emit conditional to choose min image filter or mag image filter
3044 * depending on the lod being > 0 or <= 0, respectively.
3045 */
3046 struct lp_build_if_state if_ctx;
3047
3048 lod_positive = LLVMBuildTrunc(builder, lod_positive,
3049 LLVMInt1TypeInContext(bld->gallivm->context),
3050 "lod_pos");
3051
3052 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
3053 {
3054 /* Use the minification filter */
3055 lp_build_sample_mipmap(bld, min_filter, mip_filter, false,
3056 coords, offsets,
3057 ilevel0, ilevel1, lod_fpart,
3058 texels);
3059 }
3060 lp_build_else(&if_ctx);
3061 {
3062 /* Use the magnification filter */
3063 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
3064 false,
3065 coords, offsets,
3066 ilevel0, NULL, NULL,
3067 texels);
3068 }
3069 lp_build_endif(&if_ctx);
3070 } else {
3071 LLVMValueRef need_linear, linear_mask;
3072 unsigned mip_filter_for_nearest;
3073 struct lp_build_if_state if_ctx;
3074
3075 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
3076 linear_mask = lod_positive;
3077 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
3078 } else {
3079 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
3080 mip_filter_for_nearest = mip_filter;
3081 }
3082 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
3083 linear_mask);
3084 lp_build_name(need_linear, "need_linear");
3085
3086 if (bld->num_lods != bld->coord_type.length) {
3087 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
3088 bld->lodi_type,
3089 bld->int_coord_type,
3090 linear_mask);
3091 }
3092
3093 lp_build_if(&if_ctx, bld->gallivm, need_linear);
3094 {
3095 /*
3096 * Do sampling with both filters simultaneously. This means using
3097 * a linear filter and doing some tricks (with weights) for the
3098 * pixels which need nearest filter.
3099 * Note that it's probably rare some pixels need nearest and some
3100 * linear filter but the fixups required for the nearest pixels
3101 * aren't all that complicated so just always run a combined path
3102 * if at least some pixels require linear.
3103 */
3104 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
3105 coords, offsets,
3106 ilevel0, ilevel1,
3107 lod_fpart, lod_positive,
3108 texels);
3109 }
3110 lp_build_else(&if_ctx);
3111 {
3112 /*
3113 * All pixels require just nearest filtering, which is way
3114 * cheaper than linear, hence do a separate path for that.
3115 */
3116 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
3117 mip_filter_for_nearest, false,
3118 coords, offsets,
3119 ilevel0, ilevel1, lod_fpart,
3120 texels);
3121 }
3122 lp_build_endif(&if_ctx);
3123 }
3124 }
3125
3126 for (chan = 0; chan < 4; ++chan) {
3127 colors_out[chan] = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, texels[chan], "");
3128 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
3129 }
3130 }
3131
3132
3133 /**
3134 * Texel fetch function. In contrast to general sampling there is no
3135 * filtering, no coord minification, lod (if any) is always explicit uint,
3136 * coords are uints (in terms of texel units) directly to be applied to the
3137 * selected mip level (after adding texel offsets). This function handles
3138 * texel fetch for all targets where texel fetch is supported (no cube maps,
3139 * but 1d, 2d, 3d are supported, arrays and buffers should be too).
3140 */
3141 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef ms_index,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)3142 lp_build_fetch_texel(struct lp_build_sample_context *bld,
3143 unsigned texture_unit,
3144 LLVMValueRef ms_index,
3145 const LLVMValueRef *coords,
3146 LLVMValueRef explicit_lod,
3147 const LLVMValueRef *offsets,
3148 LLVMValueRef *colors_out)
3149 {
3150 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
3151 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
3152 unsigned dims = bld->dims, chan;
3153 unsigned target = bld->static_texture_state->target;
3154 bool out_of_bound_ret_zero = true;
3155 LLVMValueRef size, ilevel;
3156 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
3157 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
3158 LLVMValueRef width, height, depth, i, j;
3159 LLVMValueRef offset, out_of_bounds, out1;
3160
3161 LLVMValueRef first_level;
3162
3163 first_level = get_first_level(bld->gallivm,
3164 bld->resources_type,
3165 bld->resources_ptr,
3166 texture_unit, NULL,
3167 bld->static_texture_state,
3168 bld->dynamic_state);
3169 out_of_bounds = int_coord_bld->zero;
3170
3171 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
3172 if (bld->num_mips != int_coord_bld->type.length) {
3173 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
3174 perquadi_bld->type, explicit_lod, 0);
3175 } else {
3176 ilevel = explicit_lod;
3177 }
3178
3179 LLVMValueRef last_level;
3180
3181 last_level = get_last_level(bld->gallivm,
3182 bld->resources_type,
3183 bld->resources_ptr,
3184 texture_unit, NULL,
3185 bld->static_texture_state,
3186 bld->dynamic_state);
3187
3188 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
3189 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
3190 lp_build_nearest_mip_level(bld,
3191 first_level, last_level,
3192 ilevel, &ilevel,
3193 out_of_bound_ret_zero ? &out_of_bounds : NULL);
3194 } else {
3195 assert(bld->num_mips == 1);
3196 if (bld->static_texture_state->target != PIPE_BUFFER) {
3197 ilevel = first_level;
3198 } else {
3199 ilevel = lp_build_const_int32(bld->gallivm, 0);
3200 }
3201 }
3202 lp_build_mipmap_level_sizes(bld, ilevel,
3203 &size,
3204 &row_stride_vec, &img_stride_vec);
3205 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
3206 size, &width, &height, &depth);
3207
3208 if (target == PIPE_TEXTURE_1D_ARRAY ||
3209 target == PIPE_TEXTURE_2D_ARRAY) {
3210 if (out_of_bound_ret_zero) {
3211 z = lp_build_layer_coord(bld, texture_unit, false, z, &out1);
3212 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3213 } else {
3214 z = lp_build_layer_coord(bld, texture_unit, false, z, NULL);
3215 }
3216 }
3217
3218 /* This is a lot like border sampling */
3219 if (offsets[0]) {
3220 /*
3221 * coords are really unsigned, offsets are signed, but I don't think
3222 * exceeding 31 bits is possible
3223 */
3224 x = lp_build_add(int_coord_bld, x, offsets[0]);
3225 }
3226 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
3227 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3228 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
3229 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3230
3231 if (dims >= 2) {
3232 if (offsets[1]) {
3233 y = lp_build_add(int_coord_bld, y, offsets[1]);
3234 }
3235 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
3236 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3237 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
3238 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3239
3240 if (dims >= 3) {
3241 if (offsets[2]) {
3242 z = lp_build_add(int_coord_bld, z, offsets[2]);
3243 }
3244 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
3245 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3246 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
3247 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3248 }
3249 }
3250
3251 lp_build_sample_offset(int_coord_bld,
3252 bld->format_desc,
3253 x, y, z, row_stride_vec, img_stride_vec,
3254 &offset, &i, &j);
3255
3256 if (bld->static_texture_state->target != PIPE_BUFFER) {
3257 offset = lp_build_add(int_coord_bld, offset,
3258 lp_build_get_mip_offsets(bld, ilevel));
3259 }
3260
3261 if (bld->fetch_ms && bld->static_texture_state->level_zero_only) {
3262 LLVMValueRef num_samples = bld->dynamic_state->last_level(bld->gallivm,
3263 bld->resources_type,
3264 bld->resources_ptr,
3265 texture_unit, NULL);
3266 num_samples = LLVMBuildZExt(bld->gallivm->builder, num_samples,
3267 bld->int_bld.elem_type, "");
3268 LLVMValueRef sample_stride = lp_sample_load_mip_value(bld->gallivm,
3269 bld->mip_offsets_type,
3270 bld->mip_offsets,
3271 lp_build_const_int32(bld->gallivm, LP_JIT_TEXTURE_SAMPLE_STRIDE));
3272 lp_build_sample_ms_offset(int_coord_bld, ms_index, num_samples, sample_stride,
3273 &offset, &out_of_bounds);
3274 }
3275
3276 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
3277
3278 lp_build_fetch_rgba_soa(bld->gallivm,
3279 bld->format_desc,
3280 bld->texel_type, true,
3281 bld->base_ptr, offset,
3282 i, j,
3283 bld->cache,
3284 colors_out);
3285
3286 if (out_of_bound_ret_zero) {
3287 /*
3288 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
3289 * Could use min/max above instead of out-of-bounds comparisons
3290 * if we don't care about the result returned for out-of-bounds.
3291 */
3292 LLVMValueRef oob[4] = {
3293 bld->texel_bld.zero,
3294 bld->texel_bld.zero,
3295 bld->texel_bld.zero,
3296 bld->texel_bld.zero,
3297 };
3298 lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob);
3299 for (chan = 0; chan < 4; chan++) {
3300 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
3301 oob[chan], colors_out[chan]);
3302 }
3303 }
3304 }
3305
3306
3307 /**
3308 * Just set texels to white instead of actually sampling the texture.
3309 * For debugging.
3310 */
3311 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])3312 lp_build_sample_nop(struct gallivm_state *gallivm,
3313 struct lp_type type,
3314 const LLVMValueRef *coords,
3315 LLVMValueRef texel_out[4])
3316 {
3317 LLVMValueRef one = lp_build_one(gallivm, type);
3318 for (unsigned chan = 0; chan < 4; chan++) {
3319 texel_out[chan] = one;
3320 }
3321 }
3322
3323
3324 struct lp_type
lp_build_texel_type(struct lp_type texel_type,const struct util_format_description * format_desc)3325 lp_build_texel_type(struct lp_type texel_type,
3326 const struct util_format_description *format_desc)
3327 {
3328 /* always using the first channel hopefully should be safe,
3329 * if not things WILL break in other places anyway.
3330 */
3331 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
3332 format_desc->channel[0].pure_integer) {
3333 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
3334 texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length);
3335 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
3336 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3337 }
3338 } else if (util_format_has_stencil(format_desc) &&
3339 !util_format_has_depth(format_desc)) {
3340 /* for stencil only formats, sample stencil (uint) */
3341 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3342 }
3343 return texel_type;
3344 }
3345
3346
3347 /**
3348 * Build the actual texture sampling code.
3349 * 'texel' will return a vector of four LLVMValueRefs corresponding to
3350 * R, G, B, A.
3351 * \param type vector float type to use for coords, etc.
3352 * \param sample_key
3353 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
3354 */
3355 void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMTypeRef thread_data_type,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef ms_index,LLVMValueRef aniso_filter_table,LLVMValueRef texel_out[4])3356 lp_build_sample_soa_code(struct gallivm_state *gallivm,
3357 const struct lp_static_texture_state *static_texture_state,
3358 const struct lp_static_sampler_state *static_sampler_state,
3359 struct lp_sampler_dynamic_state *dynamic_state,
3360 struct lp_type type,
3361 unsigned sample_key,
3362 unsigned texture_index,
3363 unsigned sampler_index,
3364 LLVMTypeRef resources_type,
3365 LLVMValueRef resources_ptr,
3366 LLVMTypeRef thread_data_type,
3367 LLVMValueRef thread_data_ptr,
3368 const LLVMValueRef *coords,
3369 const LLVMValueRef *offsets,
3370 const struct lp_derivatives *derivs, /* optional */
3371 LLVMValueRef lod, /* optional */
3372 LLVMValueRef ms_index, /* optional */
3373 LLVMValueRef aniso_filter_table,
3374 LLVMValueRef texel_out[4])
3375 {
3376 assert(static_texture_state);
3377 assert(static_texture_state->format < PIPE_FORMAT_COUNT);
3378 assert(static_sampler_state);
3379
3380 const enum pipe_texture_target target = static_texture_state->target;
3381 const unsigned dims = texture_dims(target);
3382 const unsigned num_quads = type.length / 4;
3383 struct lp_build_sample_context bld;
3384 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
3385 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
3386 LLVMBuilderRef builder = gallivm->builder;
3387 const struct util_format_description *res_format_desc;
3388
3389 if (0) {
3390 enum pipe_format fmt = static_texture_state->format;
3391 debug_printf("Sample from %s\n", util_format_name(fmt));
3392 }
3393
3394 const enum lp_sampler_lod_property lod_property =
3395 (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
3396 LP_SAMPLER_LOD_PROPERTY_SHIFT;
3397 const enum lp_sampler_lod_control lod_control =
3398 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3399 LP_SAMPLER_LOD_CONTROL_SHIFT;
3400 const enum lp_sampler_op_type op_type =
3401 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3402 LP_SAMPLER_OP_TYPE_SHIFT;
3403
3404 const bool fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
3405 const bool op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
3406 const bool op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
3407 const bool op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
3408
3409 LLVMValueRef lod_bias = NULL;
3410 LLVMValueRef explicit_lod = NULL;
3411 if (lod_control == LP_SAMPLER_LOD_BIAS) {
3412 lod_bias = lod;
3413 assert(lod);
3414 assert(derivs == NULL);
3415 } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3416 explicit_lod = lod;
3417 assert(lod);
3418 assert(derivs == NULL);
3419 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3420 assert(derivs);
3421 assert(lod == NULL);
3422 } else {
3423 assert(derivs == NULL);
3424 assert(lod == NULL);
3425 }
3426
3427 if (static_texture_state->format == PIPE_FORMAT_NONE) {
3428 /*
3429 * If there's nothing bound, format is NONE, and we must return
3430 * all zero as mandated by d3d10 in this case.
3431 */
3432 LLVMValueRef zero = lp_build_zero(gallivm, type);
3433 for (unsigned chan = 0; chan < 4; chan++) {
3434 texel_out[chan] = zero;
3435 }
3436 return;
3437 }
3438
3439 assert(type.floating);
3440
3441 /* Setup our build context */
3442 memset(&bld, 0, sizeof bld);
3443 bld.gallivm = gallivm;
3444 bld.resources_type = resources_type;
3445 bld.resources_ptr = resources_ptr;
3446 bld.aniso_filter_table = aniso_filter_table;
3447 bld.static_sampler_state = &derived_sampler_state;
3448 bld.static_texture_state = static_texture_state;
3449 bld.dynamic_state = dynamic_state;
3450 bld.format_desc = util_format_description(static_texture_state->format);
3451 bld.dims = dims;
3452
3453 res_format_desc = util_format_description(static_texture_state->res_format);
3454
3455 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
3456 bld.no_quad_lod = true;
3457 }
3458 if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) {
3459 bld.no_rho_approx = true;
3460 }
3461 if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) {
3462 bld.no_brilinear = true;
3463 }
3464
3465 bld.vector_width = lp_type_width(type);
3466
3467 bld.float_type = lp_type_float(32);
3468 bld.int_type = lp_type_int(32);
3469 bld.coord_type = type;
3470 bld.int_coord_type = lp_int_type(type);
3471 bld.float_size_in_type = lp_type_float(32);
3472 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
3473 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
3474
3475 bld.texel_type = lp_build_texel_type(type, bld.format_desc);
3476
3477 if (!static_texture_state->level_zero_only ||
3478 !static_sampler_state->max_lod_pos || op_is_lodq) {
3479 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
3480 } else {
3481 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3482 }
3483
3484 if (op_is_gather) {
3485 /*
3486 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
3487 * the actual filtering. Using mostly the same paths, so cube face
3488 * selection, coord wrapping etc. all naturally uses the same code.
3489 */
3490 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3491 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
3492 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
3493 }
3494
3495 const enum pipe_tex_mipfilter mip_filter =
3496 derived_sampler_state.min_mip_filter;
3497
3498 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3499 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3500 /*
3501 * Seamless filtering ignores wrap modes.
3502 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
3503 * bilinear it's not correct but way better than using for instance
3504 * repeat. Note we even set this for non-seamless. Technically GL
3505 * allows any wrap mode, which made sense when supporting true borders
3506 * (can get seamless effect with border and CLAMP_TO_BORDER), but
3507 * gallium doesn't support borders and d3d9 requires wrap modes to be
3508 * ignored and it's a pain to fix up the sampler state (as it makes it
3509 * texture dependent).
3510 */
3511 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3512 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3513 }
3514
3515 /*
3516 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
3517 * so AoS path could be used. Not sure it's worth the trouble...
3518 */
3519 const enum pipe_tex_filter min_img_filter =
3520 derived_sampler_state.min_img_filter;
3521 const enum pipe_tex_filter mag_img_filter =
3522 derived_sampler_state.mag_img_filter;
3523
3524 /*
3525 * This is all a bit complicated different paths are chosen for performance
3526 * reasons.
3527 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
3528 * everything (the last two options are equivalent for 4-wide case).
3529 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
3530 * lod is calculated then the lod value extracted afterwards so making this
3531 * case basically the same as far as lod handling is concerned for the
3532 * further sample/filter code as the 1 lod for everything case.
3533 * Different lod handling mostly shows up when building mipmap sizes
3534 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
3535 * (getting the fractional part of the lod to the right texels).
3536 */
3537
3538 /*
3539 * There are other situations where at least the multiple int lods could be
3540 * avoided like min and max lod being equal.
3541 */
3542 bld.num_mips = bld.num_lods = 1;
3543
3544 if ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
3545 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3546 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
3547 op_is_lodq) {
3548 /*
3549 * special case for using per-pixel lod even for implicit lod,
3550 * which is generally never required (ok by APIs) except to please
3551 * some (somewhat broken imho) tests (because per-pixel face selection
3552 * can cause derivatives to be different for pixels outside the primitive
3553 * due to the major axis division even if pre-project derivatives are
3554 * looking normal).
3555 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
3556 * cube maps we do indeed get per-pixel lod values).
3557 */
3558 bld.num_mips = type.length;
3559 bld.num_lods = type.length;
3560 } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
3561 (explicit_lod || lod_bias || derivs)) {
3562 if ((!op_is_tex && target != PIPE_BUFFER) ||
3563 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3564 bld.num_mips = type.length;
3565 bld.num_lods = type.length;
3566 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3567 bld.num_mips = 1;
3568 bld.num_lods = type.length;
3569 }
3570 }
3571 /* TODO: for true scalar_lod should only use 1 lod value */
3572 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
3573 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3574 bld.num_mips = num_quads;
3575 bld.num_lods = num_quads;
3576 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3577 bld.num_mips = 1;
3578 bld.num_lods = num_quads;
3579 }
3580
3581 bld.fetch_ms = fetch_ms;
3582 if (op_is_gather)
3583 bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
3584 bld.lodf_type = type;
3585 /* we want native vector size to be able to use our intrinsics */
3586 if (bld.num_lods != type.length) {
3587 /* TODO: this currently always has to be per-quad or per-element */
3588 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
3589 }
3590 bld.lodi_type = lp_int_type(bld.lodf_type);
3591 bld.levelf_type = bld.lodf_type;
3592 if (bld.num_mips == 1) {
3593 bld.levelf_type.length = 1;
3594 }
3595 bld.leveli_type = lp_int_type(bld.levelf_type);
3596 bld.float_size_type = bld.float_size_in_type;
3597
3598 /* Note: size vectors may not be native. They contain minified w/h/d/_
3599 * values, with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to
3600 * 8x4f32
3601 */
3602 if (bld.num_mips > 1) {
3603 bld.float_size_type.length = bld.num_mips == type.length ?
3604 bld.num_mips * bld.float_size_in_type.length :
3605 type.length;
3606 }
3607 bld.int_size_type = lp_int_type(bld.float_size_type);
3608
3609 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
3610 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
3611 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
3612 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
3613 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
3614 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
3615 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
3616 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
3617 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
3618 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
3619 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
3620 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
3621 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
3622 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
3623
3624 /* Get the dynamic state */
3625 LLVMValueRef tex_width = dynamic_state->width(gallivm, resources_type,
3626 resources_ptr, texture_index,
3627 NULL);
3628 bld.row_stride_array = dynamic_state->row_stride(gallivm, resources_type,
3629 resources_ptr, texture_index, NULL,
3630 &bld.row_stride_type);
3631 bld.img_stride_array = dynamic_state->img_stride(gallivm, resources_type,
3632 resources_ptr, texture_index, NULL,
3633 &bld.img_stride_type);
3634 bld.base_ptr = dynamic_state->base_ptr(gallivm, resources_type,
3635 resources_ptr, texture_index, NULL);
3636 bld.mip_offsets = dynamic_state->mip_offsets(gallivm, resources_type,
3637 resources_ptr, texture_index, NULL,
3638 &bld.mip_offsets_type);
3639
3640 /* Note that mip_offsets is an array[level] of offsets to texture images */
3641
3642 if (dynamic_state->cache_ptr && thread_data_ptr) {
3643 bld.cache = dynamic_state->cache_ptr(gallivm, thread_data_type,
3644 thread_data_ptr, texture_index);
3645 }
3646
3647 uint32_t res_bw = res_format_desc->block.width;
3648 uint32_t res_bh = res_format_desc->block.height;
3649 uint32_t bw = bld.format_desc->block.width;
3650 uint32_t bh = bld.format_desc->block.height;
3651
3652 /* only scale if the blocksizes are different. */
3653 if (res_bw == bw)
3654 res_bw = bw = 1;
3655 if (res_bh == bh)
3656 res_bh = bh = 1;
3657
3658 /* width, height, depth as single int vector */
3659 if (dims <= 1) {
3660 bld.int_size = tex_width;
3661 bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
3662 bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
3663 bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
3664 } else {
3665 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3666 tex_width,
3667 LLVMConstInt(i32t, 0, 0), "");
3668 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3669 LLVMConstInt(i32t, res_bw, 0),
3670 LLVMConstInt(i32t, 0, 0), "");
3671 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3672 LLVMConstInt(i32t, util_logbase2(res_bw), 0),
3673 LLVMConstInt(i32t, 0, 0), "");
3674 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3675 LLVMConstInt(i32t, bw, 0),
3676 LLVMConstInt(i32t, 0, 0), "");
3677 if (dims >= 2) {
3678 LLVMValueRef tex_height =
3679 dynamic_state->height(gallivm, resources_type,
3680 resources_ptr, texture_index, NULL);
3681 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
3682 bld.int_bld.elem_type, "");
3683 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3684 tex_height,
3685 LLVMConstInt(i32t, 1, 0), "");
3686 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3687 LLVMConstInt(i32t, res_bh, 0),
3688 LLVMConstInt(i32t, 1, 0), "");
3689 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3690 LLVMConstInt(i32t, util_logbase2(res_bh), 0),
3691 LLVMConstInt(i32t, 1, 0), "");
3692 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3693 LLVMConstInt(i32t, bh, 0),
3694 LLVMConstInt(i32t, 1, 0), "");
3695 if (dims >= 3) {
3696 LLVMValueRef tex_depth =
3697 dynamic_state->depth(gallivm, resources_type, resources_ptr,
3698 texture_index, NULL);
3699 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
3700 bld.int_bld.elem_type, "");
3701 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3702 tex_depth,
3703 LLVMConstInt(i32t, 2, 0), "");
3704 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3705 LLVMConstInt(i32t, 1, 0),
3706 LLVMConstInt(i32t, 2, 0), "");
3707 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3708 LLVMConstInt(i32t, 0, 0),
3709 LLVMConstInt(i32t, 2, 0), "");
3710 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3711 LLVMConstInt(i32t, 1, 0),
3712 LLVMConstInt(i32t, 2, 0), "");
3713 }
3714 }
3715 }
3716
3717 LLVMValueRef newcoords[5];
3718 for (unsigned i = 0; i < 5; i++) {
3719 newcoords[i] = coords[i];
3720 }
3721
3722 if (util_format_is_pure_integer(static_texture_state->format) &&
3723 !util_format_has_depth(bld.format_desc) && op_is_tex &&
3724 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
3725 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3726 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3727 /*
3728 * Bail if impossible filtering is specified (the awkard additional
3729 * depth check is because it is legal in gallium to have things like
3730 * S8Z24 here which would say it's pure int despite such formats should
3731 * sample the depth component).
3732 * In GL such filters make the texture incomplete, this makes it robust
3733 * against gallium frontends which set this up regardless (we'd crash in
3734 * the lerp later otherwise).
3735 * At least in some apis it may be legal to use such filters with lod
3736 * queries and/or gather (at least for gather d3d10 says only the wrap
3737 * bits are really used hence filter bits are likely simply ignored).
3738 * For fetch, we don't get valid samplers either way here.
3739 */
3740 LLVMValueRef zero = lp_build_zero(gallivm, type);
3741 for (unsigned chan = 0; chan < 4; chan++) {
3742 texel_out[chan] = zero;
3743 }
3744 return;
3745 }
3746
3747 if (0) {
3748 /* For debug: no-op texture sampling */
3749 lp_build_sample_nop(gallivm,
3750 bld.texel_type,
3751 newcoords,
3752 texel_out);
3753 } else if (op_type == LP_SAMPLER_OP_FETCH) {
3754 lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
3755 lod, offsets, texel_out);
3756 } else {
3757 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3758 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3759 bool use_aos = util_format_fits_8unorm(bld.format_desc) &&
3760 op_is_tex &&
3761 /* not sure this is strictly needed or simply impossible */
3762 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3763 derived_sampler_state.aniso == 0 &&
3764 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3765
3766 use_aos &= bld.num_lods <= num_quads ||
3767 derived_sampler_state.min_img_filter ==
3768 derived_sampler_state.mag_img_filter;
3769
3770 if (gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
3771 use_aos = 0;
3772 }
3773
3774 if (dims > 1) {
3775 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3776 if (dims > 2) {
3777 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3778 }
3779 }
3780 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3781 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3782 derived_sampler_state.seamless_cube_map &&
3783 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3784 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3785 /* theoretically possible with AoS filtering but not implemented (complex!) */
3786 use_aos = 0;
3787 }
3788
3789 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3790 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3791 debug_printf("%s: using floating point linear filtering for %s\n",
3792 __func__, bld.format_desc->short_name);
3793 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
3794 " wraps %d wrapt %d wrapr %d\n",
3795 derived_sampler_state.min_img_filter,
3796 derived_sampler_state.mag_img_filter,
3797 derived_sampler_state.min_mip_filter,
3798 static_texture_state->target,
3799 derived_sampler_state.seamless_cube_map,
3800 derived_sampler_state.wrap_s,
3801 derived_sampler_state.wrap_t,
3802 derived_sampler_state.wrap_r);
3803 }
3804
3805 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3806 newcoords, derivs, lod_bias, explicit_lod,
3807 &lod_positive, &lod, &lod_fpart,
3808 &ilevel0, &ilevel1);
3809
3810 if (op_is_lodq) {
3811 texel_out[0] = lod_fpart;
3812 texel_out[1] = lod;
3813 texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3814 return;
3815 }
3816
3817 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3818 /* The aos path doesn't do seamless filtering so simply add cube layer
3819 * to face now.
3820 */
3821 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3822 }
3823
3824 /*
3825 * we only try 8-wide sampling with soa or if we have AVX2
3826 * as it appears to be a loss with just AVX)
3827 */
3828 if (num_quads == 1 || !use_aos ||
3829 (util_get_cpu_caps()->has_avx2 &&
3830 (bld.num_lods == 1 ||
3831 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3832 if (use_aos) {
3833 /* do sampling/filtering with fixed pt arithmetic */
3834 lp_build_sample_aos(&bld,
3835 newcoords[0], newcoords[1],
3836 newcoords[2],
3837 offsets, lod_positive, lod_fpart,
3838 ilevel0, ilevel1,
3839 texel_out);
3840 } else {
3841 lp_build_sample_general(&bld, sampler_index,
3842 op_type == LP_SAMPLER_OP_GATHER,
3843 newcoords, offsets,
3844 lod_positive, lod_fpart,
3845 ilevel0, ilevel1,
3846 texel_out);
3847 }
3848 } else {
3849 struct lp_build_sample_context bld4;
3850 struct lp_type type4 = type;
3851 LLVMValueRef texelout4[4];
3852 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3853
3854 type4.length = 4;
3855
3856 /* Setup our build context */
3857 memset(&bld4, 0, sizeof bld4);
3858 bld4.no_quad_lod = bld.no_quad_lod;
3859 bld4.no_rho_approx = bld.no_rho_approx;
3860 bld4.no_brilinear = bld.no_brilinear;
3861 bld4.gallivm = bld.gallivm;
3862 bld4.resources_type = bld.resources_type;
3863 bld4.resources_ptr = bld.resources_ptr;
3864 bld4.aniso_filter_table = aniso_filter_table;
3865 bld4.static_texture_state = bld.static_texture_state;
3866 bld4.static_sampler_state = bld.static_sampler_state;
3867 bld4.dynamic_state = bld.dynamic_state;
3868 bld4.format_desc = bld.format_desc;
3869 bld4.dims = bld.dims;
3870 bld4.row_stride_type = bld.row_stride_type;
3871 bld4.row_stride_array = bld.row_stride_array;
3872 bld4.img_stride_type = bld.img_stride_type;
3873 bld4.img_stride_array = bld.img_stride_array;
3874 bld4.base_ptr = bld.base_ptr;
3875 bld4.mip_offsets_type = bld.mip_offsets_type;
3876 bld4.mip_offsets = bld.mip_offsets;
3877 bld4.int_size = bld.int_size;
3878 bld4.int_tex_blocksize = bld.int_tex_blocksize;
3879 bld4.int_tex_blocksize_log2 = bld.int_tex_blocksize_log2;
3880 bld4.int_view_blocksize = bld.int_view_blocksize;
3881 bld4.cache = bld.cache;
3882
3883 bld4.vector_width = lp_type_width(type4);
3884
3885 bld4.float_type = lp_type_float(32);
3886 bld4.int_type = lp_type_int(32);
3887 bld4.coord_type = type4;
3888 bld4.int_coord_type = lp_int_type(type4);
3889 bld4.float_size_in_type = lp_type_float(32);
3890 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3891 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3892 bld4.texel_type = bld.texel_type;
3893 bld4.texel_type.length = 4;
3894
3895 bld4.num_mips = bld4.num_lods = 1;
3896 if (bld4.no_quad_lod && bld4.no_rho_approx &&
3897 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3898 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3899 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3900 bld4.num_mips = type4.length;
3901 bld4.num_lods = type4.length;
3902 }
3903 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3904 (explicit_lod || lod_bias || derivs)) {
3905 if ((!op_is_tex && target != PIPE_BUFFER) ||
3906 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3907 bld4.num_mips = type4.length;
3908 bld4.num_lods = type4.length;
3909 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3910 bld4.num_mips = 1;
3911 bld4.num_lods = type4.length;
3912 }
3913 }
3914
3915 /* we want native vector size to be able to use our intrinsics */
3916 bld4.lodf_type = type4;
3917 if (bld4.num_lods != type4.length) {
3918 bld4.lodf_type.length = 1;
3919 }
3920 bld4.lodi_type = lp_int_type(bld4.lodf_type);
3921 bld4.levelf_type = type4;
3922 if (bld4.num_mips != type4.length) {
3923 bld4.levelf_type.length = 1;
3924 }
3925 bld4.leveli_type = lp_int_type(bld4.levelf_type);
3926 bld4.float_size_type = bld4.float_size_in_type;
3927 if (bld4.num_mips > 1) {
3928 bld4.float_size_type.length = bld4.num_mips == type4.length ?
3929 bld4.num_mips * bld4.float_size_in_type.length :
3930 type4.length;
3931 }
3932 bld4.int_size_type = lp_int_type(bld4.float_size_type);
3933
3934 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3935 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3936 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3937 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3938 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3939 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3940 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3941 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3942 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3943 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3944 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3945 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3946 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3947 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3948
3949 for (unsigned i = 0; i < num_quads; i++) {
3950 LLVMValueRef s4, t4, r4;
3951 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3952 LLVMValueRef ilevel04, ilevel14 = NULL;
3953 LLVMValueRef offsets4[4] = { NULL };
3954 unsigned num_lods = bld4.num_lods;
3955
3956 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3957 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3958 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3959
3960 if (offsets[0]) {
3961 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3962 if (dims > 1) {
3963 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3964 if (dims > 2) {
3965 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3966 }
3967 }
3968 }
3969 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3970 ilevel04 = bld.num_mips == 1 ? ilevel0 :
3971 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3972 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3973 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3974 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3975 }
3976
3977 if (use_aos) {
3978 /* do sampling/filtering with fixed pt arithmetic */
3979 lp_build_sample_aos(&bld4,
3980 s4, t4, r4, offsets4,
3981 lod_positive4, lod_fpart4,
3982 ilevel04, ilevel14,
3983 texelout4);
3984 } else {
3985 /* this path is currently unreachable and hence might break easily... */
3986 LLVMValueRef newcoords4[5];
3987 newcoords4[0] = s4;
3988 newcoords4[1] = t4;
3989 newcoords4[2] = r4;
3990 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3991 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3992
3993 lp_build_sample_general(&bld4, sampler_index,
3994 op_type == LP_SAMPLER_OP_GATHER,
3995 newcoords4, offsets4,
3996 lod_positive4, lod_fpart4,
3997 ilevel04, ilevel14,
3998 texelout4);
3999 }
4000 for (unsigned j = 0; j < 4; j++) {
4001 texelouttmp[j][i] = texelout4[j];
4002 }
4003 }
4004
4005 for (unsigned j = 0; j < 4; j++) {
4006 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
4007 }
4008 }
4009 }
4010
4011 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
4012 apply_sampler_swizzle(&bld, texel_out);
4013 }
4014
4015 /*
4016 * texel type can be a (32bit) int/uint (for pure int formats only),
4017 * however we are expected to always return floats (storage is untyped).
4018 */
4019 if (!bld.texel_type.floating) {
4020 unsigned chan;
4021 for (chan = 0; chan < 4; chan++) {
4022 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
4023 lp_build_vec_type(gallivm, type), "");
4024 }
4025 }
4026 }
4027
4028
4029 #define USE_TEX_FUNC_CALL 1
4030
4031 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)4032 get_target_info(enum pipe_texture_target target,
4033 unsigned *num_coords, unsigned *num_derivs,
4034 unsigned *num_offsets, unsigned *layer)
4035 {
4036 unsigned dims = texture_dims(target);
4037 *num_coords = dims;
4038 *num_offsets = dims;
4039 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
4040 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
4041 *layer = has_layer_coord(target) ? 2: 0;
4042 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4043 /*
4044 * dims doesn't include r coord for cubes - this is handled
4045 * by layer instead, but need to fix up for cube arrays...
4046 */
4047 *layer = 3;
4048 *num_coords = 3;
4049 }
4050 }
4051
4052
4053 /**
4054 * Generate the function body for a texture sampling function.
4055 */
4056 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,LLVMTypeRef resources_type,LLVMTypeRef thread_data_type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key,bool has_aniso_filter_table)4057 lp_build_sample_gen_func(struct gallivm_state *gallivm,
4058 const struct lp_static_texture_state *static_texture_state,
4059 const struct lp_static_sampler_state *static_sampler_state,
4060 struct lp_sampler_dynamic_state *dynamic_state,
4061 struct lp_type type,
4062 LLVMTypeRef resources_type,
4063 LLVMTypeRef thread_data_type,
4064 unsigned texture_index,
4065 unsigned sampler_index,
4066 LLVMValueRef function,
4067 unsigned num_args,
4068 unsigned sample_key,
4069 bool has_aniso_filter_table)
4070 {
4071 LLVMBuilderRef old_builder;
4072 LLVMBasicBlockRef block;
4073 LLVMValueRef coords[5];
4074 LLVMValueRef offsets[3] = { NULL };
4075 LLVMValueRef lod = NULL;
4076 LLVMValueRef ms_index = NULL;
4077 LLVMValueRef resources_ptr;
4078 LLVMValueRef thread_data_ptr = NULL;
4079 LLVMValueRef aniso_filter_table = NULL;
4080 LLVMValueRef texel_out[4];
4081 struct lp_derivatives derivs;
4082 struct lp_derivatives *deriv_ptr = NULL;
4083 unsigned num_param = 0;
4084 unsigned num_coords, num_derivs, num_offsets, layer;
4085 bool need_cache = false;
4086
4087 const enum lp_sampler_lod_control lod_control =
4088 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
4089 >> LP_SAMPLER_LOD_CONTROL_SHIFT;
4090
4091 const enum lp_sampler_op_type op_type =
4092 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4093
4094 get_target_info(static_texture_state->target,
4095 &num_coords, &num_derivs, &num_offsets, &layer);
4096
4097 /* lod query doesn't take a layer */
4098 if (layer && op_type == LP_SAMPLER_OP_LODQ)
4099 layer = 0;
4100
4101 if (dynamic_state->cache_ptr) {
4102 const struct util_format_description *format_desc;
4103 format_desc = util_format_description(static_texture_state->format);
4104 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4105 need_cache = true;
4106 }
4107 }
4108
4109 /* "unpack" arguments */
4110 resources_ptr = LLVMGetParam(function, num_param++);
4111 if (has_aniso_filter_table)
4112 aniso_filter_table = LLVMGetParam(function, num_param++);
4113 if (need_cache) {
4114 thread_data_ptr = LLVMGetParam(function, num_param++);
4115 }
4116 for (unsigned i = 0; i < num_coords; i++) {
4117 coords[i] = LLVMGetParam(function, num_param++);
4118 }
4119 for (unsigned i = num_coords; i < 5; i++) {
4120 /* This is rather unfortunate... */
4121 coords[i] = lp_build_undef(gallivm, type);
4122 }
4123 if (layer) {
4124 coords[layer] = LLVMGetParam(function, num_param++);
4125 }
4126 if (sample_key & LP_SAMPLER_SHADOW) {
4127 coords[4] = LLVMGetParam(function, num_param++);
4128 }
4129 if (sample_key & LP_SAMPLER_FETCH_MS) {
4130 ms_index = LLVMGetParam(function, num_param++);
4131 }
4132 if (sample_key & LP_SAMPLER_OFFSETS) {
4133 for (unsigned i = 0; i < num_offsets; i++) {
4134 offsets[i] = LLVMGetParam(function, num_param++);
4135 }
4136 }
4137 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4138 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4139 lod = LLVMGetParam(function, num_param++);
4140 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4141 for (unsigned i = 0; i < num_derivs; i++) {
4142 derivs.ddx[i] = LLVMGetParam(function, num_param++);
4143 derivs.ddy[i] = LLVMGetParam(function, num_param++);
4144 }
4145 deriv_ptr = &derivs;
4146 }
4147
4148 assert(num_args == num_param);
4149
4150 /*
4151 * Function body
4152 */
4153
4154 old_builder = gallivm->builder;
4155 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
4156 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
4157 LLVMPositionBuilderAtEnd(gallivm->builder, block);
4158
4159 lp_build_sample_soa_code(gallivm,
4160 static_texture_state,
4161 static_sampler_state,
4162 dynamic_state,
4163 type,
4164 sample_key,
4165 texture_index,
4166 sampler_index,
4167 resources_type,
4168 resources_ptr,
4169 thread_data_type,
4170 thread_data_ptr,
4171 coords,
4172 offsets,
4173 deriv_ptr,
4174 lod,
4175 ms_index,
4176 aniso_filter_table,
4177 texel_out);
4178
4179 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
4180
4181 LLVMDisposeBuilder(gallivm->builder);
4182 gallivm->builder = old_builder;
4183
4184 gallivm_verify_function(gallivm, function);
4185 }
4186
4187
4188 /**
4189 * Call the matching function for texture sampling.
4190 * If there's no match, generate a new one.
4191 */
4192 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params,unsigned texture_index,unsigned sampler_index,LLVMValueRef * tex_ret)4193 lp_build_sample_soa_func(struct gallivm_state *gallivm,
4194 const struct lp_static_texture_state *static_texture_state,
4195 const struct lp_static_sampler_state *static_sampler_state,
4196 struct lp_sampler_dynamic_state *dynamic_state,
4197 const struct lp_sampler_params *params,
4198 unsigned texture_index, unsigned sampler_index,
4199 LLVMValueRef *tex_ret)
4200 {
4201 LLVMBuilderRef builder = gallivm->builder;
4202 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
4203 LLVMGetInsertBlock(builder)));
4204 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
4205 unsigned sample_key = params->sample_key;
4206 const LLVMValueRef *coords = params->coords;
4207 const LLVMValueRef *offsets = params->offsets;
4208 const struct lp_derivatives *derivs = params->derivs;
4209
4210 const enum lp_sampler_lod_control lod_control =
4211 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
4212 LP_SAMPLER_LOD_CONTROL_SHIFT;
4213
4214 const enum lp_sampler_op_type op_type =
4215 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4216
4217 unsigned num_coords, num_derivs, num_offsets, layer;
4218 get_target_info(static_texture_state->target,
4219 &num_coords, &num_derivs, &num_offsets, &layer);
4220
4221 /* lod query doesn't take a layer */
4222 if (layer && op_type == LP_SAMPLER_OP_LODQ)
4223 layer = 0;
4224
4225 bool need_cache = false;
4226 if (dynamic_state->cache_ptr) {
4227 const struct util_format_description *format_desc;
4228 format_desc = util_format_description(static_texture_state->format);
4229 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4230 need_cache = true;
4231 }
4232 }
4233
4234 /*
4235 * texture function matches are found by name.
4236 * Thus the name has to include both the texture and sampler unit
4237 * (which covers all static state) plus the actual texture function
4238 * (including things like offsets, shadow coord, lod control).
4239 * Additionally lod_property has to be included too.
4240 */
4241 char func_name[64];
4242 snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
4243 texture_index, sampler_index, sample_key);
4244
4245 LLVMValueRef function = LLVMGetNamedFunction(module, func_name);
4246 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
4247 LLVMTypeRef ret_type;
4248 LLVMTypeRef val_type[4];
4249 unsigned num_param = 0;
4250
4251 /*
4252 * Generate the function prototype.
4253 */
4254
4255 arg_types[num_param++] = LLVMTypeOf(params->resources_ptr);
4256 if (params->aniso_filter_table)
4257 arg_types[num_param++] = LLVMTypeOf(params->aniso_filter_table);
4258 if (need_cache) {
4259 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
4260 }
4261 for (unsigned i = 0; i < num_coords; i++) {
4262 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4263 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
4264 }
4265 if (layer) {
4266 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
4267 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
4268 }
4269 if (sample_key & LP_SAMPLER_SHADOW) {
4270 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4271 }
4272 if (sample_key & LP_SAMPLER_FETCH_MS) {
4273 arg_types[num_param++] = LLVMTypeOf(params->ms_index);
4274 }
4275 if (sample_key & LP_SAMPLER_OFFSETS) {
4276 for (unsigned i = 0; i < num_offsets; i++) {
4277 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
4278 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
4279 }
4280 }
4281 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4282 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4283 arg_types[num_param++] = LLVMTypeOf(params->lod);
4284 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4285 for (unsigned i = 0; i < num_derivs; i++) {
4286 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
4287 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
4288 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
4289 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
4290 }
4291 }
4292
4293 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4294 lp_build_vec_type(gallivm, params->type);
4295 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4296 LLVMTypeRef function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
4297
4298 if (!function) {
4299 function = LLVMAddFunction(module, func_name, function_type);
4300
4301 for (unsigned i = 0; i < num_param; ++i) {
4302 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
4303
4304 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
4305 }
4306 }
4307
4308 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
4309 LLVMSetLinkage(function, LLVMInternalLinkage);
4310
4311 lp_build_sample_gen_func(gallivm,
4312 static_texture_state,
4313 static_sampler_state,
4314 dynamic_state,
4315 params->type,
4316 params->resources_type,
4317 params->thread_data_type,
4318 texture_index,
4319 sampler_index,
4320 function,
4321 num_param,
4322 sample_key,
4323 params->aniso_filter_table ? true : false);
4324 }
4325
4326 unsigned num_args = 0;
4327 args[num_args++] = params->resources_ptr;
4328 if (params->aniso_filter_table)
4329 args[num_args++] = params->aniso_filter_table;
4330 if (need_cache) {
4331 args[num_args++] = params->thread_data_ptr;
4332 }
4333 for (unsigned i = 0; i < num_coords; i++) {
4334 args[num_args++] = coords[i];
4335 }
4336 if (layer) {
4337 args[num_args++] = coords[layer];
4338 }
4339 if (sample_key & LP_SAMPLER_SHADOW) {
4340 args[num_args++] = coords[4];
4341 }
4342 if (sample_key & LP_SAMPLER_FETCH_MS) {
4343 args[num_args++] = params->ms_index;
4344 }
4345 if (sample_key & LP_SAMPLER_OFFSETS) {
4346 for (unsigned i = 0; i < num_offsets; i++) {
4347 args[num_args++] = offsets[i];
4348 }
4349 }
4350 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4351 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4352 args[num_args++] = params->lod;
4353 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4354 for (unsigned i = 0; i < num_derivs; i++) {
4355 args[num_args++] = derivs->ddx[i];
4356 args[num_args++] = derivs->ddy[i];
4357 }
4358 }
4359
4360 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
4361
4362 *tex_ret = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
4363 LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
4364 LLVMValueRef inst = LLVMGetLastInstruction(bb);
4365 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
4366 }
4367
4368
4369 /**
4370 * Build texture sampling code.
4371 * Either via a function call or inline it directly.
4372 */
4373 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)4374 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
4375 const struct lp_static_sampler_state *static_sampler_state,
4376 struct lp_sampler_dynamic_state *dynamic_state,
4377 struct gallivm_state *gallivm,
4378 const struct lp_sampler_params *params)
4379 {
4380 bool use_tex_func = false;
4381
4382 /*
4383 * Do not use a function call if the sampling is "simple enough".
4384 * We define this by
4385 * a) format
4386 * b) no mips (either one level only or no mip filter)
4387 * No mips will definitely make the code smaller, though
4388 * the format requirement is a bit iffy - there's some (SoA) formats
4389 * which definitely generate less code. This does happen to catch
4390 * some important cases though which are hurt quite a bit by using
4391 * a call (though not really because of the call overhead but because
4392 * they are reusing the same texture unit with some of the same
4393 * parameters).
4394 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
4395 */
4396
4397 if (USE_TEX_FUNC_CALL) {
4398 const struct util_format_description *format_desc =
4399 util_format_description(static_texture_state->format);
4400 const bool simple_format =
4401 (util_format_is_rgba8_variant(format_desc) &&
4402 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
4403 const enum lp_sampler_op_type op_type =
4404 (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
4405 LP_SAMPLER_OP_TYPE_SHIFT;
4406 const bool simple_tex =
4407 op_type != LP_SAMPLER_OP_TEXTURE ||
4408 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
4409 static_texture_state->level_zero_only == true) &&
4410 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
4411
4412 use_tex_func = !(simple_format && simple_tex);
4413 }
4414
4415 if (use_tex_func) {
4416 LLVMValueRef tex_ret;
4417 lp_build_sample_soa_func(gallivm,
4418 static_texture_state,
4419 static_sampler_state,
4420 dynamic_state,
4421 params, params->texture_index,
4422 params->sampler_index, &tex_ret);
4423
4424 for (unsigned i = 0; i < 4; i++) {
4425 params->texel[i] =
4426 LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
4427 }
4428 } else {
4429 lp_build_sample_soa_code(gallivm,
4430 static_texture_state,
4431 static_sampler_state,
4432 dynamic_state,
4433 params->type,
4434 params->sample_key,
4435 params->texture_index,
4436 params->sampler_index,
4437 params->resources_type,
4438 params->resources_ptr,
4439 params->thread_data_type,
4440 params->thread_data_ptr,
4441 params->coords,
4442 params->offsets,
4443 params->derivs,
4444 params->lod,
4445 params->ms_index,
4446 params->aniso_filter_table,
4447 params->texel);
4448 }
4449 }
4450
4451
4452 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)4453 lp_build_size_query_soa(struct gallivm_state *gallivm,
4454 const struct lp_static_texture_state *static_state,
4455 struct lp_sampler_dynamic_state *dynamic_state,
4456 const struct lp_sampler_size_query_params *params)
4457 {
4458 LLVMValueRef first_level = NULL;
4459 const unsigned num_lods = 1;
4460 LLVMTypeRef resources_type = params->resources_type;
4461 LLVMValueRef resources_ptr = params->resources_ptr;
4462 const unsigned texture_unit = params->texture_unit;
4463 const enum pipe_texture_target target = params->target;
4464 LLVMValueRef texture_unit_offset = params->texture_unit_offset;
4465 const struct util_format_description *format_desc =
4466 util_format_description(static_state->format);
4467 const struct util_format_description *res_format_desc =
4468 util_format_description(static_state->res_format);
4469
4470 if (static_state->format == PIPE_FORMAT_NONE) {
4471 /*
4472 * If there's nothing bound, format is NONE, and we must return
4473 * all zero as mandated by d3d10 in this case.
4474 */
4475 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
4476 for (unsigned chan = 0; chan < 4; chan++) {
4477 params->sizes_out[chan] = zero;
4478 }
4479 return;
4480 }
4481
4482 /*
4483 * Do some sanity verification about bound texture and shader dcl target.
4484 * Not entirely sure what's possible but assume array/non-array
4485 * always compatible (probably not ok for OpenGL but d3d10 has no
4486 * distinction of arrays at the resource level).
4487 * Everything else looks bogus (though not entirely sure about rect/2d).
4488 * Currently disabled because it causes assertion failures if there's
4489 * nothing bound (or rather a dummy texture, not that this case would
4490 * return the right values).
4491 */
4492 if (0 && static_state->target != target) {
4493 if (static_state->target == PIPE_TEXTURE_1D)
4494 assert(target == PIPE_TEXTURE_1D_ARRAY);
4495 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
4496 assert(target == PIPE_TEXTURE_1D);
4497 else if (static_state->target == PIPE_TEXTURE_2D)
4498 assert(target == PIPE_TEXTURE_2D_ARRAY);
4499 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
4500 assert(target == PIPE_TEXTURE_2D);
4501 else if (static_state->target == PIPE_TEXTURE_CUBE)
4502 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
4503 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
4504 assert(target == PIPE_TEXTURE_CUBE);
4505 else
4506 assert(0);
4507 }
4508
4509 const unsigned dims = texture_dims(target);
4510
4511 const bool has_array = has_layer_coord(target);
4512
4513 assert(!params->int_type.floating);
4514
4515 struct lp_build_context bld_int_vec4;
4516 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
4517
4518 if (params->samples_only) {
4519 LLVMValueRef num_samples;
4520 if (params->ms && static_state->level_zero_only) {
4521 /* multisample never has levels. */
4522 num_samples = dynamic_state->last_level(gallivm,
4523 resources_type,
4524 resources_ptr,
4525 texture_unit,
4526 texture_unit_offset);
4527 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4528 bld_int_vec4.elem_type, "");
4529 } else {
4530 num_samples = lp_build_const_int32(gallivm, 0);
4531 }
4532 params->sizes_out[0] =
4533 lp_build_broadcast(gallivm,
4534 lp_build_vec_type(gallivm, params->int_type),
4535 num_samples);
4536 return;
4537 }
4538
4539 LLVMValueRef lod;
4540 LLVMValueRef level = 0;
4541 if (params->explicit_lod) {
4542 /* FIXME: this needs to honor per-element lod */
4543 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
4544 lp_build_const_int32(gallivm, 0), "");
4545 first_level = get_first_level(gallivm, resources_type, resources_ptr,
4546 texture_unit, texture_unit_offset,
4547 static_state, dynamic_state);
4548 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
4549 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
4550 } else {
4551 lod = bld_int_vec4.zero;
4552 }
4553
4554 LLVMValueRef size = bld_int_vec4.undef;
4555 LLVMValueRef tex_blocksize = bld_int_vec4.undef;
4556 LLVMValueRef tex_blocksize_log2 = bld_int_vec4.undef;
4557 LLVMValueRef view_blocksize = bld_int_vec4.undef;
4558
4559 uint32_t res_bw = res_format_desc->block.width;
4560 uint32_t res_bh = res_format_desc->block.height;
4561 uint32_t bw = format_desc->block.width;
4562 uint32_t bh = format_desc->block.height;
4563
4564 /* only scale if the blocksizes are different. */
4565 if (res_bw == bw)
4566 res_bw = bw = 1;
4567 if (res_bh == bh)
4568 res_bh = bh = 1;
4569
4570 LLVMValueRef tex_width = dynamic_state->width(gallivm,
4571 resources_type,
4572 resources_ptr,
4573 texture_unit,
4574 texture_unit_offset);
4575 size = LLVMBuildInsertElement(gallivm->builder, size,
4576 tex_width,
4577 lp_build_const_int32(gallivm, 0), "");
4578 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4579 lp_build_const_int32(gallivm, res_bw),
4580 lp_build_const_int32(gallivm, 0), "");
4581 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4582 lp_build_const_int32(gallivm, util_logbase2(res_bw)),
4583 lp_build_const_int32(gallivm, 0), "");
4584 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4585 lp_build_const_int32(gallivm, bw),
4586 lp_build_const_int32(gallivm, 0), "");
4587 if (dims >= 2) {
4588 LLVMValueRef tex_height =
4589 dynamic_state->height(gallivm, resources_type,
4590 resources_ptr, texture_unit, texture_unit_offset);
4591 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
4592 bld_int_vec4.elem_type, "");
4593 size = LLVMBuildInsertElement(gallivm->builder, size, tex_height,
4594 lp_build_const_int32(gallivm, 1), "");
4595 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4596 lp_build_const_int32(gallivm, res_bh),
4597 lp_build_const_int32(gallivm, 1), "");
4598 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4599 lp_build_const_int32(gallivm, util_logbase2(res_bh)),
4600 lp_build_const_int32(gallivm, 1), "");
4601 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4602 lp_build_const_int32(gallivm, bh),
4603 lp_build_const_int32(gallivm, 1), "");
4604 }
4605
4606 if (dims >= 3) {
4607 LLVMValueRef tex_depth =
4608 dynamic_state->depth(gallivm, resources_type,
4609 resources_ptr, texture_unit, texture_unit_offset);
4610 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
4611 bld_int_vec4.elem_type, "");
4612 size = LLVMBuildInsertElement(gallivm->builder, size, tex_depth,
4613 lp_build_const_int32(gallivm, 2), "");
4614 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4615 lp_build_const_int32(gallivm, 1),
4616 lp_build_const_int32(gallivm, 2), "");
4617 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4618 lp_build_const_int32(gallivm, 0),
4619 lp_build_const_int32(gallivm, 2), "");
4620 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4621 lp_build_const_int32(gallivm, 1),
4622 lp_build_const_int32(gallivm, 2), "");
4623 }
4624
4625 size = lp_build_minify(&bld_int_vec4, size, lod, true);
4626 size = lp_build_scale_view_dims(&bld_int_vec4, size, tex_blocksize,
4627 tex_blocksize_log2, view_blocksize);
4628
4629 if (has_array) {
4630 LLVMValueRef layers = dynamic_state->depth(gallivm, resources_type,
4631 resources_ptr, texture_unit,
4632 texture_unit_offset);
4633 layers = LLVMBuildZExt(gallivm->builder, layers,
4634 bld_int_vec4.elem_type, "");
4635 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4636 /*
4637 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
4638 * Could avoid this by passing in number of cubes instead of total
4639 * number of layers (might make things easier elsewhere too).
4640 */
4641 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
4642 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
4643 }
4644 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
4645 lp_build_const_int32(gallivm, dims), "");
4646 }
4647
4648 /*
4649 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
4650 * if level is out of bounds (note this can't cover unbound texture
4651 * here, which also requires returning zero).
4652 */
4653 if (params->explicit_lod && params->is_sviewinfo) {
4654 LLVMValueRef last_level, out, out1;
4655 struct lp_build_context leveli_bld;
4656
4657 /* everything is scalar for now */
4658 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
4659 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4660 texture_unit, texture_unit_offset,
4661 static_state, dynamic_state);
4662
4663 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
4664 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
4665 out = lp_build_or(&leveli_bld, out, out1);
4666 if (num_lods == 1) {
4667 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
4668 } else {
4669 /* TODO */
4670 assert(0);
4671 }
4672 size = lp_build_andnot(&bld_int_vec4, size, out);
4673 }
4674
4675 unsigned i;
4676 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
4677 params->sizes_out[i] =
4678 lp_build_extract_broadcast(gallivm, bld_int_vec4.type,
4679 params->int_type,
4680 size,
4681 lp_build_const_int32(gallivm, i));
4682 }
4683 if (params->is_sviewinfo) {
4684 for (; i < 4; i++) {
4685 params->sizes_out[i] = lp_build_const_vec(gallivm,
4686 params->int_type, 0.0);
4687 }
4688 }
4689
4690 /*
4691 * if there's no explicit_lod (buffers, rects) queries requiring nr of
4692 * mips would be illegal.
4693 */
4694 if (params->is_sviewinfo && params->explicit_lod) {
4695 struct lp_build_context bld_int_scalar;
4696 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
4697
4698 LLVMValueRef num_levels;
4699 if (static_state->level_zero_only) {
4700 num_levels = bld_int_scalar.one;
4701 } else {
4702 LLVMValueRef last_level;
4703 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4704 texture_unit, texture_unit_offset,
4705 static_state, dynamic_state);
4706 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
4707 num_levels = lp_build_add(&bld_int_scalar, num_levels,
4708 bld_int_scalar.one);
4709 }
4710 params->sizes_out[3] =
4711 lp_build_broadcast(gallivm,
4712 lp_build_vec_type(gallivm, params->int_type),
4713 num_levels);
4714 }
4715
4716 if (target == PIPE_BUFFER) {
4717 struct lp_build_context bld_int;
4718 lp_build_context_init(&bld_int, gallivm, params->int_type);
4719
4720 params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
4721 lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
4722 }
4723 }
4724
4725
4726 static void
lp_build_do_atomic_soa(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type type,LLVMValueRef exec_mask,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef out_of_bounds,unsigned img_op,LLVMAtomicRMWBinOp op,const LLVMValueRef rgba_in[4],const LLVMValueRef rgba2_in[4],LLVMValueRef atomic_result[4])4727 lp_build_do_atomic_soa(struct gallivm_state *gallivm,
4728 const struct util_format_description *format_desc,
4729 struct lp_type type,
4730 LLVMValueRef exec_mask,
4731 LLVMValueRef base_ptr,
4732 LLVMValueRef offset,
4733 LLVMValueRef out_of_bounds,
4734 unsigned img_op,
4735 LLVMAtomicRMWBinOp op,
4736 const LLVMValueRef rgba_in[4],
4737 const LLVMValueRef rgba2_in[4],
4738 LLVMValueRef atomic_result[4])
4739 {
4740 const enum pipe_format format = format_desc->format;
4741
4742 bool valid = format == PIPE_FORMAT_R32_UINT ||
4743 format == PIPE_FORMAT_R32_SINT ||
4744 format == PIPE_FORMAT_R32_FLOAT;
4745
4746 bool integer = format != PIPE_FORMAT_R32_FLOAT;
4747 if (img_op == LP_IMG_ATOMIC) {
4748 switch (op) {
4749 case LLVMAtomicRMWBinOpAdd:
4750 case LLVMAtomicRMWBinOpSub:
4751 case LLVMAtomicRMWBinOpAnd:
4752 case LLVMAtomicRMWBinOpNand:
4753 case LLVMAtomicRMWBinOpOr:
4754 case LLVMAtomicRMWBinOpXor:
4755 case LLVMAtomicRMWBinOpMax:
4756 case LLVMAtomicRMWBinOpMin:
4757 case LLVMAtomicRMWBinOpUMax:
4758 case LLVMAtomicRMWBinOpUMin:
4759 valid &= integer;
4760 break;
4761 case LLVMAtomicRMWBinOpFAdd:
4762 case LLVMAtomicRMWBinOpFSub:
4763 #if LLVM_VERSION_MAJOR >= 15
4764 case LLVMAtomicRMWBinOpFMax:
4765 case LLVMAtomicRMWBinOpFMin:
4766 #endif
4767 valid &= !integer;
4768 break;
4769 default:
4770 break;
4771 }
4772 } else {
4773 valid &= integer;
4774 }
4775
4776 if (!valid) {
4777 atomic_result[0] = lp_build_zero(gallivm, type);
4778 return;
4779 }
4780
4781 LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
4782 LLVMFloatTypeInContext(gallivm->context) :
4783 LLVMInt32TypeInContext(gallivm->context);
4784
4785 LLVMTypeRef atom_res_elem_type =
4786 LLVMVectorType(ref_type, type.length);
4787 LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
4788
4789 offset = LLVMBuildGEP2(gallivm->builder,
4790 LLVMInt8TypeInContext(gallivm->context),
4791 base_ptr, &offset, 1, "");
4792
4793 struct lp_build_loop_state loop_state;
4794 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
4795 struct lp_build_if_state ifthen;
4796 LLVMValueRef cond;
4797 LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
4798
4799 LLVMValueRef should_store_mask =
4800 LLVMBuildAnd(gallivm->builder, exec_mask,
4801 LLVMBuildNot(gallivm->builder, out_of_bounds, ""),
4802 "store_mask");
4803 assert(exec_mask);
4804
4805 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask,
4806 lp_build_const_int_vec(gallivm, type, 0), "");
4807 cond = LLVMBuildExtractElement(gallivm->builder, cond,
4808 loop_state.counter, "");
4809 lp_build_if(&ifthen, gallivm, cond);
4810
4811 LLVMValueRef data =
4812 LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
4813 LLVMValueRef cast_base_ptr =
4814 LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
4815 cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
4816 LLVMPointerType(ref_type, 0), "");
4817 data = LLVMBuildBitCast(gallivm->builder, data,
4818 ref_type, "");
4819
4820 if (img_op == LP_IMG_ATOMIC_CAS) {
4821 LLVMValueRef cas_src_ptr =
4822 LLVMBuildExtractElement(gallivm->builder, packed2,
4823 loop_state.counter, "");
4824 LLVMValueRef cas_src =
4825 LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
4826 ref_type, "");
4827 data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
4828 cas_src,
4829 LLVMAtomicOrderingSequentiallyConsistent,
4830 LLVMAtomicOrderingSequentiallyConsistent,
4831 false);
4832 data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
4833 } else {
4834 data = LLVMBuildAtomicRMW(gallivm->builder, op,
4835 cast_base_ptr, data,
4836 LLVMAtomicOrderingSequentiallyConsistent,
4837 false);
4838 }
4839
4840 LLVMValueRef temp_res =
4841 LLVMBuildLoad2(gallivm->builder, atom_res_elem_type, atom_res, "");
4842 temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data,
4843 loop_state.counter, "");
4844 LLVMBuildStore(gallivm->builder, temp_res, atom_res);
4845
4846 lp_build_endif(&ifthen);
4847 lp_build_loop_end_cond(&loop_state,
4848 lp_build_const_int32(gallivm, type.length),
4849 NULL, LLVMIntUGE);
4850 atomic_result[0] = LLVMBuildLoad2(gallivm->builder, atom_res_elem_type,
4851 atom_res, "");
4852 }
4853
4854
4855 static void
lp_build_img_op_no_format(struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4856 lp_build_img_op_no_format(struct gallivm_state *gallivm,
4857 const struct lp_img_params *params,
4858 LLVMValueRef outdata[4])
4859 {
4860 /*
4861 * If there's nothing bound, format is NONE, and we must return
4862 * all zero as mandated by d3d10 in this case.
4863 */
4864 if (params->img_op != LP_IMG_STORE) {
4865 LLVMValueRef zero = lp_build_zero(gallivm, params->type);
4866 for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1);
4867 chan++) {
4868 outdata[chan] = zero;
4869 }
4870 }
4871 }
4872
4873
4874 void
lp_build_img_op_soa(const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4875 lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
4876 struct lp_sampler_dynamic_state *dynamic_state,
4877 struct gallivm_state *gallivm,
4878 const struct lp_img_params *params,
4879 LLVMValueRef outdata[4])
4880 {
4881 const enum pipe_texture_target target = params->target;
4882 const unsigned dims = texture_dims(target);
4883 const struct util_format_description *format_desc =
4884 util_format_description(static_texture_state->format);
4885 const struct util_format_description *res_format_desc =
4886 util_format_description(static_texture_state->res_format);
4887 LLVMValueRef x = params->coords[0], y = params->coords[1],
4888 z = params->coords[2];
4889 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
4890
4891 /** regular scalar int type */
4892 struct lp_type int_coord_type = lp_uint_type(params->type);
4893 struct lp_build_context int_coord_bld;
4894 lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
4895
4896 if (static_texture_state->format == PIPE_FORMAT_NONE) {
4897 lp_build_img_op_no_format(gallivm, params, outdata);
4898 return;
4899
4900 }
4901
4902 LLVMValueRef row_stride = dynamic_state->row_stride(gallivm,
4903 params->resources_type,
4904 params->resources_ptr,
4905 params->image_index, NULL, NULL);
4906 LLVMValueRef img_stride = dynamic_state->img_stride(gallivm,
4907 params->resources_type,
4908 params->resources_ptr,
4909 params->image_index, NULL, NULL);
4910 LLVMValueRef base_ptr = dynamic_state->base_ptr(gallivm,
4911 params->resources_type,
4912 params->resources_ptr,
4913 params->image_index, NULL);
4914 LLVMValueRef width = dynamic_state->width(gallivm,
4915 params->resources_type,
4916 params->resources_ptr,
4917 params->image_index, NULL);
4918 LLVMValueRef height = dynamic_state->height(gallivm,
4919 params->resources_type,
4920 params->resources_ptr,
4921 params->image_index, NULL);
4922 height = LLVMBuildZExt(gallivm->builder, height,
4923 int_coord_bld.elem_type, "");
4924 LLVMValueRef depth = dynamic_state->depth(gallivm,
4925 params->resources_type,
4926 params->resources_ptr,
4927 params->image_index, NULL);
4928 depth = LLVMBuildZExt(gallivm->builder, depth,
4929 int_coord_bld.elem_type, "");
4930 bool layer_coord = has_layer_coord(target);
4931
4932 width = lp_build_scale_view_dim(gallivm, width, res_format_desc->block.width,
4933 format_desc->block.width);
4934 width = lp_build_broadcast_scalar(&int_coord_bld, width);
4935 if (dims >= 2) {
4936 height = lp_build_scale_view_dim(gallivm, height, res_format_desc->block.height,
4937 format_desc->block.height);
4938 height = lp_build_broadcast_scalar(&int_coord_bld, height);
4939 row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
4940 }
4941 if (dims >= 3 || layer_coord) {
4942 depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
4943 img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
4944 }
4945
4946 LLVMValueRef out_of_bounds = int_coord_bld.zero;
4947 LLVMValueRef out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
4948 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4949
4950 if (dims >= 2) {
4951 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
4952 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4953 }
4954 if (dims >= 3 || layer_coord) {
4955 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
4956 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4957 }
4958
4959 LLVMValueRef offset, i, j;
4960 lp_build_sample_offset(&int_coord_bld,
4961 format_desc,
4962 x, y, z, row_stride_vec, img_stride_vec,
4963 &offset, &i, &j);
4964
4965 if (params->ms_index && static_texture_state->level_zero_only) {
4966 LLVMValueRef num_samples = dynamic_state->last_level(gallivm,
4967 params->resources_type,
4968 params->resources_ptr,
4969 params->image_index, NULL);
4970 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4971 int_coord_bld.elem_type, "");
4972 LLVMValueRef sample_stride = dynamic_state->sample_stride(gallivm,
4973 params->resources_type,
4974 params->resources_ptr,
4975 params->image_index, NULL);
4976 lp_build_sample_ms_offset(&int_coord_bld,
4977 params->ms_index, num_samples,
4978 sample_stride, &offset,
4979 &out_of_bounds);
4980 }
4981 if (params->img_op == LP_IMG_LOAD) {
4982 struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
4983
4984 offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
4985 struct lp_build_context texel_bld;
4986 lp_build_context_init(&texel_bld, gallivm, texel_type);
4987 lp_build_fetch_rgba_soa(gallivm,
4988 format_desc,
4989 texel_type, true,
4990 base_ptr, offset,
4991 i, j,
4992 NULL,
4993 outdata);
4994
4995 for (unsigned chan = 0; chan < 3; chan++) {
4996 outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
4997 texel_bld.zero, outdata[chan]);
4998 }
4999 if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
5000 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5001 texel_bld.one, outdata[3]);
5002 } else {
5003 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5004 texel_bld.zero, outdata[3]);
5005 }
5006 } else if (params->img_op == LP_IMG_STORE) {
5007 lp_build_store_rgba_soa(gallivm, format_desc, params->type,
5008 params->exec_mask, base_ptr, offset,
5009 out_of_bounds, params->indata);
5010 } else {
5011 lp_build_do_atomic_soa(gallivm, format_desc, params->type,
5012 params->exec_mask, base_ptr, offset,
5013 out_of_bounds, params->img_op, params->op,
5014 params->indata, params->indata2, outdata);
5015 }
5016 }
5017
5018
5019 /*
5020 * These functions are for indirect texture access suppoort.
5021 *
5022 * Indirect textures are implemented using a switch statement, that
5023 * takes the texture index and jumps to the sampler functions for
5024 * that texture unit.
5025 */
5026
5027 /*
5028 * Initialise an indexed sampler switch block.
5029 *
5030 * This sets up the switch_info state and adds the LLVM flow control pieces.
5031 */
5032 void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_sampler_params * params,LLVMValueRef idx,unsigned base,unsigned range)5033 lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
5034 struct gallivm_state *gallivm,
5035 const struct lp_sampler_params *params,
5036 LLVMValueRef idx,
5037 unsigned base, unsigned range)
5038 {
5039 switch_info->gallivm = gallivm;
5040 switch_info->params = *params;
5041 switch_info->base = base;
5042 switch_info->range = range;
5043
5044 /* for generating the switch functions we don't want the texture index
5045 * offset
5046 */
5047 switch_info->params.texture_index_offset = 0;
5048
5049 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5050 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
5051
5052 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
5053 switch_info->merge_ref,
5054 range - base);
5055
5056 LLVMTypeRef val_type[4];
5057 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
5058 lp_build_vec_type(gallivm, params->type);
5059
5060 LLVMTypeRef ret_type =
5061 LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
5062
5063 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5064
5065 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5066
5067 switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
5068 LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
5069 }
5070
5071
5072 /*
5073 * Add an individual entry to the indirect texture switch.
5074 *
5075 * This builds the sample function and links a case for it into the switch
5076 * statement.
5077 */
5078 void
lp_build_sample_array_case_soa(struct lp_build_sample_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_texture_state)5079 lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
5080 int idx,
5081 const struct lp_static_texture_state *static_texture_state,
5082 const struct lp_static_sampler_state *static_sampler_state,
5083 struct lp_sampler_dynamic_state *dynamic_texture_state)
5084 {
5085 struct gallivm_state *gallivm = switch_info->gallivm;
5086 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
5087
5088 LLVMAddCase(switch_info->switch_ref,
5089 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0),
5090 this_block);
5091 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5092
5093 LLVMValueRef tex_ret;
5094 lp_build_sample_soa_func(gallivm, static_texture_state,
5095 static_sampler_state, dynamic_texture_state,
5096 &switch_info->params, idx, idx, &tex_ret);
5097
5098 LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
5099 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5100 }
5101
5102
5103 /*
5104 * Finish a switch statement.
5105 *
5106 * This handles extract the results from the switch.
5107 */
5108 void
lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch * switch_info)5109 lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
5110 {
5111 struct gallivm_state *gallivm = switch_info->gallivm;
5112
5113 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5114 for (unsigned i = 0; i < 4; i++) {
5115 switch_info->params.texel[i] =
5116 LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
5117 }
5118 }
5119
5120
5121 void
lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef idx,unsigned base,unsigned range)5122 lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info,
5123 struct gallivm_state *gallivm,
5124 const struct lp_img_params *params,
5125 LLVMValueRef idx,
5126 unsigned base, unsigned range)
5127 {
5128 switch_info->gallivm = gallivm;
5129 switch_info->params = *params;
5130 switch_info->base = base;
5131 switch_info->range = range;
5132
5133 /* for generating the switch functions we don't want the texture index
5134 * offset
5135 */
5136 switch_info->params.image_index_offset = 0;
5137
5138 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5139 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge");
5140
5141 switch_info->switch_ref =
5142 LLVMBuildSwitch(gallivm->builder, idx,
5143 switch_info->merge_ref, range - base);
5144
5145 if (params->img_op != LP_IMG_STORE) {
5146 LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type);
5147 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5148
5149 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5150
5151 for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5152 switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, "");
5153 LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1);
5154 }
5155 }
5156 }
5157
5158
5159 void
lp_build_image_op_array_case(struct lp_build_img_op_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state)5160 lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
5161 int idx,
5162 const struct lp_static_texture_state *static_texture_state,
5163 struct lp_sampler_dynamic_state *dynamic_state)
5164 {
5165 struct gallivm_state *gallivm = switch_info->gallivm;
5166 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img");
5167 LLVMValueRef tex_ret[4];
5168
5169 LLVMAddCase(switch_info->switch_ref,
5170 lp_build_const_int32(gallivm, idx), this_block);
5171 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5172
5173 switch_info->params.image_index = idx;
5174
5175 lp_build_img_op_soa(static_texture_state, dynamic_state,
5176 switch_info->gallivm, &switch_info->params, tex_ret);
5177
5178 if (switch_info->params.img_op != LP_IMG_STORE) {
5179 for (unsigned i = 0;
5180 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5181 tex_ret[i] =
5182 LLVMBuildBitCast(gallivm->builder, tex_ret[i],
5183 lp_build_vec_type(gallivm,
5184 switch_info->params.type), "");
5185 }
5186
5187 this_block = LLVMGetInsertBlock(gallivm->builder);
5188 for (unsigned i = 0;
5189 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5190 LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1);
5191 }
5192 }
5193 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5194 }
5195
5196
5197 void
lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch * switch_info)5198 lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info)
5199 {
5200 struct gallivm_state *gallivm = switch_info->gallivm;
5201
5202 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5203
5204 if (switch_info->params.img_op != LP_IMG_STORE) {
5205 for (unsigned i = 0;
5206 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5207 switch_info->params.outdata[i] = switch_info->phi[i];
5208 }
5209 }
5210 }
5211