1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/bitset.h"
40 #include "util/compiler.h"
41 #include "util/u_debug.h"
42 #include "util/u_dump.h"
43 #include "util/u_memory.h"
44 #include "util/u_math.h"
45 #include "util/format/u_format.h"
46 #include "util/u_cpu_detect.h"
47 #include "util/format_rgb9e5.h"
48 #include "lp_bld_debug.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_conv.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_bitarit.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_printf.h"
56 #include "lp_bld_swizzle.h"
57 #include "lp_bld_flow.h"
58 #include "lp_bld_gather.h"
59 #include "lp_bld_format.h"
60 #include "lp_bld_sample.h"
61 #include "lp_bld_sample_aos.h"
62 #include "lp_bld_struct.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_pack.h"
65 #include "lp_bld_intr.h"
66 #include "lp_bld_misc.h"
67 #include "lp_bld_jit_types.h"
68
69 static void
lp_build_gather_resident(struct lp_build_context * bld,struct lp_sampler_dynamic_state * dynamic_state,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMValueRef offset,LLVMValueRef * out_resident)70 lp_build_gather_resident(struct lp_build_context *bld,
71 struct lp_sampler_dynamic_state *dynamic_state,
72 LLVMTypeRef resources_type,
73 LLVMValueRef resources_ptr,
74 LLVMValueRef offset,
75 LLVMValueRef *out_resident)
76 {
77 struct lp_type type = lp_int_type(bld->type);
78
79 struct gallivm_state *gallivm = bld->gallivm;
80 LLVMBuilderRef builder = gallivm->builder;
81
82 static_assert(sizeof(BITSET_WORD) == 4, "Unexpected BITSET_WORD size");
83
84 LLVMValueRef residency =
85 dynamic_state->residency(gallivm, resources_type, resources_ptr, 0, NULL);
86
87 LLVMValueRef tile_size_log2 =
88 lp_build_const_int_vec(gallivm, type, util_logbase2(64 * 1024));
89 LLVMValueRef tile_index = LLVMBuildLShr(builder, offset, tile_size_log2, "");
90
91 LLVMValueRef dword_bitsize_log2 =
92 lp_build_const_int_vec(gallivm, type, util_logbase2(32));
93 LLVMValueRef dword_index = LLVMBuildLShr(builder, tile_index, dword_bitsize_log2, "");
94
95 LLVMValueRef dword_size_log2 =
96 lp_build_const_int_vec(gallivm, type, util_logbase2(4));
97 LLVMValueRef dword_offset = LLVMBuildShl(builder, dword_index, dword_size_log2, "");
98
99 residency = lp_build_gather(gallivm, type.length, type.width, lp_elem_type(type),
100 true, residency, dword_offset, true);
101
102 LLVMValueRef dword_bit_mask =
103 lp_build_const_int_vec(gallivm, type, 31);
104 LLVMValueRef bit_index = LLVMBuildAnd(builder, tile_index, dword_bit_mask, "");
105 LLVMValueRef bit_mask = LLVMBuildShl(builder, lp_build_one(gallivm, type), bit_index, "");
106
107 LLVMValueRef resident = LLVMBuildAnd(builder, residency, bit_mask, "");
108 resident = LLVMBuildICmp(builder, LLVMIntNE, resident, lp_build_zero(gallivm, type), "");
109
110 if (*out_resident)
111 *out_resident = LLVMBuildAnd(builder, *out_resident, resident, "");
112 else
113 *out_resident = resident;
114 }
115
116 /**
117 * Generate code to fetch a texel from a texture at int coords (x, y, z).
118 * The computation depends on whether the texture is 1D, 2D or 3D.
119 * The result, texel, will be float vectors:
120 * texel[0] = red values
121 * texel[1] = green values
122 * texel[2] = blue values
123 * texel[3] = alpha values
124 */
125 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,LLVMValueRef texel_out[4])126 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
127 LLVMValueRef width,
128 LLVMValueRef height,
129 LLVMValueRef depth,
130 LLVMValueRef x,
131 LLVMValueRef y,
132 LLVMValueRef z,
133 LLVMValueRef y_stride,
134 LLVMValueRef z_stride,
135 LLVMValueRef data_ptr,
136 LLVMValueRef mipoffsets,
137 LLVMValueRef ilevel,
138 LLVMValueRef texel_out[4])
139 {
140 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
141 const unsigned dims = bld->dims;
142 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
143 LLVMBuilderRef builder = bld->gallivm->builder;
144 LLVMValueRef offset;
145 LLVMValueRef i, j;
146 LLVMValueRef use_border = NULL;
147
148 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
149 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
150 static_state->min_img_filter,
151 static_state->mag_img_filter)) {
152 LLVMValueRef b1, b2;
153 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
154 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
155 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
156 }
157
158 if (dims >= 2 &&
159 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
160 static_state->min_img_filter,
161 static_state->mag_img_filter)) {
162 LLVMValueRef b1, b2;
163 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
164 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
165 if (use_border) {
166 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
167 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
168 } else {
169 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
170 }
171 }
172
173 if (dims == 3 &&
174 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
175 static_state->min_img_filter,
176 static_state->mag_img_filter)) {
177 LLVMValueRef b1, b2;
178 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
179 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
180 if (use_border) {
181 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
182 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
183 } else {
184 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
185 }
186 }
187
188 /* convert x,y,z coords to linear offset from start of texture, in bytes */
189 if (bld->static_texture_state->tiled) {
190 lp_build_tiled_sample_offset(&bld->int_coord_bld,
191 bld->format_desc->format,
192 bld->static_texture_state,
193 x, y, z, width, height, z_stride,
194 &offset, &i, &j);
195 } else {
196 lp_build_sample_offset(&bld->int_coord_bld,
197 bld->format_desc,
198 x, y, z, y_stride, z_stride,
199 &offset, &i, &j);
200 }
201
202 if (mipoffsets) {
203 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
204 }
205
206 if (use_border) {
207 /* If we can sample the border color, it means that texcoords may
208 * lie outside the bounds of the texture image. We need to do
209 * something to prevent reading out of bounds and causing a segfault.
210 *
211 * Simply AND the texture coords with !use_border. This will cause
212 * coords which are out of bounds to become zero. Zero's guaranteed
213 * to be inside the texture image.
214 */
215 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
216 }
217
218 if (bld->residency) {
219 LLVMValueRef real_offset = offset;
220
221 if (!mipoffsets) {
222 mipoffsets = lp_build_get_mip_offsets(bld, ilevel);
223 real_offset = lp_build_add(&bld->int_coord_bld, real_offset, mipoffsets);
224
225 if (use_border)
226 real_offset = lp_build_andnot(&bld->int_coord_bld, real_offset, use_border);
227 }
228
229 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
230 bld->resources_type, bld->resources_ptr,
231 real_offset, &bld->resident);
232 }
233
234 lp_build_fetch_rgba_soa(bld->gallivm,
235 bld->format_desc,
236 bld->texel_type, true,
237 data_ptr, offset,
238 i, j,
239 bld->cache,
240 texel_out);
241
242 /*
243 * Note: if we find an app which frequently samples the texture border
244 * we might want to implement a true conditional here to avoid sampling
245 * the texture whenever possible (since that's quite a bit of code).
246 * Ex:
247 * if (use_border) {
248 * texel = border_color;
249 * } else {
250 * texel = sample_texture(coord);
251 * }
252 * As it is now, we always sample the texture, then selectively replace
253 * the texel color results with the border color.
254 */
255
256 if (use_border) {
257 /* select texel color or border color depending on use_border. */
258 const struct util_format_description *format_desc = bld->format_desc;
259 struct lp_type border_type = bld->texel_type;
260 border_type.length = 4;
261 /*
262 * Only replace channels which are actually present. The others should
263 * get optimized away eventually by sampler_view swizzle anyway but it's
264 * easier too.
265 */
266 for (unsigned chan = 0; chan < 4; chan++) {
267 unsigned chan_s;
268 /* reverse-map channel... */
269 if (util_format_has_stencil(format_desc)) {
270 if (chan == 0)
271 chan_s = 0;
272 else
273 break;
274 } else {
275 for (chan_s = 0; chan_s < 4; chan_s++) {
276 if (chan_s == format_desc->swizzle[chan]) {
277 break;
278 }
279 }
280 }
281 if (chan_s <= 3) {
282 /* use the already clamped color */
283 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
284 LLVMValueRef border_chan;
285
286 border_chan = lp_build_extract_broadcast(bld->gallivm,
287 border_type,
288 bld->texel_type,
289 bld->border_color_clamped,
290 idx);
291 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
292 border_chan, texel_out[chan]);
293 }
294 }
295 }
296 }
297
298 static LLVMValueRef
get_first_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)299 get_first_level(struct gallivm_state *gallivm,
300 LLVMTypeRef resources_type,
301 LLVMValueRef resources_ptr,
302 unsigned texture_unit,
303 LLVMValueRef texture_unit_offset,
304 const struct lp_static_texture_state *static_state,
305 struct lp_sampler_dynamic_state *dynamic_state)
306 {
307 if (static_state->level_zero_only)
308 return lp_build_const_int32(gallivm, 0);
309 else {
310 LLVMValueRef first_level;
311
312 first_level = dynamic_state->first_level(gallivm, resources_type,
313 resources_ptr, texture_unit,
314 texture_unit_offset);
315 first_level = LLVMBuildZExt(gallivm->builder, first_level,
316 LLVMInt32TypeInContext(gallivm->context), "");
317 return first_level;
318 }
319 }
320
321
322 static LLVMValueRef
get_last_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)323 get_last_level(struct gallivm_state *gallivm,
324 LLVMTypeRef resources_type,
325 LLVMValueRef resources_ptr,
326 unsigned texture_unit,
327 LLVMValueRef texture_unit_offset,
328 const struct lp_static_texture_state *static_state,
329 struct lp_sampler_dynamic_state *dynamic_state)
330 {
331 if (static_state->level_zero_only)
332 return lp_build_const_int32(gallivm, 0);
333 else {
334 LLVMValueRef last_level;
335
336 last_level = dynamic_state->last_level(gallivm, resources_type,
337 resources_ptr, texture_unit,
338 texture_unit_offset);
339 last_level = LLVMBuildZExt(gallivm->builder, last_level,
340 LLVMInt32TypeInContext(gallivm->context), "");
341 return last_level;
342 }
343 }
344
345 /**
346 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
347 * (Note that with pot sizes could do this much more easily post-scale
348 * with some bit arithmetic.)
349 */
350 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord,bool posOnly)351 lp_build_coord_mirror(struct lp_build_sample_context *bld,
352 LLVMValueRef coord, bool posOnly)
353 {
354 struct lp_build_context *coord_bld = &bld->coord_bld;
355 LLVMValueRef fract;
356 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
357
358 /*
359 * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
360 * it all works out. (The result is in range [-1, 1.0], negative if
361 * the coord is in the "odd" section, otherwise positive.)
362 */
363
364 coord = lp_build_mul(coord_bld, coord, half);
365 fract = lp_build_round(coord_bld, coord);
366 fract = lp_build_sub(coord_bld, coord, fract);
367 coord = lp_build_add(coord_bld, fract, fract);
368
369 if (posOnly) {
370 /*
371 * Theoretically it's not quite 100% accurate because the spec says
372 * that ultimately a scaled coord of -x.0 should map to int coord
373 * -x + 1 with mirroring, not -x (this does not matter for bilinear
374 * filtering).
375 */
376 coord = lp_build_abs(coord_bld, coord);
377 /* kill off NaNs */
378 /* XXX: not safe without arch rounding, fract can be anything. */
379 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
380 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
381 }
382
383 return coord;
384 }
385
386
387 /**
388 * Helper to compute the first coord and the weight for
389 * linear wrap repeat npot textures
390 */
391 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)392 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
393 LLVMValueRef coord_f,
394 LLVMValueRef length_i,
395 LLVMValueRef length_f,
396 LLVMValueRef *coord0_i,
397 LLVMValueRef *weight_f)
398 {
399 struct lp_build_context *coord_bld = &bld->coord_bld;
400 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
401 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
402 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
403 int_coord_bld->one);
404 LLVMValueRef mask;
405 /* wrap with normalized floats is just fract */
406 coord_f = lp_build_fract(coord_bld, coord_f);
407 /* mul by size and subtract 0.5 */
408 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
409 coord_f = lp_build_sub(coord_bld, coord_f, half);
410 /*
411 * we avoided the 0.5/length division before the repeat wrap,
412 * now need to fix up edge cases with selects
413 */
414 /*
415 * Note we do a float (unordered) compare so we can eliminate NaNs.
416 * (Otherwise would need fract_safe above).
417 */
418 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
419 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
420
421 /* convert to int, compute lerp weight */
422 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
423 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
424 }
425
426
427 /**
428 * Build LLVM code for texture wrap mode for linear filtering.
429 * \param x0_out returns first integer texcoord
430 * \param x1_out returns second integer texcoord
431 * \param weight_out returns linear interpolation weight
432 */
433 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)434 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
435 bool is_gather,
436 LLVMValueRef coord,
437 LLVMValueRef length,
438 LLVMValueRef length_f,
439 LLVMValueRef offset,
440 bool is_pot,
441 unsigned wrap_mode,
442 LLVMValueRef *x0_out,
443 LLVMValueRef *x1_out,
444 LLVMValueRef *weight_out)
445 {
446 struct lp_build_context *coord_bld = &bld->coord_bld;
447 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
448 LLVMBuilderRef builder = bld->gallivm->builder;
449 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
450 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
451 LLVMValueRef coord0, coord1, weight;
452
453 switch (wrap_mode) {
454 case PIPE_TEX_WRAP_REPEAT:
455 if (is_pot) {
456 /* mul by size and subtract 0.5 */
457 coord = lp_build_mul(coord_bld, coord, length_f);
458 coord = lp_build_sub(coord_bld, coord, half);
459 if (offset) {
460 offset = lp_build_int_to_float(coord_bld, offset);
461 coord = lp_build_add(coord_bld, coord, offset);
462 }
463 /* convert to int, compute lerp weight */
464 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
465 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
466 /* repeat wrap */
467 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
468 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
469 } else {
470 LLVMValueRef mask;
471 if (offset) {
472 offset = lp_build_int_to_float(coord_bld, offset);
473 offset = lp_build_div(coord_bld, offset, length_f);
474 coord = lp_build_add(coord_bld, coord, offset);
475 }
476 lp_build_coord_repeat_npot_linear(bld, coord,
477 length, length_f,
478 &coord0, &weight);
479 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
480 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
481 coord1 = LLVMBuildAnd(builder,
482 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
483 mask, "");
484 }
485 break;
486
487 case PIPE_TEX_WRAP_CLAMP:
488 if (bld->static_sampler_state->normalized_coords) {
489 /* scale coord to length */
490 coord = lp_build_mul(coord_bld, coord, length_f);
491 }
492 if (offset) {
493 offset = lp_build_int_to_float(coord_bld, offset);
494 coord = lp_build_add(coord_bld, coord, offset);
495 }
496
497 /*
498 * clamp to [0, length]
499 *
500 * Unlike some other wrap modes, this should be correct for gather
501 * too. GL_CLAMP explicitly does this clamp on the coord prior to
502 * actual wrapping (which is per sample).
503 */
504 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
505
506 coord = lp_build_sub(coord_bld, coord, half);
507
508 /* convert to int, compute lerp weight */
509 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
510 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
511 break;
512
513 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
514 {
515 struct lp_build_context abs_coord_bld = bld->coord_bld;
516 abs_coord_bld.type.sign = false;
517
518 if (bld->static_sampler_state->normalized_coords) {
519 /* mul by tex size */
520 coord = lp_build_mul(coord_bld, coord, length_f);
521 }
522 if (offset) {
523 offset = lp_build_int_to_float(coord_bld, offset);
524 coord = lp_build_add(coord_bld, coord, offset);
525 }
526
527 /* clamp to length max */
528 coord = lp_build_min_ext(coord_bld, coord, length_f,
529 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
530 if (!is_gather) {
531 /* subtract 0.5 */
532 coord = lp_build_sub(coord_bld, coord, half);
533 /* clamp to [0, length - 0.5] */
534 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
535 /* convert to int, compute lerp weight */
536 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
537 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
538 } else {
539 /*
540 * The non-gather path will end up with coords 0, 1 if coord was
541 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
542 * really matter what the second coord is). But for gather, we
543 * really need to end up with coords 0, 0.
544 */
545 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
546 coord0 = lp_build_sub(coord_bld, coord, half);
547 coord1 = lp_build_add(coord_bld, coord, half);
548 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
549 coord0 = lp_build_itrunc(coord_bld, coord0);
550 coord1 = lp_build_itrunc(coord_bld, coord1);
551 weight = coord_bld->undef;
552 }
553 /* coord1 = min(coord1, length-1) */
554 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
555 break;
556 }
557
558 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
559 if (bld->static_sampler_state->normalized_coords) {
560 /* scale coord to length */
561 coord = lp_build_mul(coord_bld, coord, length_f);
562 }
563 if (offset) {
564 offset = lp_build_int_to_float(coord_bld, offset);
565 coord = lp_build_add(coord_bld, coord, offset);
566 }
567 /*
568 * We don't need any clamp. Technically, for very large (pos or neg)
569 * (or infinite) values, clamp against [-length, length] would be
570 * correct, but we don't need to guarantee any specific
571 * result for such coords (the ifloor will be undefined, but for modes
572 * requiring border all resulting coords are safe).
573 */
574 coord = lp_build_sub(coord_bld, coord, half);
575 /* convert to int, compute lerp weight */
576 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
577 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
578 break;
579
580 case PIPE_TEX_WRAP_MIRROR_REPEAT:
581 if (offset) {
582 offset = lp_build_int_to_float(coord_bld, offset);
583 offset = lp_build_div(coord_bld, offset, length_f);
584 coord = lp_build_add(coord_bld, coord, offset);
585 }
586 if (!is_gather) {
587 /* compute mirror function */
588 coord = lp_build_coord_mirror(bld, coord, true);
589
590 /* scale coord to length */
591 coord = lp_build_mul(coord_bld, coord, length_f);
592 coord = lp_build_sub(coord_bld, coord, half);
593
594 /* convert to int, compute lerp weight */
595 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
596 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
597
598 /* coord0 = max(coord0, 0) */
599 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
600 /* coord1 = min(coord1, length-1) */
601 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
602 } else {
603 /*
604 * This is pretty reasonable in the end, all what the tests care
605 * about is nasty edge cases (scaled coords x.5, so the individual
606 * coords are actually integers, which is REALLY tricky to get right
607 * due to this working differently both for negative numbers as well
608 * as for even/odd cases). But with enough magic it's not too complex
609 * after all.
610 * Maybe should try a bit arithmetic one though for POT textures...
611 */
612 LLVMValueRef isNeg;
613 /*
614 * Wrapping just once still works, even though it means we can
615 * get "wrong" sign due to performing mirror in the middle of the
616 * two coords (because this can only happen very near the odd/even
617 * edges, so both coords will actually end up as 0 or length - 1
618 * in the end).
619 * For GL4 gather with per-sample offsets we'd need to the mirroring
620 * per coord too.
621 */
622 coord = lp_build_coord_mirror(bld, coord, false);
623 coord = lp_build_mul(coord_bld, coord, length_f);
624
625 /*
626 * NaNs should be safe here, we'll do away with them with
627 * the ones' complement plus min.
628 */
629 coord0 = lp_build_sub(coord_bld, coord, half);
630 coord0 = lp_build_ifloor(coord_bld, coord0);
631 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
632 /* ones complement for neg numbers (mirror(negX) = X - 1) */
633 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
634 coord0, int_coord_bld->zero);
635 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
636 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
637 coord1, int_coord_bld->zero);
638 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
639 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
640 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
641
642 weight = coord_bld->undef;
643 }
644 break;
645
646 case PIPE_TEX_WRAP_MIRROR_CLAMP:
647 if (bld->static_sampler_state->normalized_coords) {
648 /* scale coord to length */
649 coord = lp_build_mul(coord_bld, coord, length_f);
650 }
651 if (offset) {
652 offset = lp_build_int_to_float(coord_bld, offset);
653 coord = lp_build_add(coord_bld, coord, offset);
654 }
655 /*
656 * XXX: probably not correct for gather, albeit I'm not
657 * entirely sure as it's poorly specified. The wrapping looks
658 * correct according to the spec which is against gl 1.2.1,
659 * however negative values will be swapped - gl re-specified
660 * wrapping with newer versions (no more pre-clamp except with
661 * GL_CLAMP).
662 */
663 coord = lp_build_abs(coord_bld, coord);
664
665 /* clamp to [0, length] */
666 coord = lp_build_min_ext(coord_bld, coord, length_f,
667 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
668
669 coord = lp_build_sub(coord_bld, coord, half);
670
671 /* convert to int, compute lerp weight */
672 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
673 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
674 break;
675
676 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
677 {
678 struct lp_build_context abs_coord_bld = bld->coord_bld;
679 abs_coord_bld.type.sign = false;
680
681 if (bld->static_sampler_state->normalized_coords) {
682 /* scale coord to length */
683 coord = lp_build_mul(coord_bld, coord, length_f);
684 }
685 if (offset) {
686 offset = lp_build_int_to_float(coord_bld, offset);
687 coord = lp_build_add(coord_bld, coord, offset);
688 }
689 if (!is_gather) {
690 coord = lp_build_abs(coord_bld, coord);
691
692 /* clamp to length max */
693 coord = lp_build_min_ext(coord_bld, coord, length_f,
694 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
695 /* subtract 0.5 */
696 coord = lp_build_sub(coord_bld, coord, half);
697 /* clamp to [0, length - 0.5] */
698 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
699
700 /* convert to int, compute lerp weight */
701 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
702 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
703 /* coord1 = min(coord1, length-1) */
704 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
705 } else {
706 /*
707 * The non-gather path will swap coord0/1 if coord was negative,
708 * which is ok for filtering since the filter weight matches
709 * accordingly. Also, if coord is close to zero, coord0/1 will
710 * be 0 and 1, instead of 0 and 0 (again ok due to filter
711 * weight being 0.0). Both issues need to be fixed for gather.
712 */
713 LLVMValueRef isNeg;
714
715 /*
716 * Actually wanted to cheat here and use:
717 * coord1 = lp_build_iround(coord_bld, coord);
718 * but it's not good enough for some tests (even piglit
719 * textureGather is set up in a way so the coords area always
720 * .5, that is right at the crossover points).
721 * So do ordinary sub/floor, then do ones' complement
722 * for negative numbers.
723 * (Note can't just do sub|add/abs/itrunc per coord neither -
724 * because the spec demands that mirror(3.0) = 3 but
725 * mirror(-3.0) = 2.)
726 */
727 coord = lp_build_sub(coord_bld, coord, half);
728 coord0 = lp_build_ifloor(coord_bld, coord);
729 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
730 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
731 int_coord_bld->zero);
732 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
733 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
734
735 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
736 int_coord_bld->zero);
737 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
738 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
739
740 weight = coord_bld->undef;
741 }
742 }
743 break;
744
745 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
746 {
747 if (bld->static_sampler_state->normalized_coords) {
748 /* scale coord to length */
749 coord = lp_build_mul(coord_bld, coord, length_f);
750 }
751 if (offset) {
752 offset = lp_build_int_to_float(coord_bld, offset);
753 coord = lp_build_add(coord_bld, coord, offset);
754 }
755 /*
756 * XXX: probably not correct for gather due to swapped
757 * order if coord is negative (same rationale as for
758 * MIRROR_CLAMP).
759 */
760 coord = lp_build_abs(coord_bld, coord);
761
762 /*
763 * We don't need any clamp. Technically, for very large
764 * (or infinite) values, clamp against length would be
765 * correct, but we don't need to guarantee any specific
766 * result for such coords (the ifloor will be undefined, but
767 * for modes requiring border all resulting coords are safe).
768 */
769 coord = lp_build_sub(coord_bld, coord, half);
770
771 /* convert to int, compute lerp weight */
772 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
773 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
774 }
775 break;
776
777 default:
778 assert(0);
779 coord0 = NULL;
780 coord1 = NULL;
781 weight = NULL;
782 }
783
784 *x0_out = coord0;
785 *x1_out = coord1;
786 *weight_out = weight;
787 }
788
789
790 /**
791 * Build LLVM code for texture wrap mode for nearest filtering.
792 * \param coord the incoming texcoord (nominally in [0,1])
793 * \param length the texture size along one dimension, as int vector
794 * \param length_f the texture size along one dimension, as float vector
795 * \param offset texel offset along one dimension (as int vector)
796 * \param is_pot if TRUE, length is a power of two
797 * \param wrap_mode one of PIPE_TEX_WRAP_x
798 */
799 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode)800 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
801 LLVMValueRef coord,
802 LLVMValueRef length,
803 LLVMValueRef length_f,
804 LLVMValueRef offset,
805 bool is_pot,
806 unsigned wrap_mode)
807 {
808 struct lp_build_context *coord_bld = &bld->coord_bld;
809 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
810 LLVMBuilderRef builder = bld->gallivm->builder;
811 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
812 LLVMValueRef icoord;
813
814 switch (wrap_mode) {
815 case PIPE_TEX_WRAP_REPEAT:
816 if (is_pot) {
817 coord = lp_build_mul(coord_bld, coord, length_f);
818 icoord = lp_build_ifloor(coord_bld, coord);
819 if (offset) {
820 icoord = lp_build_add(int_coord_bld, icoord, offset);
821 }
822 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
823 } else {
824 if (offset) {
825 offset = lp_build_int_to_float(coord_bld, offset);
826 offset = lp_build_div(coord_bld, offset, length_f);
827 coord = lp_build_add(coord_bld, coord, offset);
828 }
829 /* take fraction, unnormalize */
830 coord = lp_build_fract_safe(coord_bld, coord);
831 coord = lp_build_mul(coord_bld, coord, length_f);
832 icoord = lp_build_itrunc(coord_bld, coord);
833 }
834 break;
835
836 case PIPE_TEX_WRAP_CLAMP:
837 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
838 if (bld->static_sampler_state->normalized_coords) {
839 /* scale coord to length */
840 coord = lp_build_mul(coord_bld, coord, length_f);
841 }
842
843 if (offset) {
844 offset = lp_build_int_to_float(coord_bld, offset);
845 coord = lp_build_add(coord_bld, coord, offset);
846 }
847 /* floor */
848 /* use itrunc instead since we clamp to 0 anyway */
849 icoord = lp_build_itrunc(coord_bld, coord);
850
851 /* clamp to [0, length - 1]. */
852 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
853 length_minus_one);
854 break;
855
856 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
857 if (bld->static_sampler_state->normalized_coords) {
858 /* scale coord to length */
859 coord = lp_build_mul(coord_bld, coord, length_f);
860 }
861 /* no clamp necessary, border masking will handle this */
862 icoord = lp_build_ifloor(coord_bld, coord);
863 if (offset) {
864 icoord = lp_build_add(int_coord_bld, icoord, offset);
865 }
866 break;
867
868 case PIPE_TEX_WRAP_MIRROR_REPEAT:
869 if (offset) {
870 offset = lp_build_int_to_float(coord_bld, offset);
871 offset = lp_build_div(coord_bld, offset, length_f);
872 coord = lp_build_add(coord_bld, coord, offset);
873 }
874 /* compute mirror function */
875 coord = lp_build_coord_mirror(bld, coord, true);
876
877 /* scale coord to length */
878 assert(bld->static_sampler_state->normalized_coords);
879 coord = lp_build_mul(coord_bld, coord, length_f);
880
881 /* itrunc == ifloor here */
882 icoord = lp_build_itrunc(coord_bld, coord);
883
884 /* clamp to [0, length - 1] */
885 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
886 break;
887
888 case PIPE_TEX_WRAP_MIRROR_CLAMP:
889 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
890 if (bld->static_sampler_state->normalized_coords) {
891 /* scale coord to length */
892 coord = lp_build_mul(coord_bld, coord, length_f);
893 }
894 if (offset) {
895 offset = lp_build_int_to_float(coord_bld, offset);
896 coord = lp_build_add(coord_bld, coord, offset);
897 }
898 coord = lp_build_abs(coord_bld, coord);
899
900 /* itrunc == ifloor here */
901 icoord = lp_build_itrunc(coord_bld, coord);
902 /*
903 * Use unsigned min due to possible undef values (NaNs, overflow)
904 */
905 {
906 struct lp_build_context abs_coord_bld = *int_coord_bld;
907 abs_coord_bld.type.sign = false;
908 /* clamp to [0, length - 1] */
909 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
910 }
911 break;
912
913 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
914 if (bld->static_sampler_state->normalized_coords) {
915 /* scale coord to length */
916 coord = lp_build_mul(coord_bld, coord, length_f);
917 }
918 if (offset) {
919 offset = lp_build_int_to_float(coord_bld, offset);
920 coord = lp_build_add(coord_bld, coord, offset);
921 }
922 coord = lp_build_abs(coord_bld, coord);
923
924 /* itrunc == ifloor here */
925 icoord = lp_build_itrunc(coord_bld, coord);
926 break;
927
928 default:
929 assert(0);
930 icoord = NULL;
931 }
932
933 return icoord;
934 }
935
936
937 /**
938 * Do shadow test/comparison.
939 * \param p shadow ref value
940 * \param texel the texel to compare against
941 */
942 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)943 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
944 LLVMValueRef p,
945 LLVMValueRef texel)
946 {
947 struct lp_build_context *texel_bld = &bld->texel_bld;
948 LLVMValueRef res;
949
950 if (0) {
951 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
952 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
953 }
954
955 /* result = (p FUNC texel) ? 1 : 0 */
956 /*
957 * honor d3d10 floating point rules here, which state that comparisons
958 * are ordered except NOT_EQUAL which is unordered.
959 */
960 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
961 res = lp_build_cmp_ordered(texel_bld,
962 bld->static_sampler_state->compare_func,
963 p, texel);
964 } else {
965 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
966 p, texel);
967 }
968 return res;
969 }
970
971
972 /**
973 * Generate code to sample a mipmap level with nearest filtering.
974 * If sampling a cube texture, r = cube face in [0,5].
975 */
976 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])977 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
978 LLVMValueRef size,
979 LLVMValueRef row_stride_vec,
980 LLVMValueRef img_stride_vec,
981 LLVMValueRef data_ptr,
982 LLVMValueRef mipoffsets,
983 LLVMValueRef ilevel,
984 const LLVMValueRef *coords,
985 const LLVMValueRef *offsets,
986 LLVMValueRef colors_out[4])
987 {
988 const unsigned dims = bld->dims;
989 LLVMValueRef width_vec;
990 LLVMValueRef height_vec;
991 LLVMValueRef depth_vec;
992 LLVMValueRef flt_size;
993 LLVMValueRef flt_width_vec;
994 LLVMValueRef flt_height_vec;
995 LLVMValueRef flt_depth_vec;
996 LLVMValueRef x, y = NULL, z = NULL;
997
998 lp_build_extract_image_sizes(bld,
999 &bld->int_size_bld,
1000 bld->int_coord_type,
1001 size,
1002 &width_vec, &height_vec, &depth_vec);
1003
1004 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1005
1006 lp_build_extract_image_sizes(bld,
1007 &bld->float_size_bld,
1008 bld->coord_type,
1009 flt_size,
1010 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1011
1012 /*
1013 * Compute integer texcoords.
1014 */
1015 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
1016 flt_width_vec, offsets[0],
1017 bld->static_texture_state->pot_width,
1018 bld->static_sampler_state->wrap_s);
1019 lp_build_name(x, "tex.x.wrapped");
1020
1021 if (dims >= 2) {
1022 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
1023 flt_height_vec, offsets[1],
1024 bld->static_texture_state->pot_height,
1025 bld->static_sampler_state->wrap_t);
1026 lp_build_name(y, "tex.y.wrapped");
1027
1028 if (dims == 3) {
1029 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
1030 flt_depth_vec, offsets[2],
1031 bld->static_texture_state->pot_depth,
1032 bld->static_sampler_state->wrap_r);
1033 lp_build_name(z, "tex.z.wrapped");
1034 }
1035 }
1036 if (has_layer_coord(bld->static_texture_state->target)) {
1037 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1038 /* add cube layer to face */
1039 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1040 } else {
1041 z = coords[2];
1042 }
1043 lp_build_name(z, "tex.z.layer");
1044 }
1045
1046 /*
1047 * Get texture colors.
1048 */
1049 lp_build_sample_texel_soa(bld,
1050 width_vec, height_vec, depth_vec,
1051 x, y, z,
1052 row_stride_vec, img_stride_vec,
1053 data_ptr, mipoffsets, ilevel, colors_out);
1054
1055 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1056 LLVMValueRef cmpval;
1057 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
1058 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
1059 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
1060 bld->texel_bld.one, bld->texel_bld.zero);
1061 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1062 }
1063
1064 }
1065
1066
1067 /**
1068 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
1069 */
1070 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)1071 lp_build_masklerp(struct lp_build_context *bld,
1072 LLVMValueRef weight,
1073 LLVMValueRef mask0,
1074 LLVMValueRef mask1)
1075 {
1076 struct gallivm_state *gallivm = bld->gallivm;
1077 LLVMBuilderRef builder = gallivm->builder;
1078 LLVMValueRef weight2;
1079
1080 weight2 = lp_build_sub(bld, bld->one, weight);
1081 weight = LLVMBuildBitCast(builder, weight,
1082 lp_build_int_vec_type(gallivm, bld->type), "");
1083 weight2 = LLVMBuildBitCast(builder, weight2,
1084 lp_build_int_vec_type(gallivm, bld->type), "");
1085 weight = LLVMBuildAnd(builder, weight, mask1, "");
1086 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
1087 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
1088 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
1089 return lp_build_add(bld, weight, weight2);
1090 }
1091
1092 /**
1093 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
1094 */
1095 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)1096 lp_build_masklerp2d(struct lp_build_context *bld,
1097 LLVMValueRef weight0,
1098 LLVMValueRef weight1,
1099 LLVMValueRef mask00,
1100 LLVMValueRef mask01,
1101 LLVMValueRef mask10,
1102 LLVMValueRef mask11)
1103 {
1104 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
1105 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
1106 return lp_build_lerp(bld, weight1, val0, val1, 0);
1107 }
1108
1109 /*
1110 * this is a bit excessive code for something OpenGL just recommends
1111 * but does not require.
1112 */
1113 #define ACCURATE_CUBE_CORNERS 1
1114
1115 /**
1116 * Generate code to sample a mipmap level with linear filtering.
1117 * If sampling a cube texture, r = cube face in [0,5].
1118 * If linear_mask is present, only pixels having their mask set
1119 * will receive linear filtering, the rest will use nearest.
1120 */
1121 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])1122 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1123 bool is_gather,
1124 LLVMValueRef size,
1125 LLVMValueRef linear_mask,
1126 LLVMValueRef row_stride_vec,
1127 LLVMValueRef img_stride_vec,
1128 LLVMValueRef data_ptr,
1129 LLVMValueRef mipoffsets,
1130 LLVMValueRef ilevel,
1131 const LLVMValueRef *coords,
1132 const LLVMValueRef *offsets,
1133 LLVMValueRef colors_out[4])
1134 {
1135 LLVMBuilderRef builder = bld->gallivm->builder;
1136 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1137 struct lp_build_context *coord_bld = &bld->coord_bld;
1138 struct lp_build_context *texel_bld = &bld->texel_bld;
1139 const unsigned dims = bld->dims;
1140 LLVMValueRef width_vec;
1141 LLVMValueRef height_vec;
1142 LLVMValueRef depth_vec;
1143 LLVMValueRef flt_size;
1144 LLVMValueRef flt_width_vec;
1145 LLVMValueRef flt_height_vec;
1146 LLVMValueRef flt_depth_vec;
1147 LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL;
1148 LLVMValueRef z1 = NULL;
1149 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1150 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1151 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1152 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1153 LLVMValueRef xs[4], ys[4], zs[4];
1154 LLVMValueRef neighbors[2][2][4];
1155 bool seamless_cube_filter, accurate_cube_corners;
1156 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1157
1158 if (is_gather) {
1159 switch (bld->gather_comp) {
1160 case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
1161 case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
1162 case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
1163 case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
1164 default:
1165 break;
1166 }
1167 }
1168
1169 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1170 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1171 bld->static_sampler_state->seamless_cube_map;
1172
1173 /*
1174 * Disable accurate cube corners for integer textures, which should only
1175 * get here in the gather path.
1176 */
1177 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1178 !util_format_is_pure_integer(bld->static_texture_state->format);
1179
1180 lp_build_extract_image_sizes(bld,
1181 &bld->int_size_bld,
1182 bld->int_coord_type,
1183 size,
1184 &width_vec, &height_vec, &depth_vec);
1185
1186 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1187
1188 lp_build_extract_image_sizes(bld,
1189 &bld->float_size_bld,
1190 bld->coord_type,
1191 flt_size,
1192 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1193
1194 LLVMTypeRef int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1195
1196 /*
1197 * Compute integer texcoords.
1198 */
1199
1200 if (!seamless_cube_filter) {
1201 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1202 flt_width_vec, offsets[0],
1203 bld->static_texture_state->pot_width,
1204 bld->static_sampler_state->wrap_s,
1205 &x00, &x01, &s_fpart);
1206 lp_build_name(x00, "tex.x0.wrapped");
1207 lp_build_name(x01, "tex.x1.wrapped");
1208 x10 = x00;
1209 x11 = x01;
1210
1211 if (dims >= 2) {
1212 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1213 flt_height_vec, offsets[1],
1214 bld->static_texture_state->pot_height,
1215 bld->static_sampler_state->wrap_t,
1216 &y00, &y10, &t_fpart);
1217 lp_build_name(y00, "tex.y0.wrapped");
1218 lp_build_name(y10, "tex.y1.wrapped");
1219 y01 = y00;
1220 y11 = y10;
1221
1222 if (dims == 3) {
1223 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1224 flt_depth_vec, offsets[2],
1225 bld->static_texture_state->pot_depth,
1226 bld->static_sampler_state->wrap_r,
1227 &z00, &z1, &r_fpart);
1228 z01 = z10 = z11 = z00;
1229 lp_build_name(z00, "tex.z0.wrapped");
1230 lp_build_name(z1, "tex.z1.wrapped");
1231 }
1232 }
1233 if (has_layer_coord(bld->static_texture_state->target)) {
1234 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1235 /* add cube layer to face */
1236 z00 = z01 = z10 = z11 = z1 =
1237 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1238 } else {
1239 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
1240 }
1241 lp_build_name(z00, "tex.z0.layer");
1242 lp_build_name(z1, "tex.z1.layer");
1243 }
1244 } else {
1245 struct lp_build_if_state edge_if;
1246 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1247 LLVMValueRef coord0, coord1, have_edge, have_corner;
1248 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1249 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1250 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1251 LLVMValueRef face = coords[2];
1252 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1253 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1254 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1255 height_vec = width_vec;
1256 flt_height_vec = flt_width_vec;
1257
1258 /* XXX the overflow logic is actually sort of duplicated with trilinear,
1259 * since an overflow in one mip should also have a corresponding overflow
1260 * in another.
1261 */
1262 /* should always have normalized coords, and offsets are undefined */
1263 assert(bld->static_sampler_state->normalized_coords);
1264 /*
1265 * The coords should all be between [0,1] however we can have NaNs,
1266 * which will wreak havoc. In particular the y1_clamped value below
1267 * can be -INT_MAX (on x86) and be propagated right through (probably
1268 * other values might be bogus in the end too).
1269 * So kill off the NaNs here.
1270 */
1271 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1272 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1273 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
1274 /* instead of clamp, build mask if overflowed */
1275 coord0 = lp_build_sub(coord_bld, coord0, half);
1276 /* convert to int, compute lerp weight */
1277 /* not ideal with AVX (and no AVX2) */
1278 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
1279 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1280 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1281 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1282 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
1283 coord1 = lp_build_sub(coord_bld, coord1, half);
1284 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
1285 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1286
1287 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1288 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1289 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1290 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1291
1292 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1293 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1294 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1295 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1296
1297 /* needed for accurate corner filtering branch later, rely on 0 init */
1298 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1299
1300 for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
1301 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1302 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1303 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1304 }
1305
1306 lp_build_if(&edge_if, bld->gallivm, have_edge);
1307
1308 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1309 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1310 LLVMBuildStore(builder, have_corner, have_corners);
1311
1312 /*
1313 * Need to feed clamped values here for cheap corner handling,
1314 * but only for y coord (as when falling off both edges we only
1315 * fall off the x one) - this should be sufficient.
1316 */
1317 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1318 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1319
1320 /*
1321 * Get all possible new coords.
1322 */
1323 lp_build_cube_new_coords(ivec_bld, face,
1324 x0, x1, y0_clamped, y1_clamped,
1325 length_minus_one,
1326 new_faces, new_xcoords, new_ycoords);
1327
1328 /* handle fall off x-, x+ direction */
1329 /* determine new coords, face (not both fall_off vars can be true at same time) */
1330 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1331 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1332 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1333 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1334 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1335 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1336 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1337 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1338
1339 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1340 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1341
1342 /* handle fall off y-, y+ direction */
1343 /*
1344 * Cheap corner logic: just hack up things so a texel doesn't fall
1345 * off both sides (which means filter weights will be wrong but we'll only
1346 * use valid texels in the filter).
1347 * This means however (y) coords must additionally be clamped (see above).
1348 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1349 */
1350 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1351 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1352 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1353 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1354
1355 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1356 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1357 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1358 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1359 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1360 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1361 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1362 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1363
1364 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1365 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1366 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1367 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1368
1369 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1370 /* now can add cube layer to face (per sample) */
1371 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1372 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1373 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1374 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1375 }
1376
1377 LLVMBuildStore(builder, x00, xs[0]);
1378 LLVMBuildStore(builder, x01, xs[1]);
1379 LLVMBuildStore(builder, x10, xs[2]);
1380 LLVMBuildStore(builder, x11, xs[3]);
1381 LLVMBuildStore(builder, y00, ys[0]);
1382 LLVMBuildStore(builder, y01, ys[1]);
1383 LLVMBuildStore(builder, y10, ys[2]);
1384 LLVMBuildStore(builder, y11, ys[3]);
1385 LLVMBuildStore(builder, z00, zs[0]);
1386 LLVMBuildStore(builder, z01, zs[1]);
1387 LLVMBuildStore(builder, z10, zs[2]);
1388 LLVMBuildStore(builder, z11, zs[3]);
1389
1390 lp_build_else(&edge_if);
1391
1392 LLVMBuildStore(builder, x0, xs[0]);
1393 LLVMBuildStore(builder, x1, xs[1]);
1394 LLVMBuildStore(builder, x0, xs[2]);
1395 LLVMBuildStore(builder, x1, xs[3]);
1396 LLVMBuildStore(builder, y0, ys[0]);
1397 LLVMBuildStore(builder, y0, ys[1]);
1398 LLVMBuildStore(builder, y1, ys[2]);
1399 LLVMBuildStore(builder, y1, ys[3]);
1400 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1401 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1402 LLVMBuildStore(builder, cube_layer, zs[0]);
1403 LLVMBuildStore(builder, cube_layer, zs[1]);
1404 LLVMBuildStore(builder, cube_layer, zs[2]);
1405 LLVMBuildStore(builder, cube_layer, zs[3]);
1406 } else {
1407 LLVMBuildStore(builder, face, zs[0]);
1408 LLVMBuildStore(builder, face, zs[1]);
1409 LLVMBuildStore(builder, face, zs[2]);
1410 LLVMBuildStore(builder, face, zs[3]);
1411 }
1412
1413 lp_build_endif(&edge_if);
1414
1415 LLVMTypeRef type = ivec_bld->vec_type;
1416 x00 = LLVMBuildLoad2(builder, type, xs[0], "");
1417 x01 = LLVMBuildLoad2(builder, type, xs[1], "");
1418 x10 = LLVMBuildLoad2(builder, type, xs[2], "");
1419 x11 = LLVMBuildLoad2(builder, type, xs[3], "");
1420 y00 = LLVMBuildLoad2(builder, type, ys[0], "");
1421 y01 = LLVMBuildLoad2(builder, type, ys[1], "");
1422 y10 = LLVMBuildLoad2(builder, type, ys[2], "");
1423 y11 = LLVMBuildLoad2(builder, type, ys[3], "");
1424 z00 = LLVMBuildLoad2(builder, type, zs[0], "");
1425 z01 = LLVMBuildLoad2(builder, type, zs[1], "");
1426 z10 = LLVMBuildLoad2(builder, type, zs[2], "");
1427 z11 = LLVMBuildLoad2(builder, type, zs[3], "");
1428 }
1429
1430 if (linear_mask) {
1431 /*
1432 * Whack filter weights into place. Whatever texel had more weight is
1433 * the one which should have been selected by nearest filtering hence
1434 * just use 100% weight for it.
1435 */
1436 struct lp_build_context *c_bld = &bld->coord_bld;
1437 LLVMValueRef w1_mask, w1_weight;
1438 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1439
1440 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1441 /* this select is really just a "and" */
1442 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1443 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1444 if (dims >= 2) {
1445 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1446 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1447 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1448 if (dims == 3) {
1449 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1450 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1451 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1452 }
1453 }
1454 }
1455
1456 /*
1457 * Get texture colors.
1458 */
1459 /* get x0/x1 texels */
1460 lp_build_sample_texel_soa(bld,
1461 width_vec, height_vec, depth_vec,
1462 x00, y00, z00,
1463 row_stride_vec, img_stride_vec,
1464 data_ptr, mipoffsets, ilevel, neighbors[0][0]);
1465 lp_build_sample_texel_soa(bld,
1466 width_vec, height_vec, depth_vec,
1467 x01, y01, z01,
1468 row_stride_vec, img_stride_vec,
1469 data_ptr, mipoffsets, ilevel, neighbors[0][1]);
1470
1471 if (dims == 1) {
1472 assert(!is_gather);
1473 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1474 lp_build_reduce_filter(texel_bld,
1475 bld->static_sampler_state->reduction_mode,
1476 0,
1477 4,
1478 s_fpart,
1479 neighbors[0][0],
1480 neighbors[0][1],
1481 colors_out);
1482 } else {
1483 LLVMValueRef cmpval0, cmpval1;
1484 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1485 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1486 /* simplified lerp, AND mask with weight and add */
1487 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1488 cmpval0, cmpval1);
1489 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1490 }
1491 } else {
1492 /* 2D/3D texture */
1493 struct lp_build_if_state corner_if;
1494 LLVMValueRef colors0[4], colorss[4] = { 0 };
1495
1496 /* get x0/x1 texels at y1 */
1497 lp_build_sample_texel_soa(bld,
1498 width_vec, height_vec, depth_vec,
1499 x10, y10, z10,
1500 row_stride_vec, img_stride_vec,
1501 data_ptr, mipoffsets, ilevel, neighbors[1][0]);
1502 lp_build_sample_texel_soa(bld,
1503 width_vec, height_vec, depth_vec,
1504 x11, y11, z11,
1505 row_stride_vec, img_stride_vec,
1506 data_ptr, mipoffsets, ilevel, neighbors[1][1]);
1507
1508 /*
1509 * To avoid having to duplicate linear_mask / fetch code use
1510 * another branch (with corner condition though edge would work
1511 * as well) here.
1512 */
1513 if (have_corners && accurate_cube_corners &&
1514 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1515 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
1516 LLVMValueRef have_corner, one_third;
1517
1518 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
1519 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
1520 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
1521 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
1522
1523 have_corner = LLVMBuildLoad2(builder, int1t, have_corners, "");
1524
1525 lp_build_if(&corner_if, bld->gallivm, have_corner);
1526
1527 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
1528 1.0f/3.0f);
1529
1530 /* find corner */
1531 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1532 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1533 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1534 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1535 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1536 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1537 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1538 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1539
1540 if (!is_gather) {
1541 /*
1542 * we can't use standard 2d lerp as we need per-element weight
1543 * in case of corners, so just calculate bilinear result as
1544 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1545 * (This is actually less work than using 2d lerp, 7 vs. 9
1546 * instructions, however calculating the weights needs another 6,
1547 * so actually probably not slower than 2d lerp only for 4 channels
1548 * as weights only need to be calculated once - of course fixing
1549 * the weights has additional cost.)
1550 */
1551 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
1552 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1553 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1554 w00 = lp_build_mul(coord_bld, wx0, wy0);
1555 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1556 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1557 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1558
1559 /* find corner weight */
1560 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1561 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1562 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1563 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1564
1565 /*
1566 * add 1/3 of the corner weight to the weight of the 3 other
1567 * samples and null out corner weight.
1568 */
1569 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1570 w00 = lp_build_add(coord_bld, w00, c_weight);
1571 w00 = lp_build_andnot(coord_bld, w00, c00f);
1572 w01 = lp_build_add(coord_bld, w01, c_weight);
1573 w01 = lp_build_andnot(coord_bld, w01, c01f);
1574 w10 = lp_build_add(coord_bld, w10, c_weight);
1575 w10 = lp_build_andnot(coord_bld, w10, c10f);
1576 w11 = lp_build_add(coord_bld, w11, c_weight);
1577 w11 = lp_build_andnot(coord_bld, w11, c11f);
1578
1579 if (bld->static_sampler_state->compare_mode ==
1580 PIPE_TEX_COMPARE_NONE) {
1581 for (unsigned chan = 0; chan < 4; chan++) {
1582 colors0[chan] = lp_build_mul(coord_bld, w00,
1583 neighbors[0][0][chan]);
1584 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1585 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1586 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1587 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1588 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1589 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1590 }
1591 } else {
1592 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1593 cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
1594 neighbors[0][0][0]);
1595 cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
1596 neighbors[0][1][0]);
1597 cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
1598 neighbors[1][0][0]);
1599 cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
1600 neighbors[1][1][0]);
1601 /*
1602 * inputs to interpolation are just masks so just add
1603 * masked weights together
1604 */
1605 cmpval00 = LLVMBuildBitCast(builder, cmpval00,
1606 coord_bld->vec_type, "");
1607 cmpval01 = LLVMBuildBitCast(builder, cmpval01,
1608 coord_bld->vec_type, "");
1609 cmpval10 = LLVMBuildBitCast(builder, cmpval10,
1610 coord_bld->vec_type, "");
1611 cmpval11 = LLVMBuildBitCast(builder, cmpval11,
1612 coord_bld->vec_type, "");
1613 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1614 tmp = lp_build_and(coord_bld, w01, cmpval01);
1615 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1616 tmp = lp_build_and(coord_bld, w10, cmpval10);
1617 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1618 tmp = lp_build_and(coord_bld, w11, cmpval11);
1619 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1620 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1621 }
1622 } else {
1623 /*
1624 * We don't have any weights to adjust, so instead calculate
1625 * the fourth texel as simply the average of the other 3.
1626 * (This would work for non-gather too, however we'd have
1627 * a boatload more of the select stuff due to there being
1628 * 4 times as many colors as weights.)
1629 */
1630 LLVMValueRef col00, col01, col10, col11;
1631 LLVMValueRef colc, colc0, colc1;
1632 col10 = lp_build_swizzle_soa_channel(texel_bld,
1633 neighbors[1][0], chan_swiz);
1634 col11 = lp_build_swizzle_soa_channel(texel_bld,
1635 neighbors[1][1], chan_swiz);
1636 col01 = lp_build_swizzle_soa_channel(texel_bld,
1637 neighbors[0][1], chan_swiz);
1638 col00 = lp_build_swizzle_soa_channel(texel_bld,
1639 neighbors[0][0], chan_swiz);
1640
1641 /*
1642 * The spec says for comparison filtering, the comparison
1643 * must happen before synthesizing the new value.
1644 * This means all gathered values are always 0 or 1,
1645 * except for the non-existing texel, which can be 0,1/3,2/3,1...
1646 * Seems like we'd be allowed to just return 0 or 1 too, so we
1647 * could simplify and pass down the compare mask values to the
1648 * end (using int arithmetic/compare on the mask values to
1649 * construct the fourth texel) and only there convert to floats
1650 * but it's probably not worth it (it might be easier for the cpu
1651 * but not for the code)...
1652 */
1653 if (bld->static_sampler_state->compare_mode !=
1654 PIPE_TEX_COMPARE_NONE) {
1655 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1656 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
1657 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
1658 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
1659 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
1660 col00 = lp_build_select(texel_bld, cmpval00,
1661 texel_bld->one, texel_bld->zero);
1662 col01 = lp_build_select(texel_bld, cmpval01,
1663 texel_bld->one, texel_bld->zero);
1664 col10 = lp_build_select(texel_bld, cmpval10,
1665 texel_bld->one, texel_bld->zero);
1666 col11 = lp_build_select(texel_bld, cmpval11,
1667 texel_bld->one, texel_bld->zero);
1668 }
1669
1670 /*
1671 * Null out corner color.
1672 */
1673 col00 = lp_build_andnot(coord_bld, col00, c00f);
1674 col01 = lp_build_andnot(coord_bld, col01, c01f);
1675 col10 = lp_build_andnot(coord_bld, col10, c10f);
1676 col11 = lp_build_andnot(coord_bld, col11, c11f);
1677
1678 /*
1679 * New corner texel color is all colors added / 3.
1680 */
1681 colc0 = lp_build_add(coord_bld, col00, col01);
1682 colc1 = lp_build_add(coord_bld, col10, col11);
1683 colc = lp_build_add(coord_bld, colc0, colc1);
1684 colc = lp_build_mul(coord_bld, one_third, colc);
1685
1686 /*
1687 * Replace the corner texel color with the new value.
1688 */
1689 col00 = lp_build_select(coord_bld, c00, colc, col00);
1690 col01 = lp_build_select(coord_bld, c01, colc, col01);
1691 col10 = lp_build_select(coord_bld, c10, colc, col10);
1692 col11 = lp_build_select(coord_bld, c11, colc, col11);
1693
1694 colors0[0] = col10;
1695 colors0[1] = col11;
1696 colors0[2] = col01;
1697 colors0[3] = col00;
1698 }
1699
1700 LLVMBuildStore(builder, colors0[0], colorss[0]);
1701 LLVMBuildStore(builder, colors0[1], colorss[1]);
1702 LLVMBuildStore(builder, colors0[2], colorss[2]);
1703 LLVMBuildStore(builder, colors0[3], colorss[3]);
1704
1705 lp_build_else(&corner_if);
1706 }
1707
1708 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1709 if (is_gather) {
1710 /*
1711 * Just assign the red channel (no component selection yet).
1712 * This is a bit hackish, we usually do the swizzle at the
1713 * end of sampling (much less values to swizzle), but this
1714 * obviously cannot work when using gather.
1715 */
1716 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1717 neighbors[1][0],
1718 chan_swiz);
1719 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1720 neighbors[1][1],
1721 chan_swiz);
1722 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1723 neighbors[0][1],
1724 chan_swiz);
1725 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1726 neighbors[0][0],
1727 chan_swiz);
1728 } else {
1729 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1730 lp_build_reduce_filter_2d(texel_bld,
1731 bld->static_sampler_state->reduction_mode,
1732 0,
1733 4,
1734 s_fpart,
1735 t_fpart,
1736 neighbors[0][0],
1737 neighbors[0][1],
1738 neighbors[1][0],
1739 neighbors[1][1],
1740 colors0);
1741 }
1742 } else {
1743 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1744 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1745 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1746 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1747 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1748
1749 if (is_gather) {
1750 /* more hacks for swizzling, should be X, ONE or ZERO... */
1751 colors0[0] = lp_build_select(texel_bld, cmpval10,
1752 texel_bld->one, texel_bld->zero);
1753 colors0[1] = lp_build_select(texel_bld, cmpval11,
1754 texel_bld->one, texel_bld->zero);
1755 colors0[2] = lp_build_select(texel_bld, cmpval01,
1756 texel_bld->one, texel_bld->zero);
1757 colors0[3] = lp_build_select(texel_bld, cmpval00,
1758 texel_bld->one, texel_bld->zero);
1759 } else {
1760 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1761 cmpval00, cmpval01, cmpval10, cmpval11);
1762 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1763 }
1764 }
1765
1766 if (have_corners && accurate_cube_corners &&
1767 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1768 LLVMBuildStore(builder, colors0[0], colorss[0]);
1769 LLVMBuildStore(builder, colors0[1], colorss[1]);
1770 LLVMBuildStore(builder, colors0[2], colorss[2]);
1771 LLVMBuildStore(builder, colors0[3], colorss[3]);
1772
1773 lp_build_endif(&corner_if);
1774
1775 colors0[0] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[0], "");
1776 colors0[1] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[1], "");
1777 colors0[2] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[2], "");
1778 colors0[3] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[3], "");
1779 }
1780
1781 if (dims == 3) {
1782 LLVMValueRef neighbors1[2][2][4];
1783 LLVMValueRef colors1[4];
1784
1785 assert(!is_gather);
1786
1787 /* get x0/x1/y0/y1 texels at z1 */
1788 lp_build_sample_texel_soa(bld,
1789 width_vec, height_vec, depth_vec,
1790 x00, y00, z1,
1791 row_stride_vec, img_stride_vec,
1792 data_ptr, mipoffsets, ilevel, neighbors1[0][0]);
1793 lp_build_sample_texel_soa(bld,
1794 width_vec, height_vec, depth_vec,
1795 x01, y01, z1,
1796 row_stride_vec, img_stride_vec,
1797 data_ptr, mipoffsets, ilevel, neighbors1[0][1]);
1798 lp_build_sample_texel_soa(bld,
1799 width_vec, height_vec, depth_vec,
1800 x10, y10, z1,
1801 row_stride_vec, img_stride_vec,
1802 data_ptr, mipoffsets, ilevel, neighbors1[1][0]);
1803 lp_build_sample_texel_soa(bld,
1804 width_vec, height_vec, depth_vec,
1805 x11, y11, z1,
1806 row_stride_vec, img_stride_vec,
1807 data_ptr, mipoffsets, ilevel, neighbors1[1][1]);
1808
1809 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1810 /* Bilinear interpolate the four samples from the second Z slice */
1811 lp_build_reduce_filter_2d(texel_bld,
1812 bld->static_sampler_state->reduction_mode,
1813 0,
1814 4,
1815 s_fpart,
1816 t_fpart,
1817 neighbors1[0][0],
1818 neighbors1[0][1],
1819 neighbors1[1][0],
1820 neighbors1[1][1],
1821 colors1);
1822
1823 /* Linearly interpolate the two samples from the two 3D slices */
1824 lp_build_reduce_filter(texel_bld,
1825 bld->static_sampler_state->reduction_mode,
1826 0,
1827 4,
1828 r_fpart,
1829 colors0,
1830 colors1,
1831 colors_out);
1832 } else {
1833 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1834 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1835 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1836 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1837 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1838 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1839 cmpval00, cmpval01, cmpval10, cmpval11);
1840 /* Linearly interpolate the two samples from the two 3D slices */
1841 colors_out[0] = lp_build_lerp(texel_bld,
1842 r_fpart,
1843 colors0[0], colors1[0],
1844 0);
1845 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1846 }
1847 } else {
1848 /* 2D tex */
1849 for (unsigned chan = 0; chan < 4; chan++) {
1850 colors_out[chan] = colors0[chan];
1851 }
1852 }
1853 }
1854 if (is_gather) {
1855 /*
1856 * For gather, we can't do our usual channel swizzling done later,
1857 * so do it here. It only really matters for 0/1 swizzles in case
1858 * of comparison filtering, since in this case the results would be
1859 * wrong, without comparison it should all work out alright but it
1860 * can't hurt to do that here, since it will instantly drop all
1861 * calculations above, though it's a rather stupid idea to do
1862 * gather on a channel which will always return 0 or 1 in any case...
1863 */
1864 if (chan_swiz == PIPE_SWIZZLE_1) {
1865 for (unsigned chan = 0; chan < 4; chan++) {
1866 colors_out[chan] = texel_bld->one;
1867 }
1868 } else if (chan_swiz == PIPE_SWIZZLE_0) {
1869 for (unsigned chan = 0; chan < 4; chan++) {
1870 colors_out[chan] = texel_bld->zero;
1871 }
1872 }
1873 }
1874 }
1875
1876
1877 /**
1878 * Sample the texture/mipmap using given image filter and mip filter.
1879 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1880 * from (vectors or scalars).
1881 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1882 */
1883 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1884 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1885 unsigned img_filter,
1886 unsigned mip_filter,
1887 bool is_gather,
1888 const LLVMValueRef *coords,
1889 const LLVMValueRef *offsets,
1890 LLVMValueRef ilevel0,
1891 LLVMValueRef ilevel1,
1892 LLVMValueRef lod_fpart,
1893 LLVMValueRef *colors_out)
1894 {
1895 LLVMBuilderRef builder = bld->gallivm->builder;
1896 LLVMValueRef size0 = NULL;
1897 LLVMValueRef size1 = NULL;
1898 LLVMValueRef row_stride0_vec = NULL;
1899 LLVMValueRef row_stride1_vec = NULL;
1900 LLVMValueRef img_stride0_vec = NULL;
1901 LLVMValueRef img_stride1_vec = NULL;
1902 LLVMValueRef data_ptr0 = NULL;
1903 LLVMValueRef data_ptr1 = NULL;
1904 LLVMValueRef mipoff0 = NULL;
1905 LLVMValueRef mipoff1 = NULL;
1906 LLVMValueRef colors0[4], colors1[4];
1907
1908 /* sample the first mipmap level */
1909 lp_build_mipmap_level_sizes(bld, ilevel0,
1910 &size0,
1911 &row_stride0_vec, &img_stride0_vec);
1912 if (bld->num_mips == 1) {
1913 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1914 } else {
1915 /* This path should work for num_lods 1 too but slightly less efficient */
1916 data_ptr0 = bld->base_ptr;
1917 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1918 }
1919
1920 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1921 lp_build_sample_image_nearest(bld, size0,
1922 row_stride0_vec, img_stride0_vec,
1923 data_ptr0, mipoff0, ilevel0, coords, offsets,
1924 colors0);
1925 } else {
1926 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1927 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1928 row_stride0_vec, img_stride0_vec,
1929 data_ptr0, mipoff0, ilevel0, coords, offsets,
1930 colors0);
1931 }
1932
1933 /* Store the first level's colors in the output variables */
1934 for (unsigned chan = 0; chan < 4; chan++) {
1935 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1936 }
1937
1938 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1939 struct lp_build_if_state if_ctx;
1940 LLVMValueRef need_lerp;
1941
1942 /* need_lerp = lod_fpart > 0 */
1943 if (bld->num_lods == 1) {
1944 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1945 lod_fpart, bld->lodf_bld.zero,
1946 "need_lerp");
1947 } else {
1948 /*
1949 * We'll do mip filtering if any of the quads (or individual
1950 * pixel in case of per-pixel lod) need it.
1951 * It might be better to split the vectors here and only fetch/filter
1952 * quads which need it (if there's one lod per quad).
1953 */
1954 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1955 PIPE_FUNC_GREATER,
1956 lod_fpart, bld->lodf_bld.zero);
1957 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1958 lp_build_name(need_lerp, "need_lerp");
1959 }
1960
1961 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1962 {
1963 /*
1964 * We unfortunately need to clamp lod_fpart here since we can get
1965 * negative values which would screw up filtering if not all
1966 * lod_fpart values have same sign.
1967 */
1968 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1969 bld->lodf_bld.zero);
1970 /* sample the second mipmap level */
1971 lp_build_mipmap_level_sizes(bld, ilevel1,
1972 &size1,
1973 &row_stride1_vec, &img_stride1_vec);
1974 if (bld->num_mips == 1) {
1975 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1976 } else {
1977 data_ptr1 = bld->base_ptr;
1978 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1979 }
1980 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1981 lp_build_sample_image_nearest(bld, size1,
1982 row_stride1_vec, img_stride1_vec,
1983 data_ptr1, mipoff1, ilevel1, coords, offsets,
1984 colors1);
1985 } else {
1986 lp_build_sample_image_linear(bld, false, size1, NULL,
1987 row_stride1_vec, img_stride1_vec,
1988 data_ptr1, mipoff1, ilevel1, coords, offsets,
1989 colors1);
1990 }
1991
1992 /* interpolate samples from the two mipmap levels */
1993
1994 if (bld->num_lods != bld->coord_type.length)
1995 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1996 bld->lodf_bld.type,
1997 bld->texel_bld.type,
1998 lod_fpart);
1999
2000 for (unsigned chan = 0; chan < 4; chan++) {
2001 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2002 colors0[chan], colors1[chan],
2003 0);
2004 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2005 }
2006 }
2007 lp_build_endif(&if_ctx);
2008 }
2009 }
2010
2011
2012 /**
2013 * Sample the texture/mipmap using given mip filter, and using
2014 * both nearest and linear filtering at the same time depending
2015 * on linear_mask.
2016 * lod can be per quad but linear_mask is always per pixel.
2017 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
2018 * from (vectors or scalars).
2019 * If we're using nearest miplevel sampling the '1' values will be null/unused.
2020 */
2021 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)2022 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
2023 LLVMValueRef linear_mask,
2024 unsigned mip_filter,
2025 const LLVMValueRef *coords,
2026 const LLVMValueRef *offsets,
2027 LLVMValueRef ilevel0,
2028 LLVMValueRef ilevel1,
2029 LLVMValueRef lod_fpart,
2030 LLVMValueRef lod_positive,
2031 LLVMValueRef *colors_out)
2032 {
2033 LLVMBuilderRef builder = bld->gallivm->builder;
2034 LLVMValueRef size0 = NULL;
2035 LLVMValueRef size1 = NULL;
2036 LLVMValueRef row_stride0_vec = NULL;
2037 LLVMValueRef row_stride1_vec = NULL;
2038 LLVMValueRef img_stride0_vec = NULL;
2039 LLVMValueRef img_stride1_vec = NULL;
2040 LLVMValueRef data_ptr0 = NULL;
2041 LLVMValueRef data_ptr1 = NULL;
2042 LLVMValueRef mipoff0 = NULL;
2043 LLVMValueRef mipoff1 = NULL;
2044 LLVMValueRef colors0[4], colors1[4];
2045
2046 /* sample the first mipmap level */
2047 lp_build_mipmap_level_sizes(bld, ilevel0,
2048 &size0,
2049 &row_stride0_vec, &img_stride0_vec);
2050 if (bld->num_mips == 1) {
2051 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2052 } else {
2053 /* This path should work for num_lods 1 too but slightly less efficient */
2054 data_ptr0 = bld->base_ptr;
2055 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2056 }
2057
2058 lp_build_sample_image_linear(bld, false, size0, linear_mask,
2059 row_stride0_vec, img_stride0_vec,
2060 data_ptr0, mipoff0, ilevel0, coords, offsets,
2061 colors0);
2062
2063 /* Store the first level's colors in the output variables */
2064 for (unsigned chan = 0; chan < 4; chan++) {
2065 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2066 }
2067
2068 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2069 struct lp_build_if_state if_ctx;
2070 LLVMValueRef need_lerp;
2071
2072 /*
2073 * We'll do mip filtering if any of the quads (or individual
2074 * pixel in case of per-pixel lod) need it.
2075 * Note using lod_positive here not lod_fpart since it may be the same
2076 * condition as that used in the outer "if" in the caller hence llvm
2077 * should be able to merge the branches in this case.
2078 */
2079 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
2080 lp_build_name(need_lerp, "need_lerp");
2081
2082 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
2083 {
2084 /*
2085 * We unfortunately need to clamp lod_fpart here since we can get
2086 * negative values which would screw up filtering if not all
2087 * lod_fpart values have same sign.
2088 */
2089 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
2090 bld->lodf_bld.zero);
2091 /* sample the second mipmap level */
2092 lp_build_mipmap_level_sizes(bld, ilevel1,
2093 &size1,
2094 &row_stride1_vec, &img_stride1_vec);
2095 if (bld->num_mips == 1) {
2096 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
2097 } else {
2098 data_ptr1 = bld->base_ptr;
2099 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
2100 }
2101
2102 lp_build_sample_image_linear(bld, false, size1, linear_mask,
2103 row_stride1_vec, img_stride1_vec,
2104 data_ptr1, mipoff1, ilevel1, coords, offsets,
2105 colors1);
2106
2107 /* interpolate samples from the two mipmap levels */
2108
2109 if (bld->num_lods != bld->coord_type.length)
2110 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2111 bld->lodf_bld.type,
2112 bld->texel_bld.type,
2113 lod_fpart);
2114
2115 for (unsigned chan = 0; chan < 4; chan++) {
2116 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2117 colors0[chan], colors1[chan],
2118 0);
2119 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2120 }
2121 }
2122 lp_build_endif(&if_ctx);
2123 }
2124 }
2125
2126
2127 /**
2128 * Build (per-coord) layer value.
2129 * Either clamp layer to valid values or fill in optional out_of_bounds
2130 * value and just return value unclamped.
2131 */
2132 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,bool is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)2133 lp_build_layer_coord(struct lp_build_sample_context *bld,
2134 unsigned texture_unit,
2135 bool is_cube_array,
2136 LLVMValueRef layer,
2137 LLVMValueRef *out_of_bounds)
2138 {
2139 LLVMValueRef num_layers;
2140 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2141
2142 num_layers = bld->dynamic_state->depth(bld->gallivm, bld->resources_type,
2143 bld->resources_ptr, texture_unit, NULL);
2144 num_layers = LLVMBuildZExt(bld->gallivm->builder, num_layers,
2145 bld->int_bld.elem_type, "");
2146 if (out_of_bounds) {
2147 LLVMValueRef out1, out;
2148 assert(!is_cube_array);
2149 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
2150 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
2151 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
2152 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
2153 return layer;
2154 } else {
2155 LLVMValueRef maxlayer;
2156 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
2157 bld->int_bld.one;
2158 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
2159 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
2160 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
2161 }
2162 }
2163
2164 static void
lp_build_sample_ms_offset(struct lp_build_context * int_coord_bld,LLVMValueRef ms_index,LLVMValueRef num_samples,LLVMValueRef sample_stride,LLVMValueRef * offset,LLVMValueRef * out_of_bounds)2165 lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
2166 LLVMValueRef ms_index,
2167 LLVMValueRef num_samples,
2168 LLVMValueRef sample_stride,
2169 LLVMValueRef *offset,
2170 LLVMValueRef *out_of_bounds)
2171 {
2172 LLVMValueRef out1;
2173 num_samples = lp_build_broadcast_scalar(int_coord_bld, num_samples);
2174 sample_stride = lp_build_broadcast_scalar(int_coord_bld, sample_stride);
2175 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
2176 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2177 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, num_samples);
2178 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2179 LLVMValueRef sample_offset = lp_build_mul(int_coord_bld,
2180 sample_stride, ms_index);
2181 *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
2182 }
2183
2184
2185 #define WEIGHT_LUT_SIZE 1024
2186
2187
2188 static void
lp_build_sample_aniso(struct lp_build_sample_context * bld,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)2189 lp_build_sample_aniso(struct lp_build_sample_context *bld,
2190 const LLVMValueRef *coords,
2191 const LLVMValueRef *offsets,
2192 LLVMValueRef ilevel0,
2193 LLVMValueRef ilevel1,
2194 LLVMValueRef lod_fpart,
2195 LLVMValueRef *colors_out)
2196 {
2197 struct gallivm_state *gallivm = bld->gallivm;
2198 LLVMBuilderRef builder = gallivm->builder;
2199 struct lp_build_context *coord_bld = &bld->coord_bld;
2200 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2201 struct lp_build_context uint_coord_bld;
2202
2203 LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
2204 LLVMValueRef data_ptr0, mipoff0 = NULL;
2205
2206 lp_build_context_init(&uint_coord_bld, gallivm, lp_uint_type(int_coord_bld->type));
2207
2208 lp_build_mipmap_level_sizes(bld, ilevel0,
2209 &size0,
2210 &row_stride0_vec, &img_stride0_vec);
2211 if (bld->num_mips == 1) {
2212 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2213 } else {
2214 /* This path should work for num_lods 1 too but slightly less efficient */
2215 data_ptr0 = bld->base_ptr;
2216 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2217 }
2218
2219 LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0);
2220
2221 /* extract width and height into vectors for use later */
2222 static const unsigned char swizzle15[] = { /* no-op swizzle */
2223 1, 1, 1, 1, 5, 5, 5, 5
2224 };
2225 static const unsigned char swizzle04[] = { /* no-op swizzle */
2226 0, 0, 0, 0, 4, 4, 4, 4
2227 };
2228 LLVMValueRef width_dim, height_dim;
2229
2230 width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04,
2231 bld->float_size_bld.type.length,
2232 bld->coord_bld.type.length);
2233 height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15,
2234 bld->float_size_bld.type.length,
2235 bld->coord_bld.type.length);
2236
2237 /* Gradient of the u coordinate in screen space. */
2238 LLVMValueRef dudx = lp_build_ddx(coord_bld, coords[0]);
2239 LLVMValueRef dudy = lp_build_ddy(coord_bld, coords[0]);
2240
2241 /* Gradient of the v coordinate in screen space. */
2242 LLVMValueRef dvdx = lp_build_ddx(coord_bld, coords[1]);
2243 LLVMValueRef dvdy = lp_build_ddy(coord_bld, coords[1]);
2244
2245 LLVMValueRef rho_x = lp_build_mul(coord_bld, lp_build_max(coord_bld, lp_build_abs(coord_bld, dudx), lp_build_abs(coord_bld, dvdx)), width_dim);
2246 LLVMValueRef rho_y = lp_build_mul(coord_bld, lp_build_max(coord_bld, lp_build_abs(coord_bld, dudy), lp_build_abs(coord_bld, dvdy)), height_dim);
2247
2248 /* Number of samples used for averaging. */
2249 LLVMValueRef N = lp_build_iceil(coord_bld, lp_build_max(coord_bld, rho_x, rho_y));
2250
2251 /* Use uint min so in case of NaNs/overflows loop iterations are clamped to max aniso */
2252 N = lp_build_min(&uint_coord_bld, N, lp_build_const_int_vec(gallivm, int_coord_bld->type, bld->static_sampler_state->aniso));
2253 LLVMValueRef wave_max_N = NULL;
2254 for (uint32_t i = 0; i < coord_bld->type.length; i++) {
2255 LLVMValueRef invocation_N = LLVMBuildExtractElement(builder, N, lp_build_const_int32(gallivm, i), "");
2256 if (wave_max_N)
2257 wave_max_N = lp_build_max(&bld->int_bld, wave_max_N, invocation_N);
2258 else
2259 wave_max_N = invocation_N;
2260 }
2261
2262 LLVMValueRef sample_along_x_axis = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, rho_x, rho_y);
2263 LLVMValueRef dudk = lp_build_select(coord_bld, sample_along_x_axis, dudx, dudy);
2264 LLVMValueRef dvdk = lp_build_select(coord_bld, sample_along_x_axis, dvdx, dvdy);
2265
2266 LLVMValueRef accumulator[4] = {
2267 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "r"),
2268 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "g"),
2269 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "b"),
2270 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "a"),
2271 };
2272
2273 LLVMValueRef float_N = lp_build_int_to_float(coord_bld, N);
2274 LLVMValueRef rcp_N = lp_build_rcp(coord_bld, float_N);
2275 LLVMValueRef base_k = LLVMBuildFMul(builder, float_N, lp_build_const_vec(gallivm, coord_bld->type, -0.5), "");
2276 base_k = lp_build_add(coord_bld, base_k, lp_build_const_vec(gallivm, coord_bld->type, 0.5));
2277
2278 struct lp_build_for_loop_state loop_state;
2279 lp_build_for_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0),
2280 LLVMIntULT, wave_max_N, lp_build_const_int32(gallivm, 1));
2281 {
2282 LLVMValueRef k = loop_state.counter;
2283 k = lp_build_broadcast_scalar(int_coord_bld, k);
2284
2285 LLVMValueRef float_k = lp_build_int_to_float(coord_bld, k);
2286 float_k = lp_build_add(coord_bld, float_k, base_k);
2287 float_k = lp_build_mul(coord_bld, float_k, rcp_N);
2288
2289 LLVMValueRef u_offset = lp_build_mul(coord_bld, float_k, dudk);
2290 LLVMValueRef v_offset = lp_build_mul(coord_bld, float_k, dvdk);
2291
2292 LLVMValueRef sample_coords[5] = {
2293 lp_build_add(coord_bld, coords[0], u_offset),
2294 lp_build_add(coord_bld, coords[1], v_offset),
2295 };
2296 for (uint32_t i = 2; i < ARRAY_SIZE(sample_coords); i++)
2297 sample_coords[i] = coords[i];
2298
2299
2300 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
2301 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
2302 /* Make sure the coordinates stay in bounds for PIPE_TEXTURE_CUBE loads since
2303 * lp_build_sample_image_linear uses less clamping for them.
2304 */
2305 sample_coords[0] = lp_build_max(coord_bld, sample_coords[0], bld->coord_bld.zero);
2306 sample_coords[0] = lp_build_min(coord_bld, sample_coords[0], bld->coord_bld.one);
2307 sample_coords[1] = lp_build_max(coord_bld, sample_coords[1], bld->coord_bld.zero);
2308 sample_coords[1] = lp_build_min(coord_bld, sample_coords[1], bld->coord_bld.one);
2309 }
2310
2311 LLVMValueRef sample_color[4];
2312 lp_build_sample_image_linear(bld, false, size0, NULL,
2313 row_stride0_vec, img_stride0_vec,
2314 data_ptr0, mipoff0, ilevel0, sample_coords, offsets,
2315 sample_color);
2316
2317 LLVMValueRef oob = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, k, N);
2318
2319 for (uint32_t c = 0; c < 4; c++) {
2320 LLVMValueRef tmp = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, accumulator[c], "");
2321 tmp = lp_build_select(&bld->texel_bld, oob, tmp, LLVMBuildFAdd(builder, tmp, sample_color[c], ""));
2322 LLVMBuildStore(builder, tmp, accumulator[c]);
2323 }
2324 }
2325 lp_build_for_loop_end(&loop_state);
2326
2327 for (uint32_t c = 0; c < 4; c++) {
2328 LLVMValueRef sum = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, accumulator[c], "");
2329 LLVMBuildStore(builder, lp_build_mul(&bld->texel_bld, sum, rcp_N), colors_out[c]);
2330 }
2331 }
2332
2333
2334 /**
2335 * Calculate cube face, lod, mip levels.
2336 */
2337 static void
lp_build_sample_common(struct lp_build_sample_context * bld,bool is_lodq,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)2338 lp_build_sample_common(struct lp_build_sample_context *bld,
2339 bool is_lodq,
2340 unsigned texture_index,
2341 unsigned sampler_index,
2342 LLVMValueRef *coords,
2343 const struct lp_derivatives *derivs, /* optional */
2344 LLVMValueRef lod_bias, /* optional */
2345 LLVMValueRef explicit_lod, /* optional */
2346 LLVMValueRef *lod_pos_or_zero,
2347 LLVMValueRef *lod,
2348 LLVMValueRef *lod_fpart,
2349 LLVMValueRef *ilevel0,
2350 LLVMValueRef *ilevel1)
2351 {
2352 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
2353 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
2354 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
2355 const unsigned target = bld->static_texture_state->target;
2356 const bool aniso = bld->static_sampler_state->aniso;
2357 LLVMValueRef first_level, last_level;
2358 LLVMValueRef lod_ipart = NULL;
2359 struct lp_derivatives cube_derivs;
2360
2361 /*
2362 printf("%s mip %d min %d mag %d\n", __func__,
2363 mip_filter, min_filter, mag_filter);
2364 */
2365
2366 first_level = get_first_level(bld->gallivm,
2367 bld->resources_type,
2368 bld->resources_ptr,
2369 texture_index, NULL,
2370 bld->static_texture_state,
2371 bld->dynamic_state);
2372 last_level = get_last_level(bld->gallivm,
2373 bld->resources_type,
2374 bld->resources_ptr,
2375 texture_index, NULL,
2376 bld->static_texture_state,
2377 bld->dynamic_state);
2378
2379 /*
2380 * Choose cube face, recompute texcoords for the chosen face and
2381 * calculate / transform derivatives.
2382 */
2383 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
2384 bool need_derivs = ((min_filter != mag_filter ||
2385 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
2386 !bld->static_sampler_state->min_max_lod_equal &&
2387 !explicit_lod);
2388 lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
2389 if (need_derivs)
2390 derivs = &cube_derivs;
2391
2392 if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) {
2393 /* calculate cube layer coord now */
2394 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
2395 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
2396 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
2397 coords[3] = lp_build_layer_coord(bld, texture_index, true, layer, NULL);
2398 /* because of seamless filtering can't add it to face (coords[2]) here. */
2399 }
2400 } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
2401 target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
2402 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
2403 coords[2] = lp_build_layer_coord(bld, texture_index, false, coords[2], NULL);
2404 }
2405
2406 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
2407 /*
2408 * Clamp p coords to [0,1] for fixed function depth texture format here.
2409 * Technically this is not entirely correct for unorm depth as the ref
2410 * value should be converted to the depth format (quantization!) and
2411 * comparison then done in texture format. This would actually help
2412 * performance (since only need to do it once and could save the
2413 * per-sample conversion of texels to floats instead), but it would need
2414 * more messy code (would need to push at least some bits down to actual
2415 * fetch so conversion could be skipped, and would have ugly interaction
2416 * with border color, would need to convert border color to that format
2417 * too or do some other tricks to make it work).
2418 */
2419 const struct util_format_description *format_desc = bld->format_desc;
2420 /* not entirely sure we couldn't end up with non-valid swizzle here */
2421 const enum util_format_type chan_type =
2422 format_desc->swizzle[0] <= PIPE_SWIZZLE_W
2423 ? format_desc->channel[format_desc->swizzle[0]].type
2424 : UTIL_FORMAT_TYPE_FLOAT;
2425 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2426 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2427 bld->coord_bld.zero, bld->coord_bld.one);
2428 }
2429 }
2430
2431 /*
2432 * Compute the level of detail (float).
2433 */
2434 if (min_filter != mag_filter ||
2435 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2436 /* Need to compute lod either to choose mipmap levels or to
2437 * distinguish between minification/magnification with one mipmap level.
2438 */
2439 LLVMValueRef first_level_vec =
2440 lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
2441 lp_build_lod_selector(bld, is_lodq, sampler_index,
2442 first_level_vec,
2443 coords[0], coords[1], coords[2],
2444 derivs, lod_bias, explicit_lod,
2445 mip_filter, lod,
2446 &lod_ipart, lod_fpart, lod_pos_or_zero);
2447 if (is_lodq) {
2448 last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2449 last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2450 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2451
2452 switch (mip_filter) {
2453 case PIPE_TEX_MIPFILTER_NONE:
2454 *lod_fpart = bld->lodf_bld.zero;
2455 break;
2456 case PIPE_TEX_MIPFILTER_NEAREST:
2457 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2458 FALLTHROUGH;
2459 case PIPE_TEX_MIPFILTER_LINEAR:
2460 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2461 bld->lodf_bld.zero, last_level);
2462 break;
2463 }
2464 return;
2465 }
2466 } else {
2467 lod_ipart = bld->lodi_bld.zero;
2468 *lod_pos_or_zero = bld->lodi_bld.zero;
2469 }
2470
2471 if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) &&
2472 bld->lodi_bld.type.length != 1) {
2473 /* only makes sense if there's just a single mip level */
2474 assert(bld->num_mips == 1);
2475 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2476 }
2477
2478 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2479 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2480
2481 /*
2482 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2483 */
2484
2485 if (aniso) {
2486 lp_build_nearest_mip_level(bld,
2487 first_level, last_level,
2488 lod_ipart, ilevel0, NULL);
2489 return;
2490 }
2491
2492 switch (mip_filter) {
2493 default:
2494 unreachable("Bad mip_filter value in lp_build_sample_soa()");
2495 case PIPE_TEX_MIPFILTER_NONE:
2496 /* always use mip level 0 */
2497 *ilevel0 = first_level;
2498 break;
2499 case PIPE_TEX_MIPFILTER_NEAREST:
2500 assert(lod_ipart);
2501 lp_build_nearest_mip_level(bld,
2502 first_level, last_level,
2503 lod_ipart, ilevel0, NULL);
2504 break;
2505 case PIPE_TEX_MIPFILTER_LINEAR:
2506 assert(lod_ipart);
2507 assert(*lod_fpart);
2508
2509 lp_build_linear_mip_levels(bld, texture_index,
2510 first_level, last_level,
2511 lod_ipart, lod_fpart,
2512 ilevel0, ilevel1);
2513 break;
2514 }
2515 }
2516
2517
2518 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)2519 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2520 unsigned sampler_unit)
2521 {
2522 struct gallivm_state *gallivm = bld->gallivm;
2523 LLVMBuilderRef builder = gallivm->builder;
2524 LLVMValueRef border_color_ptr =
2525 bld->dynamic_state->border_color(gallivm,
2526 bld->resources_type,
2527 bld->resources_ptr, sampler_unit);
2528 LLVMValueRef border_color;
2529 const struct util_format_description *format_desc = bld->format_desc;
2530 struct lp_type vec4_type = bld->texel_type;
2531 struct lp_build_context vec4_bld;
2532 LLVMValueRef min_clamp = NULL;
2533 LLVMValueRef max_clamp = NULL;
2534
2535 /*
2536 * For normalized format need to clamp border color (technically
2537 * probably should also quantize the data). Really sucks doing this
2538 * here but can't avoid at least for now since this is part of
2539 * sampler state and texture format is part of sampler_view state.
2540 * GL expects also expects clamping for uint/sint formats too so
2541 * do that as well (d3d10 can't end up here with uint/sint since it
2542 * only supports them with ld).
2543 */
2544 vec4_type.length = 4;
2545 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2546
2547 /*
2548 * Vectorized clamping of border color. Loading is a bit of a hack since
2549 * we just cast the pointer to float array to pointer to vec4
2550 * (int or float).
2551 */
2552 LLVMTypeRef border_color_type = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
2553 border_color_ptr = lp_build_array_get_ptr2(gallivm, border_color_type, border_color_ptr,
2554 lp_build_const_int32(gallivm, 0));
2555 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2556 LLVMPointerType(vec4_bld.vec_type, 0), "");
2557 border_color = LLVMBuildLoad2(builder, vec4_bld.vec_type, border_color_ptr, "");
2558 /* we don't have aligned type in the dynamic state unfortunately */
2559 LLVMSetAlignment(border_color, 4);
2560
2561 /*
2562 * Instead of having some incredibly complex logic which will try to figure
2563 * out clamping necessary for each channel, simply use the first channel,
2564 * and treat mixed signed/unsigned normalized formats specially. (Mixed
2565 * non-normalized, which wouldn't work at all here, do not exist for a good
2566 * reason.)
2567 */
2568 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2569 int chan;
2570 /* d/s needs special handling because both present means just sampling depth */
2571 if (util_format_is_depth_and_stencil(format_desc->format)) {
2572 chan = format_desc->swizzle[0];
2573 } else {
2574 chan = util_format_get_first_non_void_channel(format_desc->format);
2575 }
2576 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2577 unsigned chan_type = format_desc->channel[chan].type;
2578 unsigned chan_norm = format_desc->channel[chan].normalized;
2579 unsigned chan_pure = format_desc->channel[chan].pure_integer;
2580 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2581 if (chan_norm) {
2582 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2583 max_clamp = vec4_bld.one;
2584 } else if (chan_pure) {
2585 /*
2586 * Border color was stored as int, hence need min/max clamp
2587 * only if chan has less than 32 bits..
2588 */
2589 unsigned chan_size = format_desc->channel[chan].size;
2590 if (chan_size < 32) {
2591 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2592 0 - (1 << (chan_size - 1)));
2593 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2594 (1 << (chan_size - 1)) - 1);
2595 }
2596 }
2597 /* TODO: no idea about non-pure, non-normalized! */
2598 } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2599 if (chan_norm) {
2600 min_clamp = vec4_bld.zero;
2601 max_clamp = vec4_bld.one;
2602 } else if (chan_pure) {
2603 /*
2604 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2605 * we use Z32_FLOAT_S8X24 to imply sampling depth component and
2606 * ignoring stencil, which will blow up here if we try to do a
2607 * uint clamp in a float texel build... And even if we had
2608 * that format, mesa st also thinks using z24s8 means depth
2609 * sampling ignoring stencil.
2610 */
2611
2612 /*
2613 * Border color was stored as uint, hence never need min clamp,
2614 * and only need max clamp if chan has less than 32 bits.
2615 */
2616 unsigned chan_size = format_desc->channel[chan].size;
2617 if (chan_size < 32) {
2618 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2619 (1 << chan_size) - 1);
2620 }
2621 /* TODO: no idea about non-pure, non-normalized! */
2622 }
2623 } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2624 /* TODO: I have no idea what clamp this would need if any! */
2625 }
2626 }
2627 /* mixed plain formats (or different pure size) */
2628 switch (format_desc->format) {
2629 case PIPE_FORMAT_B10G10R10A2_UINT:
2630 case PIPE_FORMAT_R10G10B10A2_UINT:
2631 {
2632 unsigned max10 = (1 << 10) - 1;
2633 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2634 max10, (1 << 2) - 1, NULL);
2635 }
2636 break;
2637 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2638 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2639 -1.0F, 0.0F, NULL);
2640 max_clamp = vec4_bld.one;
2641 break;
2642 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2643 case PIPE_FORMAT_R5SG5SB6U_NORM:
2644 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2645 0.0F, 0.0F, NULL);
2646 max_clamp = vec4_bld.one;
2647 break;
2648 default:
2649 break;
2650 }
2651 } else {
2652 /* cannot figure this out from format description */
2653 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2654 /* s3tc formats are always unorm */
2655 min_clamp = vec4_bld.zero;
2656 max_clamp = vec4_bld.one;
2657 } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2658 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
2659 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2660 switch (format_desc->format) {
2661 case PIPE_FORMAT_RGTC1_UNORM:
2662 case PIPE_FORMAT_RGTC2_UNORM:
2663 case PIPE_FORMAT_LATC1_UNORM:
2664 case PIPE_FORMAT_LATC2_UNORM:
2665 case PIPE_FORMAT_ETC1_RGB8:
2666 case PIPE_FORMAT_BPTC_RGBA_UNORM:
2667 case PIPE_FORMAT_BPTC_SRGBA:
2668 min_clamp = vec4_bld.zero;
2669 max_clamp = vec4_bld.one;
2670 break;
2671 case PIPE_FORMAT_RGTC1_SNORM:
2672 case PIPE_FORMAT_RGTC2_SNORM:
2673 case PIPE_FORMAT_LATC1_SNORM:
2674 case PIPE_FORMAT_LATC2_SNORM:
2675 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2676 max_clamp = vec4_bld.one;
2677 break;
2678 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2679 /* not sure if we should clamp to max half float? */
2680 break;
2681 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2682 min_clamp = vec4_bld.zero;
2683 break;
2684 default:
2685 assert(0);
2686 break;
2687 }
2688 } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2689 /*
2690 * all others from subsampled/other group, though we don't care
2691 * about yuv (and should not have any from zs here)
2692 */
2693 switch (format_desc->format) {
2694 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2695 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2696 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2697 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2698 case PIPE_FORMAT_G8B8_G8R8_UNORM:
2699 case PIPE_FORMAT_B8G8_R8G8_UNORM:
2700 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2701 min_clamp = vec4_bld.zero;
2702 max_clamp = vec4_bld.one;
2703 break;
2704 case PIPE_FORMAT_R8G8Bx_SNORM:
2705 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2706 max_clamp = vec4_bld.one;
2707 break;
2708 /*
2709 * Note smallfloat formats usually don't need clamping
2710 * (they still have infinite range) however this is not
2711 * true for r11g11b10 and r9g9b9e5, which can't represent
2712 * negative numbers (and additionally r9g9b9e5 can't represent
2713 * very large numbers). d3d10 seems happy without clamping in
2714 * this case, but gl spec is pretty clear: "for floating
2715 * point and integer formats, border values are clamped to
2716 * the representable range of the format" so do that here.
2717 */
2718 case PIPE_FORMAT_R11G11B10_FLOAT:
2719 min_clamp = vec4_bld.zero;
2720 break;
2721 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2722 min_clamp = vec4_bld.zero;
2723 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2724 break;
2725 default:
2726 assert(0);
2727 break;
2728 }
2729 }
2730 }
2731
2732 if (min_clamp) {
2733 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2734 }
2735 if (max_clamp) {
2736 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2737 }
2738
2739 bld->border_color_clamped = border_color;
2740 }
2741
2742
2743 /**
2744 * General texture sampling codegen.
2745 * This function handles texture sampling for all texture targets (1D,
2746 * 2D, 3D, cube) and all filtering modes.
2747 */
2748 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)2749 lp_build_sample_general(struct lp_build_sample_context *bld,
2750 unsigned sampler_unit,
2751 bool is_gather,
2752 const LLVMValueRef *coords,
2753 const LLVMValueRef *offsets,
2754 LLVMValueRef lod_positive,
2755 LLVMValueRef lod_fpart,
2756 LLVMValueRef ilevel0,
2757 LLVMValueRef ilevel1,
2758 LLVMValueRef *colors_out)
2759 {
2760 LLVMBuilderRef builder = bld->gallivm->builder;
2761 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2762 const unsigned mip_filter = sampler_state->min_mip_filter;
2763 const unsigned min_filter = sampler_state->min_img_filter;
2764 const unsigned mag_filter = sampler_state->mag_img_filter;
2765 LLVMValueRef texels[4];
2766 unsigned chan;
2767
2768 /* if we need border color, (potentially) clamp it now */
2769 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2770 min_filter,
2771 mag_filter) ||
2772 (bld->dims > 1 &&
2773 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2774 min_filter,
2775 mag_filter)) ||
2776 (bld->dims > 2 &&
2777 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2778 min_filter,
2779 mag_filter))) {
2780 lp_build_clamp_border_color(bld, sampler_unit);
2781 }
2782
2783
2784 /*
2785 * Get/interpolate texture colors.
2786 */
2787
2788 for (chan = 0; chan < 4; ++chan) {
2789 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2790 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2791 }
2792
2793 if (sampler_state->aniso) {
2794 lp_build_sample_aniso(bld, coords, offsets, ilevel0,
2795 ilevel1, lod_fpart, texels);
2796 } else if (min_filter == mag_filter) {
2797 /* no need to distinguish between minification and magnification */
2798 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2799 is_gather,
2800 coords, offsets,
2801 ilevel0, ilevel1, lod_fpart,
2802 texels);
2803 } else {
2804 /*
2805 * Could also get rid of the if-logic and always use mipmap_both, both
2806 * for the single lod and multi-lod case if nothing really uses this.
2807 */
2808 if (bld->num_lods == 1) {
2809 /* Emit conditional to choose min image filter or mag image filter
2810 * depending on the lod being > 0 or <= 0, respectively.
2811 */
2812 struct lp_build_if_state if_ctx;
2813
2814 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2815 LLVMInt1TypeInContext(bld->gallivm->context),
2816 "lod_pos");
2817
2818 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2819 {
2820 /* Use the minification filter */
2821 lp_build_sample_mipmap(bld, min_filter, mip_filter, false,
2822 coords, offsets,
2823 ilevel0, ilevel1, lod_fpart,
2824 texels);
2825 }
2826 lp_build_else(&if_ctx);
2827 {
2828 /* Use the magnification filter */
2829 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2830 false,
2831 coords, offsets,
2832 ilevel0, NULL, NULL,
2833 texels);
2834 }
2835 lp_build_endif(&if_ctx);
2836 } else {
2837 LLVMValueRef need_linear, linear_mask;
2838 unsigned mip_filter_for_nearest;
2839 struct lp_build_if_state if_ctx;
2840
2841 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2842 linear_mask = lod_positive;
2843 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2844 } else {
2845 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2846 mip_filter_for_nearest = mip_filter;
2847 }
2848 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2849 linear_mask);
2850 lp_build_name(need_linear, "need_linear");
2851
2852 if (bld->num_lods != bld->coord_type.length) {
2853 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2854 bld->lodi_type,
2855 bld->int_coord_type,
2856 linear_mask);
2857 }
2858
2859 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2860 {
2861 /*
2862 * Do sampling with both filters simultaneously. This means using
2863 * a linear filter and doing some tricks (with weights) for the
2864 * pixels which need nearest filter.
2865 * Note that it's probably rare some pixels need nearest and some
2866 * linear filter but the fixups required for the nearest pixels
2867 * aren't all that complicated so just always run a combined path
2868 * if at least some pixels require linear.
2869 */
2870 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2871 coords, offsets,
2872 ilevel0, ilevel1,
2873 lod_fpart, lod_positive,
2874 texels);
2875 }
2876 lp_build_else(&if_ctx);
2877 {
2878 /*
2879 * All pixels require just nearest filtering, which is way
2880 * cheaper than linear, hence do a separate path for that.
2881 */
2882 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2883 mip_filter_for_nearest, false,
2884 coords, offsets,
2885 ilevel0, ilevel1, lod_fpart,
2886 texels);
2887 }
2888 lp_build_endif(&if_ctx);
2889 }
2890 }
2891
2892 for (chan = 0; chan < 4; ++chan) {
2893 colors_out[chan] = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, texels[chan], "");
2894 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2895 }
2896 }
2897
2898
2899 /**
2900 * Texel fetch function. In contrast to general sampling there is no
2901 * filtering, no coord minification, lod (if any) is always explicit uint,
2902 * coords are uints (in terms of texel units) directly to be applied to the
2903 * selected mip level (after adding texel offsets). This function handles
2904 * texel fetch for all targets where texel fetch is supported (no cube maps,
2905 * but 1d, 2d, 3d are supported, arrays and buffers should be too).
2906 */
2907 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef ms_index,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)2908 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2909 unsigned texture_unit,
2910 LLVMValueRef ms_index,
2911 const LLVMValueRef *coords,
2912 LLVMValueRef explicit_lod,
2913 const LLVMValueRef *offsets,
2914 LLVMValueRef *colors_out)
2915 {
2916 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2917 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2918 unsigned dims = bld->dims, chan;
2919 unsigned target = bld->static_texture_state->target;
2920 bool out_of_bound_ret_zero = true;
2921 LLVMValueRef size, ilevel;
2922 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2923 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2924 LLVMValueRef width, height, depth, i, j;
2925 LLVMValueRef offset, out_of_bounds, out1;
2926
2927 LLVMValueRef first_level;
2928
2929 first_level = get_first_level(bld->gallivm,
2930 bld->resources_type,
2931 bld->resources_ptr,
2932 texture_unit, NULL,
2933 bld->static_texture_state,
2934 bld->dynamic_state);
2935 out_of_bounds = int_coord_bld->zero;
2936
2937 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2938 if (bld->num_mips != int_coord_bld->type.length) {
2939 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2940 perquadi_bld->type, explicit_lod, 0);
2941 } else {
2942 ilevel = explicit_lod;
2943 }
2944
2945 LLVMValueRef last_level;
2946
2947 last_level = get_last_level(bld->gallivm,
2948 bld->resources_type,
2949 bld->resources_ptr,
2950 texture_unit, NULL,
2951 bld->static_texture_state,
2952 bld->dynamic_state);
2953
2954 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2955 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2956 lp_build_nearest_mip_level(bld,
2957 first_level, last_level,
2958 ilevel, &ilevel,
2959 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2960 } else {
2961 assert(bld->num_mips == 1);
2962 if (bld->static_texture_state->target != PIPE_BUFFER) {
2963 ilevel = first_level;
2964 } else {
2965 ilevel = lp_build_const_int32(bld->gallivm, 0);
2966 }
2967 }
2968 lp_build_mipmap_level_sizes(bld, ilevel,
2969 &size,
2970 &row_stride_vec, &img_stride_vec);
2971 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2972 size, &width, &height, &depth);
2973
2974 if (target == PIPE_TEXTURE_1D_ARRAY ||
2975 target == PIPE_TEXTURE_2D_ARRAY) {
2976 if (out_of_bound_ret_zero) {
2977 z = lp_build_layer_coord(bld, texture_unit, false, z, &out1);
2978 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2979 } else {
2980 z = lp_build_layer_coord(bld, texture_unit, false, z, NULL);
2981 }
2982 }
2983
2984 /* This is a lot like border sampling */
2985 if (offsets[0]) {
2986 /*
2987 * coords are really unsigned, offsets are signed, but I don't think
2988 * exceeding 31 bits is possible
2989 */
2990 x = lp_build_add(int_coord_bld, x, offsets[0]);
2991 }
2992 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2993 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2994 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2995 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2996
2997 if (dims >= 2) {
2998 if (offsets[1]) {
2999 y = lp_build_add(int_coord_bld, y, offsets[1]);
3000 }
3001 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
3002 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3003 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
3004 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3005
3006 if (dims >= 3) {
3007 if (offsets[2]) {
3008 z = lp_build_add(int_coord_bld, z, offsets[2]);
3009 }
3010 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
3011 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3012 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
3013 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3014 }
3015 }
3016
3017 if (bld->static_texture_state->tiled) {
3018 lp_build_tiled_sample_offset(&bld->int_coord_bld,
3019 bld->format_desc->format,
3020 bld->static_texture_state,
3021 x, y, z, width, height, img_stride_vec,
3022 &offset, &i, &j);
3023 } else {
3024 lp_build_sample_offset(int_coord_bld,
3025 bld->format_desc,
3026 x, y, z, row_stride_vec, img_stride_vec,
3027 &offset, &i, &j);
3028 }
3029
3030 if (bld->static_texture_state->target != PIPE_BUFFER) {
3031 offset = lp_build_add(int_coord_bld, offset,
3032 lp_build_get_mip_offsets(bld, ilevel));
3033 }
3034
3035 if (bld->fetch_ms && bld->static_texture_state->level_zero_only) {
3036 LLVMValueRef num_samples = bld->dynamic_state->last_level(bld->gallivm,
3037 bld->resources_type,
3038 bld->resources_ptr,
3039 texture_unit, NULL);
3040 num_samples = LLVMBuildZExt(bld->gallivm->builder, num_samples,
3041 bld->int_bld.elem_type, "");
3042 LLVMValueRef sample_stride = lp_sample_load_mip_value(bld->gallivm,
3043 bld->mip_offsets_type,
3044 bld->mip_offsets,
3045 lp_build_const_int32(bld->gallivm, LP_JIT_TEXTURE_SAMPLE_STRIDE));
3046 lp_build_sample_ms_offset(int_coord_bld, ms_index, num_samples, sample_stride,
3047 &offset, &out_of_bounds);
3048 }
3049
3050 if (bld->residency) {
3051 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
3052 bld->resources_type, bld->resources_ptr,
3053 offset, &bld->resident);
3054 }
3055
3056 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
3057
3058 lp_build_fetch_rgba_soa(bld->gallivm,
3059 bld->format_desc,
3060 bld->texel_type, true,
3061 bld->base_ptr, offset,
3062 i, j,
3063 bld->cache,
3064 colors_out);
3065
3066 if (out_of_bound_ret_zero) {
3067 /*
3068 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
3069 * Could use min/max above instead of out-of-bounds comparisons
3070 * if we don't care about the result returned for out-of-bounds.
3071 */
3072 LLVMValueRef oob[4] = {
3073 bld->texel_bld.zero,
3074 bld->texel_bld.zero,
3075 bld->texel_bld.zero,
3076 bld->texel_bld.zero,
3077 };
3078 lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob);
3079 for (chan = 0; chan < 4; chan++) {
3080 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
3081 oob[chan], colors_out[chan]);
3082 }
3083 }
3084 }
3085
3086
3087 /**
3088 * Just set texels to white instead of actually sampling the texture.
3089 * For debugging.
3090 */
3091 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])3092 lp_build_sample_nop(struct gallivm_state *gallivm,
3093 struct lp_type type,
3094 const LLVMValueRef *coords,
3095 LLVMValueRef texel_out[4])
3096 {
3097 LLVMValueRef one = lp_build_one(gallivm, type);
3098 for (unsigned chan = 0; chan < 4; chan++) {
3099 texel_out[chan] = one;
3100 }
3101 }
3102
3103
3104 struct lp_type
lp_build_texel_type(struct lp_type texel_type,const struct util_format_description * format_desc)3105 lp_build_texel_type(struct lp_type texel_type,
3106 const struct util_format_description *format_desc)
3107 {
3108 /* always using the first channel hopefully should be safe,
3109 * if not things WILL break in other places anyway.
3110 */
3111 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
3112 format_desc->channel[0].pure_integer) {
3113 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
3114 texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length);
3115 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
3116 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3117 }
3118 } else if (util_format_has_stencil(format_desc) &&
3119 !util_format_has_depth(format_desc)) {
3120 /* for stencil only formats, sample stencil (uint) */
3121 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3122 }
3123 return texel_type;
3124 }
3125
3126
3127 /**
3128 * Build the actual texture sampling code.
3129 * 'texel' will return a vector of four LLVMValueRefs corresponding to
3130 * R, G, B, A.
3131 * \param type vector float type to use for coords, etc.
3132 * \param sample_key
3133 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
3134 */
3135 void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMTypeRef thread_data_type,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef ms_index,LLVMValueRef * texel_out)3136 lp_build_sample_soa_code(struct gallivm_state *gallivm,
3137 const struct lp_static_texture_state *static_texture_state,
3138 const struct lp_static_sampler_state *static_sampler_state,
3139 struct lp_sampler_dynamic_state *dynamic_state,
3140 struct lp_type type,
3141 unsigned sample_key,
3142 unsigned texture_index,
3143 unsigned sampler_index,
3144 LLVMTypeRef resources_type,
3145 LLVMValueRef resources_ptr,
3146 LLVMTypeRef thread_data_type,
3147 LLVMValueRef thread_data_ptr,
3148 const LLVMValueRef *coords,
3149 const LLVMValueRef *offsets,
3150 const struct lp_derivatives *derivs, /* optional */
3151 LLVMValueRef lod, /* optional */
3152 LLVMValueRef ms_index, /* optional */
3153 LLVMValueRef *texel_out)
3154 {
3155 assert(static_texture_state);
3156 assert(static_texture_state->format < PIPE_FORMAT_COUNT);
3157 assert(static_sampler_state);
3158
3159 const enum pipe_texture_target target = static_texture_state->target;
3160 const unsigned dims = texture_dims(target);
3161 const unsigned num_quads = type.length == 1 ? 1 : type.length / 4;
3162 struct lp_build_sample_context bld;
3163 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
3164 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
3165 LLVMBuilderRef builder = gallivm->builder;
3166 const struct util_format_description *res_format_desc;
3167
3168 if (0) {
3169 enum pipe_format fmt = static_texture_state->format;
3170 debug_printf("Sample from %s\n", util_format_name(fmt));
3171 }
3172
3173 const enum lp_sampler_lod_property lod_property =
3174 (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
3175 LP_SAMPLER_LOD_PROPERTY_SHIFT;
3176 const enum lp_sampler_lod_control lod_control =
3177 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3178 LP_SAMPLER_LOD_CONTROL_SHIFT;
3179 const enum lp_sampler_op_type op_type =
3180 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3181 LP_SAMPLER_OP_TYPE_SHIFT;
3182
3183 const bool fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
3184 const bool op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
3185 const bool op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
3186 const bool op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
3187
3188 LLVMValueRef lod_bias = NULL;
3189 LLVMValueRef explicit_lod = NULL;
3190 if (lod_control == LP_SAMPLER_LOD_BIAS) {
3191 lod_bias = lod;
3192 assert(lod);
3193 assert(derivs == NULL);
3194 } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3195 explicit_lod = lod;
3196 derived_sampler_state.aniso = 0;
3197 assert(lod);
3198 assert(derivs == NULL);
3199 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3200 assert(derivs);
3201 assert(lod == NULL);
3202 } else {
3203 assert(derivs == NULL);
3204 assert(lod == NULL);
3205 }
3206
3207 if (static_texture_state->format == PIPE_FORMAT_NONE) {
3208 /*
3209 * If there's nothing bound, format is NONE, and we must return
3210 * all zero as mandated by d3d10 in this case.
3211 */
3212 LLVMValueRef zero = lp_build_zero(gallivm, type);
3213 for (unsigned chan = 0; chan < 4; chan++) {
3214 texel_out[chan] = zero;
3215 }
3216 return;
3217 }
3218
3219 if (texture_dims(target) != 2) {
3220 derived_sampler_state.aniso = 0;
3221 }
3222
3223 assert(type.floating);
3224
3225 /* Setup our build context */
3226 memset(&bld, 0, sizeof bld);
3227 bld.gallivm = gallivm;
3228 bld.resources_type = resources_type;
3229 bld.resources_ptr = resources_ptr;
3230 bld.static_sampler_state = &derived_sampler_state;
3231 bld.static_texture_state = static_texture_state;
3232 bld.dynamic_state = dynamic_state;
3233 bld.format_desc = util_format_description(static_texture_state->format);
3234 bld.dims = dims;
3235
3236 res_format_desc = util_format_description(static_texture_state->res_format);
3237
3238 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
3239 bld.no_quad_lod = true;
3240 }
3241 if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) {
3242 bld.no_rho_approx = true;
3243 }
3244 if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) {
3245 bld.no_brilinear = true;
3246 }
3247
3248 bld.vector_width = lp_type_width(type);
3249
3250 bld.float_type = lp_type_float(32);
3251 bld.int_type = lp_type_int(32);
3252 bld.coord_type = type;
3253 bld.int_coord_type = lp_int_type(type);
3254 bld.float_size_in_type = lp_type_float(32);
3255 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
3256 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
3257
3258 bld.texel_type = lp_build_texel_type(type, bld.format_desc);
3259
3260 if (!static_texture_state->level_zero_only ||
3261 !static_sampler_state->max_lod_pos || op_is_lodq) {
3262 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
3263 } else {
3264 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3265 }
3266
3267 if (op_is_gather) {
3268 /*
3269 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
3270 * the actual filtering. Using mostly the same paths, so cube face
3271 * selection, coord wrapping etc. all naturally uses the same code.
3272 */
3273 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3274 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
3275 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
3276 }
3277
3278 const enum pipe_tex_mipfilter mip_filter =
3279 derived_sampler_state.min_mip_filter;
3280
3281 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3282 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3283 /*
3284 * Seamless filtering ignores wrap modes.
3285 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
3286 * bilinear it's not correct but way better than using for instance
3287 * repeat. Note we even set this for non-seamless. Technically GL
3288 * allows any wrap mode, which made sense when supporting true borders
3289 * (can get seamless effect with border and CLAMP_TO_BORDER), but
3290 * gallium doesn't support borders and d3d9 requires wrap modes to be
3291 * ignored and it's a pain to fix up the sampler state (as it makes it
3292 * texture dependent).
3293 */
3294 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3295 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3296 }
3297
3298 /*
3299 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
3300 * so AoS path could be used. Not sure it's worth the trouble...
3301 */
3302 const enum pipe_tex_filter min_img_filter =
3303 derived_sampler_state.min_img_filter;
3304 const enum pipe_tex_filter mag_img_filter =
3305 derived_sampler_state.mag_img_filter;
3306
3307 /*
3308 * This is all a bit complicated different paths are chosen for performance
3309 * reasons.
3310 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
3311 * everything (the last two options are equivalent for 4-wide case).
3312 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
3313 * lod is calculated then the lod value extracted afterwards so making this
3314 * case basically the same as far as lod handling is concerned for the
3315 * further sample/filter code as the 1 lod for everything case.
3316 * Different lod handling mostly shows up when building mipmap sizes
3317 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
3318 * (getting the fractional part of the lod to the right texels).
3319 */
3320
3321 /*
3322 * There are other situations where at least the multiple int lods could be
3323 * avoided like min and max lod being equal.
3324 */
3325 bld.num_mips = bld.num_lods = 1;
3326
3327 if ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
3328 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3329 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
3330 op_is_lodq) {
3331 /*
3332 * special case for using per-pixel lod even for implicit lod,
3333 * which is generally never required (ok by APIs) except to please
3334 * some (somewhat broken imho) tests (because per-pixel face selection
3335 * can cause derivatives to be different for pixels outside the primitive
3336 * due to the major axis division even if pre-project derivatives are
3337 * looking normal).
3338 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
3339 * cube maps we do indeed get per-pixel lod values).
3340 */
3341 bld.num_mips = type.length;
3342 bld.num_lods = type.length;
3343 } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
3344 (explicit_lod || lod_bias || derivs)) {
3345 if ((!op_is_tex && target != PIPE_BUFFER) ||
3346 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3347 bld.num_mips = type.length;
3348 bld.num_lods = type.length;
3349 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3350 bld.num_mips = 1;
3351 bld.num_lods = type.length;
3352 }
3353 }
3354 /* TODO: for true scalar_lod should only use 1 lod value */
3355 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
3356 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3357 bld.num_mips = num_quads;
3358 bld.num_lods = num_quads;
3359 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3360 bld.num_mips = 1;
3361 bld.num_lods = num_quads;
3362 }
3363
3364 bld.fetch_ms = fetch_ms;
3365 bld.residency = !!(sample_key & LP_SAMPLER_RESIDENCY);
3366 if (op_is_gather)
3367 bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
3368 bld.lodf_type = type;
3369 /* we want native vector size to be able to use our intrinsics */
3370 if (bld.num_lods != type.length) {
3371 /* TODO: this currently always has to be per-quad or per-element */
3372 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
3373 }
3374 bld.lodi_type = lp_int_type(bld.lodf_type);
3375 bld.levelf_type = bld.lodf_type;
3376 if (bld.num_mips == 1) {
3377 bld.levelf_type.length = 1;
3378 }
3379 bld.leveli_type = lp_int_type(bld.levelf_type);
3380 bld.float_size_type = bld.float_size_in_type;
3381
3382 /* Note: size vectors may not be native. They contain minified w/h/d/_
3383 * values, with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to
3384 * 8x4f32
3385 */
3386 if (bld.num_mips > 1) {
3387 bld.float_size_type.length = bld.num_mips == type.length ?
3388 bld.num_mips * bld.float_size_in_type.length :
3389 type.length;
3390 }
3391 bld.int_size_type = lp_int_type(bld.float_size_type);
3392
3393 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
3394 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
3395 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
3396 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
3397 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
3398 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
3399 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
3400 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
3401 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
3402 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
3403 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
3404 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
3405 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
3406 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
3407
3408 /* Get the dynamic state */
3409 LLVMValueRef tex_width = dynamic_state->width(gallivm, resources_type,
3410 resources_ptr, texture_index,
3411 NULL);
3412 bld.row_stride_array = dynamic_state->row_stride(gallivm, resources_type,
3413 resources_ptr, texture_index, NULL,
3414 &bld.row_stride_type);
3415 bld.img_stride_array = dynamic_state->img_stride(gallivm, resources_type,
3416 resources_ptr, texture_index, NULL,
3417 &bld.img_stride_type);
3418 bld.base_ptr = dynamic_state->base_ptr(gallivm, resources_type,
3419 resources_ptr, texture_index, NULL);
3420 bld.mip_offsets = dynamic_state->mip_offsets(gallivm, resources_type,
3421 resources_ptr, texture_index, NULL,
3422 &bld.mip_offsets_type);
3423
3424 /* Note that mip_offsets is an array[level] of offsets to texture images */
3425
3426 if (dynamic_state->cache_ptr && thread_data_ptr) {
3427 bld.cache = dynamic_state->cache_ptr(gallivm, thread_data_type,
3428 thread_data_ptr, texture_index);
3429 }
3430
3431 uint32_t res_bw = res_format_desc->block.width;
3432 uint32_t res_bh = res_format_desc->block.height;
3433 uint32_t bw = bld.format_desc->block.width;
3434 uint32_t bh = bld.format_desc->block.height;
3435
3436 /* only scale if the blocksizes are different. */
3437 if (res_bw == bw)
3438 res_bw = bw = 1;
3439 if (res_bh == bh)
3440 res_bh = bh = 1;
3441
3442 /* width, height, depth as single int vector */
3443 if (dims <= 1) {
3444 bld.int_size = tex_width;
3445 bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
3446 bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
3447 bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
3448 } else {
3449 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3450 tex_width,
3451 LLVMConstInt(i32t, 0, 0), "");
3452 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3453 LLVMConstInt(i32t, res_bw, 0),
3454 LLVMConstInt(i32t, 0, 0), "");
3455 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3456 LLVMConstInt(i32t, util_logbase2(res_bw), 0),
3457 LLVMConstInt(i32t, 0, 0), "");
3458 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3459 LLVMConstInt(i32t, bw, 0),
3460 LLVMConstInt(i32t, 0, 0), "");
3461 if (dims >= 2) {
3462 LLVMValueRef tex_height =
3463 dynamic_state->height(gallivm, resources_type,
3464 resources_ptr, texture_index, NULL);
3465 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
3466 bld.int_bld.elem_type, "");
3467 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3468 tex_height,
3469 LLVMConstInt(i32t, 1, 0), "");
3470 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3471 LLVMConstInt(i32t, res_bh, 0),
3472 LLVMConstInt(i32t, 1, 0), "");
3473 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3474 LLVMConstInt(i32t, util_logbase2(res_bh), 0),
3475 LLVMConstInt(i32t, 1, 0), "");
3476 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3477 LLVMConstInt(i32t, bh, 0),
3478 LLVMConstInt(i32t, 1, 0), "");
3479 if (dims >= 3) {
3480 LLVMValueRef tex_depth =
3481 dynamic_state->depth(gallivm, resources_type, resources_ptr,
3482 texture_index, NULL);
3483 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
3484 bld.int_bld.elem_type, "");
3485 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3486 tex_depth,
3487 LLVMConstInt(i32t, 2, 0), "");
3488 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3489 LLVMConstInt(i32t, 1, 0),
3490 LLVMConstInt(i32t, 2, 0), "");
3491 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3492 LLVMConstInt(i32t, 0, 0),
3493 LLVMConstInt(i32t, 2, 0), "");
3494 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3495 LLVMConstInt(i32t, 1, 0),
3496 LLVMConstInt(i32t, 2, 0), "");
3497 }
3498 }
3499 }
3500
3501 LLVMValueRef newcoords[5];
3502 for (unsigned i = 0; i < 5; i++) {
3503 newcoords[i] = coords[i];
3504 }
3505
3506 if (util_format_is_pure_integer(static_texture_state->format) &&
3507 !util_format_has_depth(bld.format_desc) && op_is_tex &&
3508 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
3509 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3510 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3511 /*
3512 * Bail if impossible filtering is specified (the awkard additional
3513 * depth check is because it is legal in gallium to have things like
3514 * S8Z24 here which would say it's pure int despite such formats should
3515 * sample the depth component).
3516 * In GL such filters make the texture incomplete, this makes it robust
3517 * against gallium frontends which set this up regardless (we'd crash in
3518 * the lerp later otherwise).
3519 * At least in some apis it may be legal to use such filters with lod
3520 * queries and/or gather (at least for gather d3d10 says only the wrap
3521 * bits are really used hence filter bits are likely simply ignored).
3522 * For fetch, we don't get valid samplers either way here.
3523 */
3524 LLVMValueRef zero = lp_build_zero(gallivm, type);
3525 for (unsigned chan = 0; chan < 4; chan++) {
3526 texel_out[chan] = zero;
3527 }
3528 return;
3529 }
3530
3531 if (0) {
3532 /* For debug: no-op texture sampling */
3533 lp_build_sample_nop(gallivm,
3534 bld.texel_type,
3535 newcoords,
3536 texel_out);
3537 } else if (op_type == LP_SAMPLER_OP_FETCH) {
3538 lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
3539 lod, offsets, texel_out);
3540 if (bld.residency)
3541 texel_out[4] = bld.resident;
3542 } else {
3543 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3544 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3545 bool use_aos = util_format_fits_8unorm(bld.format_desc) &&
3546 op_is_tex &&
3547 /* not sure this is strictly needed or simply impossible */
3548 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3549 derived_sampler_state.aniso == 0 &&
3550 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3551
3552 use_aos &= bld.num_lods <= num_quads ||
3553 derived_sampler_state.min_img_filter ==
3554 derived_sampler_state.mag_img_filter;
3555
3556 use_aos &= !static_texture_state->tiled;
3557
3558 if (gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
3559 use_aos = 0;
3560 }
3561
3562 if (dims > 1) {
3563 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3564 if (dims > 2) {
3565 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3566 }
3567 }
3568 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3569 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3570 derived_sampler_state.seamless_cube_map &&
3571 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3572 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3573 /* theoretically possible with AoS filtering but not implemented (complex!) */
3574 use_aos = 0;
3575 }
3576
3577 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3578 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3579 debug_printf("%s: using floating point linear filtering for %s\n",
3580 __func__, bld.format_desc->short_name);
3581 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
3582 " wraps %d wrapt %d wrapr %d\n",
3583 derived_sampler_state.min_img_filter,
3584 derived_sampler_state.mag_img_filter,
3585 derived_sampler_state.min_mip_filter,
3586 static_texture_state->target,
3587 derived_sampler_state.seamless_cube_map,
3588 derived_sampler_state.wrap_s,
3589 derived_sampler_state.wrap_t,
3590 derived_sampler_state.wrap_r);
3591 }
3592
3593 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3594 newcoords, derivs, lod_bias, explicit_lod,
3595 &lod_positive, &lod, &lod_fpart,
3596 &ilevel0, &ilevel1);
3597
3598 if (op_is_lodq) {
3599 texel_out[0] = lod_fpart;
3600 texel_out[1] = lod;
3601 texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3602 if (bld.residency)
3603 texel_out[4] = bld.resident;
3604 return;
3605 }
3606
3607 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3608 /* The aos path doesn't do seamless filtering so simply add cube layer
3609 * to face now.
3610 */
3611 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3612 }
3613
3614 /*
3615 * we only try 8-wide sampling with soa or if we have AVX2
3616 * as it appears to be a loss with just AVX)
3617 */
3618 if (num_quads == 1 || !use_aos ||
3619 (util_get_cpu_caps()->has_avx2 &&
3620 (bld.num_lods == 1 ||
3621 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3622 if (use_aos) {
3623 /* do sampling/filtering with fixed pt arithmetic */
3624 lp_build_sample_aos(&bld,
3625 newcoords[0], newcoords[1],
3626 newcoords[2],
3627 offsets, lod_positive, lod_fpart,
3628 ilevel0, ilevel1,
3629 texel_out);
3630 } else {
3631 lp_build_sample_general(&bld, sampler_index,
3632 op_type == LP_SAMPLER_OP_GATHER,
3633 newcoords, offsets,
3634 lod_positive, lod_fpart,
3635 ilevel0, ilevel1,
3636 texel_out);
3637 if (bld.residency)
3638 texel_out[4] = bld.resident;
3639 }
3640 } else {
3641 struct lp_build_sample_context bld4;
3642 struct lp_type type4 = type;
3643 LLVMValueRef texelout4[4];
3644 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3645
3646 type4.length = 4;
3647
3648 /* Setup our build context */
3649 memset(&bld4, 0, sizeof bld4);
3650 bld4.no_quad_lod = bld.no_quad_lod;
3651 bld4.no_rho_approx = bld.no_rho_approx;
3652 bld4.no_brilinear = bld.no_brilinear;
3653 bld4.gallivm = bld.gallivm;
3654 bld4.resources_type = bld.resources_type;
3655 bld4.resources_ptr = bld.resources_ptr;
3656 bld4.static_texture_state = bld.static_texture_state;
3657 bld4.static_sampler_state = bld.static_sampler_state;
3658 bld4.dynamic_state = bld.dynamic_state;
3659 bld4.format_desc = bld.format_desc;
3660 bld4.dims = bld.dims;
3661 bld4.row_stride_type = bld.row_stride_type;
3662 bld4.row_stride_array = bld.row_stride_array;
3663 bld4.img_stride_type = bld.img_stride_type;
3664 bld4.img_stride_array = bld.img_stride_array;
3665 bld4.base_ptr = bld.base_ptr;
3666 bld4.mip_offsets_type = bld.mip_offsets_type;
3667 bld4.mip_offsets = bld.mip_offsets;
3668 bld4.int_size = bld.int_size;
3669 bld4.int_tex_blocksize = bld.int_tex_blocksize;
3670 bld4.int_tex_blocksize_log2 = bld.int_tex_blocksize_log2;
3671 bld4.int_view_blocksize = bld.int_view_blocksize;
3672 bld4.cache = bld.cache;
3673
3674 bld4.vector_width = lp_type_width(type4);
3675
3676 bld4.float_type = lp_type_float(32);
3677 bld4.int_type = lp_type_int(32);
3678 bld4.coord_type = type4;
3679 bld4.int_coord_type = lp_int_type(type4);
3680 bld4.float_size_in_type = lp_type_float(32);
3681 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3682 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3683 bld4.texel_type = bld.texel_type;
3684 bld4.texel_type.length = 4;
3685
3686 bld4.num_mips = bld4.num_lods = 1;
3687 if (bld4.no_quad_lod && bld4.no_rho_approx &&
3688 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3689 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3690 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3691 bld4.num_mips = type4.length;
3692 bld4.num_lods = type4.length;
3693 }
3694 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3695 (explicit_lod || lod_bias || derivs)) {
3696 if ((!op_is_tex && target != PIPE_BUFFER) ||
3697 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3698 bld4.num_mips = type4.length;
3699 bld4.num_lods = type4.length;
3700 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3701 bld4.num_mips = 1;
3702 bld4.num_lods = type4.length;
3703 }
3704 }
3705
3706 /* we want native vector size to be able to use our intrinsics */
3707 bld4.lodf_type = type4;
3708 if (bld4.num_lods != type4.length) {
3709 bld4.lodf_type.length = 1;
3710 }
3711 bld4.lodi_type = lp_int_type(bld4.lodf_type);
3712 bld4.levelf_type = type4;
3713 if (bld4.num_mips != type4.length) {
3714 bld4.levelf_type.length = 1;
3715 }
3716 bld4.leveli_type = lp_int_type(bld4.levelf_type);
3717 bld4.float_size_type = bld4.float_size_in_type;
3718 if (bld4.num_mips > 1) {
3719 bld4.float_size_type.length = bld4.num_mips == type4.length ?
3720 bld4.num_mips * bld4.float_size_in_type.length :
3721 type4.length;
3722 }
3723 bld4.int_size_type = lp_int_type(bld4.float_size_type);
3724
3725 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3726 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3727 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3728 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3729 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3730 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3731 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3732 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3733 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3734 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3735 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3736 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3737 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3738 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3739
3740 for (unsigned i = 0; i < num_quads; i++) {
3741 LLVMValueRef s4, t4, r4;
3742 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3743 LLVMValueRef ilevel04, ilevel14 = NULL;
3744 LLVMValueRef offsets4[4] = { NULL };
3745 unsigned num_lods = bld4.num_lods;
3746
3747 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3748 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3749 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3750
3751 if (offsets[0]) {
3752 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3753 if (dims > 1) {
3754 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3755 if (dims > 2) {
3756 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3757 }
3758 }
3759 }
3760 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3761 ilevel04 = bld.num_mips == 1 ? ilevel0 :
3762 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3763 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3764 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3765 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3766 }
3767
3768 if (use_aos) {
3769 /* do sampling/filtering with fixed pt arithmetic */
3770 lp_build_sample_aos(&bld4,
3771 s4, t4, r4, offsets4,
3772 lod_positive4, lod_fpart4,
3773 ilevel04, ilevel14,
3774 texelout4);
3775 } else {
3776 /* this path is currently unreachable and hence might break easily... */
3777 LLVMValueRef newcoords4[5];
3778 newcoords4[0] = s4;
3779 newcoords4[1] = t4;
3780 newcoords4[2] = r4;
3781 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3782 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3783
3784 lp_build_sample_general(&bld4, sampler_index,
3785 op_type == LP_SAMPLER_OP_GATHER,
3786 newcoords4, offsets4,
3787 lod_positive4, lod_fpart4,
3788 ilevel04, ilevel14,
3789 texelout4);
3790 }
3791 for (unsigned j = 0; j < 4; j++) {
3792 texelouttmp[j][i] = texelout4[j];
3793 }
3794 }
3795
3796 for (unsigned j = 0; j < 4; j++) {
3797 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
3798 }
3799 }
3800 }
3801
3802 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
3803 apply_sampler_swizzle(&bld, texel_out);
3804 }
3805
3806 /*
3807 * texel type can be a (32bit) int/uint (for pure int formats only),
3808 * however we are expected to always return floats (storage is untyped).
3809 */
3810 if (!bld.texel_type.floating) {
3811 unsigned chan;
3812 for (chan = 0; chan < 4; chan++) {
3813 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
3814 lp_build_vec_type(gallivm, type), "");
3815 }
3816 }
3817 }
3818
3819
3820 #define USE_TEX_FUNC_CALL 1
3821
3822 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)3823 get_target_info(enum pipe_texture_target target,
3824 unsigned *num_coords, unsigned *num_derivs,
3825 unsigned *num_offsets, unsigned *layer)
3826 {
3827 unsigned dims = texture_dims(target);
3828 *num_coords = dims;
3829 *num_offsets = dims;
3830 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3831 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3832 *layer = has_layer_coord(target) ? 2: 0;
3833 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3834 /*
3835 * dims doesn't include r coord for cubes - this is handled
3836 * by layer instead, but need to fix up for cube arrays...
3837 */
3838 *layer = 3;
3839 *num_coords = 3;
3840 }
3841 }
3842
3843
3844 /**
3845 * Generate the function body for a texture sampling function.
3846 */
3847 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,LLVMTypeRef resources_type,LLVMTypeRef thread_data_type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key)3848 lp_build_sample_gen_func(struct gallivm_state *gallivm,
3849 const struct lp_static_texture_state *static_texture_state,
3850 const struct lp_static_sampler_state *static_sampler_state,
3851 struct lp_sampler_dynamic_state *dynamic_state,
3852 struct lp_type type,
3853 LLVMTypeRef resources_type,
3854 LLVMTypeRef thread_data_type,
3855 unsigned texture_index,
3856 unsigned sampler_index,
3857 LLVMValueRef function,
3858 unsigned num_args,
3859 unsigned sample_key)
3860 {
3861 LLVMBuilderRef old_builder;
3862 LLVMBasicBlockRef block;
3863 LLVMValueRef coords[5];
3864 LLVMValueRef offsets[3] = { NULL };
3865 LLVMValueRef lod = NULL;
3866 LLVMValueRef ms_index = NULL;
3867 LLVMValueRef resources_ptr;
3868 LLVMValueRef thread_data_ptr = NULL;
3869 LLVMValueRef texel_out[4];
3870 struct lp_derivatives derivs;
3871 struct lp_derivatives *deriv_ptr = NULL;
3872 unsigned num_param = 0;
3873 unsigned num_coords, num_derivs, num_offsets, layer;
3874 bool need_cache = false;
3875
3876 const enum lp_sampler_lod_control lod_control =
3877 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
3878 >> LP_SAMPLER_LOD_CONTROL_SHIFT;
3879
3880 const enum lp_sampler_op_type op_type =
3881 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
3882
3883 get_target_info(static_texture_state->target,
3884 &num_coords, &num_derivs, &num_offsets, &layer);
3885
3886 /* lod query doesn't take a layer */
3887 if (layer && op_type == LP_SAMPLER_OP_LODQ)
3888 layer = 0;
3889
3890 if (dynamic_state->cache_ptr) {
3891 const struct util_format_description *format_desc;
3892 format_desc = util_format_description(static_texture_state->format);
3893 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3894 need_cache = true;
3895 }
3896 }
3897
3898 /* "unpack" arguments */
3899 resources_ptr = LLVMGetParam(function, num_param++);
3900 if (need_cache) {
3901 thread_data_ptr = LLVMGetParam(function, num_param++);
3902 }
3903 for (unsigned i = 0; i < num_coords; i++) {
3904 coords[i] = LLVMGetParam(function, num_param++);
3905 }
3906 for (unsigned i = num_coords; i < 5; i++) {
3907 /* This is rather unfortunate... */
3908 coords[i] = lp_build_undef(gallivm, type);
3909 }
3910 if (layer) {
3911 coords[layer] = LLVMGetParam(function, num_param++);
3912 }
3913 if (sample_key & LP_SAMPLER_SHADOW) {
3914 coords[4] = LLVMGetParam(function, num_param++);
3915 }
3916 if (sample_key & LP_SAMPLER_FETCH_MS) {
3917 ms_index = LLVMGetParam(function, num_param++);
3918 }
3919 if (sample_key & LP_SAMPLER_OFFSETS) {
3920 for (unsigned i = 0; i < num_offsets; i++) {
3921 offsets[i] = LLVMGetParam(function, num_param++);
3922 }
3923 }
3924 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3925 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3926 lod = LLVMGetParam(function, num_param++);
3927 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3928 for (unsigned i = 0; i < num_derivs; i++) {
3929 derivs.ddx[i] = LLVMGetParam(function, num_param++);
3930 derivs.ddy[i] = LLVMGetParam(function, num_param++);
3931 }
3932 deriv_ptr = &derivs;
3933 }
3934
3935 assert(num_args == num_param);
3936
3937 /*
3938 * Function body
3939 */
3940
3941 old_builder = gallivm->builder;
3942 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3943 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3944 LLVMPositionBuilderAtEnd(gallivm->builder, block);
3945
3946 lp_build_sample_soa_code(gallivm,
3947 static_texture_state,
3948 static_sampler_state,
3949 dynamic_state,
3950 type,
3951 sample_key,
3952 texture_index,
3953 sampler_index,
3954 resources_type,
3955 resources_ptr,
3956 thread_data_type,
3957 thread_data_ptr,
3958 coords,
3959 offsets,
3960 deriv_ptr,
3961 lod,
3962 ms_index,
3963 texel_out);
3964
3965 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3966
3967 LLVMDisposeBuilder(gallivm->builder);
3968 gallivm->builder = old_builder;
3969
3970 gallivm_verify_function(gallivm, function);
3971 }
3972
3973
3974 /**
3975 * Call the matching function for texture sampling.
3976 * If there's no match, generate a new one.
3977 */
3978 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params,unsigned texture_index,unsigned sampler_index,LLVMValueRef * tex_ret)3979 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3980 const struct lp_static_texture_state *static_texture_state,
3981 const struct lp_static_sampler_state *static_sampler_state,
3982 struct lp_sampler_dynamic_state *dynamic_state,
3983 const struct lp_sampler_params *params,
3984 unsigned texture_index, unsigned sampler_index,
3985 LLVMValueRef *tex_ret)
3986 {
3987 LLVMBuilderRef builder = gallivm->builder;
3988 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3989 LLVMGetInsertBlock(builder)));
3990 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3991 unsigned sample_key = params->sample_key;
3992 const LLVMValueRef *coords = params->coords;
3993 const LLVMValueRef *offsets = params->offsets;
3994 const struct lp_derivatives *derivs = params->derivs;
3995
3996 const enum lp_sampler_lod_control lod_control =
3997 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3998 LP_SAMPLER_LOD_CONTROL_SHIFT;
3999
4000 const enum lp_sampler_op_type op_type =
4001 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4002
4003 unsigned num_coords, num_derivs, num_offsets, layer;
4004 get_target_info(static_texture_state->target,
4005 &num_coords, &num_derivs, &num_offsets, &layer);
4006
4007 /* lod query doesn't take a layer */
4008 if (layer && op_type == LP_SAMPLER_OP_LODQ)
4009 layer = 0;
4010
4011 bool need_cache = false;
4012 if (dynamic_state->cache_ptr) {
4013 const struct util_format_description *format_desc;
4014 format_desc = util_format_description(static_texture_state->format);
4015 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4016 need_cache = true;
4017 }
4018 }
4019
4020 /*
4021 * texture function matches are found by name.
4022 * Thus the name has to include both the texture and sampler unit
4023 * (which covers all static state) plus the actual texture function
4024 * (including things like offsets, shadow coord, lod control).
4025 * Additionally lod_property has to be included too.
4026 */
4027 char func_name[64];
4028 snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
4029 texture_index, sampler_index, sample_key);
4030
4031 LLVMValueRef function = LLVMGetNamedFunction(module, func_name);
4032 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
4033 LLVMTypeRef ret_type;
4034 LLVMTypeRef val_type[4];
4035 unsigned num_param = 0;
4036
4037 /*
4038 * Generate the function prototype.
4039 */
4040
4041 arg_types[num_param++] = LLVMTypeOf(params->resources_ptr);
4042 if (need_cache) {
4043 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
4044 }
4045 for (unsigned i = 0; i < num_coords; i++) {
4046 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4047 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
4048 }
4049 if (layer) {
4050 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
4051 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
4052 }
4053 if (sample_key & LP_SAMPLER_SHADOW) {
4054 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4055 }
4056 if (sample_key & LP_SAMPLER_FETCH_MS) {
4057 arg_types[num_param++] = LLVMTypeOf(params->ms_index);
4058 }
4059 if (sample_key & LP_SAMPLER_OFFSETS) {
4060 for (unsigned i = 0; i < num_offsets; i++) {
4061 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
4062 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
4063 }
4064 }
4065 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4066 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4067 arg_types[num_param++] = LLVMTypeOf(params->lod);
4068 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4069 for (unsigned i = 0; i < num_derivs; i++) {
4070 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
4071 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
4072 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
4073 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
4074 }
4075 }
4076
4077 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4078 lp_build_vec_type(gallivm, params->type);
4079 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4080 LLVMTypeRef function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
4081
4082 if (!function) {
4083 function = LLVMAddFunction(module, func_name, function_type);
4084
4085 for (unsigned i = 0; i < num_param; ++i) {
4086 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
4087
4088 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
4089 }
4090 }
4091
4092 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
4093 LLVMSetLinkage(function, LLVMInternalLinkage);
4094
4095 lp_build_sample_gen_func(gallivm,
4096 static_texture_state,
4097 static_sampler_state,
4098 dynamic_state,
4099 params->type,
4100 params->resources_type,
4101 params->thread_data_type,
4102 texture_index,
4103 sampler_index,
4104 function,
4105 num_param,
4106 sample_key);
4107 }
4108
4109 unsigned num_args = 0;
4110 args[num_args++] = params->resources_ptr;
4111 if (need_cache) {
4112 args[num_args++] = params->thread_data_ptr;
4113 }
4114 for (unsigned i = 0; i < num_coords; i++) {
4115 args[num_args++] = coords[i];
4116 }
4117 if (layer) {
4118 args[num_args++] = coords[layer];
4119 }
4120 if (sample_key & LP_SAMPLER_SHADOW) {
4121 args[num_args++] = coords[4];
4122 }
4123 if (sample_key & LP_SAMPLER_FETCH_MS) {
4124 args[num_args++] = params->ms_index;
4125 }
4126 if (sample_key & LP_SAMPLER_OFFSETS) {
4127 for (unsigned i = 0; i < num_offsets; i++) {
4128 args[num_args++] = offsets[i];
4129 }
4130 }
4131 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4132 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4133 args[num_args++] = params->lod;
4134 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4135 for (unsigned i = 0; i < num_derivs; i++) {
4136 args[num_args++] = derivs->ddx[i];
4137 args[num_args++] = derivs->ddy[i];
4138 }
4139 }
4140
4141 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
4142
4143 *tex_ret = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
4144 LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
4145 LLVMValueRef inst = LLVMGetLastInstruction(bb);
4146 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
4147 }
4148
4149
4150 /**
4151 * Build texture sampling code.
4152 * Either via a function call or inline it directly.
4153 */
4154 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)4155 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
4156 const struct lp_static_sampler_state *static_sampler_state,
4157 struct lp_sampler_dynamic_state *dynamic_state,
4158 struct gallivm_state *gallivm,
4159 const struct lp_sampler_params *params)
4160 {
4161 bool use_tex_func = false;
4162
4163 /*
4164 * Do not use a function call if the sampling is "simple enough".
4165 * We define this by
4166 * a) format
4167 * b) no mips (either one level only or no mip filter)
4168 * No mips will definitely make the code smaller, though
4169 * the format requirement is a bit iffy - there's some (SoA) formats
4170 * which definitely generate less code. This does happen to catch
4171 * some important cases though which are hurt quite a bit by using
4172 * a call (though not really because of the call overhead but because
4173 * they are reusing the same texture unit with some of the same
4174 * parameters).
4175 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
4176 */
4177
4178 if (USE_TEX_FUNC_CALL) {
4179 const struct util_format_description *format_desc =
4180 util_format_description(static_texture_state->format);
4181 const bool simple_format =
4182 (util_format_is_rgba8_variant(format_desc) &&
4183 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
4184 const enum lp_sampler_op_type op_type =
4185 (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
4186 LP_SAMPLER_OP_TYPE_SHIFT;
4187 const bool simple_tex =
4188 op_type != LP_SAMPLER_OP_TEXTURE ||
4189 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
4190 static_texture_state->level_zero_only == true) &&
4191 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
4192
4193 use_tex_func = !(simple_format && simple_tex);
4194 }
4195
4196 if (use_tex_func) {
4197 LLVMValueRef tex_ret;
4198 lp_build_sample_soa_func(gallivm,
4199 static_texture_state,
4200 static_sampler_state,
4201 dynamic_state,
4202 params, params->texture_index,
4203 params->sampler_index, &tex_ret);
4204
4205 for (unsigned i = 0; i < 4; i++) {
4206 params->texel[i] =
4207 LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
4208 }
4209 } else {
4210 lp_build_sample_soa_code(gallivm,
4211 static_texture_state,
4212 static_sampler_state,
4213 dynamic_state,
4214 params->type,
4215 params->sample_key,
4216 params->texture_index,
4217 params->sampler_index,
4218 params->resources_type,
4219 params->resources_ptr,
4220 params->thread_data_type,
4221 params->thread_data_ptr,
4222 params->coords,
4223 params->offsets,
4224 params->derivs,
4225 params->lod,
4226 params->ms_index,
4227 params->texel);
4228 }
4229 }
4230
4231
4232 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)4233 lp_build_size_query_soa(struct gallivm_state *gallivm,
4234 const struct lp_static_texture_state *static_state,
4235 struct lp_sampler_dynamic_state *dynamic_state,
4236 const struct lp_sampler_size_query_params *params)
4237 {
4238 LLVMValueRef first_level = NULL;
4239 const unsigned num_lods = 1;
4240 LLVMTypeRef resources_type = params->resources_type;
4241 LLVMValueRef resources_ptr = params->resources_ptr;
4242 const unsigned texture_unit = params->texture_unit;
4243 const enum pipe_texture_target target = params->target;
4244 LLVMValueRef texture_unit_offset = params->texture_unit_offset;
4245 const struct util_format_description *format_desc =
4246 util_format_description(static_state->format);
4247 const struct util_format_description *res_format_desc =
4248 util_format_description(static_state->res_format);
4249
4250 if (static_state->format == PIPE_FORMAT_NONE) {
4251 /*
4252 * If there's nothing bound, format is NONE, and we must return
4253 * all zero as mandated by d3d10 in this case.
4254 */
4255 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
4256 for (unsigned chan = 0; chan < 4; chan++) {
4257 params->sizes_out[chan] = zero;
4258 }
4259 return;
4260 }
4261
4262 /*
4263 * Do some sanity verification about bound texture and shader dcl target.
4264 * Not entirely sure what's possible but assume array/non-array
4265 * always compatible (probably not ok for OpenGL but d3d10 has no
4266 * distinction of arrays at the resource level).
4267 * Everything else looks bogus (though not entirely sure about rect/2d).
4268 * Currently disabled because it causes assertion failures if there's
4269 * nothing bound (or rather a dummy texture, not that this case would
4270 * return the right values).
4271 */
4272 if (0 && static_state->target != target) {
4273 if (static_state->target == PIPE_TEXTURE_1D)
4274 assert(target == PIPE_TEXTURE_1D_ARRAY);
4275 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
4276 assert(target == PIPE_TEXTURE_1D);
4277 else if (static_state->target == PIPE_TEXTURE_2D)
4278 assert(target == PIPE_TEXTURE_2D_ARRAY);
4279 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
4280 assert(target == PIPE_TEXTURE_2D);
4281 else if (static_state->target == PIPE_TEXTURE_CUBE)
4282 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
4283 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
4284 assert(target == PIPE_TEXTURE_CUBE);
4285 else
4286 assert(0);
4287 }
4288
4289 const unsigned dims = texture_dims(target);
4290
4291 const bool has_array = has_layer_coord(target);
4292
4293 assert(!params->int_type.floating);
4294
4295 struct lp_build_context bld_int_vec4;
4296 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
4297
4298 if (params->samples_only) {
4299 LLVMValueRef num_samples;
4300 if (params->ms && static_state->level_zero_only) {
4301 /* multisample never has levels. */
4302 num_samples = dynamic_state->last_level(gallivm,
4303 resources_type,
4304 resources_ptr,
4305 texture_unit,
4306 texture_unit_offset);
4307 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4308 bld_int_vec4.elem_type, "");
4309 } else {
4310 num_samples = lp_build_const_int32(gallivm, 0);
4311 }
4312 params->sizes_out[0] =
4313 lp_build_broadcast(gallivm,
4314 lp_build_vec_type(gallivm, params->int_type),
4315 num_samples);
4316 return;
4317 }
4318
4319 LLVMValueRef lod;
4320 LLVMValueRef level = 0;
4321 if (params->explicit_lod) {
4322 /* FIXME: this needs to honor per-element lod */
4323 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
4324 lp_build_const_int32(gallivm, 0), "");
4325 first_level = get_first_level(gallivm, resources_type, resources_ptr,
4326 texture_unit, texture_unit_offset,
4327 static_state, dynamic_state);
4328 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
4329 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
4330 } else {
4331 lod = bld_int_vec4.zero;
4332 }
4333
4334 LLVMValueRef size = bld_int_vec4.undef;
4335 LLVMValueRef tex_blocksize = bld_int_vec4.undef;
4336 LLVMValueRef tex_blocksize_log2 = bld_int_vec4.undef;
4337 LLVMValueRef view_blocksize = bld_int_vec4.undef;
4338
4339 uint32_t res_bw = res_format_desc->block.width;
4340 uint32_t res_bh = res_format_desc->block.height;
4341 uint32_t bw = format_desc->block.width;
4342 uint32_t bh = format_desc->block.height;
4343
4344 /* only scale if the blocksizes are different. */
4345 if (res_bw == bw)
4346 res_bw = bw = 1;
4347 if (res_bh == bh)
4348 res_bh = bh = 1;
4349
4350 LLVMValueRef tex_width = dynamic_state->width(gallivm,
4351 resources_type,
4352 resources_ptr,
4353 texture_unit,
4354 texture_unit_offset);
4355 size = LLVMBuildInsertElement(gallivm->builder, size,
4356 tex_width,
4357 lp_build_const_int32(gallivm, 0), "");
4358 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4359 lp_build_const_int32(gallivm, res_bw),
4360 lp_build_const_int32(gallivm, 0), "");
4361 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4362 lp_build_const_int32(gallivm, util_logbase2(res_bw)),
4363 lp_build_const_int32(gallivm, 0), "");
4364 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4365 lp_build_const_int32(gallivm, bw),
4366 lp_build_const_int32(gallivm, 0), "");
4367 if (dims >= 2) {
4368 LLVMValueRef tex_height =
4369 dynamic_state->height(gallivm, resources_type,
4370 resources_ptr, texture_unit, texture_unit_offset);
4371 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
4372 bld_int_vec4.elem_type, "");
4373 size = LLVMBuildInsertElement(gallivm->builder, size, tex_height,
4374 lp_build_const_int32(gallivm, 1), "");
4375 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4376 lp_build_const_int32(gallivm, res_bh),
4377 lp_build_const_int32(gallivm, 1), "");
4378 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4379 lp_build_const_int32(gallivm, util_logbase2(res_bh)),
4380 lp_build_const_int32(gallivm, 1), "");
4381 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4382 lp_build_const_int32(gallivm, bh),
4383 lp_build_const_int32(gallivm, 1), "");
4384 }
4385
4386 if (dims >= 3) {
4387 LLVMValueRef tex_depth =
4388 dynamic_state->depth(gallivm, resources_type,
4389 resources_ptr, texture_unit, texture_unit_offset);
4390 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
4391 bld_int_vec4.elem_type, "");
4392 size = LLVMBuildInsertElement(gallivm->builder, size, tex_depth,
4393 lp_build_const_int32(gallivm, 2), "");
4394 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4395 lp_build_const_int32(gallivm, 1),
4396 lp_build_const_int32(gallivm, 2), "");
4397 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4398 lp_build_const_int32(gallivm, 0),
4399 lp_build_const_int32(gallivm, 2), "");
4400 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4401 lp_build_const_int32(gallivm, 1),
4402 lp_build_const_int32(gallivm, 2), "");
4403 }
4404
4405 size = lp_build_minify(&bld_int_vec4, size, lod, true);
4406 size = lp_build_scale_view_dims(&bld_int_vec4, size, tex_blocksize,
4407 tex_blocksize_log2, view_blocksize);
4408
4409 if (has_array) {
4410 LLVMValueRef layers = dynamic_state->depth(gallivm, resources_type,
4411 resources_ptr, texture_unit,
4412 texture_unit_offset);
4413 layers = LLVMBuildZExt(gallivm->builder, layers,
4414 bld_int_vec4.elem_type, "");
4415 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4416 /*
4417 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
4418 * Could avoid this by passing in number of cubes instead of total
4419 * number of layers (might make things easier elsewhere too).
4420 */
4421 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
4422 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
4423 }
4424 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
4425 lp_build_const_int32(gallivm, dims), "");
4426 }
4427
4428 /*
4429 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
4430 * if level is out of bounds (note this can't cover unbound texture
4431 * here, which also requires returning zero).
4432 */
4433 if (params->explicit_lod && params->is_sviewinfo) {
4434 LLVMValueRef last_level, out, out1;
4435 struct lp_build_context leveli_bld;
4436
4437 /* everything is scalar for now */
4438 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
4439 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4440 texture_unit, texture_unit_offset,
4441 static_state, dynamic_state);
4442
4443 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
4444 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
4445 out = lp_build_or(&leveli_bld, out, out1);
4446 if (num_lods == 1) {
4447 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
4448 } else {
4449 /* TODO */
4450 assert(0);
4451 }
4452 size = lp_build_andnot(&bld_int_vec4, size, out);
4453 }
4454
4455 unsigned i;
4456 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
4457 params->sizes_out[i] =
4458 lp_build_extract_broadcast(gallivm, bld_int_vec4.type,
4459 params->int_type,
4460 size,
4461 lp_build_const_int32(gallivm, i));
4462 }
4463 if (params->is_sviewinfo) {
4464 for (; i < 4; i++) {
4465 params->sizes_out[i] = lp_build_const_vec(gallivm,
4466 params->int_type, 0.0);
4467 }
4468 }
4469
4470 /*
4471 * if there's no explicit_lod (buffers, rects) queries requiring nr of
4472 * mips would be illegal.
4473 */
4474 if (params->is_sviewinfo && params->explicit_lod) {
4475 struct lp_build_context bld_int_scalar;
4476 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
4477
4478 LLVMValueRef num_levels;
4479 if (static_state->level_zero_only) {
4480 num_levels = bld_int_scalar.one;
4481 } else {
4482 LLVMValueRef last_level;
4483 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4484 texture_unit, texture_unit_offset,
4485 static_state, dynamic_state);
4486 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
4487 num_levels = lp_build_add(&bld_int_scalar, num_levels,
4488 bld_int_scalar.one);
4489 }
4490 params->sizes_out[3] =
4491 lp_build_broadcast(gallivm,
4492 lp_build_vec_type(gallivm, params->int_type),
4493 num_levels);
4494 }
4495
4496 if (target == PIPE_BUFFER) {
4497 struct lp_build_context bld_int;
4498 lp_build_context_init(&bld_int, gallivm, params->int_type);
4499
4500 params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
4501 lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
4502 }
4503 }
4504
4505
4506 static void
lp_build_do_atomic_soa(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type type,LLVMValueRef exec_mask,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef out_of_bounds,unsigned img_op,LLVMAtomicRMWBinOp op,const LLVMValueRef rgba_in[4],const LLVMValueRef rgba2_in[4],LLVMValueRef atomic_result[4])4507 lp_build_do_atomic_soa(struct gallivm_state *gallivm,
4508 const struct util_format_description *format_desc,
4509 struct lp_type type,
4510 LLVMValueRef exec_mask,
4511 LLVMValueRef base_ptr,
4512 LLVMValueRef offset,
4513 LLVMValueRef out_of_bounds,
4514 unsigned img_op,
4515 LLVMAtomicRMWBinOp op,
4516 const LLVMValueRef rgba_in[4],
4517 const LLVMValueRef rgba2_in[4],
4518 LLVMValueRef atomic_result[4])
4519 {
4520 const enum pipe_format format = format_desc->format;
4521
4522 bool valid = format == PIPE_FORMAT_R32_UINT ||
4523 format == PIPE_FORMAT_R32_SINT ||
4524 format == PIPE_FORMAT_R32_FLOAT;
4525
4526 bool integer = format != PIPE_FORMAT_R32_FLOAT;
4527 if (img_op == LP_IMG_ATOMIC) {
4528 switch (op) {
4529 case LLVMAtomicRMWBinOpAdd:
4530 case LLVMAtomicRMWBinOpSub:
4531 case LLVMAtomicRMWBinOpAnd:
4532 case LLVMAtomicRMWBinOpNand:
4533 case LLVMAtomicRMWBinOpOr:
4534 case LLVMAtomicRMWBinOpXor:
4535 case LLVMAtomicRMWBinOpMax:
4536 case LLVMAtomicRMWBinOpMin:
4537 case LLVMAtomicRMWBinOpUMax:
4538 case LLVMAtomicRMWBinOpUMin:
4539 valid &= integer;
4540 break;
4541 case LLVMAtomicRMWBinOpFAdd:
4542 case LLVMAtomicRMWBinOpFSub:
4543 #if LLVM_VERSION_MAJOR >= 15
4544 case LLVMAtomicRMWBinOpFMax:
4545 case LLVMAtomicRMWBinOpFMin:
4546 #endif
4547 valid &= !integer;
4548 break;
4549 default:
4550 break;
4551 }
4552 } else {
4553 valid &= integer;
4554 }
4555
4556 if (!valid) {
4557 atomic_result[0] = lp_build_zero(gallivm, type);
4558 return;
4559 }
4560
4561 LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
4562 LLVMFloatTypeInContext(gallivm->context) :
4563 LLVMInt32TypeInContext(gallivm->context);
4564
4565 LLVMTypeRef atom_res_elem_type =
4566 LLVMVectorType(ref_type, type.length);
4567 LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
4568
4569 offset = LLVMBuildGEP2(gallivm->builder,
4570 LLVMInt8TypeInContext(gallivm->context),
4571 base_ptr, &offset, 1, "");
4572
4573 struct lp_build_loop_state loop_state;
4574 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
4575 struct lp_build_if_state ifthen;
4576 LLVMValueRef cond;
4577 LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
4578
4579 LLVMValueRef should_store_mask =
4580 LLVMBuildAnd(gallivm->builder, exec_mask,
4581 LLVMBuildNot(gallivm->builder, out_of_bounds, ""),
4582 "store_mask");
4583 assert(exec_mask);
4584
4585 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask,
4586 lp_build_const_int_vec(gallivm, type, 0), "");
4587 cond = LLVMBuildExtractElement(gallivm->builder, cond,
4588 loop_state.counter, "");
4589 lp_build_if(&ifthen, gallivm, cond);
4590
4591 LLVMValueRef data =
4592 LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
4593 LLVMValueRef cast_base_ptr =
4594 LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
4595 cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
4596 LLVMPointerType(ref_type, 0), "");
4597 data = LLVMBuildBitCast(gallivm->builder, data,
4598 ref_type, "");
4599
4600 if (img_op == LP_IMG_ATOMIC_CAS) {
4601 LLVMValueRef cas_src_ptr =
4602 LLVMBuildExtractElement(gallivm->builder, packed2,
4603 loop_state.counter, "");
4604 LLVMValueRef cas_src =
4605 LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
4606 ref_type, "");
4607 data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
4608 cas_src,
4609 LLVMAtomicOrderingSequentiallyConsistent,
4610 LLVMAtomicOrderingSequentiallyConsistent,
4611 false);
4612 data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
4613 } else {
4614 data = LLVMBuildAtomicRMW(gallivm->builder, op,
4615 cast_base_ptr, data,
4616 LLVMAtomicOrderingSequentiallyConsistent,
4617 false);
4618 }
4619
4620 LLVMValueRef temp_res =
4621 LLVMBuildLoad2(gallivm->builder, atom_res_elem_type, atom_res, "");
4622 temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data,
4623 loop_state.counter, "");
4624 LLVMBuildStore(gallivm->builder, temp_res, atom_res);
4625
4626 lp_build_endif(&ifthen);
4627 lp_build_loop_end_cond(&loop_state,
4628 lp_build_const_int32(gallivm, type.length),
4629 NULL, LLVMIntUGE);
4630 atomic_result[0] = LLVMBuildLoad2(gallivm->builder, atom_res_elem_type,
4631 atom_res, "");
4632 }
4633
4634
4635 static void
lp_build_img_op_no_format(struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4636 lp_build_img_op_no_format(struct gallivm_state *gallivm,
4637 const struct lp_img_params *params,
4638 LLVMValueRef outdata[4])
4639 {
4640 /*
4641 * If there's nothing bound, format is NONE, and we must return
4642 * all zero as mandated by d3d10 in this case.
4643 */
4644 if (params->img_op != LP_IMG_STORE) {
4645 LLVMValueRef zero = lp_build_zero(gallivm, params->type);
4646 for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1);
4647 chan++) {
4648 outdata[chan] = zero;
4649 }
4650 }
4651 }
4652
4653
4654 void
lp_build_img_op_soa(const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef * outdata)4655 lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
4656 struct lp_sampler_dynamic_state *dynamic_state,
4657 struct gallivm_state *gallivm,
4658 const struct lp_img_params *params,
4659 LLVMValueRef *outdata)
4660 {
4661 const enum pipe_texture_target target = params->target;
4662 const unsigned dims = texture_dims(target);
4663 const struct util_format_description *format_desc =
4664 util_format_description(static_texture_state->format);
4665 const struct util_format_description *res_format_desc =
4666 util_format_description(static_texture_state->res_format);
4667 LLVMValueRef x = params->coords[0], y = params->coords[1],
4668 z = params->coords[2];
4669 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
4670
4671 /** regular scalar int type */
4672 struct lp_type int_coord_type = lp_uint_type(params->type);
4673 struct lp_build_context int_coord_bld;
4674 lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
4675
4676 if (static_texture_state->format == PIPE_FORMAT_NONE) {
4677 lp_build_img_op_no_format(gallivm, params, outdata);
4678 return;
4679
4680 }
4681
4682 LLVMValueRef row_stride = dynamic_state->row_stride(gallivm,
4683 params->resources_type,
4684 params->resources_ptr,
4685 params->image_index, NULL, NULL);
4686 LLVMValueRef img_stride = dynamic_state->img_stride(gallivm,
4687 params->resources_type,
4688 params->resources_ptr,
4689 params->image_index, NULL, NULL);
4690 LLVMValueRef base_ptr = dynamic_state->base_ptr(gallivm,
4691 params->resources_type,
4692 params->resources_ptr,
4693 params->image_index, NULL);
4694 LLVMValueRef width = dynamic_state->width(gallivm,
4695 params->resources_type,
4696 params->resources_ptr,
4697 params->image_index, NULL);
4698 LLVMValueRef height = dynamic_state->height(gallivm,
4699 params->resources_type,
4700 params->resources_ptr,
4701 params->image_index, NULL);
4702 height = LLVMBuildZExt(gallivm->builder, height,
4703 int_coord_bld.elem_type, "");
4704 LLVMValueRef depth = dynamic_state->depth(gallivm,
4705 params->resources_type,
4706 params->resources_ptr,
4707 params->image_index, NULL);
4708 depth = LLVMBuildZExt(gallivm->builder, depth,
4709 int_coord_bld.elem_type, "");
4710 bool layer_coord = has_layer_coord(target);
4711
4712 width = lp_build_scale_view_dim(gallivm, width, res_format_desc->block.width,
4713 format_desc->block.width);
4714 width = lp_build_broadcast_scalar(&int_coord_bld, width);
4715 if (dims >= 2) {
4716 height = lp_build_scale_view_dim(gallivm, height, res_format_desc->block.height,
4717 format_desc->block.height);
4718 height = lp_build_broadcast_scalar(&int_coord_bld, height);
4719 row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
4720 }
4721 if (dims >= 3 || layer_coord) {
4722 depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
4723 img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
4724 }
4725
4726 LLVMValueRef out_of_bounds = int_coord_bld.zero;
4727 LLVMValueRef out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
4728 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4729
4730 if (dims >= 2) {
4731 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
4732 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4733 }
4734 if (dims >= 3 || layer_coord) {
4735 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
4736 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4737 }
4738
4739 LLVMValueRef offset, i, j;
4740 if (static_texture_state->tiled) {
4741 lp_build_tiled_sample_offset(&int_coord_bld,
4742 format_desc->format,
4743 static_texture_state,
4744 x, y, z, width, height, img_stride_vec,
4745 &offset, &i, &j);
4746 } else {
4747 lp_build_sample_offset(&int_coord_bld,
4748 format_desc,
4749 x, y, z, row_stride_vec, img_stride_vec,
4750 &offset, &i, &j);
4751 }
4752
4753 if (params->ms_index && static_texture_state->level_zero_only) {
4754 LLVMValueRef num_samples = dynamic_state->last_level(gallivm,
4755 params->resources_type,
4756 params->resources_ptr,
4757 params->image_index, NULL);
4758 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4759 int_coord_bld.elem_type, "");
4760 LLVMValueRef sample_stride = dynamic_state->sample_stride(gallivm,
4761 params->resources_type,
4762 params->resources_ptr,
4763 params->image_index, NULL);
4764 lp_build_sample_ms_offset(&int_coord_bld,
4765 params->ms_index, num_samples,
4766 sample_stride, &offset,
4767 &out_of_bounds);
4768 }
4769 if (params->img_op == LP_IMG_LOAD || params->img_op == LP_IMG_LOAD_SPARSE) {
4770 struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
4771
4772 if (params->img_op == LP_IMG_LOAD_SPARSE && static_texture_state->tiled) {
4773 LLVMValueRef base_offset =
4774 dynamic_state->base_offset(gallivm, params->resources_type,
4775 params->resources_ptr, params->image_index, NULL);
4776 base_offset = lp_build_broadcast_scalar(&int_coord_bld, base_offset);
4777
4778 LLVMValueRef full_offset = LLVMBuildAdd(gallivm->builder, base_offset, offset, "");
4779
4780 lp_build_gather_resident(&int_coord_bld, dynamic_state,
4781 params->resources_type, params->resources_ptr,
4782 full_offset, &outdata[4]);
4783 }
4784
4785 offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
4786 struct lp_build_context texel_bld;
4787 lp_build_context_init(&texel_bld, gallivm, texel_type);
4788 lp_build_fetch_rgba_soa(gallivm,
4789 format_desc,
4790 texel_type, true,
4791 base_ptr, offset,
4792 i, j,
4793 NULL,
4794 outdata);
4795
4796 for (unsigned chan = 0; chan < 3; chan++) {
4797 outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
4798 texel_bld.zero, outdata[chan]);
4799 }
4800 if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
4801 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
4802 texel_bld.one, outdata[3]);
4803 } else {
4804 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
4805 texel_bld.zero, outdata[3]);
4806 }
4807 } else if (params->img_op == LP_IMG_STORE) {
4808 lp_build_store_rgba_soa(gallivm, format_desc, params->type,
4809 params->exec_mask, base_ptr, offset,
4810 out_of_bounds, params->indata);
4811 } else {
4812 lp_build_do_atomic_soa(gallivm, format_desc, params->type,
4813 params->exec_mask, base_ptr, offset,
4814 out_of_bounds, params->img_op, params->op,
4815 params->indata, params->indata2, outdata);
4816 }
4817 }
4818
4819
4820 /*
4821 * These functions are for indirect texture access suppoort.
4822 *
4823 * Indirect textures are implemented using a switch statement, that
4824 * takes the texture index and jumps to the sampler functions for
4825 * that texture unit.
4826 */
4827
4828 /*
4829 * Initialise an indexed sampler switch block.
4830 *
4831 * This sets up the switch_info state and adds the LLVM flow control pieces.
4832 */
4833 void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_sampler_params * params,LLVMValueRef idx,unsigned base,unsigned range)4834 lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
4835 struct gallivm_state *gallivm,
4836 const struct lp_sampler_params *params,
4837 LLVMValueRef idx,
4838 unsigned base, unsigned range)
4839 {
4840 switch_info->gallivm = gallivm;
4841 switch_info->params = *params;
4842 switch_info->base = base;
4843 switch_info->range = range;
4844
4845 /* for generating the switch functions we don't want the texture index
4846 * offset
4847 */
4848 switch_info->params.texture_index_offset = 0;
4849
4850 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
4851 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
4852
4853 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
4854 switch_info->merge_ref,
4855 range - base);
4856
4857 LLVMTypeRef val_type[4];
4858 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4859 lp_build_vec_type(gallivm, params->type);
4860
4861 LLVMTypeRef ret_type =
4862 LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4863
4864 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
4865
4866 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4867
4868 switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
4869 LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
4870 }
4871
4872
4873 /*
4874 * Add an individual entry to the indirect texture switch.
4875 *
4876 * This builds the sample function and links a case for it into the switch
4877 * statement.
4878 */
4879 void
lp_build_sample_array_case_soa(struct lp_build_sample_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_texture_state)4880 lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
4881 int idx,
4882 const struct lp_static_texture_state *static_texture_state,
4883 const struct lp_static_sampler_state *static_sampler_state,
4884 struct lp_sampler_dynamic_state *dynamic_texture_state)
4885 {
4886 struct gallivm_state *gallivm = switch_info->gallivm;
4887 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
4888
4889 LLVMAddCase(switch_info->switch_ref,
4890 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0),
4891 this_block);
4892 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
4893
4894 LLVMValueRef tex_ret;
4895 lp_build_sample_soa_func(gallivm, static_texture_state,
4896 static_sampler_state, dynamic_texture_state,
4897 &switch_info->params, idx, idx, &tex_ret);
4898
4899 LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
4900 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
4901 }
4902
4903
4904 /*
4905 * Finish a switch statement.
4906 *
4907 * This handles extract the results from the switch.
4908 */
4909 void
lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch * switch_info)4910 lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
4911 {
4912 struct gallivm_state *gallivm = switch_info->gallivm;
4913
4914 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4915 for (unsigned i = 0; i < 4; i++) {
4916 switch_info->params.texel[i] =
4917 LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
4918 }
4919 }
4920
4921
4922 void
lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef idx,unsigned base,unsigned range)4923 lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info,
4924 struct gallivm_state *gallivm,
4925 const struct lp_img_params *params,
4926 LLVMValueRef idx,
4927 unsigned base, unsigned range)
4928 {
4929 switch_info->gallivm = gallivm;
4930 switch_info->params = *params;
4931 switch_info->base = base;
4932 switch_info->range = range;
4933
4934 /* for generating the switch functions we don't want the texture index
4935 * offset
4936 */
4937 switch_info->params.image_index_offset = 0;
4938
4939 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
4940 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge");
4941
4942 switch_info->switch_ref =
4943 LLVMBuildSwitch(gallivm->builder, idx,
4944 switch_info->merge_ref, range - base);
4945
4946 if (params->img_op != LP_IMG_STORE) {
4947 LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type);
4948 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
4949
4950 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4951
4952 for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4953 switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, "");
4954 LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1);
4955 }
4956 }
4957 }
4958
4959
4960 void
lp_build_image_op_array_case(struct lp_build_img_op_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state)4961 lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
4962 int idx,
4963 const struct lp_static_texture_state *static_texture_state,
4964 struct lp_sampler_dynamic_state *dynamic_state)
4965 {
4966 struct gallivm_state *gallivm = switch_info->gallivm;
4967 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img");
4968 LLVMValueRef tex_ret[4];
4969
4970 LLVMAddCase(switch_info->switch_ref,
4971 lp_build_const_int32(gallivm, idx), this_block);
4972 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
4973
4974 switch_info->params.image_index = idx;
4975
4976 lp_build_img_op_soa(static_texture_state, dynamic_state,
4977 switch_info->gallivm, &switch_info->params, tex_ret);
4978
4979 if (switch_info->params.img_op != LP_IMG_STORE) {
4980 for (unsigned i = 0;
4981 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4982 tex_ret[i] =
4983 LLVMBuildBitCast(gallivm->builder, tex_ret[i],
4984 lp_build_vec_type(gallivm,
4985 switch_info->params.type), "");
4986 }
4987
4988 this_block = LLVMGetInsertBlock(gallivm->builder);
4989 for (unsigned i = 0;
4990 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4991 LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1);
4992 }
4993 }
4994 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
4995 }
4996
4997
4998 void
lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch * switch_info)4999 lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info)
5000 {
5001 struct gallivm_state *gallivm = switch_info->gallivm;
5002
5003 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5004
5005 if (switch_info->params.img_op != LP_IMG_STORE) {
5006 for (unsigned i = 0;
5007 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5008 switch_info->params.outdata[i] = switch_info->phi[i];
5009 }
5010 }
5011 }
5012