1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/bitset.h"
40 #include "util/compiler.h"
41 #include "util/u_debug.h"
42 #include "util/u_dump.h"
43 #include "util/u_memory.h"
44 #include "util/u_math.h"
45 #include "util/format/u_format.h"
46 #include "util/u_cpu_detect.h"
47 #include "util/format_rgb9e5.h"
48 #include "lp_bld_debug.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_conv.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_bitarit.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_printf.h"
56 #include "lp_bld_swizzle.h"
57 #include "lp_bld_flow.h"
58 #include "lp_bld_gather.h"
59 #include "lp_bld_format.h"
60 #include "lp_bld_sample.h"
61 #include "lp_bld_sample_aos.h"
62 #include "lp_bld_struct.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_pack.h"
65 #include "lp_bld_intr.h"
66 #include "lp_bld_misc.h"
67 #include "lp_bld_jit_types.h"
68
69 static void
lp_build_gather_resident(struct lp_build_context * bld,struct lp_sampler_dynamic_state * dynamic_state,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMValueRef offset,LLVMValueRef * out_resident)70 lp_build_gather_resident(struct lp_build_context *bld,
71 struct lp_sampler_dynamic_state *dynamic_state,
72 LLVMTypeRef resources_type,
73 LLVMValueRef resources_ptr,
74 LLVMValueRef offset,
75 LLVMValueRef *out_resident)
76 {
77 struct lp_type type = lp_int_type(bld->type);
78
79 struct gallivm_state *gallivm = bld->gallivm;
80 LLVMBuilderRef builder = gallivm->builder;
81
82 static_assert(sizeof(BITSET_WORD) == 4, "Unexpected BITSET_WORD size");
83
84 LLVMValueRef residency =
85 dynamic_state->residency(gallivm, resources_type, resources_ptr, 0, NULL);
86
87 LLVMValueRef tile_size_log2 =
88 lp_build_const_int_vec(gallivm, type, util_logbase2(64 * 1024));
89 LLVMValueRef tile_index = LLVMBuildLShr(builder, offset, tile_size_log2, "");
90
91 LLVMValueRef dword_bitsize_log2 =
92 lp_build_const_int_vec(gallivm, type, util_logbase2(32));
93 LLVMValueRef dword_index = LLVMBuildLShr(builder, tile_index, dword_bitsize_log2, "");
94
95 LLVMValueRef dword_size_log2 =
96 lp_build_const_int_vec(gallivm, type, util_logbase2(4));
97 LLVMValueRef dword_offset = LLVMBuildShl(builder, dword_index, dword_size_log2, "");
98
99 residency = lp_build_gather(gallivm, type.length, type.width, lp_elem_type(type),
100 true, residency, dword_offset, true);
101
102 LLVMValueRef dword_bit_mask =
103 lp_build_const_int_vec(gallivm, type, 31);
104 LLVMValueRef bit_index = LLVMBuildAnd(builder, tile_index, dword_bit_mask, "");
105 LLVMValueRef bit_mask = LLVMBuildShl(builder, lp_build_one(gallivm, type), bit_index, "");
106
107 LLVMValueRef resident = LLVMBuildAnd(builder, residency, bit_mask, "");
108 resident = LLVMBuildICmp(builder, LLVMIntNE, resident, lp_build_zero(gallivm, type), "");
109
110 if (*out_resident)
111 *out_resident = LLVMBuildAnd(builder, *out_resident, resident, "");
112 else
113 *out_resident = resident;
114 }
115
116 /**
117 * Generate code to fetch a texel from a texture at int coords (x, y, z).
118 * The computation depends on whether the texture is 1D, 2D or 3D.
119 * The result, texel, will be float vectors:
120 * texel[0] = red values
121 * texel[1] = green values
122 * texel[2] = blue values
123 * texel[3] = alpha values
124 */
125 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,LLVMValueRef texel_out[4])126 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
127 LLVMValueRef width,
128 LLVMValueRef height,
129 LLVMValueRef depth,
130 LLVMValueRef x,
131 LLVMValueRef y,
132 LLVMValueRef z,
133 LLVMValueRef y_stride,
134 LLVMValueRef z_stride,
135 LLVMValueRef data_ptr,
136 LLVMValueRef mipoffsets,
137 LLVMValueRef ilevel,
138 LLVMValueRef texel_out[4])
139 {
140 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
141 const unsigned dims = bld->dims;
142 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
143 LLVMBuilderRef builder = bld->gallivm->builder;
144 LLVMValueRef offset;
145 LLVMValueRef i, j;
146 LLVMValueRef use_border = NULL;
147
148 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
149 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
150 static_state->min_img_filter,
151 static_state->mag_img_filter)) {
152 LLVMValueRef b1, b2;
153 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
154 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
155 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
156 }
157
158 if (dims >= 2 &&
159 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
160 static_state->min_img_filter,
161 static_state->mag_img_filter)) {
162 LLVMValueRef b1, b2;
163 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
164 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
165 if (use_border) {
166 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
167 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
168 } else {
169 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
170 }
171 }
172
173 if (dims == 3 &&
174 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
175 static_state->min_img_filter,
176 static_state->mag_img_filter)) {
177 LLVMValueRef b1, b2;
178 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
179 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
180 if (use_border) {
181 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
182 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
183 } else {
184 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
185 }
186 }
187
188 /* convert x,y,z coords to linear offset from start of texture, in bytes */
189 if (bld->static_texture_state->tiled) {
190 lp_build_tiled_sample_offset(&bld->int_coord_bld,
191 bld->format_desc->format,
192 bld->static_texture_state,
193 x, y, z, width, height, z_stride,
194 &offset, &i, &j);
195 } else {
196 lp_build_sample_offset(&bld->int_coord_bld,
197 bld->format_desc,
198 x, y, z, y_stride, z_stride,
199 &offset, &i, &j);
200 }
201
202 if (mipoffsets) {
203 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
204 }
205
206 if (use_border) {
207 /* If we can sample the border color, it means that texcoords may
208 * lie outside the bounds of the texture image. We need to do
209 * something to prevent reading out of bounds and causing a segfault.
210 *
211 * Simply AND the texture coords with !use_border. This will cause
212 * coords which are out of bounds to become zero. Zero's guaranteed
213 * to be inside the texture image.
214 */
215 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
216 }
217
218 if (bld->residency) {
219 LLVMValueRef real_offset = offset;
220
221 if (!mipoffsets) {
222 mipoffsets = lp_build_get_mip_offsets(bld, ilevel);
223 real_offset = lp_build_add(&bld->int_coord_bld, real_offset, mipoffsets);
224
225 if (use_border)
226 real_offset = lp_build_andnot(&bld->int_coord_bld, real_offset, use_border);
227 }
228
229 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
230 bld->resources_type, bld->resources_ptr,
231 real_offset, &bld->resident);
232 }
233
234 lp_build_fetch_rgba_soa(bld->gallivm,
235 bld->format_desc,
236 bld->texel_type, true,
237 data_ptr, offset,
238 i, j,
239 bld->cache,
240 texel_out);
241
242 /*
243 * Note: if we find an app which frequently samples the texture border
244 * we might want to implement a true conditional here to avoid sampling
245 * the texture whenever possible (since that's quite a bit of code).
246 * Ex:
247 * if (use_border) {
248 * texel = border_color;
249 * } else {
250 * texel = sample_texture(coord);
251 * }
252 * As it is now, we always sample the texture, then selectively replace
253 * the texel color results with the border color.
254 */
255
256 if (use_border) {
257 /* select texel color or border color depending on use_border. */
258 const struct util_format_description *format_desc = bld->format_desc;
259 struct lp_type border_type = bld->texel_type;
260 border_type.length = 4;
261 /*
262 * Only replace channels which are actually present. The others should
263 * get optimized away eventually by sampler_view swizzle anyway but it's
264 * easier too.
265 */
266 for (unsigned chan = 0; chan < 4; chan++) {
267 unsigned chan_s;
268 /* reverse-map channel... */
269 if (util_format_has_stencil(format_desc)) {
270 if (chan == 0)
271 chan_s = 0;
272 else
273 break;
274 } else {
275 for (chan_s = 0; chan_s < 4; chan_s++) {
276 if (chan_s == format_desc->swizzle[chan]) {
277 break;
278 }
279 }
280 }
281 if (chan_s <= 3) {
282 /* use the already clamped color */
283 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
284 LLVMValueRef border_chan;
285
286 border_chan = lp_build_extract_broadcast(bld->gallivm,
287 border_type,
288 bld->texel_type,
289 bld->border_color_clamped,
290 idx);
291 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
292 border_chan, texel_out[chan]);
293 }
294 }
295 }
296 }
297
298 static LLVMValueRef
get_first_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)299 get_first_level(struct gallivm_state *gallivm,
300 LLVMTypeRef resources_type,
301 LLVMValueRef resources_ptr,
302 unsigned texture_unit,
303 LLVMValueRef texture_unit_offset,
304 const struct lp_static_texture_state *static_state,
305 struct lp_sampler_dynamic_state *dynamic_state)
306 {
307 if (static_state->level_zero_only)
308 return lp_build_const_int32(gallivm, 0);
309 else {
310 LLVMValueRef first_level;
311
312 first_level = dynamic_state->first_level(gallivm, resources_type,
313 resources_ptr, texture_unit,
314 texture_unit_offset);
315 first_level = LLVMBuildZExt(gallivm->builder, first_level,
316 LLVMInt32TypeInContext(gallivm->context), "");
317 return first_level;
318 }
319 }
320
321
322 static LLVMValueRef
get_last_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)323 get_last_level(struct gallivm_state *gallivm,
324 LLVMTypeRef resources_type,
325 LLVMValueRef resources_ptr,
326 unsigned texture_unit,
327 LLVMValueRef texture_unit_offset,
328 const struct lp_static_texture_state *static_state,
329 struct lp_sampler_dynamic_state *dynamic_state)
330 {
331 if (static_state->level_zero_only)
332 return lp_build_const_int32(gallivm, 0);
333 else {
334 LLVMValueRef last_level;
335
336 last_level = dynamic_state->last_level(gallivm, resources_type,
337 resources_ptr, texture_unit,
338 texture_unit_offset);
339 last_level = LLVMBuildZExt(gallivm->builder, last_level,
340 LLVMInt32TypeInContext(gallivm->context), "");
341 return last_level;
342 }
343 }
344
345 /**
346 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
347 * (Note that with pot sizes could do this much more easily post-scale
348 * with some bit arithmetic.)
349 */
350 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord,bool posOnly)351 lp_build_coord_mirror(struct lp_build_sample_context *bld,
352 LLVMValueRef coord, bool posOnly)
353 {
354 struct lp_build_context *coord_bld = &bld->coord_bld;
355 LLVMValueRef fract;
356 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
357
358 /*
359 * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
360 * it all works out. (The result is in range [-1, 1.0], negative if
361 * the coord is in the "odd" section, otherwise positive.)
362 */
363
364 coord = lp_build_mul(coord_bld, coord, half);
365 fract = lp_build_round(coord_bld, coord);
366 fract = lp_build_sub(coord_bld, coord, fract);
367 coord = lp_build_add(coord_bld, fract, fract);
368
369 if (posOnly) {
370 /*
371 * Theoretically it's not quite 100% accurate because the spec says
372 * that ultimately a scaled coord of -x.0 should map to int coord
373 * -x + 1 with mirroring, not -x (this does not matter for bilinear
374 * filtering).
375 */
376 coord = lp_build_abs(coord_bld, coord);
377 /* kill off NaNs */
378 /* XXX: not safe without arch rounding, fract can be anything. */
379 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
380 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
381 }
382
383 return coord;
384 }
385
386
387 /**
388 * Helper to compute the first coord and the weight for
389 * linear wrap repeat npot textures
390 */
391 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)392 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
393 LLVMValueRef coord_f,
394 LLVMValueRef length_i,
395 LLVMValueRef length_f,
396 LLVMValueRef *coord0_i,
397 LLVMValueRef *weight_f)
398 {
399 struct lp_build_context *coord_bld = &bld->coord_bld;
400 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
401 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
402 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
403 int_coord_bld->one);
404 LLVMValueRef mask;
405 /* wrap with normalized floats is just fract */
406 coord_f = lp_build_fract(coord_bld, coord_f);
407 /* mul by size and subtract 0.5 */
408 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
409 coord_f = lp_build_sub(coord_bld, coord_f, half);
410 /*
411 * we avoided the 0.5/length division before the repeat wrap,
412 * now need to fix up edge cases with selects
413 */
414 /*
415 * Note we do a float (unordered) compare so we can eliminate NaNs.
416 * (Otherwise would need fract_safe above).
417 */
418 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
419 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
420
421 /* convert to int, compute lerp weight */
422 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
423 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
424 }
425
426
427 /**
428 * Build LLVM code for texture wrap mode for linear filtering.
429 * \param x0_out returns first integer texcoord
430 * \param x1_out returns second integer texcoord
431 * \param weight_out returns linear interpolation weight
432 */
433 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)434 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
435 bool is_gather,
436 LLVMValueRef coord,
437 LLVMValueRef length,
438 LLVMValueRef length_f,
439 LLVMValueRef offset,
440 bool is_pot,
441 unsigned wrap_mode,
442 LLVMValueRef *x0_out,
443 LLVMValueRef *x1_out,
444 LLVMValueRef *weight_out)
445 {
446 struct lp_build_context *coord_bld = &bld->coord_bld;
447 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
448 LLVMBuilderRef builder = bld->gallivm->builder;
449 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
450 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
451 LLVMValueRef coord0, coord1, weight;
452
453 switch (wrap_mode) {
454 case PIPE_TEX_WRAP_REPEAT:
455 if (is_pot) {
456 /* mul by size and subtract 0.5 */
457 coord = lp_build_mul(coord_bld, coord, length_f);
458 coord = lp_build_sub(coord_bld, coord, half);
459 if (offset) {
460 offset = lp_build_int_to_float(coord_bld, offset);
461 coord = lp_build_add(coord_bld, coord, offset);
462 }
463 /* convert to int, compute lerp weight */
464 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
465 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
466 /* repeat wrap */
467 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
468 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
469 } else {
470 LLVMValueRef mask;
471 if (offset) {
472 offset = lp_build_int_to_float(coord_bld, offset);
473 offset = lp_build_div(coord_bld, offset, length_f);
474 coord = lp_build_add(coord_bld, coord, offset);
475 }
476 lp_build_coord_repeat_npot_linear(bld, coord,
477 length, length_f,
478 &coord0, &weight);
479 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
480 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
481 coord1 = LLVMBuildAnd(builder,
482 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
483 mask, "");
484 }
485 break;
486
487 case PIPE_TEX_WRAP_CLAMP:
488 if (bld->static_sampler_state->normalized_coords) {
489 /* scale coord to length */
490 coord = lp_build_mul(coord_bld, coord, length_f);
491 }
492 if (offset) {
493 offset = lp_build_int_to_float(coord_bld, offset);
494 coord = lp_build_add(coord_bld, coord, offset);
495 }
496
497 /*
498 * clamp to [0, length]
499 *
500 * Unlike some other wrap modes, this should be correct for gather
501 * too. GL_CLAMP explicitly does this clamp on the coord prior to
502 * actual wrapping (which is per sample).
503 */
504 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
505
506 coord = lp_build_sub(coord_bld, coord, half);
507
508 /* convert to int, compute lerp weight */
509 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
510 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
511 break;
512
513 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
514 {
515 struct lp_build_context abs_coord_bld = bld->coord_bld;
516 abs_coord_bld.type.sign = false;
517
518 if (bld->static_sampler_state->normalized_coords) {
519 /* mul by tex size */
520 coord = lp_build_mul(coord_bld, coord, length_f);
521 }
522 if (offset) {
523 offset = lp_build_int_to_float(coord_bld, offset);
524 coord = lp_build_add(coord_bld, coord, offset);
525 }
526
527 /* clamp to length max */
528 coord = lp_build_min_ext(coord_bld, coord, length_f,
529 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
530 if (!is_gather) {
531 /* subtract 0.5 */
532 coord = lp_build_sub(coord_bld, coord, half);
533 /* clamp to [0, length - 0.5] */
534 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
535 /* convert to int, compute lerp weight */
536 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
537 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
538 } else {
539 /*
540 * The non-gather path will end up with coords 0, 1 if coord was
541 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
542 * really matter what the second coord is). But for gather, we
543 * really need to end up with coords 0, 0.
544 */
545 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
546 coord0 = lp_build_sub(coord_bld, coord, half);
547 coord1 = lp_build_add(coord_bld, coord, half);
548 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
549 coord0 = lp_build_itrunc(coord_bld, coord0);
550 coord1 = lp_build_itrunc(coord_bld, coord1);
551 weight = coord_bld->undef;
552 }
553 /* coord1 = min(coord1, length-1) */
554 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
555 break;
556 }
557
558 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
559 if (bld->static_sampler_state->normalized_coords) {
560 /* scale coord to length */
561 coord = lp_build_mul(coord_bld, coord, length_f);
562 }
563 if (offset) {
564 offset = lp_build_int_to_float(coord_bld, offset);
565 coord = lp_build_add(coord_bld, coord, offset);
566 }
567 /*
568 * We don't need any clamp. Technically, for very large (pos or neg)
569 * (or infinite) values, clamp against [-length, length] would be
570 * correct, but we don't need to guarantee any specific
571 * result for such coords (the ifloor will be undefined, but for modes
572 * requiring border all resulting coords are safe).
573 */
574 coord = lp_build_sub(coord_bld, coord, half);
575 /* convert to int, compute lerp weight */
576 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
577 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
578 break;
579
580 case PIPE_TEX_WRAP_MIRROR_REPEAT:
581 if (offset) {
582 offset = lp_build_int_to_float(coord_bld, offset);
583 offset = lp_build_div(coord_bld, offset, length_f);
584 coord = lp_build_add(coord_bld, coord, offset);
585 }
586 if (!is_gather) {
587 /* compute mirror function */
588 coord = lp_build_coord_mirror(bld, coord, true);
589
590 /* scale coord to length */
591 coord = lp_build_mul(coord_bld, coord, length_f);
592 coord = lp_build_sub(coord_bld, coord, half);
593
594 /* convert to int, compute lerp weight */
595 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
596 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
597
598 /* coord0 = max(coord0, 0) */
599 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
600 /* coord1 = min(coord1, length-1) */
601 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
602 } else {
603 /*
604 * This is pretty reasonable in the end, all what the tests care
605 * about is nasty edge cases (scaled coords x.5, so the individual
606 * coords are actually integers, which is REALLY tricky to get right
607 * due to this working differently both for negative numbers as well
608 * as for even/odd cases). But with enough magic it's not too complex
609 * after all.
610 * Maybe should try a bit arithmetic one though for POT textures...
611 */
612 LLVMValueRef isNeg;
613 /*
614 * Wrapping just once still works, even though it means we can
615 * get "wrong" sign due to performing mirror in the middle of the
616 * two coords (because this can only happen very near the odd/even
617 * edges, so both coords will actually end up as 0 or length - 1
618 * in the end).
619 * For GL4 gather with per-sample offsets we'd need to the mirroring
620 * per coord too.
621 */
622 coord = lp_build_coord_mirror(bld, coord, false);
623 coord = lp_build_mul(coord_bld, coord, length_f);
624
625 /*
626 * NaNs should be safe here, we'll do away with them with
627 * the ones' complement plus min.
628 */
629 coord0 = lp_build_sub(coord_bld, coord, half);
630 coord0 = lp_build_ifloor(coord_bld, coord0);
631 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
632 /* ones complement for neg numbers (mirror(negX) = X - 1) */
633 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
634 coord0, int_coord_bld->zero);
635 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
636 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
637 coord1, int_coord_bld->zero);
638 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
639 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
640 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
641
642 weight = coord_bld->undef;
643 }
644 break;
645
646 case PIPE_TEX_WRAP_MIRROR_CLAMP:
647 if (bld->static_sampler_state->normalized_coords) {
648 /* scale coord to length */
649 coord = lp_build_mul(coord_bld, coord, length_f);
650 }
651 if (offset) {
652 offset = lp_build_int_to_float(coord_bld, offset);
653 coord = lp_build_add(coord_bld, coord, offset);
654 }
655 /*
656 * XXX: probably not correct for gather, albeit I'm not
657 * entirely sure as it's poorly specified. The wrapping looks
658 * correct according to the spec which is against gl 1.2.1,
659 * however negative values will be swapped - gl re-specified
660 * wrapping with newer versions (no more pre-clamp except with
661 * GL_CLAMP).
662 */
663 coord = lp_build_abs(coord_bld, coord);
664
665 /* clamp to [0, length] */
666 coord = lp_build_min_ext(coord_bld, coord, length_f,
667 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
668
669 coord = lp_build_sub(coord_bld, coord, half);
670
671 /* convert to int, compute lerp weight */
672 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
673 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
674 break;
675
676 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
677 {
678 struct lp_build_context abs_coord_bld = bld->coord_bld;
679 abs_coord_bld.type.sign = false;
680
681 if (bld->static_sampler_state->normalized_coords) {
682 /* scale coord to length */
683 coord = lp_build_mul(coord_bld, coord, length_f);
684 }
685 if (offset) {
686 offset = lp_build_int_to_float(coord_bld, offset);
687 coord = lp_build_add(coord_bld, coord, offset);
688 }
689 if (!is_gather) {
690 coord = lp_build_abs(coord_bld, coord);
691
692 /* clamp to length max */
693 coord = lp_build_min_ext(coord_bld, coord, length_f,
694 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
695 /* subtract 0.5 */
696 coord = lp_build_sub(coord_bld, coord, half);
697 /* clamp to [0, length - 0.5] */
698 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
699
700 /* convert to int, compute lerp weight */
701 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
702 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
703 /* coord1 = min(coord1, length-1) */
704 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
705 } else {
706 /*
707 * The non-gather path will swap coord0/1 if coord was negative,
708 * which is ok for filtering since the filter weight matches
709 * accordingly. Also, if coord is close to zero, coord0/1 will
710 * be 0 and 1, instead of 0 and 0 (again ok due to filter
711 * weight being 0.0). Both issues need to be fixed for gather.
712 */
713 LLVMValueRef isNeg;
714
715 /*
716 * Actually wanted to cheat here and use:
717 * coord1 = lp_build_iround(coord_bld, coord);
718 * but it's not good enough for some tests (even piglit
719 * textureGather is set up in a way so the coords area always
720 * .5, that is right at the crossover points).
721 * So do ordinary sub/floor, then do ones' complement
722 * for negative numbers.
723 * (Note can't just do sub|add/abs/itrunc per coord neither -
724 * because the spec demands that mirror(3.0) = 3 but
725 * mirror(-3.0) = 2.)
726 */
727 coord = lp_build_sub(coord_bld, coord, half);
728 coord0 = lp_build_ifloor(coord_bld, coord);
729 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
730 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
731 int_coord_bld->zero);
732 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
733 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
734
735 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
736 int_coord_bld->zero);
737 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
738 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
739
740 weight = coord_bld->undef;
741 }
742 }
743 break;
744
745 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
746 {
747 if (bld->static_sampler_state->normalized_coords) {
748 /* scale coord to length */
749 coord = lp_build_mul(coord_bld, coord, length_f);
750 }
751 if (offset) {
752 offset = lp_build_int_to_float(coord_bld, offset);
753 coord = lp_build_add(coord_bld, coord, offset);
754 }
755 /*
756 * XXX: probably not correct for gather due to swapped
757 * order if coord is negative (same rationale as for
758 * MIRROR_CLAMP).
759 */
760 coord = lp_build_abs(coord_bld, coord);
761
762 /*
763 * We don't need any clamp. Technically, for very large
764 * (or infinite) values, clamp against length would be
765 * correct, but we don't need to guarantee any specific
766 * result for such coords (the ifloor will be undefined, but
767 * for modes requiring border all resulting coords are safe).
768 */
769 coord = lp_build_sub(coord_bld, coord, half);
770
771 /* convert to int, compute lerp weight */
772 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
773 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
774 }
775 break;
776
777 default:
778 assert(0);
779 coord0 = NULL;
780 coord1 = NULL;
781 weight = NULL;
782 }
783
784 *x0_out = coord0;
785 *x1_out = coord1;
786 *weight_out = weight;
787 }
788
789
790 /**
791 * Build LLVM code for texture wrap mode for nearest filtering.
792 * \param coord the incoming texcoord (nominally in [0,1])
793 * \param length the texture size along one dimension, as int vector
794 * \param length_f the texture size along one dimension, as float vector
795 * \param offset texel offset along one dimension (as int vector)
796 * \param is_pot if TRUE, length is a power of two
797 * \param wrap_mode one of PIPE_TEX_WRAP_x
798 */
799 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode)800 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
801 LLVMValueRef coord,
802 LLVMValueRef length,
803 LLVMValueRef length_f,
804 LLVMValueRef offset,
805 bool is_pot,
806 unsigned wrap_mode)
807 {
808 struct lp_build_context *coord_bld = &bld->coord_bld;
809 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
810 LLVMBuilderRef builder = bld->gallivm->builder;
811 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
812 LLVMValueRef icoord;
813
814 switch (wrap_mode) {
815 case PIPE_TEX_WRAP_REPEAT:
816 if (is_pot) {
817 coord = lp_build_mul(coord_bld, coord, length_f);
818 icoord = lp_build_ifloor(coord_bld, coord);
819 if (offset) {
820 icoord = lp_build_add(int_coord_bld, icoord, offset);
821 }
822 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
823 } else {
824 if (offset) {
825 offset = lp_build_int_to_float(coord_bld, offset);
826 offset = lp_build_div(coord_bld, offset, length_f);
827 coord = lp_build_add(coord_bld, coord, offset);
828 }
829 /* take fraction, unnormalize */
830 coord = lp_build_fract_safe(coord_bld, coord);
831 coord = lp_build_mul(coord_bld, coord, length_f);
832 icoord = lp_build_itrunc(coord_bld, coord);
833 }
834 break;
835
836 case PIPE_TEX_WRAP_CLAMP:
837 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
838 if (bld->static_sampler_state->normalized_coords) {
839 /* scale coord to length */
840 coord = lp_build_mul(coord_bld, coord, length_f);
841 }
842
843 if (offset) {
844 offset = lp_build_int_to_float(coord_bld, offset);
845 coord = lp_build_add(coord_bld, coord, offset);
846 }
847 /* floor */
848 /* use itrunc instead since we clamp to 0 anyway */
849 icoord = lp_build_itrunc(coord_bld, coord);
850
851 /* clamp to [0, length - 1]. */
852 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
853 length_minus_one);
854 break;
855
856 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
857 if (bld->static_sampler_state->normalized_coords) {
858 /* scale coord to length */
859 coord = lp_build_mul(coord_bld, coord, length_f);
860 }
861 /* no clamp necessary, border masking will handle this */
862 icoord = lp_build_ifloor(coord_bld, coord);
863 if (offset) {
864 icoord = lp_build_add(int_coord_bld, icoord, offset);
865 }
866 break;
867
868 case PIPE_TEX_WRAP_MIRROR_REPEAT:
869 if (offset) {
870 offset = lp_build_int_to_float(coord_bld, offset);
871 offset = lp_build_div(coord_bld, offset, length_f);
872 coord = lp_build_add(coord_bld, coord, offset);
873 }
874 /* compute mirror function */
875 coord = lp_build_coord_mirror(bld, coord, true);
876
877 /* scale coord to length */
878 assert(bld->static_sampler_state->normalized_coords);
879 coord = lp_build_mul(coord_bld, coord, length_f);
880
881 /* itrunc == ifloor here */
882 icoord = lp_build_itrunc(coord_bld, coord);
883
884 /* clamp to [0, length - 1] */
885 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
886 break;
887
888 case PIPE_TEX_WRAP_MIRROR_CLAMP:
889 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
890 if (bld->static_sampler_state->normalized_coords) {
891 /* scale coord to length */
892 coord = lp_build_mul(coord_bld, coord, length_f);
893 }
894 if (offset) {
895 offset = lp_build_int_to_float(coord_bld, offset);
896 coord = lp_build_add(coord_bld, coord, offset);
897 }
898 coord = lp_build_abs(coord_bld, coord);
899
900 /* itrunc == ifloor here */
901 icoord = lp_build_itrunc(coord_bld, coord);
902 /*
903 * Use unsigned min due to possible undef values (NaNs, overflow)
904 */
905 {
906 struct lp_build_context abs_coord_bld = *int_coord_bld;
907 abs_coord_bld.type.sign = false;
908 /* clamp to [0, length - 1] */
909 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
910 }
911 break;
912
913 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
914 if (bld->static_sampler_state->normalized_coords) {
915 /* scale coord to length */
916 coord = lp_build_mul(coord_bld, coord, length_f);
917 }
918 if (offset) {
919 offset = lp_build_int_to_float(coord_bld, offset);
920 coord = lp_build_add(coord_bld, coord, offset);
921 }
922 coord = lp_build_abs(coord_bld, coord);
923
924 /* itrunc == ifloor here */
925 icoord = lp_build_itrunc(coord_bld, coord);
926 break;
927
928 default:
929 assert(0);
930 icoord = NULL;
931 }
932
933 return icoord;
934 }
935
936
937 /**
938 * Do shadow test/comparison.
939 * \param p shadow ref value
940 * \param texel the texel to compare against
941 */
942 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)943 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
944 LLVMValueRef p,
945 LLVMValueRef texel)
946 {
947 struct lp_build_context *texel_bld = &bld->texel_bld;
948 LLVMValueRef res;
949
950 if (0) {
951 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
952 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
953 }
954
955 /* result = (p FUNC texel) ? 1 : 0 */
956 /*
957 * honor d3d10 floating point rules here, which state that comparisons
958 * are ordered except NOT_EQUAL which is unordered.
959 */
960 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
961 res = lp_build_cmp_ordered(texel_bld,
962 bld->static_sampler_state->compare_func,
963 p, texel);
964 } else {
965 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
966 p, texel);
967 }
968 return res;
969 }
970
971
972 /**
973 * Generate code to sample a mipmap level with nearest filtering.
974 * If sampling a cube texture, r = cube face in [0,5].
975 */
976 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])977 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
978 LLVMValueRef size,
979 LLVMValueRef row_stride_vec,
980 LLVMValueRef img_stride_vec,
981 LLVMValueRef data_ptr,
982 LLVMValueRef mipoffsets,
983 LLVMValueRef ilevel,
984 const LLVMValueRef *coords,
985 const LLVMValueRef *offsets,
986 LLVMValueRef colors_out[4])
987 {
988 const unsigned dims = bld->dims;
989 LLVMValueRef width_vec;
990 LLVMValueRef height_vec;
991 LLVMValueRef depth_vec;
992 LLVMValueRef flt_size;
993 LLVMValueRef flt_width_vec;
994 LLVMValueRef flt_height_vec;
995 LLVMValueRef flt_depth_vec;
996 LLVMValueRef x, y = NULL, z = NULL;
997
998 lp_build_extract_image_sizes(bld,
999 &bld->int_size_bld,
1000 bld->int_coord_type,
1001 size,
1002 &width_vec, &height_vec, &depth_vec);
1003
1004 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1005
1006 lp_build_extract_image_sizes(bld,
1007 &bld->float_size_bld,
1008 bld->coord_type,
1009 flt_size,
1010 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1011
1012 /*
1013 * Compute integer texcoords.
1014 */
1015 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
1016 flt_width_vec, offsets[0],
1017 bld->static_texture_state->pot_width,
1018 bld->static_sampler_state->wrap_s);
1019 lp_build_name(x, "tex.x.wrapped");
1020
1021 if (dims >= 2) {
1022 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
1023 flt_height_vec, offsets[1],
1024 bld->static_texture_state->pot_height,
1025 bld->static_sampler_state->wrap_t);
1026 lp_build_name(y, "tex.y.wrapped");
1027
1028 if (dims == 3) {
1029 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
1030 flt_depth_vec, offsets[2],
1031 bld->static_texture_state->pot_depth,
1032 bld->static_sampler_state->wrap_r);
1033 lp_build_name(z, "tex.z.wrapped");
1034 }
1035 }
1036 if (has_layer_coord(bld->static_texture_state->target)) {
1037 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1038 /* add cube layer to face */
1039 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1040 } else {
1041 z = coords[2];
1042 }
1043 lp_build_name(z, "tex.z.layer");
1044 }
1045
1046 /*
1047 * Get texture colors.
1048 */
1049 lp_build_sample_texel_soa(bld,
1050 width_vec, height_vec, depth_vec,
1051 x, y, z,
1052 row_stride_vec, img_stride_vec,
1053 data_ptr, mipoffsets, ilevel, colors_out);
1054
1055 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1056 LLVMValueRef cmpval;
1057 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
1058 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
1059 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
1060 bld->texel_bld.one, bld->texel_bld.zero);
1061 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1062 }
1063
1064 }
1065
1066
1067 /**
1068 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
1069 */
1070 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)1071 lp_build_masklerp(struct lp_build_context *bld,
1072 LLVMValueRef weight,
1073 LLVMValueRef mask0,
1074 LLVMValueRef mask1)
1075 {
1076 struct gallivm_state *gallivm = bld->gallivm;
1077 LLVMBuilderRef builder = gallivm->builder;
1078 LLVMValueRef weight2;
1079
1080 weight2 = lp_build_sub(bld, bld->one, weight);
1081 weight = LLVMBuildBitCast(builder, weight,
1082 lp_build_int_vec_type(gallivm, bld->type), "");
1083 weight2 = LLVMBuildBitCast(builder, weight2,
1084 lp_build_int_vec_type(gallivm, bld->type), "");
1085 weight = LLVMBuildAnd(builder, weight, mask1, "");
1086 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
1087 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
1088 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
1089 return lp_build_add(bld, weight, weight2);
1090 }
1091
1092 /**
1093 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
1094 */
1095 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)1096 lp_build_masklerp2d(struct lp_build_context *bld,
1097 LLVMValueRef weight0,
1098 LLVMValueRef weight1,
1099 LLVMValueRef mask00,
1100 LLVMValueRef mask01,
1101 LLVMValueRef mask10,
1102 LLVMValueRef mask11)
1103 {
1104 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
1105 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
1106 return lp_build_lerp(bld, weight1, val0, val1, 0);
1107 }
1108
1109 /*
1110 * this is a bit excessive code for something OpenGL just recommends
1111 * but does not require.
1112 */
1113 #define ACCURATE_CUBE_CORNERS 1
1114
1115 /**
1116 * Generate code to sample a mipmap level with linear filtering.
1117 * If sampling a cube texture, r = cube face in [0,5].
1118 * If linear_mask is present, only pixels having their mask set
1119 * will receive linear filtering, the rest will use nearest.
1120 */
1121 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])1122 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1123 bool is_gather,
1124 LLVMValueRef size,
1125 LLVMValueRef linear_mask,
1126 LLVMValueRef row_stride_vec,
1127 LLVMValueRef img_stride_vec,
1128 LLVMValueRef data_ptr,
1129 LLVMValueRef mipoffsets,
1130 LLVMValueRef ilevel,
1131 const LLVMValueRef *coords,
1132 const LLVMValueRef *offsets,
1133 LLVMValueRef colors_out[4])
1134 {
1135 LLVMBuilderRef builder = bld->gallivm->builder;
1136 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1137 struct lp_build_context *coord_bld = &bld->coord_bld;
1138 struct lp_build_context *texel_bld = &bld->texel_bld;
1139 const unsigned dims = bld->dims;
1140 LLVMValueRef width_vec;
1141 LLVMValueRef height_vec;
1142 LLVMValueRef depth_vec;
1143 LLVMValueRef flt_size;
1144 LLVMValueRef flt_width_vec;
1145 LLVMValueRef flt_height_vec;
1146 LLVMValueRef flt_depth_vec;
1147 LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL;
1148 LLVMValueRef z1 = NULL;
1149 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1150 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1151 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1152 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1153 LLVMValueRef xs[4], ys[4], zs[4];
1154 LLVMValueRef neighbors[2][2][4];
1155 bool seamless_cube_filter, accurate_cube_corners;
1156 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1157
1158 if (is_gather) {
1159 switch (bld->gather_comp) {
1160 case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
1161 case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
1162 case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
1163 case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
1164 default:
1165 break;
1166 }
1167 }
1168
1169 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1170 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1171 bld->static_sampler_state->seamless_cube_map;
1172
1173 /*
1174 * Disable accurate cube corners for integer textures, which should only
1175 * get here in the gather path.
1176 */
1177 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1178 !util_format_is_pure_integer(bld->static_texture_state->format);
1179
1180 lp_build_extract_image_sizes(bld,
1181 &bld->int_size_bld,
1182 bld->int_coord_type,
1183 size,
1184 &width_vec, &height_vec, &depth_vec);
1185
1186 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1187
1188 lp_build_extract_image_sizes(bld,
1189 &bld->float_size_bld,
1190 bld->coord_type,
1191 flt_size,
1192 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1193
1194 LLVMTypeRef int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1195
1196 /*
1197 * Compute integer texcoords.
1198 */
1199
1200 if (!seamless_cube_filter) {
1201 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1202 flt_width_vec, offsets[0],
1203 bld->static_texture_state->pot_width,
1204 bld->static_sampler_state->wrap_s,
1205 &x00, &x01, &s_fpart);
1206 lp_build_name(x00, "tex.x0.wrapped");
1207 lp_build_name(x01, "tex.x1.wrapped");
1208 x10 = x00;
1209 x11 = x01;
1210
1211 if (dims >= 2) {
1212 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1213 flt_height_vec, offsets[1],
1214 bld->static_texture_state->pot_height,
1215 bld->static_sampler_state->wrap_t,
1216 &y00, &y10, &t_fpart);
1217 lp_build_name(y00, "tex.y0.wrapped");
1218 lp_build_name(y10, "tex.y1.wrapped");
1219 y01 = y00;
1220 y11 = y10;
1221
1222 if (dims == 3) {
1223 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1224 flt_depth_vec, offsets[2],
1225 bld->static_texture_state->pot_depth,
1226 bld->static_sampler_state->wrap_r,
1227 &z00, &z1, &r_fpart);
1228 z01 = z10 = z11 = z00;
1229 lp_build_name(z00, "tex.z0.wrapped");
1230 lp_build_name(z1, "tex.z1.wrapped");
1231 }
1232 }
1233 if (has_layer_coord(bld->static_texture_state->target)) {
1234 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1235 /* add cube layer to face */
1236 z00 = z01 = z10 = z11 = z1 =
1237 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1238 } else {
1239 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
1240 }
1241 lp_build_name(z00, "tex.z0.layer");
1242 lp_build_name(z1, "tex.z1.layer");
1243 }
1244 } else {
1245 struct lp_build_if_state edge_if;
1246 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1247 LLVMValueRef coord0, coord1, have_edge, have_corner;
1248 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1249 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1250 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1251 LLVMValueRef face = coords[2];
1252 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1253 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1254 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1255 height_vec = width_vec;
1256 flt_height_vec = flt_width_vec;
1257
1258 /* XXX the overflow logic is actually sort of duplicated with trilinear,
1259 * since an overflow in one mip should also have a corresponding overflow
1260 * in another.
1261 */
1262 /* should always have normalized coords, and offsets are undefined */
1263 assert(bld->static_sampler_state->normalized_coords);
1264 /*
1265 * The coords should all be between [0,1] however we can have NaNs,
1266 * which will wreak havoc. In particular the y1_clamped value below
1267 * can be -INT_MAX (on x86) and be propagated right through (probably
1268 * other values might be bogus in the end too).
1269 * So kill off the NaNs here.
1270 */
1271 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1272 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1273 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
1274 /* instead of clamp, build mask if overflowed */
1275 coord0 = lp_build_sub(coord_bld, coord0, half);
1276 /* convert to int, compute lerp weight */
1277 /* not ideal with AVX (and no AVX2) */
1278 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
1279 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1280 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1281 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1282 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
1283 coord1 = lp_build_sub(coord_bld, coord1, half);
1284 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
1285 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1286
1287 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1288 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1289 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1290 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1291
1292 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1293 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1294 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1295 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1296
1297 /* needed for accurate corner filtering branch later, rely on 0 init */
1298 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1299
1300 for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
1301 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1302 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1303 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1304 }
1305
1306 lp_build_if(&edge_if, bld->gallivm, have_edge);
1307
1308 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1309 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1310 LLVMBuildStore(builder, have_corner, have_corners);
1311
1312 /*
1313 * Need to feed clamped values here for cheap corner handling,
1314 * but only for y coord (as when falling off both edges we only
1315 * fall off the x one) - this should be sufficient.
1316 */
1317 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1318 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1319
1320 /*
1321 * Get all possible new coords.
1322 */
1323 lp_build_cube_new_coords(ivec_bld, face,
1324 x0, x1, y0_clamped, y1_clamped,
1325 length_minus_one,
1326 new_faces, new_xcoords, new_ycoords);
1327
1328 /* handle fall off x-, x+ direction */
1329 /* determine new coords, face (not both fall_off vars can be true at same time) */
1330 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1331 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1332 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1333 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1334 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1335 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1336 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1337 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1338
1339 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1340 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1341
1342 /* handle fall off y-, y+ direction */
1343 /*
1344 * Cheap corner logic: just hack up things so a texel doesn't fall
1345 * off both sides (which means filter weights will be wrong but we'll only
1346 * use valid texels in the filter).
1347 * This means however (y) coords must additionally be clamped (see above).
1348 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1349 */
1350 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1351 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1352 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1353 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1354
1355 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1356 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1357 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1358 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1359 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1360 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1361 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1362 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1363
1364 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1365 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1366 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1367 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1368
1369 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1370 /* now can add cube layer to face (per sample) */
1371 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1372 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1373 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1374 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1375 }
1376
1377 LLVMBuildStore(builder, x00, xs[0]);
1378 LLVMBuildStore(builder, x01, xs[1]);
1379 LLVMBuildStore(builder, x10, xs[2]);
1380 LLVMBuildStore(builder, x11, xs[3]);
1381 LLVMBuildStore(builder, y00, ys[0]);
1382 LLVMBuildStore(builder, y01, ys[1]);
1383 LLVMBuildStore(builder, y10, ys[2]);
1384 LLVMBuildStore(builder, y11, ys[3]);
1385 LLVMBuildStore(builder, z00, zs[0]);
1386 LLVMBuildStore(builder, z01, zs[1]);
1387 LLVMBuildStore(builder, z10, zs[2]);
1388 LLVMBuildStore(builder, z11, zs[3]);
1389
1390 lp_build_else(&edge_if);
1391
1392 LLVMBuildStore(builder, x0, xs[0]);
1393 LLVMBuildStore(builder, x1, xs[1]);
1394 LLVMBuildStore(builder, x0, xs[2]);
1395 LLVMBuildStore(builder, x1, xs[3]);
1396 LLVMBuildStore(builder, y0, ys[0]);
1397 LLVMBuildStore(builder, y0, ys[1]);
1398 LLVMBuildStore(builder, y1, ys[2]);
1399 LLVMBuildStore(builder, y1, ys[3]);
1400 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1401 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1402 LLVMBuildStore(builder, cube_layer, zs[0]);
1403 LLVMBuildStore(builder, cube_layer, zs[1]);
1404 LLVMBuildStore(builder, cube_layer, zs[2]);
1405 LLVMBuildStore(builder, cube_layer, zs[3]);
1406 } else {
1407 LLVMBuildStore(builder, face, zs[0]);
1408 LLVMBuildStore(builder, face, zs[1]);
1409 LLVMBuildStore(builder, face, zs[2]);
1410 LLVMBuildStore(builder, face, zs[3]);
1411 }
1412
1413 lp_build_endif(&edge_if);
1414
1415 LLVMTypeRef type = ivec_bld->vec_type;
1416 x00 = LLVMBuildLoad2(builder, type, xs[0], "");
1417 x01 = LLVMBuildLoad2(builder, type, xs[1], "");
1418 x10 = LLVMBuildLoad2(builder, type, xs[2], "");
1419 x11 = LLVMBuildLoad2(builder, type, xs[3], "");
1420 y00 = LLVMBuildLoad2(builder, type, ys[0], "");
1421 y01 = LLVMBuildLoad2(builder, type, ys[1], "");
1422 y10 = LLVMBuildLoad2(builder, type, ys[2], "");
1423 y11 = LLVMBuildLoad2(builder, type, ys[3], "");
1424 z00 = LLVMBuildLoad2(builder, type, zs[0], "");
1425 z01 = LLVMBuildLoad2(builder, type, zs[1], "");
1426 z10 = LLVMBuildLoad2(builder, type, zs[2], "");
1427 z11 = LLVMBuildLoad2(builder, type, zs[3], "");
1428 }
1429
1430 if (linear_mask) {
1431 /*
1432 * Whack filter weights into place. Whatever texel had more weight is
1433 * the one which should have been selected by nearest filtering hence
1434 * just use 100% weight for it.
1435 */
1436 struct lp_build_context *c_bld = &bld->coord_bld;
1437 LLVMValueRef w1_mask, w1_weight;
1438 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1439
1440 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1441 /* this select is really just a "and" */
1442 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1443 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1444 if (dims >= 2) {
1445 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1446 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1447 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1448 if (dims == 3) {
1449 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1450 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1451 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1452 }
1453 }
1454 }
1455
1456 /*
1457 * Get texture colors.
1458 */
1459 /* get x0/x1 texels */
1460 lp_build_sample_texel_soa(bld,
1461 width_vec, height_vec, depth_vec,
1462 x00, y00, z00,
1463 row_stride_vec, img_stride_vec,
1464 data_ptr, mipoffsets, ilevel, neighbors[0][0]);
1465 lp_build_sample_texel_soa(bld,
1466 width_vec, height_vec, depth_vec,
1467 x01, y01, z01,
1468 row_stride_vec, img_stride_vec,
1469 data_ptr, mipoffsets, ilevel, neighbors[0][1]);
1470
1471 if (dims == 1) {
1472 assert(!is_gather);
1473 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1474 lp_build_reduce_filter(texel_bld,
1475 bld->static_sampler_state->reduction_mode,
1476 0,
1477 4,
1478 s_fpart,
1479 neighbors[0][0],
1480 neighbors[0][1],
1481 colors_out);
1482 } else {
1483 LLVMValueRef cmpval0, cmpval1;
1484 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1485 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1486 /* simplified lerp, AND mask with weight and add */
1487 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1488 cmpval0, cmpval1);
1489 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1490 }
1491 } else {
1492 /* 2D/3D texture */
1493 struct lp_build_if_state corner_if;
1494 LLVMValueRef colors0[4], colorss[4] = { 0 };
1495
1496 /* get x0/x1 texels at y1 */
1497 lp_build_sample_texel_soa(bld,
1498 width_vec, height_vec, depth_vec,
1499 x10, y10, z10,
1500 row_stride_vec, img_stride_vec,
1501 data_ptr, mipoffsets, ilevel, neighbors[1][0]);
1502 lp_build_sample_texel_soa(bld,
1503 width_vec, height_vec, depth_vec,
1504 x11, y11, z11,
1505 row_stride_vec, img_stride_vec,
1506 data_ptr, mipoffsets, ilevel, neighbors[1][1]);
1507
1508 /*
1509 * To avoid having to duplicate linear_mask / fetch code use
1510 * another branch (with corner condition though edge would work
1511 * as well) here.
1512 */
1513 if (have_corners && accurate_cube_corners &&
1514 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1515 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
1516 LLVMValueRef have_corner, one_third;
1517
1518 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
1519 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
1520 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
1521 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
1522
1523 have_corner = LLVMBuildLoad2(builder, int1t, have_corners, "");
1524
1525 lp_build_if(&corner_if, bld->gallivm, have_corner);
1526
1527 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
1528 1.0f/3.0f);
1529
1530 /* find corner */
1531 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1532 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1533 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1534 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1535 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1536 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1537 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1538 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1539
1540 if (!is_gather) {
1541 /*
1542 * we can't use standard 2d lerp as we need per-element weight
1543 * in case of corners, so just calculate bilinear result as
1544 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1545 * (This is actually less work than using 2d lerp, 7 vs. 9
1546 * instructions, however calculating the weights needs another 6,
1547 * so actually probably not slower than 2d lerp only for 4 channels
1548 * as weights only need to be calculated once - of course fixing
1549 * the weights has additional cost.)
1550 */
1551 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
1552 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1553 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1554 w00 = lp_build_mul(coord_bld, wx0, wy0);
1555 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1556 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1557 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1558
1559 /* find corner weight */
1560 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1561 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1562 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1563 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1564
1565 /*
1566 * add 1/3 of the corner weight to the weight of the 3 other
1567 * samples and null out corner weight.
1568 */
1569 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1570 w00 = lp_build_add(coord_bld, w00, c_weight);
1571 w00 = lp_build_andnot(coord_bld, w00, c00f);
1572 w01 = lp_build_add(coord_bld, w01, c_weight);
1573 w01 = lp_build_andnot(coord_bld, w01, c01f);
1574 w10 = lp_build_add(coord_bld, w10, c_weight);
1575 w10 = lp_build_andnot(coord_bld, w10, c10f);
1576 w11 = lp_build_add(coord_bld, w11, c_weight);
1577 w11 = lp_build_andnot(coord_bld, w11, c11f);
1578
1579 if (bld->static_sampler_state->compare_mode ==
1580 PIPE_TEX_COMPARE_NONE) {
1581 for (unsigned chan = 0; chan < 4; chan++) {
1582 colors0[chan] = lp_build_mul(coord_bld, w00,
1583 neighbors[0][0][chan]);
1584 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1585 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1586 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1587 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1588 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1589 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1590 }
1591 } else {
1592 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1593 cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
1594 neighbors[0][0][0]);
1595 cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
1596 neighbors[0][1][0]);
1597 cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
1598 neighbors[1][0][0]);
1599 cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
1600 neighbors[1][1][0]);
1601 /*
1602 * inputs to interpolation are just masks so just add
1603 * masked weights together
1604 */
1605 cmpval00 = LLVMBuildBitCast(builder, cmpval00,
1606 coord_bld->vec_type, "");
1607 cmpval01 = LLVMBuildBitCast(builder, cmpval01,
1608 coord_bld->vec_type, "");
1609 cmpval10 = LLVMBuildBitCast(builder, cmpval10,
1610 coord_bld->vec_type, "");
1611 cmpval11 = LLVMBuildBitCast(builder, cmpval11,
1612 coord_bld->vec_type, "");
1613 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1614 tmp = lp_build_and(coord_bld, w01, cmpval01);
1615 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1616 tmp = lp_build_and(coord_bld, w10, cmpval10);
1617 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1618 tmp = lp_build_and(coord_bld, w11, cmpval11);
1619 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1620 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1621 }
1622 } else {
1623 /*
1624 * We don't have any weights to adjust, so instead calculate
1625 * the fourth texel as simply the average of the other 3.
1626 * (This would work for non-gather too, however we'd have
1627 * a boatload more of the select stuff due to there being
1628 * 4 times as many colors as weights.)
1629 */
1630 LLVMValueRef col00, col01, col10, col11;
1631 LLVMValueRef colc, colc0, colc1;
1632 col10 = lp_build_swizzle_soa_channel(texel_bld,
1633 neighbors[1][0], chan_swiz);
1634 col11 = lp_build_swizzle_soa_channel(texel_bld,
1635 neighbors[1][1], chan_swiz);
1636 col01 = lp_build_swizzle_soa_channel(texel_bld,
1637 neighbors[0][1], chan_swiz);
1638 col00 = lp_build_swizzle_soa_channel(texel_bld,
1639 neighbors[0][0], chan_swiz);
1640
1641 /*
1642 * The spec says for comparison filtering, the comparison
1643 * must happen before synthesizing the new value.
1644 * This means all gathered values are always 0 or 1,
1645 * except for the non-existing texel, which can be 0,1/3,2/3,1...
1646 * Seems like we'd be allowed to just return 0 or 1 too, so we
1647 * could simplify and pass down the compare mask values to the
1648 * end (using int arithmetic/compare on the mask values to
1649 * construct the fourth texel) and only there convert to floats
1650 * but it's probably not worth it (it might be easier for the cpu
1651 * but not for the code)...
1652 */
1653 if (bld->static_sampler_state->compare_mode !=
1654 PIPE_TEX_COMPARE_NONE) {
1655 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1656 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
1657 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
1658 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
1659 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
1660 col00 = lp_build_select(texel_bld, cmpval00,
1661 texel_bld->one, texel_bld->zero);
1662 col01 = lp_build_select(texel_bld, cmpval01,
1663 texel_bld->one, texel_bld->zero);
1664 col10 = lp_build_select(texel_bld, cmpval10,
1665 texel_bld->one, texel_bld->zero);
1666 col11 = lp_build_select(texel_bld, cmpval11,
1667 texel_bld->one, texel_bld->zero);
1668 }
1669
1670 /*
1671 * Null out corner color.
1672 */
1673 col00 = lp_build_andnot(coord_bld, col00, c00f);
1674 col01 = lp_build_andnot(coord_bld, col01, c01f);
1675 col10 = lp_build_andnot(coord_bld, col10, c10f);
1676 col11 = lp_build_andnot(coord_bld, col11, c11f);
1677
1678 /*
1679 * New corner texel color is all colors added / 3.
1680 */
1681 colc0 = lp_build_add(coord_bld, col00, col01);
1682 colc1 = lp_build_add(coord_bld, col10, col11);
1683 colc = lp_build_add(coord_bld, colc0, colc1);
1684 colc = lp_build_mul(coord_bld, one_third, colc);
1685
1686 /*
1687 * Replace the corner texel color with the new value.
1688 */
1689 col00 = lp_build_select(coord_bld, c00, colc, col00);
1690 col01 = lp_build_select(coord_bld, c01, colc, col01);
1691 col10 = lp_build_select(coord_bld, c10, colc, col10);
1692 col11 = lp_build_select(coord_bld, c11, colc, col11);
1693
1694 colors0[0] = col10;
1695 colors0[1] = col11;
1696 colors0[2] = col01;
1697 colors0[3] = col00;
1698 }
1699
1700 LLVMBuildStore(builder, colors0[0], colorss[0]);
1701 LLVMBuildStore(builder, colors0[1], colorss[1]);
1702 LLVMBuildStore(builder, colors0[2], colorss[2]);
1703 LLVMBuildStore(builder, colors0[3], colorss[3]);
1704
1705 lp_build_else(&corner_if);
1706 }
1707
1708 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1709 if (is_gather) {
1710 /*
1711 * Just assign the red channel (no component selection yet).
1712 * This is a bit hackish, we usually do the swizzle at the
1713 * end of sampling (much less values to swizzle), but this
1714 * obviously cannot work when using gather.
1715 */
1716 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1717 neighbors[1][0],
1718 chan_swiz);
1719 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1720 neighbors[1][1],
1721 chan_swiz);
1722 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1723 neighbors[0][1],
1724 chan_swiz);
1725 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1726 neighbors[0][0],
1727 chan_swiz);
1728 } else {
1729 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1730 lp_build_reduce_filter_2d(texel_bld,
1731 bld->static_sampler_state->reduction_mode,
1732 0,
1733 4,
1734 s_fpart,
1735 t_fpart,
1736 neighbors[0][0],
1737 neighbors[0][1],
1738 neighbors[1][0],
1739 neighbors[1][1],
1740 colors0);
1741 }
1742 } else {
1743 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1744 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1745 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1746 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1747 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1748
1749 if (is_gather) {
1750 /* more hacks for swizzling, should be X, ONE or ZERO... */
1751 colors0[0] = lp_build_select(texel_bld, cmpval10,
1752 texel_bld->one, texel_bld->zero);
1753 colors0[1] = lp_build_select(texel_bld, cmpval11,
1754 texel_bld->one, texel_bld->zero);
1755 colors0[2] = lp_build_select(texel_bld, cmpval01,
1756 texel_bld->one, texel_bld->zero);
1757 colors0[3] = lp_build_select(texel_bld, cmpval00,
1758 texel_bld->one, texel_bld->zero);
1759 } else {
1760 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1761 cmpval00, cmpval01, cmpval10, cmpval11);
1762 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1763 }
1764 }
1765
1766 if (have_corners && accurate_cube_corners &&
1767 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1768 LLVMBuildStore(builder, colors0[0], colorss[0]);
1769 LLVMBuildStore(builder, colors0[1], colorss[1]);
1770 LLVMBuildStore(builder, colors0[2], colorss[2]);
1771 LLVMBuildStore(builder, colors0[3], colorss[3]);
1772
1773 lp_build_endif(&corner_if);
1774
1775 colors0[0] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[0], "");
1776 colors0[1] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[1], "");
1777 colors0[2] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[2], "");
1778 colors0[3] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[3], "");
1779 }
1780
1781 if (dims == 3) {
1782 LLVMValueRef neighbors1[2][2][4];
1783 LLVMValueRef colors1[4];
1784
1785 assert(!is_gather);
1786
1787 /* get x0/x1/y0/y1 texels at z1 */
1788 lp_build_sample_texel_soa(bld,
1789 width_vec, height_vec, depth_vec,
1790 x00, y00, z1,
1791 row_stride_vec, img_stride_vec,
1792 data_ptr, mipoffsets, ilevel, neighbors1[0][0]);
1793 lp_build_sample_texel_soa(bld,
1794 width_vec, height_vec, depth_vec,
1795 x01, y01, z1,
1796 row_stride_vec, img_stride_vec,
1797 data_ptr, mipoffsets, ilevel, neighbors1[0][1]);
1798 lp_build_sample_texel_soa(bld,
1799 width_vec, height_vec, depth_vec,
1800 x10, y10, z1,
1801 row_stride_vec, img_stride_vec,
1802 data_ptr, mipoffsets, ilevel, neighbors1[1][0]);
1803 lp_build_sample_texel_soa(bld,
1804 width_vec, height_vec, depth_vec,
1805 x11, y11, z1,
1806 row_stride_vec, img_stride_vec,
1807 data_ptr, mipoffsets, ilevel, neighbors1[1][1]);
1808
1809 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1810 /* Bilinear interpolate the four samples from the second Z slice */
1811 lp_build_reduce_filter_2d(texel_bld,
1812 bld->static_sampler_state->reduction_mode,
1813 0,
1814 4,
1815 s_fpart,
1816 t_fpart,
1817 neighbors1[0][0],
1818 neighbors1[0][1],
1819 neighbors1[1][0],
1820 neighbors1[1][1],
1821 colors1);
1822
1823 /* Linearly interpolate the two samples from the two 3D slices */
1824 lp_build_reduce_filter(texel_bld,
1825 bld->static_sampler_state->reduction_mode,
1826 0,
1827 4,
1828 r_fpart,
1829 colors0,
1830 colors1,
1831 colors_out);
1832 } else {
1833 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1834 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1835 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1836 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1837 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1838 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1839 cmpval00, cmpval01, cmpval10, cmpval11);
1840 /* Linearly interpolate the two samples from the two 3D slices */
1841 colors_out[0] = lp_build_lerp(texel_bld,
1842 r_fpart,
1843 colors0[0], colors1[0],
1844 0);
1845 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1846 }
1847 } else {
1848 /* 2D tex */
1849 for (unsigned chan = 0; chan < 4; chan++) {
1850 colors_out[chan] = colors0[chan];
1851 }
1852 }
1853 }
1854 if (is_gather) {
1855 /*
1856 * For gather, we can't do our usual channel swizzling done later,
1857 * so do it here. It only really matters for 0/1 swizzles in case
1858 * of comparison filtering, since in this case the results would be
1859 * wrong, without comparison it should all work out alright but it
1860 * can't hurt to do that here, since it will instantly drop all
1861 * calculations above, though it's a rather stupid idea to do
1862 * gather on a channel which will always return 0 or 1 in any case...
1863 */
1864 if (chan_swiz == PIPE_SWIZZLE_1) {
1865 for (unsigned chan = 0; chan < 4; chan++) {
1866 colors_out[chan] = texel_bld->one;
1867 }
1868 } else if (chan_swiz == PIPE_SWIZZLE_0) {
1869 for (unsigned chan = 0; chan < 4; chan++) {
1870 colors_out[chan] = texel_bld->zero;
1871 }
1872 }
1873 }
1874 }
1875
1876
1877 /**
1878 * Sample the texture/mipmap using given image filter and mip filter.
1879 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1880 * from (vectors or scalars).
1881 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1882 */
1883 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1884 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1885 unsigned img_filter,
1886 unsigned mip_filter,
1887 bool is_gather,
1888 const LLVMValueRef *coords,
1889 const LLVMValueRef *offsets,
1890 LLVMValueRef ilevel0,
1891 LLVMValueRef ilevel1,
1892 LLVMValueRef lod_fpart,
1893 LLVMValueRef *colors_out)
1894 {
1895 LLVMBuilderRef builder = bld->gallivm->builder;
1896 LLVMValueRef size0 = NULL;
1897 LLVMValueRef size1 = NULL;
1898 LLVMValueRef row_stride0_vec = NULL;
1899 LLVMValueRef row_stride1_vec = NULL;
1900 LLVMValueRef img_stride0_vec = NULL;
1901 LLVMValueRef img_stride1_vec = NULL;
1902 LLVMValueRef data_ptr0 = NULL;
1903 LLVMValueRef data_ptr1 = NULL;
1904 LLVMValueRef mipoff0 = NULL;
1905 LLVMValueRef mipoff1 = NULL;
1906 LLVMValueRef colors0[4], colors1[4];
1907
1908 /* sample the first mipmap level */
1909 lp_build_mipmap_level_sizes(bld, ilevel0,
1910 &size0,
1911 &row_stride0_vec, &img_stride0_vec);
1912 if (bld->num_mips == 1) {
1913 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1914 } else {
1915 /* This path should work for num_lods 1 too but slightly less efficient */
1916 data_ptr0 = bld->base_ptr;
1917 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1918 }
1919
1920 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1921 lp_build_sample_image_nearest(bld, size0,
1922 row_stride0_vec, img_stride0_vec,
1923 data_ptr0, mipoff0, ilevel0, coords, offsets,
1924 colors0);
1925 } else {
1926 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1927 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1928 row_stride0_vec, img_stride0_vec,
1929 data_ptr0, mipoff0, ilevel0, coords, offsets,
1930 colors0);
1931 }
1932
1933 /* Store the first level's colors in the output variables */
1934 for (unsigned chan = 0; chan < 4; chan++) {
1935 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1936 }
1937
1938 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1939 struct lp_build_if_state if_ctx;
1940 LLVMValueRef need_lerp;
1941
1942 /* need_lerp = lod_fpart > 0 */
1943 if (bld->num_lods == 1) {
1944 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1945 lod_fpart, bld->lodf_bld.zero,
1946 "need_lerp");
1947 } else {
1948 /*
1949 * We'll do mip filtering if any of the quads (or individual
1950 * pixel in case of per-pixel lod) need it.
1951 * It might be better to split the vectors here and only fetch/filter
1952 * quads which need it (if there's one lod per quad).
1953 */
1954 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1955 PIPE_FUNC_GREATER,
1956 lod_fpart, bld->lodf_bld.zero);
1957 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1958 lp_build_name(need_lerp, "need_lerp");
1959 }
1960
1961 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1962 {
1963 /*
1964 * We unfortunately need to clamp lod_fpart here since we can get
1965 * negative values which would screw up filtering if not all
1966 * lod_fpart values have same sign.
1967 */
1968 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1969 bld->lodf_bld.zero);
1970 /* sample the second mipmap level */
1971 lp_build_mipmap_level_sizes(bld, ilevel1,
1972 &size1,
1973 &row_stride1_vec, &img_stride1_vec);
1974 if (bld->num_mips == 1) {
1975 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1976 } else {
1977 data_ptr1 = bld->base_ptr;
1978 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1979 }
1980 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1981 lp_build_sample_image_nearest(bld, size1,
1982 row_stride1_vec, img_stride1_vec,
1983 data_ptr1, mipoff1, ilevel1, coords, offsets,
1984 colors1);
1985 } else {
1986 lp_build_sample_image_linear(bld, false, size1, NULL,
1987 row_stride1_vec, img_stride1_vec,
1988 data_ptr1, mipoff1, ilevel1, coords, offsets,
1989 colors1);
1990 }
1991
1992 /* interpolate samples from the two mipmap levels */
1993
1994 if (bld->num_lods != bld->coord_type.length)
1995 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1996 bld->lodf_bld.type,
1997 bld->texel_bld.type,
1998 lod_fpart);
1999
2000 for (unsigned chan = 0; chan < 4; chan++) {
2001 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2002 colors0[chan], colors1[chan],
2003 0);
2004 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2005 }
2006 }
2007 lp_build_endif(&if_ctx);
2008 }
2009 }
2010
2011
2012 /**
2013 * Sample the texture/mipmap using given mip filter, and using
2014 * both nearest and linear filtering at the same time depending
2015 * on linear_mask.
2016 * lod can be per quad but linear_mask is always per pixel.
2017 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
2018 * from (vectors or scalars).
2019 * If we're using nearest miplevel sampling the '1' values will be null/unused.
2020 */
2021 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)2022 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
2023 LLVMValueRef linear_mask,
2024 unsigned mip_filter,
2025 const LLVMValueRef *coords,
2026 const LLVMValueRef *offsets,
2027 LLVMValueRef ilevel0,
2028 LLVMValueRef ilevel1,
2029 LLVMValueRef lod_fpart,
2030 LLVMValueRef lod_positive,
2031 LLVMValueRef *colors_out)
2032 {
2033 LLVMBuilderRef builder = bld->gallivm->builder;
2034 LLVMValueRef size0 = NULL;
2035 LLVMValueRef size1 = NULL;
2036 LLVMValueRef row_stride0_vec = NULL;
2037 LLVMValueRef row_stride1_vec = NULL;
2038 LLVMValueRef img_stride0_vec = NULL;
2039 LLVMValueRef img_stride1_vec = NULL;
2040 LLVMValueRef data_ptr0 = NULL;
2041 LLVMValueRef data_ptr1 = NULL;
2042 LLVMValueRef mipoff0 = NULL;
2043 LLVMValueRef mipoff1 = NULL;
2044 LLVMValueRef colors0[4], colors1[4];
2045
2046 /* sample the first mipmap level */
2047 lp_build_mipmap_level_sizes(bld, ilevel0,
2048 &size0,
2049 &row_stride0_vec, &img_stride0_vec);
2050 if (bld->num_mips == 1) {
2051 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2052 } else {
2053 /* This path should work for num_lods 1 too but slightly less efficient */
2054 data_ptr0 = bld->base_ptr;
2055 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2056 }
2057
2058 lp_build_sample_image_linear(bld, false, size0, linear_mask,
2059 row_stride0_vec, img_stride0_vec,
2060 data_ptr0, mipoff0, ilevel0, coords, offsets,
2061 colors0);
2062
2063 /* Store the first level's colors in the output variables */
2064 for (unsigned chan = 0; chan < 4; chan++) {
2065 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2066 }
2067
2068 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2069 struct lp_build_if_state if_ctx;
2070 LLVMValueRef need_lerp;
2071
2072 /*
2073 * We'll do mip filtering if any of the quads (or individual
2074 * pixel in case of per-pixel lod) need it.
2075 * Note using lod_positive here not lod_fpart since it may be the same
2076 * condition as that used in the outer "if" in the caller hence llvm
2077 * should be able to merge the branches in this case.
2078 */
2079 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
2080 lp_build_name(need_lerp, "need_lerp");
2081
2082 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
2083 {
2084 /*
2085 * We unfortunately need to clamp lod_fpart here since we can get
2086 * negative values which would screw up filtering if not all
2087 * lod_fpart values have same sign.
2088 */
2089 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
2090 bld->lodf_bld.zero);
2091 /* sample the second mipmap level */
2092 lp_build_mipmap_level_sizes(bld, ilevel1,
2093 &size1,
2094 &row_stride1_vec, &img_stride1_vec);
2095 if (bld->num_mips == 1) {
2096 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
2097 } else {
2098 data_ptr1 = bld->base_ptr;
2099 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
2100 }
2101
2102 lp_build_sample_image_linear(bld, false, size1, linear_mask,
2103 row_stride1_vec, img_stride1_vec,
2104 data_ptr1, mipoff1, ilevel1, coords, offsets,
2105 colors1);
2106
2107 /* interpolate samples from the two mipmap levels */
2108
2109 if (bld->num_lods != bld->coord_type.length)
2110 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2111 bld->lodf_bld.type,
2112 bld->texel_bld.type,
2113 lod_fpart);
2114
2115 for (unsigned chan = 0; chan < 4; chan++) {
2116 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2117 colors0[chan], colors1[chan],
2118 0);
2119 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2120 }
2121 }
2122 lp_build_endif(&if_ctx);
2123 }
2124 }
2125
2126
2127 /**
2128 * Build (per-coord) layer value.
2129 * Either clamp layer to valid values or fill in optional out_of_bounds
2130 * value and just return value unclamped.
2131 */
2132 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,bool is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)2133 lp_build_layer_coord(struct lp_build_sample_context *bld,
2134 unsigned texture_unit,
2135 bool is_cube_array,
2136 LLVMValueRef layer,
2137 LLVMValueRef *out_of_bounds)
2138 {
2139 LLVMValueRef num_layers;
2140 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2141
2142 num_layers = bld->dynamic_state->depth(bld->gallivm, bld->resources_type,
2143 bld->resources_ptr, texture_unit, NULL);
2144 num_layers = LLVMBuildZExt(bld->gallivm->builder, num_layers,
2145 bld->int_bld.elem_type, "");
2146 if (out_of_bounds) {
2147 LLVMValueRef out1, out;
2148 assert(!is_cube_array);
2149 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
2150 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
2151 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
2152 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
2153 return layer;
2154 } else {
2155 LLVMValueRef maxlayer;
2156 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
2157 bld->int_bld.one;
2158 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
2159 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
2160 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
2161 }
2162 }
2163
2164 static void
lp_build_sample_ms_offset(struct lp_build_context * int_coord_bld,LLVMValueRef ms_index,LLVMValueRef num_samples,LLVMValueRef sample_stride,LLVMValueRef * offset,LLVMValueRef * out_of_bounds)2165 lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
2166 LLVMValueRef ms_index,
2167 LLVMValueRef num_samples,
2168 LLVMValueRef sample_stride,
2169 LLVMValueRef *offset,
2170 LLVMValueRef *out_of_bounds)
2171 {
2172 LLVMValueRef out1;
2173 num_samples = lp_build_broadcast_scalar(int_coord_bld, num_samples);
2174 sample_stride = lp_build_broadcast_scalar(int_coord_bld, sample_stride);
2175 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
2176 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2177 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, num_samples);
2178 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2179 LLVMValueRef sample_offset = lp_build_mul(int_coord_bld,
2180 sample_stride, ms_index);
2181 *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
2182 }
2183
2184
2185 #define WEIGHT_LUT_SIZE 1024
2186
2187
2188 static void
lp_build_sample_aniso(struct lp_build_sample_context * bld,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)2189 lp_build_sample_aniso(struct lp_build_sample_context *bld,
2190 const LLVMValueRef *coords,
2191 const LLVMValueRef *offsets,
2192 LLVMValueRef ilevel0,
2193 LLVMValueRef ilevel1,
2194 LLVMValueRef lod_fpart,
2195 LLVMValueRef *colors_out)
2196 {
2197 struct gallivm_state *gallivm = bld->gallivm;
2198 LLVMBuilderRef builder = gallivm->builder;
2199 struct lp_build_context *coord_bld = &bld->coord_bld;
2200 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2201 LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
2202 LLVMValueRef data_ptr0, mipoff0 = NULL;
2203
2204 lp_build_mipmap_level_sizes(bld, ilevel0,
2205 &size0,
2206 &row_stride0_vec, &img_stride0_vec);
2207 if (bld->num_mips == 1) {
2208 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2209 } else {
2210 /* This path should work for num_lods 1 too but slightly less efficient */
2211 data_ptr0 = bld->base_ptr;
2212 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2213 }
2214
2215 LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0);
2216
2217 /* extract width and height into vectors for use later */
2218 static const unsigned char swizzle15[] = { /* no-op swizzle */
2219 1, 1, 1, 1, 5, 5, 5, 5
2220 };
2221 static const unsigned char swizzle04[] = { /* no-op swizzle */
2222 0, 0, 0, 0, 4, 4, 4, 4
2223 };
2224 LLVMValueRef width_dim, height_dim;
2225
2226 width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04,
2227 bld->float_size_bld.type.length,
2228 bld->coord_bld.type.length);
2229 height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15,
2230 bld->float_size_bld.type.length,
2231 bld->coord_bld.type.length);
2232
2233 /* Gradient of the u coordinate in screen space. */
2234 LLVMValueRef dudx = lp_build_ddx(coord_bld, coords[0]);
2235 LLVMValueRef dudy = lp_build_ddy(coord_bld, coords[0]);
2236
2237 /* Gradient of the v coordinate in screen space. */
2238 LLVMValueRef dvdx = lp_build_ddx(coord_bld, coords[1]);
2239 LLVMValueRef dvdy = lp_build_ddy(coord_bld, coords[1]);
2240
2241 LLVMValueRef rho_x = lp_build_mul(coord_bld, lp_build_max(coord_bld, lp_build_abs(coord_bld, dudx), lp_build_abs(coord_bld, dvdx)), width_dim);
2242 LLVMValueRef rho_y = lp_build_mul(coord_bld, lp_build_max(coord_bld, lp_build_abs(coord_bld, dudy), lp_build_abs(coord_bld, dvdy)), height_dim);
2243
2244 /* Number of samples used for averaging. */
2245 LLVMValueRef N = lp_build_iceil(coord_bld, lp_build_max(coord_bld, rho_x, rho_y));
2246 N = lp_build_min(int_coord_bld, N, lp_build_const_int_vec(gallivm, int_coord_bld->type, bld->static_sampler_state->aniso));
2247 LLVMValueRef wave_max_N = NULL;
2248 for (uint32_t i = 0; i < coord_bld->type.length; i++) {
2249 LLVMValueRef invocation_N = LLVMBuildExtractElement(builder, N, lp_build_const_int32(gallivm, i), "");
2250 if (wave_max_N)
2251 wave_max_N = lp_build_max(&bld->int_bld, wave_max_N, invocation_N);
2252 else
2253 wave_max_N = invocation_N;
2254 }
2255
2256 LLVMValueRef sample_along_x_axis = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, rho_x, rho_y);
2257 LLVMValueRef dudk = lp_build_select(coord_bld, sample_along_x_axis, dudx, dudy);
2258 LLVMValueRef dvdk = lp_build_select(coord_bld, sample_along_x_axis, dvdx, dvdy);
2259
2260 LLVMValueRef accumulator[4] = {
2261 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "r"),
2262 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "g"),
2263 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "b"),
2264 lp_build_alloca(gallivm, bld->texel_bld.vec_type, "a"),
2265 };
2266
2267 LLVMValueRef float_N = lp_build_int_to_float(coord_bld, N);
2268 LLVMValueRef rcp_N = lp_build_rcp(coord_bld, float_N);
2269
2270 struct lp_build_for_loop_state loop_state;
2271 lp_build_for_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0),
2272 LLVMIntULT, wave_max_N, lp_build_const_int32(gallivm, 1));
2273 {
2274 LLVMValueRef k = loop_state.counter;
2275 k = lp_build_broadcast_scalar(int_coord_bld, k);
2276
2277 LLVMValueRef float_k = lp_build_int_to_float(coord_bld, k);
2278 float_k = lp_build_mul(coord_bld, float_k, rcp_N);
2279 float_k = lp_build_add(coord_bld, float_k, lp_build_const_vec(gallivm, coord_bld->type, -0.5));
2280
2281 LLVMValueRef u_offset = lp_build_mul(coord_bld, float_k, dudk);
2282 LLVMValueRef v_offset = lp_build_mul(coord_bld, float_k, dvdk);
2283
2284 LLVMValueRef sample_coords[4] = {
2285 lp_build_add(coord_bld, coords[0], u_offset),
2286 lp_build_add(coord_bld, coords[1], v_offset),
2287 coords[2],
2288 coords[3],
2289 };
2290
2291 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
2292 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
2293 /* Make sure the coordinates stay in bounds for PIPE_TEXTURE_CUBE loads since
2294 * lp_build_sample_image_linear uses less clamping for them.
2295 */
2296 sample_coords[0] = lp_build_max(coord_bld, sample_coords[0], bld->coord_bld.zero);
2297 sample_coords[0] = lp_build_min(coord_bld, sample_coords[0], bld->coord_bld.one);
2298 sample_coords[1] = lp_build_max(coord_bld, sample_coords[1], bld->coord_bld.zero);
2299 sample_coords[1] = lp_build_min(coord_bld, sample_coords[1], bld->coord_bld.one);
2300 }
2301
2302 LLVMValueRef sample_color[4];
2303 lp_build_sample_image_linear(bld, false, size0, NULL,
2304 row_stride0_vec, img_stride0_vec,
2305 data_ptr0, mipoff0, ilevel0, sample_coords, offsets,
2306 sample_color);
2307
2308 LLVMValueRef oob = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, k, N);
2309
2310 for (uint32_t c = 0; c < 4; c++) {
2311 LLVMValueRef tmp = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, accumulator[c], "");
2312 tmp = lp_build_select(&bld->texel_bld, oob, tmp, LLVMBuildFAdd(builder, tmp, sample_color[c], ""));
2313 LLVMBuildStore(builder, tmp, accumulator[c]);
2314 }
2315 }
2316 lp_build_for_loop_end(&loop_state);
2317
2318 for (uint32_t c = 0; c < 4; c++) {
2319 LLVMValueRef sum = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, accumulator[c], "");
2320 LLVMBuildStore(builder, lp_build_mul(&bld->texel_bld, sum, rcp_N), colors_out[c]);
2321 }
2322 }
2323
2324
2325 /**
2326 * Calculate cube face, lod, mip levels.
2327 */
2328 static void
lp_build_sample_common(struct lp_build_sample_context * bld,bool is_lodq,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)2329 lp_build_sample_common(struct lp_build_sample_context *bld,
2330 bool is_lodq,
2331 unsigned texture_index,
2332 unsigned sampler_index,
2333 LLVMValueRef *coords,
2334 const struct lp_derivatives *derivs, /* optional */
2335 LLVMValueRef lod_bias, /* optional */
2336 LLVMValueRef explicit_lod, /* optional */
2337 LLVMValueRef *lod_pos_or_zero,
2338 LLVMValueRef *lod,
2339 LLVMValueRef *lod_fpart,
2340 LLVMValueRef *ilevel0,
2341 LLVMValueRef *ilevel1)
2342 {
2343 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
2344 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
2345 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
2346 const unsigned target = bld->static_texture_state->target;
2347 const bool aniso = bld->static_sampler_state->aniso;
2348 LLVMValueRef first_level, last_level;
2349 LLVMValueRef lod_ipart = NULL;
2350 struct lp_derivatives cube_derivs;
2351
2352 /*
2353 printf("%s mip %d min %d mag %d\n", __func__,
2354 mip_filter, min_filter, mag_filter);
2355 */
2356
2357 first_level = get_first_level(bld->gallivm,
2358 bld->resources_type,
2359 bld->resources_ptr,
2360 texture_index, NULL,
2361 bld->static_texture_state,
2362 bld->dynamic_state);
2363 last_level = get_last_level(bld->gallivm,
2364 bld->resources_type,
2365 bld->resources_ptr,
2366 texture_index, NULL,
2367 bld->static_texture_state,
2368 bld->dynamic_state);
2369
2370 /*
2371 * Choose cube face, recompute texcoords for the chosen face and
2372 * calculate / transform derivatives.
2373 */
2374 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
2375 bool need_derivs = ((min_filter != mag_filter ||
2376 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
2377 !bld->static_sampler_state->min_max_lod_equal &&
2378 !explicit_lod);
2379 lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
2380 if (need_derivs)
2381 derivs = &cube_derivs;
2382
2383 if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) {
2384 /* calculate cube layer coord now */
2385 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
2386 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
2387 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
2388 coords[3] = lp_build_layer_coord(bld, texture_index, true, layer, NULL);
2389 /* because of seamless filtering can't add it to face (coords[2]) here. */
2390 }
2391 } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
2392 target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
2393 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
2394 coords[2] = lp_build_layer_coord(bld, texture_index, false, coords[2], NULL);
2395 }
2396
2397 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
2398 /*
2399 * Clamp p coords to [0,1] for fixed function depth texture format here.
2400 * Technically this is not entirely correct for unorm depth as the ref
2401 * value should be converted to the depth format (quantization!) and
2402 * comparison then done in texture format. This would actually help
2403 * performance (since only need to do it once and could save the
2404 * per-sample conversion of texels to floats instead), but it would need
2405 * more messy code (would need to push at least some bits down to actual
2406 * fetch so conversion could be skipped, and would have ugly interaction
2407 * with border color, would need to convert border color to that format
2408 * too or do some other tricks to make it work).
2409 */
2410 const struct util_format_description *format_desc = bld->format_desc;
2411 /* not entirely sure we couldn't end up with non-valid swizzle here */
2412 const enum util_format_type chan_type =
2413 format_desc->swizzle[0] <= PIPE_SWIZZLE_W
2414 ? format_desc->channel[format_desc->swizzle[0]].type
2415 : UTIL_FORMAT_TYPE_FLOAT;
2416 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2417 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2418 bld->coord_bld.zero, bld->coord_bld.one);
2419 }
2420 }
2421
2422 /*
2423 * Compute the level of detail (float).
2424 */
2425 if (min_filter != mag_filter ||
2426 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2427 /* Need to compute lod either to choose mipmap levels or to
2428 * distinguish between minification/magnification with one mipmap level.
2429 */
2430 LLVMValueRef first_level_vec =
2431 lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
2432 lp_build_lod_selector(bld, is_lodq, sampler_index,
2433 first_level_vec,
2434 coords[0], coords[1], coords[2],
2435 derivs, lod_bias, explicit_lod,
2436 mip_filter, lod,
2437 &lod_ipart, lod_fpart, lod_pos_or_zero);
2438 if (is_lodq) {
2439 last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2440 last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2441 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2442
2443 switch (mip_filter) {
2444 case PIPE_TEX_MIPFILTER_NONE:
2445 *lod_fpart = bld->lodf_bld.zero;
2446 break;
2447 case PIPE_TEX_MIPFILTER_NEAREST:
2448 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2449 FALLTHROUGH;
2450 case PIPE_TEX_MIPFILTER_LINEAR:
2451 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2452 bld->lodf_bld.zero, last_level);
2453 break;
2454 }
2455 return;
2456 }
2457 } else {
2458 lod_ipart = bld->lodi_bld.zero;
2459 *lod_pos_or_zero = bld->lodi_bld.zero;
2460 }
2461
2462 if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) &&
2463 bld->lodi_bld.type.length != 1) {
2464 /* only makes sense if there's just a single mip level */
2465 assert(bld->num_mips == 1);
2466 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2467 }
2468
2469 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2470 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2471
2472 /*
2473 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2474 */
2475
2476 if (aniso) {
2477 lp_build_nearest_mip_level(bld,
2478 first_level, last_level,
2479 lod_ipart, ilevel0, NULL);
2480 return;
2481 }
2482
2483 switch (mip_filter) {
2484 default:
2485 unreachable("Bad mip_filter value in lp_build_sample_soa()");
2486 case PIPE_TEX_MIPFILTER_NONE:
2487 /* always use mip level 0 */
2488 *ilevel0 = first_level;
2489 break;
2490 case PIPE_TEX_MIPFILTER_NEAREST:
2491 assert(lod_ipart);
2492 lp_build_nearest_mip_level(bld,
2493 first_level, last_level,
2494 lod_ipart, ilevel0, NULL);
2495 break;
2496 case PIPE_TEX_MIPFILTER_LINEAR:
2497 assert(lod_ipart);
2498 assert(*lod_fpart);
2499
2500 lp_build_linear_mip_levels(bld, texture_index,
2501 first_level, last_level,
2502 lod_ipart, lod_fpart,
2503 ilevel0, ilevel1);
2504 break;
2505 }
2506 }
2507
2508
2509 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)2510 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2511 unsigned sampler_unit)
2512 {
2513 struct gallivm_state *gallivm = bld->gallivm;
2514 LLVMBuilderRef builder = gallivm->builder;
2515 LLVMValueRef border_color_ptr =
2516 bld->dynamic_state->border_color(gallivm,
2517 bld->resources_type,
2518 bld->resources_ptr, sampler_unit);
2519 LLVMValueRef border_color;
2520 const struct util_format_description *format_desc = bld->format_desc;
2521 struct lp_type vec4_type = bld->texel_type;
2522 struct lp_build_context vec4_bld;
2523 LLVMValueRef min_clamp = NULL;
2524 LLVMValueRef max_clamp = NULL;
2525
2526 /*
2527 * For normalized format need to clamp border color (technically
2528 * probably should also quantize the data). Really sucks doing this
2529 * here but can't avoid at least for now since this is part of
2530 * sampler state and texture format is part of sampler_view state.
2531 * GL expects also expects clamping for uint/sint formats too so
2532 * do that as well (d3d10 can't end up here with uint/sint since it
2533 * only supports them with ld).
2534 */
2535 vec4_type.length = 4;
2536 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2537
2538 /*
2539 * Vectorized clamping of border color. Loading is a bit of a hack since
2540 * we just cast the pointer to float array to pointer to vec4
2541 * (int or float).
2542 */
2543 LLVMTypeRef border_color_type = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
2544 border_color_ptr = lp_build_array_get_ptr2(gallivm, border_color_type, border_color_ptr,
2545 lp_build_const_int32(gallivm, 0));
2546 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2547 LLVMPointerType(vec4_bld.vec_type, 0), "");
2548 border_color = LLVMBuildLoad2(builder, vec4_bld.vec_type, border_color_ptr, "");
2549 /* we don't have aligned type in the dynamic state unfortunately */
2550 LLVMSetAlignment(border_color, 4);
2551
2552 /*
2553 * Instead of having some incredibly complex logic which will try to figure
2554 * out clamping necessary for each channel, simply use the first channel,
2555 * and treat mixed signed/unsigned normalized formats specially. (Mixed
2556 * non-normalized, which wouldn't work at all here, do not exist for a good
2557 * reason.)
2558 */
2559 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2560 int chan;
2561 /* d/s needs special handling because both present means just sampling depth */
2562 if (util_format_is_depth_and_stencil(format_desc->format)) {
2563 chan = format_desc->swizzle[0];
2564 } else {
2565 chan = util_format_get_first_non_void_channel(format_desc->format);
2566 }
2567 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2568 unsigned chan_type = format_desc->channel[chan].type;
2569 unsigned chan_norm = format_desc->channel[chan].normalized;
2570 unsigned chan_pure = format_desc->channel[chan].pure_integer;
2571 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2572 if (chan_norm) {
2573 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2574 max_clamp = vec4_bld.one;
2575 } else if (chan_pure) {
2576 /*
2577 * Border color was stored as int, hence need min/max clamp
2578 * only if chan has less than 32 bits..
2579 */
2580 unsigned chan_size = format_desc->channel[chan].size;
2581 if (chan_size < 32) {
2582 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2583 0 - (1 << (chan_size - 1)));
2584 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2585 (1 << (chan_size - 1)) - 1);
2586 }
2587 }
2588 /* TODO: no idea about non-pure, non-normalized! */
2589 } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2590 if (chan_norm) {
2591 min_clamp = vec4_bld.zero;
2592 max_clamp = vec4_bld.one;
2593 } else if (chan_pure) {
2594 /*
2595 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2596 * we use Z32_FLOAT_S8X24 to imply sampling depth component and
2597 * ignoring stencil, which will blow up here if we try to do a
2598 * uint clamp in a float texel build... And even if we had
2599 * that format, mesa st also thinks using z24s8 means depth
2600 * sampling ignoring stencil.
2601 */
2602
2603 /*
2604 * Border color was stored as uint, hence never need min clamp,
2605 * and only need max clamp if chan has less than 32 bits.
2606 */
2607 unsigned chan_size = format_desc->channel[chan].size;
2608 if (chan_size < 32) {
2609 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2610 (1 << chan_size) - 1);
2611 }
2612 /* TODO: no idea about non-pure, non-normalized! */
2613 }
2614 } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2615 /* TODO: I have no idea what clamp this would need if any! */
2616 }
2617 }
2618 /* mixed plain formats (or different pure size) */
2619 switch (format_desc->format) {
2620 case PIPE_FORMAT_B10G10R10A2_UINT:
2621 case PIPE_FORMAT_R10G10B10A2_UINT:
2622 {
2623 unsigned max10 = (1 << 10) - 1;
2624 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2625 max10, (1 << 2) - 1, NULL);
2626 }
2627 break;
2628 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2629 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2630 -1.0F, 0.0F, NULL);
2631 max_clamp = vec4_bld.one;
2632 break;
2633 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2634 case PIPE_FORMAT_R5SG5SB6U_NORM:
2635 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2636 0.0F, 0.0F, NULL);
2637 max_clamp = vec4_bld.one;
2638 break;
2639 default:
2640 break;
2641 }
2642 } else {
2643 /* cannot figure this out from format description */
2644 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2645 /* s3tc formats are always unorm */
2646 min_clamp = vec4_bld.zero;
2647 max_clamp = vec4_bld.one;
2648 } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2649 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
2650 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2651 switch (format_desc->format) {
2652 case PIPE_FORMAT_RGTC1_UNORM:
2653 case PIPE_FORMAT_RGTC2_UNORM:
2654 case PIPE_FORMAT_LATC1_UNORM:
2655 case PIPE_FORMAT_LATC2_UNORM:
2656 case PIPE_FORMAT_ETC1_RGB8:
2657 case PIPE_FORMAT_BPTC_RGBA_UNORM:
2658 case PIPE_FORMAT_BPTC_SRGBA:
2659 min_clamp = vec4_bld.zero;
2660 max_clamp = vec4_bld.one;
2661 break;
2662 case PIPE_FORMAT_RGTC1_SNORM:
2663 case PIPE_FORMAT_RGTC2_SNORM:
2664 case PIPE_FORMAT_LATC1_SNORM:
2665 case PIPE_FORMAT_LATC2_SNORM:
2666 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2667 max_clamp = vec4_bld.one;
2668 break;
2669 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2670 /* not sure if we should clamp to max half float? */
2671 break;
2672 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2673 min_clamp = vec4_bld.zero;
2674 break;
2675 default:
2676 assert(0);
2677 break;
2678 }
2679 } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2680 /*
2681 * all others from subsampled/other group, though we don't care
2682 * about yuv (and should not have any from zs here)
2683 */
2684 switch (format_desc->format) {
2685 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2686 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2687 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2688 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2689 case PIPE_FORMAT_G8B8_G8R8_UNORM:
2690 case PIPE_FORMAT_B8G8_R8G8_UNORM:
2691 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2692 min_clamp = vec4_bld.zero;
2693 max_clamp = vec4_bld.one;
2694 break;
2695 case PIPE_FORMAT_R8G8Bx_SNORM:
2696 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2697 max_clamp = vec4_bld.one;
2698 break;
2699 /*
2700 * Note smallfloat formats usually don't need clamping
2701 * (they still have infinite range) however this is not
2702 * true for r11g11b10 and r9g9b9e5, which can't represent
2703 * negative numbers (and additionally r9g9b9e5 can't represent
2704 * very large numbers). d3d10 seems happy without clamping in
2705 * this case, but gl spec is pretty clear: "for floating
2706 * point and integer formats, border values are clamped to
2707 * the representable range of the format" so do that here.
2708 */
2709 case PIPE_FORMAT_R11G11B10_FLOAT:
2710 min_clamp = vec4_bld.zero;
2711 break;
2712 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2713 min_clamp = vec4_bld.zero;
2714 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2715 break;
2716 default:
2717 assert(0);
2718 break;
2719 }
2720 }
2721 }
2722
2723 if (min_clamp) {
2724 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2725 }
2726 if (max_clamp) {
2727 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2728 }
2729
2730 bld->border_color_clamped = border_color;
2731 }
2732
2733
2734 /**
2735 * General texture sampling codegen.
2736 * This function handles texture sampling for all texture targets (1D,
2737 * 2D, 3D, cube) and all filtering modes.
2738 */
2739 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)2740 lp_build_sample_general(struct lp_build_sample_context *bld,
2741 unsigned sampler_unit,
2742 bool is_gather,
2743 const LLVMValueRef *coords,
2744 const LLVMValueRef *offsets,
2745 LLVMValueRef lod_positive,
2746 LLVMValueRef lod_fpart,
2747 LLVMValueRef ilevel0,
2748 LLVMValueRef ilevel1,
2749 LLVMValueRef *colors_out)
2750 {
2751 LLVMBuilderRef builder = bld->gallivm->builder;
2752 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2753 const unsigned mip_filter = sampler_state->min_mip_filter;
2754 const unsigned min_filter = sampler_state->min_img_filter;
2755 const unsigned mag_filter = sampler_state->mag_img_filter;
2756 LLVMValueRef texels[4];
2757 unsigned chan;
2758
2759 /* if we need border color, (potentially) clamp it now */
2760 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2761 min_filter,
2762 mag_filter) ||
2763 (bld->dims > 1 &&
2764 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2765 min_filter,
2766 mag_filter)) ||
2767 (bld->dims > 2 &&
2768 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2769 min_filter,
2770 mag_filter))) {
2771 lp_build_clamp_border_color(bld, sampler_unit);
2772 }
2773
2774
2775 /*
2776 * Get/interpolate texture colors.
2777 */
2778
2779 for (chan = 0; chan < 4; ++chan) {
2780 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2781 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2782 }
2783
2784 if (sampler_state->aniso) {
2785 lp_build_sample_aniso(bld, coords, offsets, ilevel0,
2786 ilevel1, lod_fpart, texels);
2787 } else if (min_filter == mag_filter) {
2788 /* no need to distinguish between minification and magnification */
2789 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2790 is_gather,
2791 coords, offsets,
2792 ilevel0, ilevel1, lod_fpart,
2793 texels);
2794 } else {
2795 /*
2796 * Could also get rid of the if-logic and always use mipmap_both, both
2797 * for the single lod and multi-lod case if nothing really uses this.
2798 */
2799 if (bld->num_lods == 1) {
2800 /* Emit conditional to choose min image filter or mag image filter
2801 * depending on the lod being > 0 or <= 0, respectively.
2802 */
2803 struct lp_build_if_state if_ctx;
2804
2805 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2806 LLVMInt1TypeInContext(bld->gallivm->context),
2807 "lod_pos");
2808
2809 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2810 {
2811 /* Use the minification filter */
2812 lp_build_sample_mipmap(bld, min_filter, mip_filter, false,
2813 coords, offsets,
2814 ilevel0, ilevel1, lod_fpart,
2815 texels);
2816 }
2817 lp_build_else(&if_ctx);
2818 {
2819 /* Use the magnification filter */
2820 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2821 false,
2822 coords, offsets,
2823 ilevel0, NULL, NULL,
2824 texels);
2825 }
2826 lp_build_endif(&if_ctx);
2827 } else {
2828 LLVMValueRef need_linear, linear_mask;
2829 unsigned mip_filter_for_nearest;
2830 struct lp_build_if_state if_ctx;
2831
2832 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2833 linear_mask = lod_positive;
2834 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2835 } else {
2836 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2837 mip_filter_for_nearest = mip_filter;
2838 }
2839 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2840 linear_mask);
2841 lp_build_name(need_linear, "need_linear");
2842
2843 if (bld->num_lods != bld->coord_type.length) {
2844 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2845 bld->lodi_type,
2846 bld->int_coord_type,
2847 linear_mask);
2848 }
2849
2850 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2851 {
2852 /*
2853 * Do sampling with both filters simultaneously. This means using
2854 * a linear filter and doing some tricks (with weights) for the
2855 * pixels which need nearest filter.
2856 * Note that it's probably rare some pixels need nearest and some
2857 * linear filter but the fixups required for the nearest pixels
2858 * aren't all that complicated so just always run a combined path
2859 * if at least some pixels require linear.
2860 */
2861 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2862 coords, offsets,
2863 ilevel0, ilevel1,
2864 lod_fpart, lod_positive,
2865 texels);
2866 }
2867 lp_build_else(&if_ctx);
2868 {
2869 /*
2870 * All pixels require just nearest filtering, which is way
2871 * cheaper than linear, hence do a separate path for that.
2872 */
2873 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2874 mip_filter_for_nearest, false,
2875 coords, offsets,
2876 ilevel0, ilevel1, lod_fpart,
2877 texels);
2878 }
2879 lp_build_endif(&if_ctx);
2880 }
2881 }
2882
2883 for (chan = 0; chan < 4; ++chan) {
2884 colors_out[chan] = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, texels[chan], "");
2885 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2886 }
2887 }
2888
2889
2890 /**
2891 * Texel fetch function. In contrast to general sampling there is no
2892 * filtering, no coord minification, lod (if any) is always explicit uint,
2893 * coords are uints (in terms of texel units) directly to be applied to the
2894 * selected mip level (after adding texel offsets). This function handles
2895 * texel fetch for all targets where texel fetch is supported (no cube maps,
2896 * but 1d, 2d, 3d are supported, arrays and buffers should be too).
2897 */
2898 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef ms_index,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)2899 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2900 unsigned texture_unit,
2901 LLVMValueRef ms_index,
2902 const LLVMValueRef *coords,
2903 LLVMValueRef explicit_lod,
2904 const LLVMValueRef *offsets,
2905 LLVMValueRef *colors_out)
2906 {
2907 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2908 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2909 unsigned dims = bld->dims, chan;
2910 unsigned target = bld->static_texture_state->target;
2911 bool out_of_bound_ret_zero = true;
2912 LLVMValueRef size, ilevel;
2913 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2914 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2915 LLVMValueRef width, height, depth, i, j;
2916 LLVMValueRef offset, out_of_bounds, out1;
2917
2918 LLVMValueRef first_level;
2919
2920 first_level = get_first_level(bld->gallivm,
2921 bld->resources_type,
2922 bld->resources_ptr,
2923 texture_unit, NULL,
2924 bld->static_texture_state,
2925 bld->dynamic_state);
2926 out_of_bounds = int_coord_bld->zero;
2927
2928 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2929 if (bld->num_mips != int_coord_bld->type.length) {
2930 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2931 perquadi_bld->type, explicit_lod, 0);
2932 } else {
2933 ilevel = explicit_lod;
2934 }
2935
2936 LLVMValueRef last_level;
2937
2938 last_level = get_last_level(bld->gallivm,
2939 bld->resources_type,
2940 bld->resources_ptr,
2941 texture_unit, NULL,
2942 bld->static_texture_state,
2943 bld->dynamic_state);
2944
2945 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2946 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2947 lp_build_nearest_mip_level(bld,
2948 first_level, last_level,
2949 ilevel, &ilevel,
2950 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2951 } else {
2952 assert(bld->num_mips == 1);
2953 if (bld->static_texture_state->target != PIPE_BUFFER) {
2954 ilevel = first_level;
2955 } else {
2956 ilevel = lp_build_const_int32(bld->gallivm, 0);
2957 }
2958 }
2959 lp_build_mipmap_level_sizes(bld, ilevel,
2960 &size,
2961 &row_stride_vec, &img_stride_vec);
2962 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2963 size, &width, &height, &depth);
2964
2965 if (target == PIPE_TEXTURE_1D_ARRAY ||
2966 target == PIPE_TEXTURE_2D_ARRAY) {
2967 if (out_of_bound_ret_zero) {
2968 z = lp_build_layer_coord(bld, texture_unit, false, z, &out1);
2969 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2970 } else {
2971 z = lp_build_layer_coord(bld, texture_unit, false, z, NULL);
2972 }
2973 }
2974
2975 /* This is a lot like border sampling */
2976 if (offsets[0]) {
2977 /*
2978 * coords are really unsigned, offsets are signed, but I don't think
2979 * exceeding 31 bits is possible
2980 */
2981 x = lp_build_add(int_coord_bld, x, offsets[0]);
2982 }
2983 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2984 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2985 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2986 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2987
2988 if (dims >= 2) {
2989 if (offsets[1]) {
2990 y = lp_build_add(int_coord_bld, y, offsets[1]);
2991 }
2992 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2993 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2994 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2995 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2996
2997 if (dims >= 3) {
2998 if (offsets[2]) {
2999 z = lp_build_add(int_coord_bld, z, offsets[2]);
3000 }
3001 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
3002 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3003 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
3004 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3005 }
3006 }
3007
3008 if (bld->static_texture_state->tiled) {
3009 lp_build_tiled_sample_offset(&bld->int_coord_bld,
3010 bld->format_desc->format,
3011 bld->static_texture_state,
3012 x, y, z, width, height, img_stride_vec,
3013 &offset, &i, &j);
3014 } else {
3015 lp_build_sample_offset(int_coord_bld,
3016 bld->format_desc,
3017 x, y, z, row_stride_vec, img_stride_vec,
3018 &offset, &i, &j);
3019 }
3020
3021 if (bld->static_texture_state->target != PIPE_BUFFER) {
3022 offset = lp_build_add(int_coord_bld, offset,
3023 lp_build_get_mip_offsets(bld, ilevel));
3024 }
3025
3026 if (bld->fetch_ms && bld->static_texture_state->level_zero_only) {
3027 LLVMValueRef num_samples = bld->dynamic_state->last_level(bld->gallivm,
3028 bld->resources_type,
3029 bld->resources_ptr,
3030 texture_unit, NULL);
3031 num_samples = LLVMBuildZExt(bld->gallivm->builder, num_samples,
3032 bld->int_bld.elem_type, "");
3033 LLVMValueRef sample_stride = lp_sample_load_mip_value(bld->gallivm,
3034 bld->mip_offsets_type,
3035 bld->mip_offsets,
3036 lp_build_const_int32(bld->gallivm, LP_JIT_TEXTURE_SAMPLE_STRIDE));
3037 lp_build_sample_ms_offset(int_coord_bld, ms_index, num_samples, sample_stride,
3038 &offset, &out_of_bounds);
3039 }
3040
3041 if (bld->residency) {
3042 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
3043 bld->resources_type, bld->resources_ptr,
3044 offset, &bld->resident);
3045 }
3046
3047 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
3048
3049 lp_build_fetch_rgba_soa(bld->gallivm,
3050 bld->format_desc,
3051 bld->texel_type, true,
3052 bld->base_ptr, offset,
3053 i, j,
3054 bld->cache,
3055 colors_out);
3056
3057 if (out_of_bound_ret_zero) {
3058 /*
3059 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
3060 * Could use min/max above instead of out-of-bounds comparisons
3061 * if we don't care about the result returned for out-of-bounds.
3062 */
3063 LLVMValueRef oob[4] = {
3064 bld->texel_bld.zero,
3065 bld->texel_bld.zero,
3066 bld->texel_bld.zero,
3067 bld->texel_bld.zero,
3068 };
3069 lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob);
3070 for (chan = 0; chan < 4; chan++) {
3071 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
3072 oob[chan], colors_out[chan]);
3073 }
3074 }
3075 }
3076
3077
3078 /**
3079 * Just set texels to white instead of actually sampling the texture.
3080 * For debugging.
3081 */
3082 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])3083 lp_build_sample_nop(struct gallivm_state *gallivm,
3084 struct lp_type type,
3085 const LLVMValueRef *coords,
3086 LLVMValueRef texel_out[4])
3087 {
3088 LLVMValueRef one = lp_build_one(gallivm, type);
3089 for (unsigned chan = 0; chan < 4; chan++) {
3090 texel_out[chan] = one;
3091 }
3092 }
3093
3094
3095 struct lp_type
lp_build_texel_type(struct lp_type texel_type,const struct util_format_description * format_desc)3096 lp_build_texel_type(struct lp_type texel_type,
3097 const struct util_format_description *format_desc)
3098 {
3099 /* always using the first channel hopefully should be safe,
3100 * if not things WILL break in other places anyway.
3101 */
3102 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
3103 format_desc->channel[0].pure_integer) {
3104 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
3105 texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length);
3106 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
3107 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3108 }
3109 } else if (util_format_has_stencil(format_desc) &&
3110 !util_format_has_depth(format_desc)) {
3111 /* for stencil only formats, sample stencil (uint) */
3112 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3113 }
3114 return texel_type;
3115 }
3116
3117
3118 /**
3119 * Build the actual texture sampling code.
3120 * 'texel' will return a vector of four LLVMValueRefs corresponding to
3121 * R, G, B, A.
3122 * \param type vector float type to use for coords, etc.
3123 * \param sample_key
3124 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
3125 */
3126 void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMTypeRef thread_data_type,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef ms_index,LLVMValueRef * texel_out)3127 lp_build_sample_soa_code(struct gallivm_state *gallivm,
3128 const struct lp_static_texture_state *static_texture_state,
3129 const struct lp_static_sampler_state *static_sampler_state,
3130 struct lp_sampler_dynamic_state *dynamic_state,
3131 struct lp_type type,
3132 unsigned sample_key,
3133 unsigned texture_index,
3134 unsigned sampler_index,
3135 LLVMTypeRef resources_type,
3136 LLVMValueRef resources_ptr,
3137 LLVMTypeRef thread_data_type,
3138 LLVMValueRef thread_data_ptr,
3139 const LLVMValueRef *coords,
3140 const LLVMValueRef *offsets,
3141 const struct lp_derivatives *derivs, /* optional */
3142 LLVMValueRef lod, /* optional */
3143 LLVMValueRef ms_index, /* optional */
3144 LLVMValueRef *texel_out)
3145 {
3146 assert(static_texture_state);
3147 assert(static_texture_state->format < PIPE_FORMAT_COUNT);
3148 assert(static_sampler_state);
3149
3150 const enum pipe_texture_target target = static_texture_state->target;
3151 const unsigned dims = texture_dims(target);
3152 const unsigned num_quads = type.length == 1 ? 1 : type.length / 4;
3153 struct lp_build_sample_context bld;
3154 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
3155 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
3156 LLVMBuilderRef builder = gallivm->builder;
3157 const struct util_format_description *res_format_desc;
3158
3159 if (0) {
3160 enum pipe_format fmt = static_texture_state->format;
3161 debug_printf("Sample from %s\n", util_format_name(fmt));
3162 }
3163
3164 const enum lp_sampler_lod_property lod_property =
3165 (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
3166 LP_SAMPLER_LOD_PROPERTY_SHIFT;
3167 const enum lp_sampler_lod_control lod_control =
3168 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3169 LP_SAMPLER_LOD_CONTROL_SHIFT;
3170 const enum lp_sampler_op_type op_type =
3171 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3172 LP_SAMPLER_OP_TYPE_SHIFT;
3173
3174 const bool fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
3175 const bool op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
3176 const bool op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
3177 const bool op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
3178
3179 LLVMValueRef lod_bias = NULL;
3180 LLVMValueRef explicit_lod = NULL;
3181 if (lod_control == LP_SAMPLER_LOD_BIAS) {
3182 lod_bias = lod;
3183 assert(lod);
3184 assert(derivs == NULL);
3185 } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3186 explicit_lod = lod;
3187 derived_sampler_state.aniso = 0;
3188 assert(lod);
3189 assert(derivs == NULL);
3190 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3191 assert(derivs);
3192 assert(lod == NULL);
3193 } else {
3194 assert(derivs == NULL);
3195 assert(lod == NULL);
3196 }
3197
3198 if (static_texture_state->format == PIPE_FORMAT_NONE) {
3199 /*
3200 * If there's nothing bound, format is NONE, and we must return
3201 * all zero as mandated by d3d10 in this case.
3202 */
3203 LLVMValueRef zero = lp_build_zero(gallivm, type);
3204 for (unsigned chan = 0; chan < 4; chan++) {
3205 texel_out[chan] = zero;
3206 }
3207 return;
3208 }
3209
3210 assert(type.floating);
3211
3212 /* Setup our build context */
3213 memset(&bld, 0, sizeof bld);
3214 bld.gallivm = gallivm;
3215 bld.resources_type = resources_type;
3216 bld.resources_ptr = resources_ptr;
3217 bld.static_sampler_state = &derived_sampler_state;
3218 bld.static_texture_state = static_texture_state;
3219 bld.dynamic_state = dynamic_state;
3220 bld.format_desc = util_format_description(static_texture_state->format);
3221 bld.dims = dims;
3222
3223 res_format_desc = util_format_description(static_texture_state->res_format);
3224
3225 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
3226 bld.no_quad_lod = true;
3227 }
3228 if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) {
3229 bld.no_rho_approx = true;
3230 }
3231 if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) {
3232 bld.no_brilinear = true;
3233 }
3234
3235 bld.vector_width = lp_type_width(type);
3236
3237 bld.float_type = lp_type_float(32);
3238 bld.int_type = lp_type_int(32);
3239 bld.coord_type = type;
3240 bld.int_coord_type = lp_int_type(type);
3241 bld.float_size_in_type = lp_type_float(32);
3242 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
3243 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
3244
3245 bld.texel_type = lp_build_texel_type(type, bld.format_desc);
3246
3247 if (!static_texture_state->level_zero_only ||
3248 !static_sampler_state->max_lod_pos || op_is_lodq) {
3249 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
3250 } else {
3251 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3252 }
3253
3254 if (op_is_gather) {
3255 /*
3256 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
3257 * the actual filtering. Using mostly the same paths, so cube face
3258 * selection, coord wrapping etc. all naturally uses the same code.
3259 */
3260 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3261 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
3262 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
3263 }
3264
3265 const enum pipe_tex_mipfilter mip_filter =
3266 derived_sampler_state.min_mip_filter;
3267
3268 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3269 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3270 /*
3271 * Seamless filtering ignores wrap modes.
3272 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
3273 * bilinear it's not correct but way better than using for instance
3274 * repeat. Note we even set this for non-seamless. Technically GL
3275 * allows any wrap mode, which made sense when supporting true borders
3276 * (can get seamless effect with border and CLAMP_TO_BORDER), but
3277 * gallium doesn't support borders and d3d9 requires wrap modes to be
3278 * ignored and it's a pain to fix up the sampler state (as it makes it
3279 * texture dependent).
3280 */
3281 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3282 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3283 }
3284
3285 /*
3286 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
3287 * so AoS path could be used. Not sure it's worth the trouble...
3288 */
3289 const enum pipe_tex_filter min_img_filter =
3290 derived_sampler_state.min_img_filter;
3291 const enum pipe_tex_filter mag_img_filter =
3292 derived_sampler_state.mag_img_filter;
3293
3294 /*
3295 * This is all a bit complicated different paths are chosen for performance
3296 * reasons.
3297 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
3298 * everything (the last two options are equivalent for 4-wide case).
3299 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
3300 * lod is calculated then the lod value extracted afterwards so making this
3301 * case basically the same as far as lod handling is concerned for the
3302 * further sample/filter code as the 1 lod for everything case.
3303 * Different lod handling mostly shows up when building mipmap sizes
3304 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
3305 * (getting the fractional part of the lod to the right texels).
3306 */
3307
3308 /*
3309 * There are other situations where at least the multiple int lods could be
3310 * avoided like min and max lod being equal.
3311 */
3312 bld.num_mips = bld.num_lods = 1;
3313
3314 if ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
3315 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3316 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
3317 op_is_lodq) {
3318 /*
3319 * special case for using per-pixel lod even for implicit lod,
3320 * which is generally never required (ok by APIs) except to please
3321 * some (somewhat broken imho) tests (because per-pixel face selection
3322 * can cause derivatives to be different for pixels outside the primitive
3323 * due to the major axis division even if pre-project derivatives are
3324 * looking normal).
3325 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
3326 * cube maps we do indeed get per-pixel lod values).
3327 */
3328 bld.num_mips = type.length;
3329 bld.num_lods = type.length;
3330 } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
3331 (explicit_lod || lod_bias || derivs)) {
3332 if ((!op_is_tex && target != PIPE_BUFFER) ||
3333 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3334 bld.num_mips = type.length;
3335 bld.num_lods = type.length;
3336 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3337 bld.num_mips = 1;
3338 bld.num_lods = type.length;
3339 }
3340 }
3341 /* TODO: for true scalar_lod should only use 1 lod value */
3342 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
3343 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3344 bld.num_mips = num_quads;
3345 bld.num_lods = num_quads;
3346 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3347 bld.num_mips = 1;
3348 bld.num_lods = num_quads;
3349 }
3350
3351 bld.fetch_ms = fetch_ms;
3352 bld.residency = !!(sample_key & LP_SAMPLER_RESIDENCY);
3353 if (op_is_gather)
3354 bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
3355 bld.lodf_type = type;
3356 /* we want native vector size to be able to use our intrinsics */
3357 if (bld.num_lods != type.length) {
3358 /* TODO: this currently always has to be per-quad or per-element */
3359 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
3360 }
3361 bld.lodi_type = lp_int_type(bld.lodf_type);
3362 bld.levelf_type = bld.lodf_type;
3363 if (bld.num_mips == 1) {
3364 bld.levelf_type.length = 1;
3365 }
3366 bld.leveli_type = lp_int_type(bld.levelf_type);
3367 bld.float_size_type = bld.float_size_in_type;
3368
3369 /* Note: size vectors may not be native. They contain minified w/h/d/_
3370 * values, with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to
3371 * 8x4f32
3372 */
3373 if (bld.num_mips > 1) {
3374 bld.float_size_type.length = bld.num_mips == type.length ?
3375 bld.num_mips * bld.float_size_in_type.length :
3376 type.length;
3377 }
3378 bld.int_size_type = lp_int_type(bld.float_size_type);
3379
3380 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
3381 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
3382 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
3383 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
3384 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
3385 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
3386 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
3387 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
3388 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
3389 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
3390 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
3391 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
3392 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
3393 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
3394
3395 /* Get the dynamic state */
3396 LLVMValueRef tex_width = dynamic_state->width(gallivm, resources_type,
3397 resources_ptr, texture_index,
3398 NULL);
3399 bld.row_stride_array = dynamic_state->row_stride(gallivm, resources_type,
3400 resources_ptr, texture_index, NULL,
3401 &bld.row_stride_type);
3402 bld.img_stride_array = dynamic_state->img_stride(gallivm, resources_type,
3403 resources_ptr, texture_index, NULL,
3404 &bld.img_stride_type);
3405 bld.base_ptr = dynamic_state->base_ptr(gallivm, resources_type,
3406 resources_ptr, texture_index, NULL);
3407 bld.mip_offsets = dynamic_state->mip_offsets(gallivm, resources_type,
3408 resources_ptr, texture_index, NULL,
3409 &bld.mip_offsets_type);
3410
3411 /* Note that mip_offsets is an array[level] of offsets to texture images */
3412
3413 if (dynamic_state->cache_ptr && thread_data_ptr) {
3414 bld.cache = dynamic_state->cache_ptr(gallivm, thread_data_type,
3415 thread_data_ptr, texture_index);
3416 }
3417
3418 uint32_t res_bw = res_format_desc->block.width;
3419 uint32_t res_bh = res_format_desc->block.height;
3420 uint32_t bw = bld.format_desc->block.width;
3421 uint32_t bh = bld.format_desc->block.height;
3422
3423 /* only scale if the blocksizes are different. */
3424 if (res_bw == bw)
3425 res_bw = bw = 1;
3426 if (res_bh == bh)
3427 res_bh = bh = 1;
3428
3429 /* width, height, depth as single int vector */
3430 if (dims <= 1) {
3431 bld.int_size = tex_width;
3432 bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
3433 bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
3434 bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
3435 } else {
3436 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3437 tex_width,
3438 LLVMConstInt(i32t, 0, 0), "");
3439 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3440 LLVMConstInt(i32t, res_bw, 0),
3441 LLVMConstInt(i32t, 0, 0), "");
3442 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3443 LLVMConstInt(i32t, util_logbase2(res_bw), 0),
3444 LLVMConstInt(i32t, 0, 0), "");
3445 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3446 LLVMConstInt(i32t, bw, 0),
3447 LLVMConstInt(i32t, 0, 0), "");
3448 if (dims >= 2) {
3449 LLVMValueRef tex_height =
3450 dynamic_state->height(gallivm, resources_type,
3451 resources_ptr, texture_index, NULL);
3452 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
3453 bld.int_bld.elem_type, "");
3454 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3455 tex_height,
3456 LLVMConstInt(i32t, 1, 0), "");
3457 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3458 LLVMConstInt(i32t, res_bh, 0),
3459 LLVMConstInt(i32t, 1, 0), "");
3460 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3461 LLVMConstInt(i32t, util_logbase2(res_bh), 0),
3462 LLVMConstInt(i32t, 1, 0), "");
3463 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3464 LLVMConstInt(i32t, bh, 0),
3465 LLVMConstInt(i32t, 1, 0), "");
3466 if (dims >= 3) {
3467 LLVMValueRef tex_depth =
3468 dynamic_state->depth(gallivm, resources_type, resources_ptr,
3469 texture_index, NULL);
3470 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
3471 bld.int_bld.elem_type, "");
3472 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3473 tex_depth,
3474 LLVMConstInt(i32t, 2, 0), "");
3475 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3476 LLVMConstInt(i32t, 1, 0),
3477 LLVMConstInt(i32t, 2, 0), "");
3478 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3479 LLVMConstInt(i32t, 0, 0),
3480 LLVMConstInt(i32t, 2, 0), "");
3481 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3482 LLVMConstInt(i32t, 1, 0),
3483 LLVMConstInt(i32t, 2, 0), "");
3484 }
3485 }
3486 }
3487
3488 LLVMValueRef newcoords[5];
3489 for (unsigned i = 0; i < 5; i++) {
3490 newcoords[i] = coords[i];
3491 }
3492
3493 if (util_format_is_pure_integer(static_texture_state->format) &&
3494 !util_format_has_depth(bld.format_desc) && op_is_tex &&
3495 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
3496 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3497 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3498 /*
3499 * Bail if impossible filtering is specified (the awkard additional
3500 * depth check is because it is legal in gallium to have things like
3501 * S8Z24 here which would say it's pure int despite such formats should
3502 * sample the depth component).
3503 * In GL such filters make the texture incomplete, this makes it robust
3504 * against gallium frontends which set this up regardless (we'd crash in
3505 * the lerp later otherwise).
3506 * At least in some apis it may be legal to use such filters with lod
3507 * queries and/or gather (at least for gather d3d10 says only the wrap
3508 * bits are really used hence filter bits are likely simply ignored).
3509 * For fetch, we don't get valid samplers either way here.
3510 */
3511 LLVMValueRef zero = lp_build_zero(gallivm, type);
3512 for (unsigned chan = 0; chan < 4; chan++) {
3513 texel_out[chan] = zero;
3514 }
3515 return;
3516 }
3517
3518 if (0) {
3519 /* For debug: no-op texture sampling */
3520 lp_build_sample_nop(gallivm,
3521 bld.texel_type,
3522 newcoords,
3523 texel_out);
3524 } else if (op_type == LP_SAMPLER_OP_FETCH) {
3525 lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
3526 lod, offsets, texel_out);
3527 if (bld.residency)
3528 texel_out[4] = bld.resident;
3529 } else {
3530 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3531 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3532 bool use_aos = util_format_fits_8unorm(bld.format_desc) &&
3533 op_is_tex &&
3534 /* not sure this is strictly needed or simply impossible */
3535 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3536 derived_sampler_state.aniso == 0 &&
3537 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3538
3539 use_aos &= bld.num_lods <= num_quads ||
3540 derived_sampler_state.min_img_filter ==
3541 derived_sampler_state.mag_img_filter;
3542
3543 use_aos &= !static_texture_state->tiled;
3544
3545 if (gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
3546 use_aos = 0;
3547 }
3548
3549 if (dims > 1) {
3550 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3551 if (dims > 2) {
3552 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3553 }
3554 }
3555 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3556 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3557 derived_sampler_state.seamless_cube_map &&
3558 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3559 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3560 /* theoretically possible with AoS filtering but not implemented (complex!) */
3561 use_aos = 0;
3562 }
3563
3564 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3565 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3566 debug_printf("%s: using floating point linear filtering for %s\n",
3567 __func__, bld.format_desc->short_name);
3568 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
3569 " wraps %d wrapt %d wrapr %d\n",
3570 derived_sampler_state.min_img_filter,
3571 derived_sampler_state.mag_img_filter,
3572 derived_sampler_state.min_mip_filter,
3573 static_texture_state->target,
3574 derived_sampler_state.seamless_cube_map,
3575 derived_sampler_state.wrap_s,
3576 derived_sampler_state.wrap_t,
3577 derived_sampler_state.wrap_r);
3578 }
3579
3580 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3581 newcoords, derivs, lod_bias, explicit_lod,
3582 &lod_positive, &lod, &lod_fpart,
3583 &ilevel0, &ilevel1);
3584
3585 if (op_is_lodq) {
3586 texel_out[0] = lod_fpart;
3587 texel_out[1] = lod;
3588 texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3589 if (bld.residency)
3590 texel_out[4] = bld.resident;
3591 return;
3592 }
3593
3594 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3595 /* The aos path doesn't do seamless filtering so simply add cube layer
3596 * to face now.
3597 */
3598 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3599 }
3600
3601 /*
3602 * we only try 8-wide sampling with soa or if we have AVX2
3603 * as it appears to be a loss with just AVX)
3604 */
3605 if (num_quads == 1 || !use_aos ||
3606 (util_get_cpu_caps()->has_avx2 &&
3607 (bld.num_lods == 1 ||
3608 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3609 if (use_aos) {
3610 /* do sampling/filtering with fixed pt arithmetic */
3611 lp_build_sample_aos(&bld,
3612 newcoords[0], newcoords[1],
3613 newcoords[2],
3614 offsets, lod_positive, lod_fpart,
3615 ilevel0, ilevel1,
3616 texel_out);
3617 } else {
3618 lp_build_sample_general(&bld, sampler_index,
3619 op_type == LP_SAMPLER_OP_GATHER,
3620 newcoords, offsets,
3621 lod_positive, lod_fpart,
3622 ilevel0, ilevel1,
3623 texel_out);
3624 if (bld.residency)
3625 texel_out[4] = bld.resident;
3626 }
3627 } else {
3628 struct lp_build_sample_context bld4;
3629 struct lp_type type4 = type;
3630 LLVMValueRef texelout4[4];
3631 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3632
3633 type4.length = 4;
3634
3635 /* Setup our build context */
3636 memset(&bld4, 0, sizeof bld4);
3637 bld4.no_quad_lod = bld.no_quad_lod;
3638 bld4.no_rho_approx = bld.no_rho_approx;
3639 bld4.no_brilinear = bld.no_brilinear;
3640 bld4.gallivm = bld.gallivm;
3641 bld4.resources_type = bld.resources_type;
3642 bld4.resources_ptr = bld.resources_ptr;
3643 bld4.static_texture_state = bld.static_texture_state;
3644 bld4.static_sampler_state = bld.static_sampler_state;
3645 bld4.dynamic_state = bld.dynamic_state;
3646 bld4.format_desc = bld.format_desc;
3647 bld4.dims = bld.dims;
3648 bld4.row_stride_type = bld.row_stride_type;
3649 bld4.row_stride_array = bld.row_stride_array;
3650 bld4.img_stride_type = bld.img_stride_type;
3651 bld4.img_stride_array = bld.img_stride_array;
3652 bld4.base_ptr = bld.base_ptr;
3653 bld4.mip_offsets_type = bld.mip_offsets_type;
3654 bld4.mip_offsets = bld.mip_offsets;
3655 bld4.int_size = bld.int_size;
3656 bld4.int_tex_blocksize = bld.int_tex_blocksize;
3657 bld4.int_tex_blocksize_log2 = bld.int_tex_blocksize_log2;
3658 bld4.int_view_blocksize = bld.int_view_blocksize;
3659 bld4.cache = bld.cache;
3660
3661 bld4.vector_width = lp_type_width(type4);
3662
3663 bld4.float_type = lp_type_float(32);
3664 bld4.int_type = lp_type_int(32);
3665 bld4.coord_type = type4;
3666 bld4.int_coord_type = lp_int_type(type4);
3667 bld4.float_size_in_type = lp_type_float(32);
3668 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3669 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3670 bld4.texel_type = bld.texel_type;
3671 bld4.texel_type.length = 4;
3672
3673 bld4.num_mips = bld4.num_lods = 1;
3674 if (bld4.no_quad_lod && bld4.no_rho_approx &&
3675 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3676 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3677 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3678 bld4.num_mips = type4.length;
3679 bld4.num_lods = type4.length;
3680 }
3681 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3682 (explicit_lod || lod_bias || derivs)) {
3683 if ((!op_is_tex && target != PIPE_BUFFER) ||
3684 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3685 bld4.num_mips = type4.length;
3686 bld4.num_lods = type4.length;
3687 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3688 bld4.num_mips = 1;
3689 bld4.num_lods = type4.length;
3690 }
3691 }
3692
3693 /* we want native vector size to be able to use our intrinsics */
3694 bld4.lodf_type = type4;
3695 if (bld4.num_lods != type4.length) {
3696 bld4.lodf_type.length = 1;
3697 }
3698 bld4.lodi_type = lp_int_type(bld4.lodf_type);
3699 bld4.levelf_type = type4;
3700 if (bld4.num_mips != type4.length) {
3701 bld4.levelf_type.length = 1;
3702 }
3703 bld4.leveli_type = lp_int_type(bld4.levelf_type);
3704 bld4.float_size_type = bld4.float_size_in_type;
3705 if (bld4.num_mips > 1) {
3706 bld4.float_size_type.length = bld4.num_mips == type4.length ?
3707 bld4.num_mips * bld4.float_size_in_type.length :
3708 type4.length;
3709 }
3710 bld4.int_size_type = lp_int_type(bld4.float_size_type);
3711
3712 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3713 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3714 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3715 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3716 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3717 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3718 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3719 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3720 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3721 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3722 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3723 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3724 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3725 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3726
3727 for (unsigned i = 0; i < num_quads; i++) {
3728 LLVMValueRef s4, t4, r4;
3729 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3730 LLVMValueRef ilevel04, ilevel14 = NULL;
3731 LLVMValueRef offsets4[4] = { NULL };
3732 unsigned num_lods = bld4.num_lods;
3733
3734 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3735 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3736 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3737
3738 if (offsets[0]) {
3739 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3740 if (dims > 1) {
3741 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3742 if (dims > 2) {
3743 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3744 }
3745 }
3746 }
3747 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3748 ilevel04 = bld.num_mips == 1 ? ilevel0 :
3749 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3750 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3751 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3752 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3753 }
3754
3755 if (use_aos) {
3756 /* do sampling/filtering with fixed pt arithmetic */
3757 lp_build_sample_aos(&bld4,
3758 s4, t4, r4, offsets4,
3759 lod_positive4, lod_fpart4,
3760 ilevel04, ilevel14,
3761 texelout4);
3762 } else {
3763 /* this path is currently unreachable and hence might break easily... */
3764 LLVMValueRef newcoords4[5];
3765 newcoords4[0] = s4;
3766 newcoords4[1] = t4;
3767 newcoords4[2] = r4;
3768 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3769 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3770
3771 lp_build_sample_general(&bld4, sampler_index,
3772 op_type == LP_SAMPLER_OP_GATHER,
3773 newcoords4, offsets4,
3774 lod_positive4, lod_fpart4,
3775 ilevel04, ilevel14,
3776 texelout4);
3777 }
3778 for (unsigned j = 0; j < 4; j++) {
3779 texelouttmp[j][i] = texelout4[j];
3780 }
3781 }
3782
3783 for (unsigned j = 0; j < 4; j++) {
3784 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
3785 }
3786 }
3787 }
3788
3789 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
3790 apply_sampler_swizzle(&bld, texel_out);
3791 }
3792
3793 /*
3794 * texel type can be a (32bit) int/uint (for pure int formats only),
3795 * however we are expected to always return floats (storage is untyped).
3796 */
3797 if (!bld.texel_type.floating) {
3798 unsigned chan;
3799 for (chan = 0; chan < 4; chan++) {
3800 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
3801 lp_build_vec_type(gallivm, type), "");
3802 }
3803 }
3804 }
3805
3806
3807 #define USE_TEX_FUNC_CALL 1
3808
3809 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)3810 get_target_info(enum pipe_texture_target target,
3811 unsigned *num_coords, unsigned *num_derivs,
3812 unsigned *num_offsets, unsigned *layer)
3813 {
3814 unsigned dims = texture_dims(target);
3815 *num_coords = dims;
3816 *num_offsets = dims;
3817 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3818 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3819 *layer = has_layer_coord(target) ? 2: 0;
3820 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3821 /*
3822 * dims doesn't include r coord for cubes - this is handled
3823 * by layer instead, but need to fix up for cube arrays...
3824 */
3825 *layer = 3;
3826 *num_coords = 3;
3827 }
3828 }
3829
3830
3831 /**
3832 * Generate the function body for a texture sampling function.
3833 */
3834 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,LLVMTypeRef resources_type,LLVMTypeRef thread_data_type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key)3835 lp_build_sample_gen_func(struct gallivm_state *gallivm,
3836 const struct lp_static_texture_state *static_texture_state,
3837 const struct lp_static_sampler_state *static_sampler_state,
3838 struct lp_sampler_dynamic_state *dynamic_state,
3839 struct lp_type type,
3840 LLVMTypeRef resources_type,
3841 LLVMTypeRef thread_data_type,
3842 unsigned texture_index,
3843 unsigned sampler_index,
3844 LLVMValueRef function,
3845 unsigned num_args,
3846 unsigned sample_key)
3847 {
3848 LLVMBuilderRef old_builder;
3849 LLVMBasicBlockRef block;
3850 LLVMValueRef coords[5];
3851 LLVMValueRef offsets[3] = { NULL };
3852 LLVMValueRef lod = NULL;
3853 LLVMValueRef ms_index = NULL;
3854 LLVMValueRef resources_ptr;
3855 LLVMValueRef thread_data_ptr = NULL;
3856 LLVMValueRef texel_out[4];
3857 struct lp_derivatives derivs;
3858 struct lp_derivatives *deriv_ptr = NULL;
3859 unsigned num_param = 0;
3860 unsigned num_coords, num_derivs, num_offsets, layer;
3861 bool need_cache = false;
3862
3863 const enum lp_sampler_lod_control lod_control =
3864 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
3865 >> LP_SAMPLER_LOD_CONTROL_SHIFT;
3866
3867 const enum lp_sampler_op_type op_type =
3868 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
3869
3870 get_target_info(static_texture_state->target,
3871 &num_coords, &num_derivs, &num_offsets, &layer);
3872
3873 /* lod query doesn't take a layer */
3874 if (layer && op_type == LP_SAMPLER_OP_LODQ)
3875 layer = 0;
3876
3877 if (dynamic_state->cache_ptr) {
3878 const struct util_format_description *format_desc;
3879 format_desc = util_format_description(static_texture_state->format);
3880 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3881 need_cache = true;
3882 }
3883 }
3884
3885 /* "unpack" arguments */
3886 resources_ptr = LLVMGetParam(function, num_param++);
3887 if (need_cache) {
3888 thread_data_ptr = LLVMGetParam(function, num_param++);
3889 }
3890 for (unsigned i = 0; i < num_coords; i++) {
3891 coords[i] = LLVMGetParam(function, num_param++);
3892 }
3893 for (unsigned i = num_coords; i < 5; i++) {
3894 /* This is rather unfortunate... */
3895 coords[i] = lp_build_undef(gallivm, type);
3896 }
3897 if (layer) {
3898 coords[layer] = LLVMGetParam(function, num_param++);
3899 }
3900 if (sample_key & LP_SAMPLER_SHADOW) {
3901 coords[4] = LLVMGetParam(function, num_param++);
3902 }
3903 if (sample_key & LP_SAMPLER_FETCH_MS) {
3904 ms_index = LLVMGetParam(function, num_param++);
3905 }
3906 if (sample_key & LP_SAMPLER_OFFSETS) {
3907 for (unsigned i = 0; i < num_offsets; i++) {
3908 offsets[i] = LLVMGetParam(function, num_param++);
3909 }
3910 }
3911 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3912 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3913 lod = LLVMGetParam(function, num_param++);
3914 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3915 for (unsigned i = 0; i < num_derivs; i++) {
3916 derivs.ddx[i] = LLVMGetParam(function, num_param++);
3917 derivs.ddy[i] = LLVMGetParam(function, num_param++);
3918 }
3919 deriv_ptr = &derivs;
3920 }
3921
3922 assert(num_args == num_param);
3923
3924 /*
3925 * Function body
3926 */
3927
3928 old_builder = gallivm->builder;
3929 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3930 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3931 LLVMPositionBuilderAtEnd(gallivm->builder, block);
3932
3933 lp_build_sample_soa_code(gallivm,
3934 static_texture_state,
3935 static_sampler_state,
3936 dynamic_state,
3937 type,
3938 sample_key,
3939 texture_index,
3940 sampler_index,
3941 resources_type,
3942 resources_ptr,
3943 thread_data_type,
3944 thread_data_ptr,
3945 coords,
3946 offsets,
3947 deriv_ptr,
3948 lod,
3949 ms_index,
3950 texel_out);
3951
3952 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3953
3954 LLVMDisposeBuilder(gallivm->builder);
3955 gallivm->builder = old_builder;
3956
3957 gallivm_verify_function(gallivm, function);
3958 }
3959
3960
3961 /**
3962 * Call the matching function for texture sampling.
3963 * If there's no match, generate a new one.
3964 */
3965 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params,unsigned texture_index,unsigned sampler_index,LLVMValueRef * tex_ret)3966 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3967 const struct lp_static_texture_state *static_texture_state,
3968 const struct lp_static_sampler_state *static_sampler_state,
3969 struct lp_sampler_dynamic_state *dynamic_state,
3970 const struct lp_sampler_params *params,
3971 unsigned texture_index, unsigned sampler_index,
3972 LLVMValueRef *tex_ret)
3973 {
3974 LLVMBuilderRef builder = gallivm->builder;
3975 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3976 LLVMGetInsertBlock(builder)));
3977 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3978 unsigned sample_key = params->sample_key;
3979 const LLVMValueRef *coords = params->coords;
3980 const LLVMValueRef *offsets = params->offsets;
3981 const struct lp_derivatives *derivs = params->derivs;
3982
3983 const enum lp_sampler_lod_control lod_control =
3984 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3985 LP_SAMPLER_LOD_CONTROL_SHIFT;
3986
3987 const enum lp_sampler_op_type op_type =
3988 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
3989
3990 unsigned num_coords, num_derivs, num_offsets, layer;
3991 get_target_info(static_texture_state->target,
3992 &num_coords, &num_derivs, &num_offsets, &layer);
3993
3994 /* lod query doesn't take a layer */
3995 if (layer && op_type == LP_SAMPLER_OP_LODQ)
3996 layer = 0;
3997
3998 bool need_cache = false;
3999 if (dynamic_state->cache_ptr) {
4000 const struct util_format_description *format_desc;
4001 format_desc = util_format_description(static_texture_state->format);
4002 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4003 need_cache = true;
4004 }
4005 }
4006
4007 /*
4008 * texture function matches are found by name.
4009 * Thus the name has to include both the texture and sampler unit
4010 * (which covers all static state) plus the actual texture function
4011 * (including things like offsets, shadow coord, lod control).
4012 * Additionally lod_property has to be included too.
4013 */
4014 char func_name[64];
4015 snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
4016 texture_index, sampler_index, sample_key);
4017
4018 LLVMValueRef function = LLVMGetNamedFunction(module, func_name);
4019 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
4020 LLVMTypeRef ret_type;
4021 LLVMTypeRef val_type[4];
4022 unsigned num_param = 0;
4023
4024 /*
4025 * Generate the function prototype.
4026 */
4027
4028 arg_types[num_param++] = LLVMTypeOf(params->resources_ptr);
4029 if (need_cache) {
4030 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
4031 }
4032 for (unsigned i = 0; i < num_coords; i++) {
4033 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4034 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
4035 }
4036 if (layer) {
4037 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
4038 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
4039 }
4040 if (sample_key & LP_SAMPLER_SHADOW) {
4041 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4042 }
4043 if (sample_key & LP_SAMPLER_FETCH_MS) {
4044 arg_types[num_param++] = LLVMTypeOf(params->ms_index);
4045 }
4046 if (sample_key & LP_SAMPLER_OFFSETS) {
4047 for (unsigned i = 0; i < num_offsets; i++) {
4048 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
4049 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
4050 }
4051 }
4052 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4053 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4054 arg_types[num_param++] = LLVMTypeOf(params->lod);
4055 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4056 for (unsigned i = 0; i < num_derivs; i++) {
4057 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
4058 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
4059 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
4060 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
4061 }
4062 }
4063
4064 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4065 lp_build_vec_type(gallivm, params->type);
4066 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4067 LLVMTypeRef function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
4068
4069 if (!function) {
4070 function = LLVMAddFunction(module, func_name, function_type);
4071
4072 for (unsigned i = 0; i < num_param; ++i) {
4073 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
4074
4075 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
4076 }
4077 }
4078
4079 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
4080 LLVMSetLinkage(function, LLVMInternalLinkage);
4081
4082 lp_build_sample_gen_func(gallivm,
4083 static_texture_state,
4084 static_sampler_state,
4085 dynamic_state,
4086 params->type,
4087 params->resources_type,
4088 params->thread_data_type,
4089 texture_index,
4090 sampler_index,
4091 function,
4092 num_param,
4093 sample_key);
4094 }
4095
4096 unsigned num_args = 0;
4097 args[num_args++] = params->resources_ptr;
4098 if (need_cache) {
4099 args[num_args++] = params->thread_data_ptr;
4100 }
4101 for (unsigned i = 0; i < num_coords; i++) {
4102 args[num_args++] = coords[i];
4103 }
4104 if (layer) {
4105 args[num_args++] = coords[layer];
4106 }
4107 if (sample_key & LP_SAMPLER_SHADOW) {
4108 args[num_args++] = coords[4];
4109 }
4110 if (sample_key & LP_SAMPLER_FETCH_MS) {
4111 args[num_args++] = params->ms_index;
4112 }
4113 if (sample_key & LP_SAMPLER_OFFSETS) {
4114 for (unsigned i = 0; i < num_offsets; i++) {
4115 args[num_args++] = offsets[i];
4116 }
4117 }
4118 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4119 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4120 args[num_args++] = params->lod;
4121 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4122 for (unsigned i = 0; i < num_derivs; i++) {
4123 args[num_args++] = derivs->ddx[i];
4124 args[num_args++] = derivs->ddy[i];
4125 }
4126 }
4127
4128 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
4129
4130 *tex_ret = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
4131 LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
4132 LLVMValueRef inst = LLVMGetLastInstruction(bb);
4133 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
4134 }
4135
4136
4137 /**
4138 * Build texture sampling code.
4139 * Either via a function call or inline it directly.
4140 */
4141 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)4142 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
4143 const struct lp_static_sampler_state *static_sampler_state,
4144 struct lp_sampler_dynamic_state *dynamic_state,
4145 struct gallivm_state *gallivm,
4146 const struct lp_sampler_params *params)
4147 {
4148 bool use_tex_func = false;
4149
4150 /*
4151 * Do not use a function call if the sampling is "simple enough".
4152 * We define this by
4153 * a) format
4154 * b) no mips (either one level only or no mip filter)
4155 * No mips will definitely make the code smaller, though
4156 * the format requirement is a bit iffy - there's some (SoA) formats
4157 * which definitely generate less code. This does happen to catch
4158 * some important cases though which are hurt quite a bit by using
4159 * a call (though not really because of the call overhead but because
4160 * they are reusing the same texture unit with some of the same
4161 * parameters).
4162 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
4163 */
4164
4165 if (USE_TEX_FUNC_CALL) {
4166 const struct util_format_description *format_desc =
4167 util_format_description(static_texture_state->format);
4168 const bool simple_format =
4169 (util_format_is_rgba8_variant(format_desc) &&
4170 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
4171 const enum lp_sampler_op_type op_type =
4172 (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
4173 LP_SAMPLER_OP_TYPE_SHIFT;
4174 const bool simple_tex =
4175 op_type != LP_SAMPLER_OP_TEXTURE ||
4176 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
4177 static_texture_state->level_zero_only == true) &&
4178 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
4179
4180 use_tex_func = !(simple_format && simple_tex);
4181 }
4182
4183 if (use_tex_func) {
4184 LLVMValueRef tex_ret;
4185 lp_build_sample_soa_func(gallivm,
4186 static_texture_state,
4187 static_sampler_state,
4188 dynamic_state,
4189 params, params->texture_index,
4190 params->sampler_index, &tex_ret);
4191
4192 for (unsigned i = 0; i < 4; i++) {
4193 params->texel[i] =
4194 LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
4195 }
4196 } else {
4197 lp_build_sample_soa_code(gallivm,
4198 static_texture_state,
4199 static_sampler_state,
4200 dynamic_state,
4201 params->type,
4202 params->sample_key,
4203 params->texture_index,
4204 params->sampler_index,
4205 params->resources_type,
4206 params->resources_ptr,
4207 params->thread_data_type,
4208 params->thread_data_ptr,
4209 params->coords,
4210 params->offsets,
4211 params->derivs,
4212 params->lod,
4213 params->ms_index,
4214 params->texel);
4215 }
4216 }
4217
4218
4219 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)4220 lp_build_size_query_soa(struct gallivm_state *gallivm,
4221 const struct lp_static_texture_state *static_state,
4222 struct lp_sampler_dynamic_state *dynamic_state,
4223 const struct lp_sampler_size_query_params *params)
4224 {
4225 LLVMValueRef first_level = NULL;
4226 const unsigned num_lods = 1;
4227 LLVMTypeRef resources_type = params->resources_type;
4228 LLVMValueRef resources_ptr = params->resources_ptr;
4229 const unsigned texture_unit = params->texture_unit;
4230 const enum pipe_texture_target target = params->target;
4231 LLVMValueRef texture_unit_offset = params->texture_unit_offset;
4232 const struct util_format_description *format_desc =
4233 util_format_description(static_state->format);
4234 const struct util_format_description *res_format_desc =
4235 util_format_description(static_state->res_format);
4236
4237 if (static_state->format == PIPE_FORMAT_NONE) {
4238 /*
4239 * If there's nothing bound, format is NONE, and we must return
4240 * all zero as mandated by d3d10 in this case.
4241 */
4242 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
4243 for (unsigned chan = 0; chan < 4; chan++) {
4244 params->sizes_out[chan] = zero;
4245 }
4246 return;
4247 }
4248
4249 /*
4250 * Do some sanity verification about bound texture and shader dcl target.
4251 * Not entirely sure what's possible but assume array/non-array
4252 * always compatible (probably not ok for OpenGL but d3d10 has no
4253 * distinction of arrays at the resource level).
4254 * Everything else looks bogus (though not entirely sure about rect/2d).
4255 * Currently disabled because it causes assertion failures if there's
4256 * nothing bound (or rather a dummy texture, not that this case would
4257 * return the right values).
4258 */
4259 if (0 && static_state->target != target) {
4260 if (static_state->target == PIPE_TEXTURE_1D)
4261 assert(target == PIPE_TEXTURE_1D_ARRAY);
4262 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
4263 assert(target == PIPE_TEXTURE_1D);
4264 else if (static_state->target == PIPE_TEXTURE_2D)
4265 assert(target == PIPE_TEXTURE_2D_ARRAY);
4266 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
4267 assert(target == PIPE_TEXTURE_2D);
4268 else if (static_state->target == PIPE_TEXTURE_CUBE)
4269 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
4270 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
4271 assert(target == PIPE_TEXTURE_CUBE);
4272 else
4273 assert(0);
4274 }
4275
4276 const unsigned dims = texture_dims(target);
4277
4278 const bool has_array = has_layer_coord(target);
4279
4280 assert(!params->int_type.floating);
4281
4282 struct lp_build_context bld_int_vec4;
4283 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
4284
4285 if (params->samples_only) {
4286 LLVMValueRef num_samples;
4287 if (params->ms && static_state->level_zero_only) {
4288 /* multisample never has levels. */
4289 num_samples = dynamic_state->last_level(gallivm,
4290 resources_type,
4291 resources_ptr,
4292 texture_unit,
4293 texture_unit_offset);
4294 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4295 bld_int_vec4.elem_type, "");
4296 } else {
4297 num_samples = lp_build_const_int32(gallivm, 0);
4298 }
4299 params->sizes_out[0] =
4300 lp_build_broadcast(gallivm,
4301 lp_build_vec_type(gallivm, params->int_type),
4302 num_samples);
4303 return;
4304 }
4305
4306 LLVMValueRef lod;
4307 LLVMValueRef level = 0;
4308 if (params->explicit_lod) {
4309 /* FIXME: this needs to honor per-element lod */
4310 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
4311 lp_build_const_int32(gallivm, 0), "");
4312 first_level = get_first_level(gallivm, resources_type, resources_ptr,
4313 texture_unit, texture_unit_offset,
4314 static_state, dynamic_state);
4315 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
4316 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
4317 } else {
4318 lod = bld_int_vec4.zero;
4319 }
4320
4321 LLVMValueRef size = bld_int_vec4.undef;
4322 LLVMValueRef tex_blocksize = bld_int_vec4.undef;
4323 LLVMValueRef tex_blocksize_log2 = bld_int_vec4.undef;
4324 LLVMValueRef view_blocksize = bld_int_vec4.undef;
4325
4326 uint32_t res_bw = res_format_desc->block.width;
4327 uint32_t res_bh = res_format_desc->block.height;
4328 uint32_t bw = format_desc->block.width;
4329 uint32_t bh = format_desc->block.height;
4330
4331 /* only scale if the blocksizes are different. */
4332 if (res_bw == bw)
4333 res_bw = bw = 1;
4334 if (res_bh == bh)
4335 res_bh = bh = 1;
4336
4337 LLVMValueRef tex_width = dynamic_state->width(gallivm,
4338 resources_type,
4339 resources_ptr,
4340 texture_unit,
4341 texture_unit_offset);
4342 size = LLVMBuildInsertElement(gallivm->builder, size,
4343 tex_width,
4344 lp_build_const_int32(gallivm, 0), "");
4345 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4346 lp_build_const_int32(gallivm, res_bw),
4347 lp_build_const_int32(gallivm, 0), "");
4348 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4349 lp_build_const_int32(gallivm, util_logbase2(res_bw)),
4350 lp_build_const_int32(gallivm, 0), "");
4351 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4352 lp_build_const_int32(gallivm, bw),
4353 lp_build_const_int32(gallivm, 0), "");
4354 if (dims >= 2) {
4355 LLVMValueRef tex_height =
4356 dynamic_state->height(gallivm, resources_type,
4357 resources_ptr, texture_unit, texture_unit_offset);
4358 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
4359 bld_int_vec4.elem_type, "");
4360 size = LLVMBuildInsertElement(gallivm->builder, size, tex_height,
4361 lp_build_const_int32(gallivm, 1), "");
4362 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4363 lp_build_const_int32(gallivm, res_bh),
4364 lp_build_const_int32(gallivm, 1), "");
4365 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4366 lp_build_const_int32(gallivm, util_logbase2(res_bh)),
4367 lp_build_const_int32(gallivm, 1), "");
4368 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4369 lp_build_const_int32(gallivm, bh),
4370 lp_build_const_int32(gallivm, 1), "");
4371 }
4372
4373 if (dims >= 3) {
4374 LLVMValueRef tex_depth =
4375 dynamic_state->depth(gallivm, resources_type,
4376 resources_ptr, texture_unit, texture_unit_offset);
4377 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
4378 bld_int_vec4.elem_type, "");
4379 size = LLVMBuildInsertElement(gallivm->builder, size, tex_depth,
4380 lp_build_const_int32(gallivm, 2), "");
4381 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4382 lp_build_const_int32(gallivm, 1),
4383 lp_build_const_int32(gallivm, 2), "");
4384 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4385 lp_build_const_int32(gallivm, 0),
4386 lp_build_const_int32(gallivm, 2), "");
4387 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4388 lp_build_const_int32(gallivm, 1),
4389 lp_build_const_int32(gallivm, 2), "");
4390 }
4391
4392 size = lp_build_minify(&bld_int_vec4, size, lod, true);
4393 size = lp_build_scale_view_dims(&bld_int_vec4, size, tex_blocksize,
4394 tex_blocksize_log2, view_blocksize);
4395
4396 if (has_array) {
4397 LLVMValueRef layers = dynamic_state->depth(gallivm, resources_type,
4398 resources_ptr, texture_unit,
4399 texture_unit_offset);
4400 layers = LLVMBuildZExt(gallivm->builder, layers,
4401 bld_int_vec4.elem_type, "");
4402 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4403 /*
4404 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
4405 * Could avoid this by passing in number of cubes instead of total
4406 * number of layers (might make things easier elsewhere too).
4407 */
4408 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
4409 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
4410 }
4411 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
4412 lp_build_const_int32(gallivm, dims), "");
4413 }
4414
4415 /*
4416 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
4417 * if level is out of bounds (note this can't cover unbound texture
4418 * here, which also requires returning zero).
4419 */
4420 if (params->explicit_lod && params->is_sviewinfo) {
4421 LLVMValueRef last_level, out, out1;
4422 struct lp_build_context leveli_bld;
4423
4424 /* everything is scalar for now */
4425 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
4426 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4427 texture_unit, texture_unit_offset,
4428 static_state, dynamic_state);
4429
4430 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
4431 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
4432 out = lp_build_or(&leveli_bld, out, out1);
4433 if (num_lods == 1) {
4434 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
4435 } else {
4436 /* TODO */
4437 assert(0);
4438 }
4439 size = lp_build_andnot(&bld_int_vec4, size, out);
4440 }
4441
4442 unsigned i;
4443 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
4444 params->sizes_out[i] =
4445 lp_build_extract_broadcast(gallivm, bld_int_vec4.type,
4446 params->int_type,
4447 size,
4448 lp_build_const_int32(gallivm, i));
4449 }
4450 if (params->is_sviewinfo) {
4451 for (; i < 4; i++) {
4452 params->sizes_out[i] = lp_build_const_vec(gallivm,
4453 params->int_type, 0.0);
4454 }
4455 }
4456
4457 /*
4458 * if there's no explicit_lod (buffers, rects) queries requiring nr of
4459 * mips would be illegal.
4460 */
4461 if (params->is_sviewinfo && params->explicit_lod) {
4462 struct lp_build_context bld_int_scalar;
4463 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
4464
4465 LLVMValueRef num_levels;
4466 if (static_state->level_zero_only) {
4467 num_levels = bld_int_scalar.one;
4468 } else {
4469 LLVMValueRef last_level;
4470 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4471 texture_unit, texture_unit_offset,
4472 static_state, dynamic_state);
4473 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
4474 num_levels = lp_build_add(&bld_int_scalar, num_levels,
4475 bld_int_scalar.one);
4476 }
4477 params->sizes_out[3] =
4478 lp_build_broadcast(gallivm,
4479 lp_build_vec_type(gallivm, params->int_type),
4480 num_levels);
4481 }
4482
4483 if (target == PIPE_BUFFER) {
4484 struct lp_build_context bld_int;
4485 lp_build_context_init(&bld_int, gallivm, params->int_type);
4486
4487 params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
4488 lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
4489 }
4490 }
4491
4492
4493 static void
lp_build_do_atomic_soa(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type type,LLVMValueRef exec_mask,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef out_of_bounds,unsigned img_op,LLVMAtomicRMWBinOp op,const LLVMValueRef rgba_in[4],const LLVMValueRef rgba2_in[4],LLVMValueRef atomic_result[4])4494 lp_build_do_atomic_soa(struct gallivm_state *gallivm,
4495 const struct util_format_description *format_desc,
4496 struct lp_type type,
4497 LLVMValueRef exec_mask,
4498 LLVMValueRef base_ptr,
4499 LLVMValueRef offset,
4500 LLVMValueRef out_of_bounds,
4501 unsigned img_op,
4502 LLVMAtomicRMWBinOp op,
4503 const LLVMValueRef rgba_in[4],
4504 const LLVMValueRef rgba2_in[4],
4505 LLVMValueRef atomic_result[4])
4506 {
4507 const enum pipe_format format = format_desc->format;
4508
4509 bool valid = format == PIPE_FORMAT_R32_UINT ||
4510 format == PIPE_FORMAT_R32_SINT ||
4511 format == PIPE_FORMAT_R32_FLOAT;
4512
4513 bool integer = format != PIPE_FORMAT_R32_FLOAT;
4514 if (img_op == LP_IMG_ATOMIC) {
4515 switch (op) {
4516 case LLVMAtomicRMWBinOpAdd:
4517 case LLVMAtomicRMWBinOpSub:
4518 case LLVMAtomicRMWBinOpAnd:
4519 case LLVMAtomicRMWBinOpNand:
4520 case LLVMAtomicRMWBinOpOr:
4521 case LLVMAtomicRMWBinOpXor:
4522 case LLVMAtomicRMWBinOpMax:
4523 case LLVMAtomicRMWBinOpMin:
4524 case LLVMAtomicRMWBinOpUMax:
4525 case LLVMAtomicRMWBinOpUMin:
4526 valid &= integer;
4527 break;
4528 case LLVMAtomicRMWBinOpFAdd:
4529 case LLVMAtomicRMWBinOpFSub:
4530 #if LLVM_VERSION_MAJOR >= 15
4531 case LLVMAtomicRMWBinOpFMax:
4532 case LLVMAtomicRMWBinOpFMin:
4533 #endif
4534 valid &= !integer;
4535 break;
4536 default:
4537 break;
4538 }
4539 } else {
4540 valid &= integer;
4541 }
4542
4543 if (!valid) {
4544 atomic_result[0] = lp_build_zero(gallivm, type);
4545 return;
4546 }
4547
4548 LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
4549 LLVMFloatTypeInContext(gallivm->context) :
4550 LLVMInt32TypeInContext(gallivm->context);
4551
4552 LLVMTypeRef atom_res_elem_type =
4553 LLVMVectorType(ref_type, type.length);
4554 LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
4555
4556 offset = LLVMBuildGEP2(gallivm->builder,
4557 LLVMInt8TypeInContext(gallivm->context),
4558 base_ptr, &offset, 1, "");
4559
4560 struct lp_build_loop_state loop_state;
4561 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
4562 struct lp_build_if_state ifthen;
4563 LLVMValueRef cond;
4564 LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
4565
4566 LLVMValueRef should_store_mask =
4567 LLVMBuildAnd(gallivm->builder, exec_mask,
4568 LLVMBuildNot(gallivm->builder, out_of_bounds, ""),
4569 "store_mask");
4570 assert(exec_mask);
4571
4572 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask,
4573 lp_build_const_int_vec(gallivm, type, 0), "");
4574 cond = LLVMBuildExtractElement(gallivm->builder, cond,
4575 loop_state.counter, "");
4576 lp_build_if(&ifthen, gallivm, cond);
4577
4578 LLVMValueRef data =
4579 LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
4580 LLVMValueRef cast_base_ptr =
4581 LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
4582 cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
4583 LLVMPointerType(ref_type, 0), "");
4584 data = LLVMBuildBitCast(gallivm->builder, data,
4585 ref_type, "");
4586
4587 if (img_op == LP_IMG_ATOMIC_CAS) {
4588 LLVMValueRef cas_src_ptr =
4589 LLVMBuildExtractElement(gallivm->builder, packed2,
4590 loop_state.counter, "");
4591 LLVMValueRef cas_src =
4592 LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
4593 ref_type, "");
4594 data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
4595 cas_src,
4596 LLVMAtomicOrderingSequentiallyConsistent,
4597 LLVMAtomicOrderingSequentiallyConsistent,
4598 false);
4599 data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
4600 } else {
4601 data = LLVMBuildAtomicRMW(gallivm->builder, op,
4602 cast_base_ptr, data,
4603 LLVMAtomicOrderingSequentiallyConsistent,
4604 false);
4605 }
4606
4607 LLVMValueRef temp_res =
4608 LLVMBuildLoad2(gallivm->builder, atom_res_elem_type, atom_res, "");
4609 temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data,
4610 loop_state.counter, "");
4611 LLVMBuildStore(gallivm->builder, temp_res, atom_res);
4612
4613 lp_build_endif(&ifthen);
4614 lp_build_loop_end_cond(&loop_state,
4615 lp_build_const_int32(gallivm, type.length),
4616 NULL, LLVMIntUGE);
4617 atomic_result[0] = LLVMBuildLoad2(gallivm->builder, atom_res_elem_type,
4618 atom_res, "");
4619 }
4620
4621
4622 static void
lp_build_img_op_no_format(struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4623 lp_build_img_op_no_format(struct gallivm_state *gallivm,
4624 const struct lp_img_params *params,
4625 LLVMValueRef outdata[4])
4626 {
4627 /*
4628 * If there's nothing bound, format is NONE, and we must return
4629 * all zero as mandated by d3d10 in this case.
4630 */
4631 if (params->img_op != LP_IMG_STORE) {
4632 LLVMValueRef zero = lp_build_zero(gallivm, params->type);
4633 for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1);
4634 chan++) {
4635 outdata[chan] = zero;
4636 }
4637 }
4638 }
4639
4640
4641 void
lp_build_img_op_soa(const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef * outdata)4642 lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
4643 struct lp_sampler_dynamic_state *dynamic_state,
4644 struct gallivm_state *gallivm,
4645 const struct lp_img_params *params,
4646 LLVMValueRef *outdata)
4647 {
4648 const enum pipe_texture_target target = params->target;
4649 const unsigned dims = texture_dims(target);
4650 const struct util_format_description *format_desc =
4651 util_format_description(static_texture_state->format);
4652 const struct util_format_description *res_format_desc =
4653 util_format_description(static_texture_state->res_format);
4654 LLVMValueRef x = params->coords[0], y = params->coords[1],
4655 z = params->coords[2];
4656 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
4657
4658 /** regular scalar int type */
4659 struct lp_type int_coord_type = lp_uint_type(params->type);
4660 struct lp_build_context int_coord_bld;
4661 lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
4662
4663 if (static_texture_state->format == PIPE_FORMAT_NONE) {
4664 lp_build_img_op_no_format(gallivm, params, outdata);
4665 return;
4666
4667 }
4668
4669 LLVMValueRef row_stride = dynamic_state->row_stride(gallivm,
4670 params->resources_type,
4671 params->resources_ptr,
4672 params->image_index, NULL, NULL);
4673 LLVMValueRef img_stride = dynamic_state->img_stride(gallivm,
4674 params->resources_type,
4675 params->resources_ptr,
4676 params->image_index, NULL, NULL);
4677 LLVMValueRef base_ptr = dynamic_state->base_ptr(gallivm,
4678 params->resources_type,
4679 params->resources_ptr,
4680 params->image_index, NULL);
4681 LLVMValueRef width = dynamic_state->width(gallivm,
4682 params->resources_type,
4683 params->resources_ptr,
4684 params->image_index, NULL);
4685 LLVMValueRef height = dynamic_state->height(gallivm,
4686 params->resources_type,
4687 params->resources_ptr,
4688 params->image_index, NULL);
4689 height = LLVMBuildZExt(gallivm->builder, height,
4690 int_coord_bld.elem_type, "");
4691 LLVMValueRef depth = dynamic_state->depth(gallivm,
4692 params->resources_type,
4693 params->resources_ptr,
4694 params->image_index, NULL);
4695 depth = LLVMBuildZExt(gallivm->builder, depth,
4696 int_coord_bld.elem_type, "");
4697 bool layer_coord = has_layer_coord(target);
4698
4699 width = lp_build_scale_view_dim(gallivm, width, res_format_desc->block.width,
4700 format_desc->block.width);
4701 width = lp_build_broadcast_scalar(&int_coord_bld, width);
4702 if (dims >= 2) {
4703 height = lp_build_scale_view_dim(gallivm, height, res_format_desc->block.height,
4704 format_desc->block.height);
4705 height = lp_build_broadcast_scalar(&int_coord_bld, height);
4706 row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
4707 }
4708 if (dims >= 3 || layer_coord) {
4709 depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
4710 img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
4711 }
4712
4713 LLVMValueRef out_of_bounds = int_coord_bld.zero;
4714 LLVMValueRef out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
4715 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4716
4717 if (dims >= 2) {
4718 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
4719 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4720 }
4721 if (dims >= 3 || layer_coord) {
4722 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
4723 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
4724 }
4725
4726 LLVMValueRef offset, i, j;
4727 if (static_texture_state->tiled) {
4728 lp_build_tiled_sample_offset(&int_coord_bld,
4729 format_desc->format,
4730 static_texture_state,
4731 x, y, z, width, height, img_stride_vec,
4732 &offset, &i, &j);
4733 } else {
4734 lp_build_sample_offset(&int_coord_bld,
4735 format_desc,
4736 x, y, z, row_stride_vec, img_stride_vec,
4737 &offset, &i, &j);
4738 }
4739
4740 if (params->ms_index && static_texture_state->level_zero_only) {
4741 LLVMValueRef num_samples = dynamic_state->last_level(gallivm,
4742 params->resources_type,
4743 params->resources_ptr,
4744 params->image_index, NULL);
4745 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4746 int_coord_bld.elem_type, "");
4747 LLVMValueRef sample_stride = dynamic_state->sample_stride(gallivm,
4748 params->resources_type,
4749 params->resources_ptr,
4750 params->image_index, NULL);
4751 lp_build_sample_ms_offset(&int_coord_bld,
4752 params->ms_index, num_samples,
4753 sample_stride, &offset,
4754 &out_of_bounds);
4755 }
4756 if (params->img_op == LP_IMG_LOAD || params->img_op == LP_IMG_LOAD_SPARSE) {
4757 struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
4758
4759 if (params->img_op == LP_IMG_LOAD_SPARSE && static_texture_state->tiled) {
4760 LLVMValueRef base_offset =
4761 dynamic_state->base_offset(gallivm, params->resources_type,
4762 params->resources_ptr, params->image_index, NULL);
4763 base_offset = lp_build_broadcast_scalar(&int_coord_bld, base_offset);
4764
4765 LLVMValueRef full_offset = LLVMBuildAdd(gallivm->builder, base_offset, offset, "");
4766
4767 lp_build_gather_resident(&int_coord_bld, dynamic_state,
4768 params->resources_type, params->resources_ptr,
4769 full_offset, &outdata[4]);
4770 }
4771
4772 offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
4773 struct lp_build_context texel_bld;
4774 lp_build_context_init(&texel_bld, gallivm, texel_type);
4775 lp_build_fetch_rgba_soa(gallivm,
4776 format_desc,
4777 texel_type, true,
4778 base_ptr, offset,
4779 i, j,
4780 NULL,
4781 outdata);
4782
4783 for (unsigned chan = 0; chan < 3; chan++) {
4784 outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
4785 texel_bld.zero, outdata[chan]);
4786 }
4787 if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
4788 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
4789 texel_bld.one, outdata[3]);
4790 } else {
4791 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
4792 texel_bld.zero, outdata[3]);
4793 }
4794 } else if (params->img_op == LP_IMG_STORE) {
4795 lp_build_store_rgba_soa(gallivm, format_desc, params->type,
4796 params->exec_mask, base_ptr, offset,
4797 out_of_bounds, params->indata);
4798 } else {
4799 lp_build_do_atomic_soa(gallivm, format_desc, params->type,
4800 params->exec_mask, base_ptr, offset,
4801 out_of_bounds, params->img_op, params->op,
4802 params->indata, params->indata2, outdata);
4803 }
4804 }
4805
4806
4807 /*
4808 * These functions are for indirect texture access suppoort.
4809 *
4810 * Indirect textures are implemented using a switch statement, that
4811 * takes the texture index and jumps to the sampler functions for
4812 * that texture unit.
4813 */
4814
4815 /*
4816 * Initialise an indexed sampler switch block.
4817 *
4818 * This sets up the switch_info state and adds the LLVM flow control pieces.
4819 */
4820 void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_sampler_params * params,LLVMValueRef idx,unsigned base,unsigned range)4821 lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
4822 struct gallivm_state *gallivm,
4823 const struct lp_sampler_params *params,
4824 LLVMValueRef idx,
4825 unsigned base, unsigned range)
4826 {
4827 switch_info->gallivm = gallivm;
4828 switch_info->params = *params;
4829 switch_info->base = base;
4830 switch_info->range = range;
4831
4832 /* for generating the switch functions we don't want the texture index
4833 * offset
4834 */
4835 switch_info->params.texture_index_offset = 0;
4836
4837 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
4838 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
4839
4840 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
4841 switch_info->merge_ref,
4842 range - base);
4843
4844 LLVMTypeRef val_type[4];
4845 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4846 lp_build_vec_type(gallivm, params->type);
4847
4848 LLVMTypeRef ret_type =
4849 LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4850
4851 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
4852
4853 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4854
4855 switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
4856 LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
4857 }
4858
4859
4860 /*
4861 * Add an individual entry to the indirect texture switch.
4862 *
4863 * This builds the sample function and links a case for it into the switch
4864 * statement.
4865 */
4866 void
lp_build_sample_array_case_soa(struct lp_build_sample_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_texture_state)4867 lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
4868 int idx,
4869 const struct lp_static_texture_state *static_texture_state,
4870 const struct lp_static_sampler_state *static_sampler_state,
4871 struct lp_sampler_dynamic_state *dynamic_texture_state)
4872 {
4873 struct gallivm_state *gallivm = switch_info->gallivm;
4874 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
4875
4876 LLVMAddCase(switch_info->switch_ref,
4877 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0),
4878 this_block);
4879 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
4880
4881 LLVMValueRef tex_ret;
4882 lp_build_sample_soa_func(gallivm, static_texture_state,
4883 static_sampler_state, dynamic_texture_state,
4884 &switch_info->params, idx, idx, &tex_ret);
4885
4886 LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
4887 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
4888 }
4889
4890
4891 /*
4892 * Finish a switch statement.
4893 *
4894 * This handles extract the results from the switch.
4895 */
4896 void
lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch * switch_info)4897 lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
4898 {
4899 struct gallivm_state *gallivm = switch_info->gallivm;
4900
4901 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4902 for (unsigned i = 0; i < 4; i++) {
4903 switch_info->params.texel[i] =
4904 LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
4905 }
4906 }
4907
4908
4909 void
lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef idx,unsigned base,unsigned range)4910 lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info,
4911 struct gallivm_state *gallivm,
4912 const struct lp_img_params *params,
4913 LLVMValueRef idx,
4914 unsigned base, unsigned range)
4915 {
4916 switch_info->gallivm = gallivm;
4917 switch_info->params = *params;
4918 switch_info->base = base;
4919 switch_info->range = range;
4920
4921 /* for generating the switch functions we don't want the texture index
4922 * offset
4923 */
4924 switch_info->params.image_index_offset = 0;
4925
4926 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
4927 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge");
4928
4929 switch_info->switch_ref =
4930 LLVMBuildSwitch(gallivm->builder, idx,
4931 switch_info->merge_ref, range - base);
4932
4933 if (params->img_op != LP_IMG_STORE) {
4934 LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type);
4935 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
4936
4937 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4938
4939 for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4940 switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, "");
4941 LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1);
4942 }
4943 }
4944 }
4945
4946
4947 void
lp_build_image_op_array_case(struct lp_build_img_op_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state)4948 lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
4949 int idx,
4950 const struct lp_static_texture_state *static_texture_state,
4951 struct lp_sampler_dynamic_state *dynamic_state)
4952 {
4953 struct gallivm_state *gallivm = switch_info->gallivm;
4954 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img");
4955 LLVMValueRef tex_ret[4];
4956
4957 LLVMAddCase(switch_info->switch_ref,
4958 lp_build_const_int32(gallivm, idx), this_block);
4959 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
4960
4961 switch_info->params.image_index = idx;
4962
4963 lp_build_img_op_soa(static_texture_state, dynamic_state,
4964 switch_info->gallivm, &switch_info->params, tex_ret);
4965
4966 if (switch_info->params.img_op != LP_IMG_STORE) {
4967 for (unsigned i = 0;
4968 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4969 tex_ret[i] =
4970 LLVMBuildBitCast(gallivm->builder, tex_ret[i],
4971 lp_build_vec_type(gallivm,
4972 switch_info->params.type), "");
4973 }
4974
4975 this_block = LLVMGetInsertBlock(gallivm->builder);
4976 for (unsigned i = 0;
4977 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4978 LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1);
4979 }
4980 }
4981 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
4982 }
4983
4984
4985 void
lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch * switch_info)4986 lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info)
4987 {
4988 struct gallivm_state *gallivm = switch_info->gallivm;
4989
4990 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
4991
4992 if (switch_info->params.img_op != LP_IMG_STORE) {
4993 for (unsigned i = 0;
4994 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
4995 switch_info->params.outdata[i] = switch_info->phi[i];
4996 }
4997 }
4998 }
4999