1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42
43 static float bt601_csc_coeffs[9] = {
44 1.16438356f, 1.16438356f, 1.16438356f,
45 0.0f, -0.39176229f, 2.01723214f,
46 1.59602678f, -0.81296764f, 0.0f,
47 };
48 static float bt709_csc_coeffs[9] = {
49 1.16438356f, 1.16438356f, 1.16438356f,
50 0.0f , -0.21324861f, 2.11240179f,
51 1.79274107f, -0.53290933f, 0.0f,
52 };
53 static float bt2020_csc_coeffs[9] = {
54 1.16438356f, 1.16438356f, 1.16438356f,
55 0.0f , -0.18732610f, 2.14177232f,
56 1.67867411f, -0.65042432f, 0.0f,
57 };
58
59 static float bt601_csc_offsets[3] = {
60 -0.874202218f, 0.531667823f, -1.085630789f
61 };
62 static float bt709_csc_offsets[3] = {
63 -0.972945075f, 0.301482665f, -1.133402218f
64 };
65 static float bt2020_csc_offsets[3] = {
66 -0.915687932f, 0.347458499f, -1.148145075f
67 };
68
69 static bool
project_src(nir_builder * b,nir_tex_instr * tex)70 project_src(nir_builder *b, nir_tex_instr *tex)
71 {
72 /* Find the projector in the srcs list, if present. */
73 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
74 if (proj_index < 0)
75 return false;
76
77 b->cursor = nir_before_instr(&tex->instr);
78
79 nir_ssa_def *inv_proj =
80 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
81
82 /* Walk through the sources projecting the arguments. */
83 for (unsigned i = 0; i < tex->num_srcs; i++) {
84 switch (tex->src[i].src_type) {
85 case nir_tex_src_coord:
86 case nir_tex_src_comparator:
87 break;
88 default:
89 continue;
90 }
91 nir_ssa_def *unprojected =
92 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
93 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
94
95 /* Array indices don't get projected, so make an new vector with the
96 * coordinate's array index untouched.
97 */
98 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
99 switch (tex->coord_components) {
100 case 4:
101 projected = nir_vec4(b,
102 nir_channel(b, projected, 0),
103 nir_channel(b, projected, 1),
104 nir_channel(b, projected, 2),
105 nir_channel(b, unprojected, 3));
106 break;
107 case 3:
108 projected = nir_vec3(b,
109 nir_channel(b, projected, 0),
110 nir_channel(b, projected, 1),
111 nir_channel(b, unprojected, 2));
112 break;
113 case 2:
114 projected = nir_vec2(b,
115 nir_channel(b, projected, 0),
116 nir_channel(b, unprojected, 1));
117 break;
118 default:
119 unreachable("bad texture coord count for array");
120 break;
121 }
122 }
123
124 nir_instr_rewrite_src(&tex->instr,
125 &tex->src[i].src,
126 nir_src_for_ssa(projected));
127 }
128
129 nir_tex_instr_remove_src(tex, proj_index);
130 return true;
131 }
132
133 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)134 lower_offset(nir_builder *b, nir_tex_instr *tex)
135 {
136 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
137 if (offset_index < 0)
138 return false;
139
140 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
141 assert(coord_index >= 0);
142
143 assert(tex->src[offset_index].src.is_ssa);
144 assert(tex->src[coord_index].src.is_ssa);
145 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
146 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
147
148 b->cursor = nir_before_instr(&tex->instr);
149
150 nir_ssa_def *offset_coord;
151 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
152 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
153 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
154 } else {
155 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
156 nir_ssa_def *scale = nir_frcp(b, txs);
157
158 offset_coord = nir_fadd(b, coord,
159 nir_fmul(b,
160 nir_i2f32(b, offset),
161 scale));
162 }
163 } else {
164 offset_coord = nir_iadd(b, coord, offset);
165 }
166
167 if (tex->is_array) {
168 /* The offset is not applied to the array index */
169 if (tex->coord_components == 2) {
170 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
171 nir_channel(b, coord, 1));
172 } else if (tex->coord_components == 3) {
173 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
174 nir_channel(b, offset_coord, 1),
175 nir_channel(b, coord, 2));
176 } else {
177 unreachable("Invalid number of components");
178 }
179 }
180
181 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
182 nir_src_for_ssa(offset_coord));
183
184 nir_tex_instr_remove_src(tex, offset_index);
185
186 return true;
187 }
188
189 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)190 lower_rect(nir_builder *b, nir_tex_instr *tex)
191 {
192 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
193 * right dimensionality.
194 */
195 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
196
197 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
198 nir_ssa_def *scale = nir_frcp(b, txs);
199
200 /* Walk through the sources normalizing the requested arguments. */
201 for (unsigned i = 0; i < tex->num_srcs; i++) {
202 if (tex->src[i].src_type != nir_tex_src_coord)
203 continue;
204
205 nir_ssa_def *coords =
206 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
207 nir_instr_rewrite_src(&tex->instr,
208 &tex->src[i].src,
209 nir_src_for_ssa(nir_fmul(b, coords, scale)));
210 }
211 }
212
213 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)214 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
215 {
216 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
217 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
218 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
219 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
220
221 b->cursor = nir_before_instr(&tex->instr);
222
223 nir_ssa_def *lod = nir_get_texture_lod(b, tex);
224
225 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
226 if (bias_idx >= 0) {
227 /* If we have a bias, add it in */
228 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
229 nir_tex_instr_remove_src(tex, bias_idx);
230 }
231
232 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
233 if (min_lod_idx >= 0) {
234 /* If we have a minimum LOD, clamp LOD accordingly */
235 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
236 nir_tex_instr_remove_src(tex, min_lod_idx);
237 }
238
239 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
240 tex->op = nir_texop_txl;
241 }
242
243 static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)244 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
245 const nir_lower_tex_options *options)
246 {
247 assert(tex->dest.is_ssa);
248 assert(nir_tex_instr_dest_size(tex) == 4);
249 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
250 assert(tex->op == nir_texop_tex);
251 assert(tex->coord_components == 2);
252
253 nir_tex_instr *plane_tex =
254 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
255 for (unsigned i = 0; i < tex->num_srcs; i++) {
256 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src, plane_tex);
257 plane_tex->src[i].src_type = tex->src[i].src_type;
258 }
259 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
260 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
261 plane_tex->op = nir_texop_tex;
262 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
263 plane_tex->dest_type = nir_type_float;
264 plane_tex->coord_components = 2;
265
266 plane_tex->texture_index = tex->texture_index;
267 plane_tex->sampler_index = tex->sampler_index;
268
269 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
270 nir_dest_bit_size(tex->dest), NULL);
271
272 nir_builder_instr_insert(b, &plane_tex->instr);
273
274 /* If scaling_factor is set, return a scaled value. */
275 if (options->scale_factors[tex->texture_index])
276 return nir_fmul_imm(b, &plane_tex->dest.ssa,
277 options->scale_factors[tex->texture_index]);
278
279 return &plane_tex->dest.ssa;
280 }
281
282 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options)283 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
284 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
285 nir_ssa_def *a,
286 const nir_lower_tex_options *options)
287 {
288
289 float *offset_vals;
290 float *m_vals;
291 assert((options->bt709_external & options->bt2020_external) == 0);
292 if (options->bt709_external & (1 << tex->texture_index)) {
293 m_vals = bt709_csc_coeffs;
294 offset_vals = bt709_csc_offsets;
295 } else if (options->bt2020_external & (1 << tex->texture_index)) {
296 m_vals = bt2020_csc_coeffs;
297 offset_vals = bt2020_csc_offsets;
298 } else {
299 m_vals = bt601_csc_coeffs;
300 offset_vals = bt601_csc_offsets;
301 }
302
303 nir_const_value m[3][4] = {
304 { { .f32 = m_vals[0] }, { .f32 = m_vals[1] }, { .f32 = m_vals[2] }, { .f32 = 0.0f } },
305 { { .f32 = m_vals[3] }, { .f32 = m_vals[4] }, { .f32 = m_vals[5] }, { .f32 = 0.0f } },
306 { { .f32 = m_vals[6] }, { .f32 = m_vals[7] }, { .f32 = m_vals[8] }, { .f32 = 0.0f } },
307 };
308 unsigned bit_size = nir_dest_bit_size(tex->dest);
309
310 nir_ssa_def *offset =
311 nir_vec4(b,
312 nir_imm_float(b, offset_vals[0]),
313 nir_imm_float(b, offset_vals[1]),
314 nir_imm_float(b, offset_vals[2]),
315 a);
316
317 offset = nir_f2fN(b, offset, bit_size);
318
319 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[0]), bit_size);
320 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[1]), bit_size);
321 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m[2]), bit_size);
322
323 nir_ssa_def *result =
324 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
325
326 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
327 }
328
329 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)330 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
331 const nir_lower_tex_options *options)
332 {
333 b->cursor = nir_after_instr(&tex->instr);
334
335 nir_ssa_def *y = sample_plane(b, tex, 0, options);
336 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
337
338 convert_yuv_to_rgb(b, tex,
339 nir_channel(b, y, 0),
340 nir_channel(b, uv, 0),
341 nir_channel(b, uv, 1),
342 nir_imm_float(b, 1.0f),
343 options);
344 }
345
346 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)347 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
348 const nir_lower_tex_options *options)
349 {
350 b->cursor = nir_after_instr(&tex->instr);
351
352 nir_ssa_def *y = sample_plane(b, tex, 0, options);
353 nir_ssa_def *u = sample_plane(b, tex, 1, options);
354 nir_ssa_def *v = sample_plane(b, tex, 2, options);
355
356 convert_yuv_to_rgb(b, tex,
357 nir_channel(b, y, 0),
358 nir_channel(b, u, 0),
359 nir_channel(b, v, 0),
360 nir_imm_float(b, 1.0f),
361 options);
362 }
363
364 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)365 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
366 const nir_lower_tex_options *options)
367 {
368 b->cursor = nir_after_instr(&tex->instr);
369
370 nir_ssa_def *y = sample_plane(b, tex, 0, options);
371 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
372
373 convert_yuv_to_rgb(b, tex,
374 nir_channel(b, y, 0),
375 nir_channel(b, xuxv, 1),
376 nir_channel(b, xuxv, 3),
377 nir_imm_float(b, 1.0f),
378 options);
379 }
380
381 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)382 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
383 const nir_lower_tex_options *options)
384 {
385 b->cursor = nir_after_instr(&tex->instr);
386
387 nir_ssa_def *y = sample_plane(b, tex, 0, options);
388 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
389
390 convert_yuv_to_rgb(b, tex,
391 nir_channel(b, y, 1),
392 nir_channel(b, uxvx, 0),
393 nir_channel(b, uxvx, 2),
394 nir_imm_float(b, 1.0f),
395 options);
396 }
397
398 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)399 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
400 const nir_lower_tex_options *options)
401 {
402 b->cursor = nir_after_instr(&tex->instr);
403
404 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
405
406 convert_yuv_to_rgb(b, tex,
407 nir_channel(b, ayuv, 2),
408 nir_channel(b, ayuv, 1),
409 nir_channel(b, ayuv, 0),
410 nir_channel(b, ayuv, 3),
411 options);
412 }
413
414 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)415 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
416 const nir_lower_tex_options *options)
417 {
418 b->cursor = nir_after_instr(&tex->instr);
419
420 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
421
422 convert_yuv_to_rgb(b, tex,
423 nir_channel(b, xyuv, 2),
424 nir_channel(b, xyuv, 1),
425 nir_channel(b, xyuv, 0),
426 nir_imm_float(b, 1.0f),
427 options);
428 }
429
430 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)431 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
432 const nir_lower_tex_options *options)
433 {
434 b->cursor = nir_after_instr(&tex->instr);
435
436 nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
437
438 convert_yuv_to_rgb(b, tex,
439 nir_channel(b, yuv, 0),
440 nir_channel(b, yuv, 1),
441 nir_channel(b, yuv, 2),
442 nir_imm_float(b, 1.0f),
443 options);
444 }
445
446 /*
447 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
448 * computed from the gradients.
449 */
450 static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)451 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
452 {
453 assert(tex->op == nir_texop_txd);
454
455 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
456 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
457
458 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
459 if (min_lod_idx >= 0) {
460 /* If we have a minimum LOD, clamp LOD accordingly */
461 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
462 nir_tex_instr_remove_src(tex, min_lod_idx);
463 }
464
465 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
466 tex->op = nir_texop_txl;
467 }
468
469 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)470 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
471 {
472 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
473 assert(tex->op == nir_texop_txd);
474 assert(tex->dest.is_ssa);
475
476 /* Use textureSize() to get the width and height of LOD 0 */
477 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
478
479 /* Cubemap texture lookups first generate a texture coordinate normalized
480 * to [-1, 1] on the appropiate face. The appropiate face is determined
481 * by which component has largest magnitude and its sign. The texture
482 * coordinate is the quotient of the remaining texture coordinates against
483 * that absolute value of the component of largest magnitude. This
484 * division requires that the computing of the derivative of the texel
485 * coordinate must use the quotient rule. The high level GLSL code is as
486 * follows:
487 *
488 * Step 1: selection
489 *
490 * vec3 abs_p, Q, dQdx, dQdy;
491 * abs_p = abs(ir->coordinate);
492 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
493 * Q = ir->coordinate.yzx;
494 * dQdx = ir->lod_info.grad.dPdx.yzx;
495 * dQdy = ir->lod_info.grad.dPdy.yzx;
496 * }
497 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
498 * Q = ir->coordinate.xzy;
499 * dQdx = ir->lod_info.grad.dPdx.xzy;
500 * dQdy = ir->lod_info.grad.dPdy.xzy;
501 * }
502 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
503 * Q = ir->coordinate;
504 * dQdx = ir->lod_info.grad.dPdx;
505 * dQdy = ir->lod_info.grad.dPdy;
506 * }
507 *
508 * Step 2: use quotient rule to compute derivative. The normalized to
509 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
510 * only concerned with the magnitudes of the derivatives whose values are
511 * not affected by the sign. We drop the sign from the computation.
512 *
513 * vec2 dx, dy;
514 * float recip;
515 *
516 * recip = 1.0 / Q.z;
517 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
518 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
519 *
520 * Step 3: compute LOD. At this point we have the derivatives of the
521 * texture coordinates normalized to [-1,1]. We take the LOD to be
522 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
523 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
524 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
525 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
526 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
527 * where L is the dimension of the cubemap. The code is:
528 *
529 * float M, result;
530 * M = max(dot(dx, dx), dot(dy, dy));
531 * L = textureSize(sampler, 0).x;
532 * result = -1.0 + 0.5 * log2(L * L * M);
533 */
534
535 /* coordinate */
536 nir_ssa_def *p =
537 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
538
539 /* unmodified dPdx, dPdy values */
540 nir_ssa_def *dPdx =
541 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
542 nir_ssa_def *dPdy =
543 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
544
545 nir_ssa_def *abs_p = nir_fabs(b, p);
546 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
547 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
548 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
549
550 /* 1. compute selector */
551 nir_ssa_def *Q, *dQdx, *dQdy;
552
553 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
554 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
555
556 unsigned yzx[3] = { 1, 2, 0 };
557 unsigned xzy[3] = { 0, 2, 1 };
558
559 Q = nir_bcsel(b, cond_z,
560 p,
561 nir_bcsel(b, cond_y,
562 nir_swizzle(b, p, xzy, 3),
563 nir_swizzle(b, p, yzx, 3)));
564
565 dQdx = nir_bcsel(b, cond_z,
566 dPdx,
567 nir_bcsel(b, cond_y,
568 nir_swizzle(b, dPdx, xzy, 3),
569 nir_swizzle(b, dPdx, yzx, 3)));
570
571 dQdy = nir_bcsel(b, cond_z,
572 dPdy,
573 nir_bcsel(b, cond_y,
574 nir_swizzle(b, dPdy, xzy, 3),
575 nir_swizzle(b, dPdy, yzx, 3)));
576
577 /* 2. quotient rule */
578
579 /* tmp = Q.xy * recip;
580 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
581 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
582 */
583 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
584
585 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
586 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
587
588 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
589 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
590 nir_ssa_def *dx =
591 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
592
593 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
594 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
595 nir_ssa_def *dy =
596 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
597
598 /* M = max(dot(dx, dx), dot(dy, dy)); */
599 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
600
601 /* size has textureSize() of LOD 0 */
602 nir_ssa_def *L = nir_channel(b, size, 0);
603
604 /* lod = -1.0 + 0.5 * log2(L * L * M); */
605 nir_ssa_def *lod =
606 nir_fadd(b,
607 nir_imm_float(b, -1.0f),
608 nir_fmul(b,
609 nir_imm_float(b, 0.5f),
610 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
611
612 /* 3. Replace the gradient instruction with an equivalent lod instruction */
613 replace_gradient_with_lod(b, lod, tex);
614 }
615
616 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)617 lower_gradient(nir_builder *b, nir_tex_instr *tex)
618 {
619 /* Cubes are more complicated and have their own function */
620 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
621 lower_gradient_cube_map(b, tex);
622 return;
623 }
624
625 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
626 assert(tex->op == nir_texop_txd);
627 assert(tex->dest.is_ssa);
628
629 /* Use textureSize() to get the width and height of LOD 0 */
630 unsigned component_mask;
631 switch (tex->sampler_dim) {
632 case GLSL_SAMPLER_DIM_3D:
633 component_mask = 7;
634 break;
635 case GLSL_SAMPLER_DIM_1D:
636 component_mask = 1;
637 break;
638 default:
639 component_mask = 3;
640 break;
641 }
642
643 nir_ssa_def *size =
644 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
645 component_mask);
646
647 /* Scale the gradients by width and height. Effectively, the incoming
648 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
649 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
650 */
651 nir_ssa_def *ddx =
652 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
653 nir_ssa_def *ddy =
654 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
655
656 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
657 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
658
659 nir_ssa_def *rho;
660 if (dPdx->num_components == 1) {
661 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
662 } else {
663 rho = nir_fmax(b,
664 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
665 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
666 }
667
668 /* lod = log2(rho). We're ignoring GL state biases for now. */
669 nir_ssa_def *lod = nir_flog2(b, rho);
670
671 /* Replace the gradient instruction with an equivalent lod instruction */
672 replace_gradient_with_lod(b, lod, tex);
673 }
674
675 static void
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)676 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
677 {
678 b->cursor = nir_before_instr(&tex->instr);
679
680 /* Walk through the sources saturating the requested arguments. */
681 for (unsigned i = 0; i < tex->num_srcs; i++) {
682 if (tex->src[i].src_type != nir_tex_src_coord)
683 continue;
684
685 nir_ssa_def *src =
686 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
687
688 /* split src into components: */
689 nir_ssa_def *comp[4];
690
691 assume(tex->coord_components >= 1);
692
693 for (unsigned j = 0; j < tex->coord_components; j++)
694 comp[j] = nir_channel(b, src, j);
695
696 /* clamp requested components, array index does not get clamped: */
697 unsigned ncomp = tex->coord_components;
698 if (tex->is_array)
699 ncomp--;
700
701 for (unsigned j = 0; j < ncomp; j++) {
702 if ((1 << j) & sat_mask) {
703 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
704 /* non-normalized texture coords, so clamp to texture
705 * size rather than [0.0, 1.0]
706 */
707 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
708 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
709 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
710 } else {
711 comp[j] = nir_fsat(b, comp[j]);
712 }
713 }
714 }
715
716 /* and move the result back into a single vecN: */
717 src = nir_vec(b, comp, tex->coord_components);
718
719 nir_instr_rewrite_src(&tex->instr,
720 &tex->src[i].src,
721 nir_src_for_ssa(src));
722 }
723 }
724
725 static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)726 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
727 {
728 nir_const_value v[4];
729
730 memset(&v, 0, sizeof(v));
731
732 if (swizzle_val == 4) {
733 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
734 } else {
735 assert(swizzle_val == 5);
736 if (type == nir_type_float)
737 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
738 else
739 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
740 }
741
742 return nir_build_imm(b, 4, 32, v);
743 }
744
745 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)746 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
747 {
748 assert(tex->dest.is_ssa);
749
750 b->cursor = nir_after_instr(&tex->instr);
751
752 assert(nir_tex_instr_dest_size(tex) == 4);
753 unsigned swiz[4] = { 2, 3, 1, 0 };
754 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
755
756 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
757 swizzled->parent_instr);
758 }
759
760 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])761 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
762 {
763 assert(tex->dest.is_ssa);
764
765 b->cursor = nir_after_instr(&tex->instr);
766
767 nir_ssa_def *swizzled;
768 if (tex->op == nir_texop_tg4) {
769 if (swizzle[tex->component] < 4) {
770 /* This one's easy */
771 tex->component = swizzle[tex->component];
772 return;
773 } else {
774 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
775 }
776 } else {
777 assert(nir_tex_instr_dest_size(tex) == 4);
778 if (swizzle[0] < 4 && swizzle[1] < 4 &&
779 swizzle[2] < 4 && swizzle[3] < 4) {
780 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
781 /* We have no 0s or 1s, just emit a swizzling MOV */
782 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
783 } else {
784 nir_ssa_def *srcs[4];
785 for (unsigned i = 0; i < 4; i++) {
786 if (swizzle[i] < 4) {
787 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
788 } else {
789 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
790 }
791 }
792 swizzled = nir_vec(b, srcs, 4);
793 }
794 }
795
796 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
797 swizzled->parent_instr);
798 }
799
800 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)801 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
802 {
803 assert(tex->dest.is_ssa);
804 assert(nir_tex_instr_dest_size(tex) == 4);
805 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
806
807 b->cursor = nir_after_instr(&tex->instr);
808
809 nir_ssa_def *rgb =
810 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
811
812 /* alpha is untouched: */
813 nir_ssa_def *result = nir_vec4(b,
814 nir_channel(b, rgb, 0),
815 nir_channel(b, rgb, 1),
816 nir_channel(b, rgb, 2),
817 nir_channel(b, &tex->dest.ssa, 3));
818
819 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
820 result->parent_instr);
821 }
822
823 /**
824 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
825 * i16, or u16, or a single unorm4x8 value.
826 *
827 * Note that we don't change the destination num_components, because
828 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
829 * to not store the other channels, given that nothing at the NIR level will
830 * read them.
831 */
832 static void
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)833 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
834 const nir_lower_tex_options *options)
835 {
836 nir_ssa_def *color = &tex->dest.ssa;
837
838 b->cursor = nir_after_instr(&tex->instr);
839
840 switch (options->lower_tex_packing[tex->sampler_index]) {
841 case nir_lower_tex_packing_none:
842 return;
843
844 case nir_lower_tex_packing_16: {
845 static const unsigned bits[4] = {16, 16, 16, 16};
846
847 switch (nir_alu_type_get_base_type(tex->dest_type)) {
848 case nir_type_float:
849 switch (nir_tex_instr_dest_size(tex)) {
850 case 1:
851 assert(tex->is_shadow && tex->is_new_style_shadow);
852 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
853 break;
854 case 2: {
855 nir_ssa_def *rg = nir_channel(b, color, 0);
856 color = nir_vec2(b,
857 nir_unpack_half_2x16_split_x(b, rg),
858 nir_unpack_half_2x16_split_y(b, rg));
859 break;
860 }
861 case 4: {
862 nir_ssa_def *rg = nir_channel(b, color, 0);
863 nir_ssa_def *ba = nir_channel(b, color, 1);
864 color = nir_vec4(b,
865 nir_unpack_half_2x16_split_x(b, rg),
866 nir_unpack_half_2x16_split_y(b, rg),
867 nir_unpack_half_2x16_split_x(b, ba),
868 nir_unpack_half_2x16_split_y(b, ba));
869 break;
870 }
871 default:
872 unreachable("wrong dest_size");
873 }
874 break;
875
876 case nir_type_int:
877 color = nir_format_unpack_sint(b, color, bits, 4);
878 break;
879
880 case nir_type_uint:
881 color = nir_format_unpack_uint(b, color, bits, 4);
882 break;
883
884 default:
885 unreachable("unknown base type");
886 }
887 break;
888 }
889
890 case nir_lower_tex_packing_8:
891 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
892 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
893 break;
894 }
895
896 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(color),
897 color->parent_instr);
898 }
899
900 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)901 sampler_index_lt(nir_tex_instr *tex, unsigned max)
902 {
903 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
904
905 unsigned sampler_index = tex->sampler_index;
906
907 int sampler_offset_idx =
908 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
909 if (sampler_offset_idx >= 0) {
910 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
911 return false;
912
913 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
914 }
915
916 return sampler_index < max;
917 }
918
919 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)920 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
921 {
922 assert(tex->op == nir_texop_tg4);
923 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
924 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
925
926 b->cursor = nir_after_instr(&tex->instr);
927
928 nir_ssa_def *dest[4];
929 for (unsigned i = 0; i < 4; ++i) {
930 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
931 tex_copy->op = tex->op;
932 tex_copy->coord_components = tex->coord_components;
933 tex_copy->sampler_dim = tex->sampler_dim;
934 tex_copy->is_array = tex->is_array;
935 tex_copy->is_shadow = tex->is_shadow;
936 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
937 tex_copy->component = tex->component;
938 tex_copy->dest_type = tex->dest_type;
939
940 for (unsigned j = 0; j < tex->num_srcs; ++j) {
941 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src, tex_copy);
942 tex_copy->src[j].src_type = tex->src[j].src_type;
943 }
944
945 nir_tex_src src;
946 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
947 tex->tg4_offsets[i][1]));
948 src.src_type = nir_tex_src_offset;
949 tex_copy->src[tex_copy->num_srcs - 1] = src;
950
951 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
952 nir_tex_instr_dest_size(tex), 32, NULL);
953
954 nir_builder_instr_insert(b, &tex_copy->instr);
955
956 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
957 }
958
959 nir_ssa_def *res = nir_vec4(b, dest[0], dest[1], dest[2], dest[3]);
960 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(res));
961 nir_instr_remove(&tex->instr);
962
963 return true;
964 }
965
966 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)967 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
968 {
969 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
970 if (lod_idx < 0 ||
971 (nir_src_is_const(tex->src[lod_idx].src) &&
972 nir_src_as_int(tex->src[lod_idx].src) == 0))
973 return false;
974
975 unsigned dest_size = nir_tex_instr_dest_size(tex);
976
977 b->cursor = nir_before_instr(&tex->instr);
978 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
979
980 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
981 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
982 nir_src_for_ssa(nir_imm_int(b, 0)));
983
984 /* TXS(LOD) = max(TXS(0) >> LOD, 1) */
985 b->cursor = nir_after_instr(&tex->instr);
986 nir_ssa_def *minified = nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
987 nir_imm_int(b, 1));
988
989 /* Make sure the component encoding the array size (if any) is not
990 * minified.
991 */
992 if (tex->is_array) {
993 nir_ssa_def *comp[3];
994
995 assert(dest_size <= ARRAY_SIZE(comp));
996 for (unsigned i = 0; i < dest_size - 1; i++)
997 comp[i] = nir_channel(b, minified, i);
998
999 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1000 minified = nir_vec(b, comp, dest_size);
1001 }
1002
1003 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(minified),
1004 minified->parent_instr);
1005 return true;
1006 }
1007
1008 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options)1009 nir_lower_tex_block(nir_block *block, nir_builder *b,
1010 const nir_lower_tex_options *options)
1011 {
1012 bool progress = false;
1013
1014 nir_foreach_instr_safe(instr, block) {
1015 if (instr->type != nir_instr_type_tex)
1016 continue;
1017
1018 nir_tex_instr *tex = nir_instr_as_tex(instr);
1019 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1020
1021 /* mask of src coords to saturate (clamp): */
1022 unsigned sat_mask = 0;
1023
1024 if ((1 << tex->sampler_index) & options->saturate_r)
1025 sat_mask |= (1 << 2); /* .z */
1026 if ((1 << tex->sampler_index) & options->saturate_t)
1027 sat_mask |= (1 << 1); /* .y */
1028 if ((1 << tex->sampler_index) & options->saturate_s)
1029 sat_mask |= (1 << 0); /* .x */
1030
1031 /* If we are clamping any coords, we must lower projector first
1032 * as clamping happens *after* projection:
1033 */
1034 if (lower_txp || sat_mask) {
1035 progress |= project_src(b, tex);
1036 }
1037
1038 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1039 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1040 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1041 options->lower_rect_offset)) {
1042 progress = lower_offset(b, tex) || progress;
1043 }
1044
1045 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1046 tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1047 lower_rect(b, tex);
1048 progress = true;
1049 }
1050
1051 if ((1 << tex->texture_index) & options->lower_y_uv_external) {
1052 lower_y_uv_external(b, tex, options);
1053 progress = true;
1054 }
1055
1056 if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
1057 lower_y_u_v_external(b, tex, options);
1058 progress = true;
1059 }
1060
1061 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
1062 lower_yx_xuxv_external(b, tex, options);
1063 progress = true;
1064 }
1065
1066 if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
1067 lower_xy_uxvx_external(b, tex, options);
1068 progress = true;
1069 }
1070
1071 if ((1 << tex->texture_index) & options->lower_ayuv_external) {
1072 lower_ayuv_external(b, tex, options);
1073 progress = true;
1074 }
1075
1076 if ((1 << tex->texture_index) & options->lower_xyuv_external) {
1077 lower_xyuv_external(b, tex, options);
1078 progress = true;
1079 }
1080
1081 if ((1 << tex->texture_index) & options->lower_yuv_external) {
1082 lower_yuv_external(b, tex, options);
1083 progress = true;
1084 }
1085
1086 if (sat_mask) {
1087 saturate_src(b, tex, sat_mask);
1088 progress = true;
1089 }
1090
1091 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1092 swizzle_tg4_broadcom(b, tex);
1093 progress = true;
1094 }
1095
1096 if (((1 << tex->texture_index) & options->swizzle_result) &&
1097 !nir_tex_instr_is_query(tex) &&
1098 !(tex->is_shadow && tex->is_new_style_shadow)) {
1099 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1100 progress = true;
1101 }
1102
1103 /* should be after swizzle so we know which channels are rgb: */
1104 if (((1 << tex->texture_index) & options->lower_srgb) &&
1105 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1106 linearize_srgb_result(b, tex);
1107 progress = true;
1108 }
1109
1110 const bool has_min_lod =
1111 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1112 const bool has_offset =
1113 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1114
1115 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1116 options->lower_txb_shadow_clamp) {
1117 lower_implicit_lod(b, tex);
1118 progress = true;
1119 }
1120
1121 if (options->lower_tex_packing[tex->sampler_index] !=
1122 nir_lower_tex_packing_none &&
1123 tex->op != nir_texop_txs &&
1124 tex->op != nir_texop_query_levels &&
1125 tex->op != nir_texop_texture_samples) {
1126 lower_tex_packing(b, tex, options);
1127 progress = true;
1128 }
1129
1130 if (tex->op == nir_texop_txd &&
1131 (options->lower_txd ||
1132 (options->lower_txd_shadow && tex->is_shadow) ||
1133 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1134 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1135 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1136 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1137 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1138 has_min_lod && !sampler_index_lt(tex, 16)) ||
1139 (options->lower_txd_cube_map &&
1140 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1141 (options->lower_txd_3d &&
1142 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1143 lower_gradient(b, tex);
1144 progress = true;
1145 continue;
1146 }
1147
1148 bool shader_supports_implicit_lod =
1149 b->shader->info.stage == MESA_SHADER_FRAGMENT ||
1150 (b->shader->info.stage == MESA_SHADER_COMPUTE &&
1151 b->shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
1152
1153 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1154 * three opcodes provides one. Provide a default LOD of 0.
1155 */
1156 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1157 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1158 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
1159 (tex->op == nir_texop_tex && !shader_supports_implicit_lod))) {
1160 b->cursor = nir_before_instr(&tex->instr);
1161 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1162 if (tex->op == nir_texop_tex && options->lower_tex_without_implicit_lod)
1163 tex->op = nir_texop_txl;
1164 progress = true;
1165 continue;
1166 }
1167
1168 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1169 progress |= nir_lower_txs_lod(b, tex);
1170 continue;
1171 }
1172
1173 /* has to happen after all the other lowerings as the original tg4 gets
1174 * replaced by 4 tg4 instructions.
1175 */
1176 if (tex->op == nir_texop_tg4 &&
1177 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1178 options->lower_tg4_offsets) {
1179 progress |= lower_tg4_offsets(b, tex);
1180 continue;
1181 }
1182 }
1183
1184 return progress;
1185 }
1186
1187 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options)1188 nir_lower_tex_impl(nir_function_impl *impl,
1189 const nir_lower_tex_options *options)
1190 {
1191 bool progress = false;
1192 nir_builder builder;
1193 nir_builder_init(&builder, impl);
1194
1195 nir_foreach_block(block, impl) {
1196 progress |= nir_lower_tex_block(block, &builder, options);
1197 }
1198
1199 nir_metadata_preserve(impl, nir_metadata_block_index |
1200 nir_metadata_dominance);
1201 return progress;
1202 }
1203
1204 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1205 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1206 {
1207 bool progress = false;
1208
1209 nir_foreach_function(function, shader) {
1210 if (function->impl)
1211 progress |= nir_lower_tex_impl(function->impl, options);
1212 }
1213
1214 return progress;
1215 }
1216