1 /*
2 * Copyright © 2023 Valve Corporation
3 * Copyright © 2015 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 /*
26 * This lowering pass supports (as configured via nir_lower_tex_options)
27 * various texture related conversions:
28 * + texture projector lowering: converts the coordinate division for
29 * texture projection to be done in ALU instructions instead of
30 * asking the texture operation to do so.
31 * + lowering RECT: converts the un-normalized RECT texture coordinates
32 * to normalized coordinates with txs plus ALU instructions
33 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
34 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
35 * Note that this automatically triggers texture projector lowering if
36 * needed, since clamping must happen after projector lowering.
37 * + YUV-to-RGB conversion: to allow sampling YUV values as RGB values
38 * according to a specific YUV color space and range.
39 */
40
41 #include "nir.h"
42 #include "nir_builder.h"
43 #include "nir_builtin_builder.h"
44 #include "nir_format_convert.h"
45
46 typedef struct nir_const_value_3_4 {
47 nir_const_value v[3][4];
48 } nir_const_value_3_4;
49
50 static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { {
51 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
52 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
53 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
54 } };
55 static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { {
56 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
57 { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } },
58 { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } },
59 } };
60 static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { {
61 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
62 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
63 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } },
64 } };
65 static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { {
66 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
67 { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } },
68 { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } },
69 } };
70 static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { {
71 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
72 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
73 { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } },
74 } };
75 static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { {
76 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
77 { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } },
78 { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } },
79 } };
80
81 static const float bt601_limited_range_csc_offsets[3] = {
82 -0.874202218f, 0.531667823f, -1.085630789f
83 };
84 static const float bt601_full_range_csc_offsets[3] = {
85 -0.701000000f, 0.529136286f, -0.886000000f
86 };
87 static const float bt709_limited_range_csc_offsets[3] = {
88 -0.972945075f, 0.301482665f, -1.133402218f
89 };
90 static const float bt709_full_range_csc_offsets[3] = {
91 -0.787400000f, 0.327724273f, -0.927800000f
92 };
93 static const float bt2020_limited_range_csc_offsets[3] = {
94 -0.915745075f, 0.347480639f, -1.148145075f
95 };
96 static const float bt2020_full_range_csc_offsets[3] = {
97 -0.737350000f, 0.367972500f, -0.940700000f
98 };
99
100 static bool
project_src(nir_builder * b,nir_tex_instr * tex)101 project_src(nir_builder *b, nir_tex_instr *tex)
102 {
103 nir_def *proj = nir_steal_tex_src(tex, nir_tex_src_projector);
104 if (!proj)
105 return false;
106
107 b->cursor = nir_before_instr(&tex->instr);
108 nir_def *inv_proj = nir_frcp(b, proj);
109
110 /* Walk through the sources projecting the arguments. */
111 for (unsigned i = 0; i < tex->num_srcs; i++) {
112 switch (tex->src[i].src_type) {
113 case nir_tex_src_coord:
114 case nir_tex_src_comparator:
115 break;
116 default:
117 continue;
118 }
119 nir_def *unprojected =
120 tex->src[i].src.ssa;
121 nir_def *projected = nir_fmul(b, unprojected, inv_proj);
122
123 /* Array indices don't get projected, so make an new vector with the
124 * coordinate's array index untouched.
125 */
126 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
127 switch (tex->coord_components) {
128 case 4:
129 projected = nir_vec4(b,
130 nir_channel(b, projected, 0),
131 nir_channel(b, projected, 1),
132 nir_channel(b, projected, 2),
133 nir_channel(b, unprojected, 3));
134 break;
135 case 3:
136 projected = nir_vec3(b,
137 nir_channel(b, projected, 0),
138 nir_channel(b, projected, 1),
139 nir_channel(b, unprojected, 2));
140 break;
141 case 2:
142 projected = nir_vec2(b,
143 nir_channel(b, projected, 0),
144 nir_channel(b, unprojected, 1));
145 break;
146 default:
147 unreachable("bad texture coord count for array");
148 break;
149 }
150 }
151
152 nir_src_rewrite(&tex->src[i].src, projected);
153 }
154
155 return true;
156 }
157
158 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)159 lower_offset(nir_builder *b, nir_tex_instr *tex)
160 {
161 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
162 if (!offset)
163 return false;
164
165 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
166 assert(coord_index >= 0);
167
168 nir_def *coord = tex->src[coord_index].src.ssa;
169
170 b->cursor = nir_before_instr(&tex->instr);
171
172 nir_def *offset_coord;
173 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
174 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
175 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
176 } else {
177 nir_def *scale = NULL;
178
179 if (b->shader->options->has_texture_scaling) {
180 nir_def *idx = nir_imm_int(b, tex->texture_index);
181 scale = nir_load_texture_scale(b, 32, idx);
182 } else {
183 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
184 scale = nir_frcp(b, txs);
185 }
186
187 offset_coord = nir_fadd(b, coord,
188 nir_fmul(b,
189 nir_i2f32(b, offset),
190 scale));
191 }
192 } else {
193 offset_coord = nir_iadd(b, coord, offset);
194 }
195
196 if (tex->is_array) {
197 /* The offset is not applied to the array index */
198 if (tex->coord_components == 2) {
199 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
200 nir_channel(b, coord, 1));
201 } else if (tex->coord_components == 3) {
202 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
203 nir_channel(b, offset_coord, 1),
204 nir_channel(b, coord, 2));
205 } else {
206 unreachable("Invalid number of components");
207 }
208 }
209
210 nir_src_rewrite(&tex->src[coord_index].src, offset_coord);
211
212 return true;
213 }
214
215 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)216 lower_rect(nir_builder *b, nir_tex_instr *tex)
217 {
218 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
219 * right dimensionality.
220 */
221 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
222
223 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
224 nir_def *scale = nir_frcp(b, txs);
225 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
226
227 if (coord_index != -1) {
228 nir_def *coords =
229 tex->src[coord_index].src.ssa;
230 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
231 }
232 }
233
234 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)235 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
236 {
237 b->cursor = nir_before_instr(&tex->instr);
238
239 nir_def *idx = nir_imm_int(b, tex->texture_index);
240 nir_def *scale = nir_load_texture_scale(b, 32, idx);
241 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
242
243 if (coord_index != -1) {
244 nir_def *coords =
245 tex->src[coord_index].src.ssa;
246 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
247 }
248 }
249
250 static void
lower_1d(nir_builder * b,nir_tex_instr * tex)251 lower_1d(nir_builder *b, nir_tex_instr *tex)
252 {
253 b->cursor = nir_before_instr(&tex->instr);
254
255 nir_def *coords = nir_steal_tex_src(tex, nir_tex_src_coord);
256 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
257 nir_def *ddx = nir_steal_tex_src(tex, nir_tex_src_ddx);
258 nir_def *ddy = nir_steal_tex_src(tex, nir_tex_src_ddy);
259
260 /* Add in 2D sources to become a 2D operation */
261 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
262
263 if (coords) {
264 /* We want to fetch texel 0 along the Y-axis. To do so, we sample at 0.5
265 * to get texel 0 with correct handling of wrap modes.
266 */
267 nir_def *y = nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5,
268 coords->bit_size);
269
270 tex->coord_components++;
271
272 if (tex->is_array && tex->op != nir_texop_lod) {
273 assert(tex->coord_components == 3);
274
275 nir_def *x = nir_channel(b, coords, 0);
276 nir_def *idx = nir_channel(b, coords, 1);
277 coords = nir_vec3(b, x, y, idx);
278 } else {
279 assert(tex->coord_components == 2);
280 coords = nir_vec2(b, coords, y);
281 }
282
283 nir_tex_instr_add_src(tex, nir_tex_src_coord, coords);
284 }
285
286 if (offset) {
287 nir_tex_instr_add_src(tex, nir_tex_src_offset,
288 nir_pad_vector_imm_int(b, offset, 0, 2));
289 }
290
291 if (ddx || ddy) {
292 nir_tex_instr_add_src(tex, nir_tex_src_ddx,
293 nir_pad_vector_imm_int(b, ddx, 0, 2));
294
295 nir_tex_instr_add_src(tex, nir_tex_src_ddy,
296 nir_pad_vector_imm_int(b, ddy, 0, 2));
297 }
298
299 /* Handle destination component mismatch for txs. */
300 if (tex->op == nir_texop_txs) {
301 b->cursor = nir_after_instr(&tex->instr);
302
303 nir_def *dst;
304 if (tex->is_array) {
305 assert(tex->def.num_components == 2);
306 tex->def.num_components = 3;
307
308 /* For array, we take .xz to skip the newly added height */
309 dst = nir_channels(b, &tex->def, (1 << 0) | (1 << 2));
310 } else {
311 assert(tex->def.num_components == 1);
312 tex->def.num_components = 2;
313
314 dst = nir_channel(b, &tex->def, 0);
315 }
316
317 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
318 }
319 }
320
321 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_def * lod)322 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_def *lod)
323 {
324 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
325 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
326 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
327 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
328
329 /* If we have a bias, add it in */
330 nir_def *bias = nir_steal_tex_src(tex, nir_tex_src_bias);
331 if (bias)
332 lod = nir_fadd(b, lod, bias);
333
334 /* If we have a minimum LOD, clamp LOD accordingly */
335 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
336 if (min_lod)
337 lod = nir_fmax(b, lod, min_lod);
338
339 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
340 tex->op = nir_texop_txl;
341 }
342
343 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)344 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
345 {
346 b->cursor = nir_before_instr(&tex->instr);
347 lower_lod(b, tex, nir_get_texture_lod(b, tex));
348 }
349
350 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)351 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
352 {
353 b->cursor = nir_before_instr(&tex->instr);
354
355 if (tex->op == nir_texop_lod) {
356 nir_def_rewrite_uses(&tex->def, nir_imm_int(b, 0));
357 nir_instr_remove(&tex->instr);
358 return;
359 }
360
361 lower_lod(b, tex, nir_imm_int(b, 0));
362 }
363
364 static nir_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)365 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
366 const nir_lower_tex_options *options)
367 {
368 assert(nir_tex_instr_dest_size(tex) == 4);
369 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
370 assert(tex->op == nir_texop_tex);
371 assert(tex->coord_components == 2);
372
373 nir_tex_instr *plane_tex =
374 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
375 for (unsigned i = 0; i < tex->num_srcs; i++) {
376 plane_tex->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
377 plane_tex->src[i].src_type = tex->src[i].src_type;
378 }
379 plane_tex->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_plane,
380 nir_imm_int(b, plane));
381 plane_tex->op = nir_texop_tex;
382 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
383 plane_tex->dest_type = nir_type_float | tex->def.bit_size;
384 plane_tex->coord_components = 2;
385
386 plane_tex->texture_index = tex->texture_index;
387 plane_tex->sampler_index = tex->sampler_index;
388
389 nir_def_init(&plane_tex->instr, &plane_tex->def, 4,
390 tex->def.bit_size);
391
392 nir_builder_instr_insert(b, &plane_tex->instr);
393
394 /* If scaling_factor is set, return a scaled value. */
395 if (options->scale_factors[tex->texture_index])
396 return nir_fmul_imm(b, &plane_tex->def,
397 options->scale_factors[tex->texture_index]);
398
399 return &plane_tex->def;
400 }
401
402 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_def * y,nir_def * u,nir_def * v,nir_def * a,const nir_lower_tex_options * options,unsigned texture_index)403 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
404 nir_def *y, nir_def *u, nir_def *v,
405 nir_def *a,
406 const nir_lower_tex_options *options,
407 unsigned texture_index)
408 {
409
410 const float *offset_vals;
411 const nir_const_value_3_4 *m;
412 assert((options->bt709_external & options->bt2020_external) == 0);
413 if (options->yuv_full_range_external & (1u << texture_index)) {
414 if (options->bt709_external & (1u << texture_index)) {
415 m = &bt709_full_range_csc_coeffs;
416 offset_vals = bt709_full_range_csc_offsets;
417 } else if (options->bt2020_external & (1u << texture_index)) {
418 m = &bt2020_full_range_csc_coeffs;
419 offset_vals = bt2020_full_range_csc_offsets;
420 } else {
421 m = &bt601_full_range_csc_coeffs;
422 offset_vals = bt601_full_range_csc_offsets;
423 }
424 } else {
425 if (options->bt709_external & (1u << texture_index)) {
426 m = &bt709_limited_range_csc_coeffs;
427 offset_vals = bt709_limited_range_csc_offsets;
428 } else if (options->bt2020_external & (1u << texture_index)) {
429 m = &bt2020_limited_range_csc_coeffs;
430 offset_vals = bt2020_limited_range_csc_offsets;
431 } else {
432 m = &bt601_limited_range_csc_coeffs;
433 offset_vals = bt601_limited_range_csc_offsets;
434 }
435 }
436
437 unsigned bit_size = tex->def.bit_size;
438
439 nir_def *offset =
440 nir_vec4(b,
441 nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
442 nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
443 nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
444 a);
445
446 offset = nir_f2fN(b, offset, bit_size);
447
448 nir_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
449 nir_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
450 nir_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
451
452 nir_def *result =
453 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
454
455 nir_def_rewrite_uses(&tex->def, result);
456 }
457
458 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)459 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
460 const nir_lower_tex_options *options,
461 unsigned texture_index)
462 {
463 b->cursor = nir_after_instr(&tex->instr);
464
465 nir_def *y = sample_plane(b, tex, 0, options);
466 nir_def *uv = sample_plane(b, tex, 1, options);
467
468 convert_yuv_to_rgb(b, tex,
469 nir_channel(b, y, 0),
470 nir_channel(b, uv, 0),
471 nir_channel(b, uv, 1),
472 nir_imm_float(b, 1.0f),
473 options,
474 texture_index);
475 }
476
477 static void
lower_y_vu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)478 lower_y_vu_external(nir_builder *b, nir_tex_instr *tex,
479 const nir_lower_tex_options *options,
480 unsigned texture_index)
481 {
482 b->cursor = nir_after_instr(&tex->instr);
483
484 nir_def *y = sample_plane(b, tex, 0, options);
485 nir_def *vu = sample_plane(b, tex, 1, options);
486
487 convert_yuv_to_rgb(b, tex,
488 nir_channel(b, y, 0),
489 nir_channel(b, vu, 1),
490 nir_channel(b, vu, 0),
491 nir_imm_float(b, 1.0f),
492 options,
493 texture_index);
494 }
495
496 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)497 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
498 const nir_lower_tex_options *options,
499 unsigned texture_index)
500 {
501 b->cursor = nir_after_instr(&tex->instr);
502
503 nir_def *y = sample_plane(b, tex, 0, options);
504 nir_def *u = sample_plane(b, tex, 1, options);
505 nir_def *v = sample_plane(b, tex, 2, options);
506
507 convert_yuv_to_rgb(b, tex,
508 nir_channel(b, y, 0),
509 nir_channel(b, u, 0),
510 nir_channel(b, v, 0),
511 nir_imm_float(b, 1.0f),
512 options,
513 texture_index);
514 }
515
516 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)517 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
518 const nir_lower_tex_options *options,
519 unsigned texture_index)
520 {
521 b->cursor = nir_after_instr(&tex->instr);
522
523 nir_def *y = sample_plane(b, tex, 0, options);
524 nir_def *xuxv = sample_plane(b, tex, 1, options);
525
526 convert_yuv_to_rgb(b, tex,
527 nir_channel(b, y, 0),
528 nir_channel(b, xuxv, 1),
529 nir_channel(b, xuxv, 3),
530 nir_imm_float(b, 1.0f),
531 options,
532 texture_index);
533 }
534
535 static void
lower_yx_xvxu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)536 lower_yx_xvxu_external(nir_builder *b, nir_tex_instr *tex,
537 const nir_lower_tex_options *options,
538 unsigned texture_index)
539 {
540 b->cursor = nir_after_instr(&tex->instr);
541
542 nir_def *y = sample_plane(b, tex, 0, options);
543 nir_def *xvxu = sample_plane(b, tex, 1, options);
544
545 convert_yuv_to_rgb(b, tex,
546 nir_channel(b, y, 0),
547 nir_channel(b, xvxu, 3),
548 nir_channel(b, xvxu, 1),
549 nir_imm_float(b, 1.0f),
550 options,
551 texture_index);
552 }
553
554 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)555 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
556 const nir_lower_tex_options *options,
557 unsigned texture_index)
558 {
559 b->cursor = nir_after_instr(&tex->instr);
560
561 nir_def *y = sample_plane(b, tex, 0, options);
562 nir_def *uxvx = sample_plane(b, tex, 1, options);
563
564 convert_yuv_to_rgb(b, tex,
565 nir_channel(b, y, 1),
566 nir_channel(b, uxvx, 0),
567 nir_channel(b, uxvx, 2),
568 nir_imm_float(b, 1.0f),
569 options,
570 texture_index);
571 }
572
573 static void
lower_xy_vxux_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)574 lower_xy_vxux_external(nir_builder *b, nir_tex_instr *tex,
575 const nir_lower_tex_options *options,
576 unsigned texture_index)
577 {
578 b->cursor = nir_after_instr(&tex->instr);
579
580 nir_def *y = sample_plane(b, tex, 0, options);
581 nir_def *vxux = sample_plane(b, tex, 1, options);
582
583 convert_yuv_to_rgb(b, tex,
584 nir_channel(b, y, 1),
585 nir_channel(b, vxux, 2),
586 nir_channel(b, vxux, 0),
587 nir_imm_float(b, 1.0f),
588 options,
589 texture_index);
590 }
591
592 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)593 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
594 const nir_lower_tex_options *options,
595 unsigned texture_index)
596 {
597 b->cursor = nir_after_instr(&tex->instr);
598
599 nir_def *ayuv = sample_plane(b, tex, 0, options);
600
601 convert_yuv_to_rgb(b, tex,
602 nir_channel(b, ayuv, 2),
603 nir_channel(b, ayuv, 1),
604 nir_channel(b, ayuv, 0),
605 nir_channel(b, ayuv, 3),
606 options,
607 texture_index);
608 }
609
610 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)611 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
612 const nir_lower_tex_options *options,
613 unsigned texture_index)
614 {
615 b->cursor = nir_after_instr(&tex->instr);
616
617 nir_def *y41x = sample_plane(b, tex, 0, options);
618
619 convert_yuv_to_rgb(b, tex,
620 nir_channel(b, y41x, 1),
621 nir_channel(b, y41x, 0),
622 nir_channel(b, y41x, 2),
623 nir_channel(b, y41x, 3),
624 options,
625 texture_index);
626 }
627
628 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)629 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
630 const nir_lower_tex_options *options,
631 unsigned texture_index)
632 {
633 b->cursor = nir_after_instr(&tex->instr);
634
635 nir_def *xyuv = sample_plane(b, tex, 0, options);
636
637 convert_yuv_to_rgb(b, tex,
638 nir_channel(b, xyuv, 2),
639 nir_channel(b, xyuv, 1),
640 nir_channel(b, xyuv, 0),
641 nir_imm_float(b, 1.0f),
642 options,
643 texture_index);
644 }
645
646 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)647 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
648 const nir_lower_tex_options *options,
649 unsigned texture_index)
650 {
651 b->cursor = nir_after_instr(&tex->instr);
652
653 nir_def *yuv = sample_plane(b, tex, 0, options);
654
655 convert_yuv_to_rgb(b, tex,
656 nir_channel(b, yuv, 0),
657 nir_channel(b, yuv, 1),
658 nir_channel(b, yuv, 2),
659 nir_imm_float(b, 1.0f),
660 options,
661 texture_index);
662 }
663
664 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)665 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
666 const nir_lower_tex_options *options,
667 unsigned texture_index)
668 {
669 b->cursor = nir_after_instr(&tex->instr);
670
671 nir_def *yuv = sample_plane(b, tex, 0, options);
672
673 convert_yuv_to_rgb(b, tex,
674 nir_channel(b, yuv, 1),
675 nir_channel(b, yuv, 2),
676 nir_channel(b, yuv, 0),
677 nir_imm_float(b, 1.0f),
678 options,
679 texture_index);
680 }
681
682 static void
lower_yv_yu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)683 lower_yv_yu_external(nir_builder *b, nir_tex_instr *tex,
684 const nir_lower_tex_options *options,
685 unsigned texture_index)
686 {
687 b->cursor = nir_after_instr(&tex->instr);
688
689 nir_def *yuv = sample_plane(b, tex, 0, options);
690
691 convert_yuv_to_rgb(b, tex,
692 nir_channel(b, yuv, 2),
693 nir_channel(b, yuv, 1),
694 nir_channel(b, yuv, 0),
695 nir_imm_float(b, 1.0f),
696 options,
697 texture_index);
698 }
699
700 /*
701 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
702 * computed from the gradients.
703 */
704 static void
replace_gradient_with_lod(nir_builder * b,nir_def * lod,nir_tex_instr * tex)705 replace_gradient_with_lod(nir_builder *b, nir_def *lod, nir_tex_instr *tex)
706 {
707 assert(tex->op == nir_texop_txd);
708
709 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
710 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
711
712 /* If we have a minimum LOD, clamp LOD accordingly */
713 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
714 if (min_lod)
715 lod = nir_fmax(b, lod, min_lod);
716
717 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
718 tex->op = nir_texop_txl;
719 }
720
721 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)722 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
723 {
724 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
725 assert(tex->op == nir_texop_txd);
726
727 /* Use textureSize() to get the width and height of LOD 0 */
728 nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
729
730 /* Cubemap texture lookups first generate a texture coordinate normalized
731 * to [-1, 1] on the appropiate face. The appropiate face is determined
732 * by which component has largest magnitude and its sign. The texture
733 * coordinate is the quotient of the remaining texture coordinates against
734 * that absolute value of the component of largest magnitude. This
735 * division requires that the computing of the derivative of the texel
736 * coordinate must use the quotient rule. The high level GLSL code is as
737 * follows:
738 *
739 * Step 1: selection
740 *
741 * vec3 abs_p, Q, dQdx, dQdy;
742 * abs_p = abs(ir->coordinate);
743 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
744 * Q = ir->coordinate.yzx;
745 * dQdx = ir->lod_info.grad.dPdx.yzx;
746 * dQdy = ir->lod_info.grad.dPdy.yzx;
747 * }
748 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
749 * Q = ir->coordinate.xzy;
750 * dQdx = ir->lod_info.grad.dPdx.xzy;
751 * dQdy = ir->lod_info.grad.dPdy.xzy;
752 * }
753 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
754 * Q = ir->coordinate;
755 * dQdx = ir->lod_info.grad.dPdx;
756 * dQdy = ir->lod_info.grad.dPdy;
757 * }
758 *
759 * Step 2: use quotient rule to compute derivative. The normalized to
760 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
761 * only concerned with the magnitudes of the derivatives whose values are
762 * not affected by the sign. We drop the sign from the computation.
763 *
764 * vec2 dx, dy;
765 * float recip;
766 *
767 * recip = 1.0 / Q.z;
768 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
769 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
770 *
771 * Step 3: compute LOD. At this point we have the derivatives of the
772 * texture coordinates normalized to [-1,1]. We take the LOD to be
773 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
774 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
775 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
776 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
777 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
778 * where L is the dimension of the cubemap. The code is:
779 *
780 * float M, result;
781 * M = max(dot(dx, dx), dot(dy, dy));
782 * L = textureSize(sampler, 0).x;
783 * result = -1.0 + 0.5 * log2(L * L * M);
784 */
785
786 /* coordinate */
787 nir_def *p =
788 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
789
790 /* unmodified dPdx, dPdy values */
791 nir_def *dPdx =
792 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
793 nir_def *dPdy =
794 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
795
796 nir_def *abs_p = nir_fabs(b, p);
797 nir_def *abs_p_x = nir_channel(b, abs_p, 0);
798 nir_def *abs_p_y = nir_channel(b, abs_p, 1);
799 nir_def *abs_p_z = nir_channel(b, abs_p, 2);
800
801 /* 1. compute selector */
802 nir_def *Q, *dQdx, *dQdy;
803
804 nir_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
805 nir_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
806
807 unsigned yzx[3] = { 1, 2, 0 };
808 unsigned xzy[3] = { 0, 2, 1 };
809
810 Q = nir_bcsel(b, cond_z,
811 p,
812 nir_bcsel(b, cond_y,
813 nir_swizzle(b, p, xzy, 3),
814 nir_swizzle(b, p, yzx, 3)));
815
816 dQdx = nir_bcsel(b, cond_z,
817 dPdx,
818 nir_bcsel(b, cond_y,
819 nir_swizzle(b, dPdx, xzy, 3),
820 nir_swizzle(b, dPdx, yzx, 3)));
821
822 dQdy = nir_bcsel(b, cond_z,
823 dPdy,
824 nir_bcsel(b, cond_y,
825 nir_swizzle(b, dPdy, xzy, 3),
826 nir_swizzle(b, dPdy, yzx, 3)));
827
828 /* 2. quotient rule */
829
830 /* tmp = Q.xy * recip;
831 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
832 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
833 */
834 nir_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
835
836 nir_def *Q_xy = nir_trim_vector(b, Q, 2);
837 nir_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
838
839 nir_def *dQdx_xy = nir_trim_vector(b, dQdx, 2);
840 nir_def *dQdx_z = nir_channel(b, dQdx, 2);
841 nir_def *dx =
842 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
843
844 nir_def *dQdy_xy = nir_trim_vector(b, dQdy, 2);
845 nir_def *dQdy_z = nir_channel(b, dQdy, 2);
846 nir_def *dy =
847 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
848
849 /* M = max(dot(dx, dx), dot(dy, dy)); */
850 nir_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
851
852 /* size has textureSize() of LOD 0 */
853 nir_def *L = nir_channel(b, size, 0);
854
855 /* lod = -1.0 + 0.5 * log2(L * L * M); */
856 nir_def *lod =
857 nir_fadd(b,
858 nir_imm_float(b, -1.0f),
859 nir_fmul(b,
860 nir_imm_float(b, 0.5f),
861 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
862
863 /* 3. Replace the gradient instruction with an equivalent lod instruction */
864 replace_gradient_with_lod(b, lod, tex);
865 }
866
867 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)868 lower_gradient(nir_builder *b, nir_tex_instr *tex)
869 {
870 /* Cubes are more complicated and have their own function */
871 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
872 lower_gradient_cube_map(b, tex);
873 return;
874 }
875
876 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
877 assert(tex->op == nir_texop_txd);
878
879 /* Use textureSize() to get the width and height of LOD 0 */
880 unsigned component_mask;
881 switch (tex->sampler_dim) {
882 case GLSL_SAMPLER_DIM_3D:
883 component_mask = 7;
884 break;
885 case GLSL_SAMPLER_DIM_1D:
886 component_mask = 1;
887 break;
888 default:
889 component_mask = 3;
890 break;
891 }
892
893 nir_def *size =
894 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
895 component_mask);
896
897 /* Scale the gradients by width and height. Effectively, the incoming
898 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
899 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
900 */
901 nir_def *ddx =
902 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
903 nir_def *ddy =
904 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
905
906 nir_def *dPdx = nir_fmul(b, ddx, size);
907 nir_def *dPdy = nir_fmul(b, ddy, size);
908
909 nir_def *rho;
910 if (dPdx->num_components == 1) {
911 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
912 } else {
913 rho = nir_fmax(b,
914 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
915 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
916 }
917
918 /* lod = log2(rho). We're ignoring GL state biases for now. */
919 nir_def *lod = nir_flog2(b, rho);
920
921 /* Replace the gradient instruction with an equivalent lod instruction */
922 replace_gradient_with_lod(b, lod, tex);
923 }
924
925 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
926 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)927 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
928 {
929 b->cursor = nir_after_instr(&tex->instr);
930 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
931
932 txd->op = nir_texop_txd;
933 txd->sampler_dim = tex->sampler_dim;
934 txd->dest_type = tex->dest_type;
935 txd->coord_components = tex->coord_components;
936 txd->texture_index = tex->texture_index;
937 txd->sampler_index = tex->sampler_index;
938 txd->is_array = tex->is_array;
939 txd->is_shadow = tex->is_shadow;
940 txd->is_new_style_shadow = tex->is_new_style_shadow;
941
942 /* reuse existing srcs */
943 for (unsigned i = 0; i < tex->num_srcs; i++) {
944 txd->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
945 txd->src[i].src_type = tex->src[i].src_type;
946 }
947 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
948 assert(coord_idx >= 0);
949 nir_def *coord = tex->src[coord_idx].src.ssa;
950 /* don't take the derivative of the array index */
951 if (tex->is_array)
952 coord = nir_channels(b, coord, nir_component_mask(coord->num_components - 1));
953 nir_def *dfdx = nir_fddx(b, coord);
954 nir_def *dfdy = nir_fddy(b, coord);
955 txd->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_ddx, dfdx);
956 txd->src[tex->num_srcs + 1] = nir_tex_src_for_ssa(nir_tex_src_ddy, dfdy);
957
958 nir_def_init(&txd->instr, &txd->def,
959 tex->def.num_components,
960 tex->def.bit_size);
961 nir_builder_instr_insert(b, &txd->instr);
962 nir_def_rewrite_uses(&tex->def, &txd->def);
963 nir_instr_remove(&tex->instr);
964 return txd;
965 }
966
967 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
968 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)969 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
970 {
971 b->cursor = nir_after_instr(&tex->instr);
972 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
973
974 txl->op = nir_texop_txl;
975 txl->sampler_dim = tex->sampler_dim;
976 txl->dest_type = tex->dest_type;
977 txl->coord_components = tex->coord_components;
978 txl->texture_index = tex->texture_index;
979 txl->sampler_index = tex->sampler_index;
980 txl->is_array = tex->is_array;
981 txl->is_shadow = tex->is_shadow;
982 txl->is_new_style_shadow = tex->is_new_style_shadow;
983
984 /* reuse all but bias src */
985 for (int i = 0; i < tex->num_srcs; i++) {
986 if (tex->src[i].src_type != nir_tex_src_bias) {
987 txl->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
988 txl->src[i].src_type = tex->src[i].src_type;
989 }
990 }
991 nir_def *lod = nir_get_texture_lod(b, tex);
992
993 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
994 assert(bias_idx >= 0);
995 lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
996 txl->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_lod, lod);
997
998 nir_def_init(&txl->instr, &txl->def,
999 tex->def.num_components,
1000 tex->def.bit_size);
1001 nir_builder_instr_insert(b, &txl->instr);
1002 nir_def_rewrite_uses(&tex->def, &txl->def);
1003 nir_instr_remove(&tex->instr);
1004 return txl;
1005 }
1006
1007 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)1008 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
1009 {
1010 if (tex->op == nir_texop_tex)
1011 tex = lower_tex_to_txd(b, tex);
1012 else if (tex->op == nir_texop_txb)
1013 tex = lower_txb_to_txl(b, tex);
1014
1015 b->cursor = nir_before_instr(&tex->instr);
1016 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1017
1018 if (coord_index != -1) {
1019 nir_def *src =
1020 tex->src[coord_index].src.ssa;
1021
1022 /* split src into components: */
1023 nir_def *comp[4];
1024
1025 assume(tex->coord_components >= 1);
1026
1027 for (unsigned j = 0; j < tex->coord_components; j++)
1028 comp[j] = nir_channel(b, src, j);
1029
1030 /* clamp requested components, array index does not get clamped: */
1031 unsigned ncomp = tex->coord_components;
1032 if (tex->is_array)
1033 ncomp--;
1034
1035 for (unsigned j = 0; j < ncomp; j++) {
1036 if ((1 << j) & sat_mask) {
1037 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1038 /* non-normalized texture coords, so clamp to texture
1039 * size rather than [0.0, 1.0]
1040 */
1041 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
1042 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
1043 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
1044 } else {
1045 comp[j] = nir_fsat(b, comp[j]);
1046 }
1047 }
1048 }
1049
1050 /* and move the result back into a single vecN: */
1051 src = nir_vec(b, comp, tex->coord_components);
1052
1053 nir_src_rewrite(&tex->src[coord_index].src, src);
1054 }
1055 return tex;
1056 }
1057
1058 static nir_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)1059 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
1060 {
1061 nir_const_value v[4];
1062
1063 memset(&v, 0, sizeof(v));
1064
1065 if (swizzle_val == 4) {
1066 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
1067 } else {
1068 assert(swizzle_val == 5);
1069 if (type == nir_type_float32)
1070 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
1071 else
1072 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
1073 }
1074
1075 return nir_build_imm(b, 4, 32, v);
1076 }
1077
1078 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)1079 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
1080 {
1081 b->cursor = nir_after_instr(&tex->instr);
1082
1083 assert(nir_tex_instr_dest_size(tex) == 4);
1084 unsigned swiz[4] = { 2, 3, 1, 0 };
1085 nir_def *swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1086
1087 nir_def_rewrite_uses_after(&tex->def, swizzled,
1088 swizzled->parent_instr);
1089 }
1090
1091 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])1092 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
1093 {
1094 b->cursor = nir_after_instr(&tex->instr);
1095
1096 nir_def *swizzled;
1097 if (tex->op == nir_texop_tg4) {
1098 if (swizzle[tex->component] < 4) {
1099 /* This one's easy */
1100 tex->component = swizzle[tex->component];
1101 return;
1102 } else {
1103 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
1104 }
1105 } else {
1106 assert(nir_tex_instr_dest_size(tex) == 4);
1107 if (swizzle[0] < 4 && swizzle[1] < 4 &&
1108 swizzle[2] < 4 && swizzle[3] < 4) {
1109 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
1110 /* We have no 0s or 1s, just emit a swizzling MOV */
1111 swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1112 } else {
1113 nir_scalar srcs[4];
1114 for (unsigned i = 0; i < 4; i++) {
1115 if (swizzle[i] < 4) {
1116 srcs[i] = nir_get_scalar(&tex->def, swizzle[i]);
1117 } else {
1118 srcs[i] = nir_get_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
1119 }
1120 }
1121 swizzled = nir_vec_scalars(b, srcs, 4);
1122 }
1123 }
1124
1125 nir_def_rewrite_uses_after(&tex->def, swizzled,
1126 swizzled->parent_instr);
1127 }
1128
1129 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)1130 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
1131 {
1132 assert(nir_tex_instr_dest_size(tex) == 4);
1133 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1134
1135 b->cursor = nir_after_instr(&tex->instr);
1136
1137 nir_def *rgb =
1138 nir_format_srgb_to_linear(b, nir_trim_vector(b, &tex->def, 3));
1139
1140 /* alpha is untouched: */
1141 nir_def *result = nir_vec4(b,
1142 nir_channel(b, rgb, 0),
1143 nir_channel(b, rgb, 1),
1144 nir_channel(b, rgb, 2),
1145 nir_channel(b, &tex->def, 3));
1146
1147 nir_def_rewrite_uses_after(&tex->def, result,
1148 result->parent_instr);
1149 }
1150
1151 /**
1152 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
1153 * i16, or u16, or a single unorm4x8 value.
1154 *
1155 * Note that we don't change the destination num_components, because
1156 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
1157 * to not store the other channels, given that nothing at the NIR level will
1158 * read them.
1159 */
1160 static bool
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)1161 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
1162 const nir_lower_tex_options *options)
1163 {
1164 nir_def *color = &tex->def;
1165
1166 b->cursor = nir_after_instr(&tex->instr);
1167
1168 assert(options->lower_tex_packing_cb);
1169 enum nir_lower_tex_packing packing =
1170 options->lower_tex_packing_cb(tex, options->lower_tex_packing_data);
1171
1172 switch (packing) {
1173 case nir_lower_tex_packing_none:
1174 return false;
1175
1176 case nir_lower_tex_packing_16: {
1177 static const unsigned bits[4] = { 16, 16, 16, 16 };
1178
1179 switch (nir_alu_type_get_base_type(tex->dest_type)) {
1180 case nir_type_float:
1181 switch (nir_tex_instr_dest_size(tex)) {
1182 case 1:
1183 assert(tex->is_shadow && tex->is_new_style_shadow);
1184 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1185 break;
1186 case 2: {
1187 nir_def *rg = nir_channel(b, color, 0);
1188 color = nir_vec2(b,
1189 nir_unpack_half_2x16_split_x(b, rg),
1190 nir_unpack_half_2x16_split_y(b, rg));
1191 break;
1192 }
1193 case 4: {
1194 nir_def *rg = nir_channel(b, color, 0);
1195 nir_def *ba = nir_channel(b, color, 1);
1196 color = nir_vec4(b,
1197 nir_unpack_half_2x16_split_x(b, rg),
1198 nir_unpack_half_2x16_split_y(b, rg),
1199 nir_unpack_half_2x16_split_x(b, ba),
1200 nir_unpack_half_2x16_split_y(b, ba));
1201 break;
1202 }
1203 default:
1204 unreachable("wrong dest_size");
1205 }
1206 break;
1207
1208 case nir_type_int:
1209 color = nir_format_unpack_sint(b, color, bits, 4);
1210 break;
1211
1212 case nir_type_uint:
1213 color = nir_format_unpack_uint(b, color, bits, 4);
1214 break;
1215
1216 default:
1217 unreachable("unknown base type");
1218 }
1219 break;
1220 }
1221
1222 case nir_lower_tex_packing_8:
1223 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1224 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1225 break;
1226 }
1227
1228 nir_def_rewrite_uses_after(&tex->def, color,
1229 color->parent_instr);
1230 return true;
1231 }
1232
1233 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1234 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1235 {
1236 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1237
1238 unsigned sampler_index = tex->sampler_index;
1239
1240 int sampler_offset_idx =
1241 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1242 if (sampler_offset_idx >= 0) {
1243 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1244 return false;
1245
1246 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1247 }
1248
1249 return sampler_index < max;
1250 }
1251
1252 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1253 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1254 {
1255 assert(tex->op == nir_texop_tg4);
1256 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1257 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1258
1259 b->cursor = nir_after_instr(&tex->instr);
1260
1261 nir_scalar dest[5] = { 0 };
1262 nir_def *residency = NULL;
1263 for (unsigned i = 0; i < 4; ++i) {
1264 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1265 tex_copy->op = tex->op;
1266 tex_copy->coord_components = tex->coord_components;
1267 tex_copy->sampler_dim = tex->sampler_dim;
1268 tex_copy->is_array = tex->is_array;
1269 tex_copy->is_shadow = tex->is_shadow;
1270 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1271 tex_copy->is_sparse = tex->is_sparse;
1272 tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod;
1273 tex_copy->component = tex->component;
1274 tex_copy->dest_type = tex->dest_type;
1275 tex_copy->texture_index = tex->texture_index;
1276 tex_copy->sampler_index = tex->sampler_index;
1277 tex_copy->backend_flags = tex->backend_flags;
1278
1279 for (unsigned j = 0; j < tex->num_srcs; ++j) {
1280 tex_copy->src[j].src = nir_src_for_ssa(tex->src[j].src.ssa);
1281 tex_copy->src[j].src_type = tex->src[j].src_type;
1282 }
1283
1284 nir_def *offset = nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1285 tex->tg4_offsets[i][1]);
1286 nir_tex_src src = nir_tex_src_for_ssa(nir_tex_src_offset, offset);
1287 tex_copy->src[tex_copy->num_srcs - 1] = src;
1288
1289 nir_def_init(&tex_copy->instr, &tex_copy->def,
1290 nir_tex_instr_dest_size(tex), 32);
1291
1292 nir_builder_instr_insert(b, &tex_copy->instr);
1293
1294 dest[i] = nir_get_scalar(&tex_copy->def, 3);
1295 if (tex->is_sparse) {
1296 nir_def *code = nir_channel(b, &tex_copy->def, 4);
1297 if (residency)
1298 residency = nir_sparse_residency_code_and(b, residency, code);
1299 else
1300 residency = code;
1301 }
1302 }
1303 dest[4] = nir_get_scalar(residency, 0);
1304
1305 nir_def *res = nir_vec_scalars(b, dest, tex->def.num_components);
1306 nir_def_rewrite_uses(&tex->def, res);
1307 nir_instr_remove(&tex->instr);
1308
1309 return true;
1310 }
1311
1312 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1313 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1314 {
1315 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1316 if (lod_idx < 0 ||
1317 (nir_src_is_const(tex->src[lod_idx].src) &&
1318 nir_src_as_int(tex->src[lod_idx].src) == 0))
1319 return false;
1320
1321 unsigned dest_size = nir_tex_instr_dest_size(tex);
1322
1323 b->cursor = nir_before_instr(&tex->instr);
1324 nir_def *lod = tex->src[lod_idx].src.ssa;
1325
1326 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1327 nir_src_rewrite(&tex->src[lod_idx].src, nir_imm_int(b, 0));
1328
1329 /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1330 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1331 * which should return 0, not 1.
1332 */
1333 b->cursor = nir_after_instr(&tex->instr);
1334 nir_def *minified = nir_imin(b, &tex->def,
1335 nir_imax(b, nir_ushr(b, &tex->def, lod),
1336 nir_imm_int(b, 1)));
1337
1338 /* Make sure the component encoding the array size (if any) is not
1339 * minified.
1340 */
1341 if (tex->is_array) {
1342 nir_def *comp[3];
1343
1344 assert(dest_size <= ARRAY_SIZE(comp));
1345 for (unsigned i = 0; i < dest_size - 1; i++)
1346 comp[i] = nir_channel(b, minified, i);
1347
1348 comp[dest_size - 1] = nir_channel(b, &tex->def, dest_size - 1);
1349 minified = nir_vec(b, comp, dest_size);
1350 }
1351
1352 nir_def_rewrite_uses_after(&tex->def, minified,
1353 minified->parent_instr);
1354 return true;
1355 }
1356
1357 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1358 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1359 {
1360 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1361 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1362
1363 b->cursor = nir_after_instr(&tex->instr);
1364
1365 assert(tex->def.num_components == 3);
1366 nir_def *size = &tex->def;
1367 size = nir_vec3(b, nir_channel(b, size, 1),
1368 nir_channel(b, size, 1),
1369 nir_idiv(b, nir_channel(b, size, 2),
1370 nir_imm_int(b, 6)));
1371
1372 nir_def_rewrite_uses_after(&tex->def, size, size->parent_instr);
1373 }
1374
1375 /* Adjust the sample index according to AMD FMASK (fragment mask).
1376 *
1377 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1378 * which is the identity mapping. Each nibble says which physical sample
1379 * should be fetched to get that sample.
1380 *
1381 * For example, 0x11111100 means there are only 2 samples stored and
1382 * the second sample covers 3/4 of the pixel. When reading samples 0
1383 * and 1, return physical sample 0 (determined by the first two 0s
1384 * in FMASK), otherwise return physical sample 1.
1385 *
1386 * The sample index should be adjusted as follows:
1387 * sample_index = ubfe(fmask, sample_index * 4, 3);
1388 *
1389 * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
1390 * means the physical sample index is unknown. We can map 8 to any valid
1391 * sample index, and extracting only 3 bits will map it to 0, which works
1392 * with all MSAA modes.
1393 */
1394 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1395 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1396 {
1397 lower_offset(b, tex);
1398
1399 b->cursor = nir_before_instr(&tex->instr);
1400
1401 /* Create FMASK fetch. */
1402 assert(tex->texture_index == 0);
1403 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1404 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1405 fmask_fetch->coord_components = tex->coord_components;
1406 fmask_fetch->sampler_dim = tex->sampler_dim;
1407 fmask_fetch->is_array = tex->is_array;
1408 fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1409 fmask_fetch->dest_type = nir_type_uint32;
1410 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1411
1412 fmask_fetch->num_srcs = 0;
1413 for (unsigned i = 0; i < tex->num_srcs; i++) {
1414 if (tex->src[i].src_type == nir_tex_src_ms_index)
1415 continue;
1416 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1417 src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1418 src->src_type = tex->src[i].src_type;
1419 }
1420
1421 nir_builder_instr_insert(b, &fmask_fetch->instr);
1422
1423 /* Obtain new sample index. */
1424 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1425 assert(ms_index >= 0);
1426 nir_src sample = tex->src[ms_index].src;
1427 nir_def *new_sample = nir_ubfe(b, &fmask_fetch->def,
1428 nir_ishl_imm(b, sample.ssa, 2), nir_imm_int(b, 3));
1429
1430 /* Update instruction. */
1431 tex->op = nir_texop_fragment_fetch_amd;
1432 nir_src_rewrite(&tex->src[ms_index].src, new_sample);
1433 }
1434
1435 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1436 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1437 {
1438 b->cursor = nir_after_instr(&tex->instr);
1439
1440 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1441 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1442 fmask_fetch->dest_type = nir_type_uint32;
1443 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1444 nir_builder_instr_insert(b, &fmask_fetch->instr);
1445
1446 nir_def_rewrite_uses(&tex->def, nir_ieq_imm(b, &fmask_fetch->def, 0));
1447 nir_instr_remove_v(&tex->instr);
1448 }
1449
1450 static void
nir_lower_lod_zero_width(nir_builder * b,nir_tex_instr * tex)1451 nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex)
1452 {
1453 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1454 assert(coord_index >= 0);
1455
1456 b->cursor = nir_after_instr(&tex->instr);
1457
1458 nir_def *is_zero = nir_imm_true(b);
1459 for (unsigned i = 0; i < tex->coord_components; i++) {
1460 nir_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i);
1461
1462 /* Compute the sum of the absolute values of derivatives. */
1463 nir_def *dfdx = nir_fddx(b, coord);
1464 nir_def *dfdy = nir_fddy(b, coord);
1465 nir_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy));
1466
1467 /* Check if the sum is 0. */
1468 is_zero = nir_iand(b, is_zero, nir_feq_imm(b, fwidth, 0.0));
1469 }
1470
1471 /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */
1472 nir_def *adjusted_lod =
1473 nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX),
1474 nir_channel(b, &tex->def, 1));
1475
1476 nir_def *def =
1477 nir_vec2(b, nir_channel(b, &tex->def, 0), adjusted_lod);
1478
1479 nir_def_rewrite_uses_after(&tex->def, def, def->parent_instr);
1480 }
1481
1482 static bool
lower_index_to_offset(nir_builder * b,nir_tex_instr * tex)1483 lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
1484 {
1485 bool progress = false;
1486 b->cursor = nir_before_instr(&tex->instr);
1487
1488 for (unsigned i = 0; i < tex->num_srcs; i++) {
1489 unsigned *index;
1490 switch (tex->src[i].src_type) {
1491 case nir_tex_src_texture_offset:
1492 index = &tex->texture_index;
1493 break;
1494 case nir_tex_src_sampler_offset:
1495 index = &tex->sampler_index;
1496 break;
1497 default:
1498 continue;
1499 }
1500
1501 /* If there's no base index, there's nothing to lower */
1502 if ((*index) == 0)
1503 continue;
1504
1505 nir_def *sum = nir_iadd_imm(b, tex->src[i].src.ssa, *index);
1506 nir_src_rewrite(&tex->src[i].src, sum);
1507 *index = 0;
1508 progress = true;
1509 }
1510
1511 return progress;
1512 }
1513
1514 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1515 nir_lower_tex_block(nir_block *block, nir_builder *b,
1516 const nir_lower_tex_options *options,
1517 const struct nir_shader_compiler_options *compiler_options)
1518 {
1519 bool progress = false;
1520
1521 nir_foreach_instr_safe(instr, block) {
1522 if (instr->type != nir_instr_type_tex)
1523 continue;
1524
1525 nir_tex_instr *tex = nir_instr_as_tex(instr);
1526 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1527
1528 /* mask of src coords to saturate (clamp): */
1529 unsigned sat_mask = 0;
1530 /* ignore saturate for txf ops: these don't use samplers and can't GL_CLAMP */
1531 if (nir_tex_instr_need_sampler(tex)) {
1532 if ((1 << tex->sampler_index) & options->saturate_r)
1533 sat_mask |= (1 << 2); /* .z */
1534 if ((1 << tex->sampler_index) & options->saturate_t)
1535 sat_mask |= (1 << 1); /* .y */
1536 if ((1 << tex->sampler_index) & options->saturate_s)
1537 sat_mask |= (1 << 0); /* .x */
1538 }
1539
1540 if (options->lower_index_to_offset)
1541 progress |= lower_index_to_offset(b, tex);
1542
1543 /* If we are clamping any coords, we must lower projector first
1544 * as clamping happens *after* projection:
1545 */
1546 if (lower_txp || sat_mask ||
1547 (options->lower_txp_array && tex->is_array)) {
1548 progress |= project_src(b, tex);
1549 }
1550
1551 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1552 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1553 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1554 options->lower_rect_offset) ||
1555 (options->lower_offset_filter &&
1556 options->lower_offset_filter(instr, options->callback_data))) {
1557 progress = lower_offset(b, tex) || progress;
1558 }
1559
1560 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1561 tex->op != nir_texop_txf) {
1562 if (nir_tex_instr_is_query(tex))
1563 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1564 else if (compiler_options->has_texture_scaling)
1565 lower_rect_tex_scale(b, tex);
1566 else
1567 lower_rect(b, tex);
1568
1569 progress = true;
1570 }
1571
1572 if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D &&
1573 (options->lower_1d || (tex->is_shadow && options->lower_1d_shadow))) {
1574 lower_1d(b, tex);
1575 progress = true;
1576 }
1577
1578 unsigned texture_index = tex->texture_index;
1579 uint32_t texture_mask = 1u << texture_index;
1580 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1581 if (tex_index >= 0) {
1582 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1583 nir_variable *var = nir_deref_instr_get_variable(deref);
1584 texture_index = var ? var->data.binding : 0;
1585 texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u;
1586 }
1587
1588 if (texture_mask & options->lower_y_uv_external) {
1589 lower_y_uv_external(b, tex, options, texture_index);
1590 progress = true;
1591 }
1592
1593 if (texture_mask & options->lower_y_vu_external) {
1594 lower_y_vu_external(b, tex, options, texture_index);
1595 progress = true;
1596 }
1597
1598 if (texture_mask & options->lower_y_u_v_external) {
1599 lower_y_u_v_external(b, tex, options, texture_index);
1600 progress = true;
1601 }
1602
1603 if (texture_mask & options->lower_yx_xuxv_external) {
1604 lower_yx_xuxv_external(b, tex, options, texture_index);
1605 progress = true;
1606 }
1607
1608 if (texture_mask & options->lower_yx_xvxu_external) {
1609 lower_yx_xvxu_external(b, tex, options, texture_index);
1610 progress = true;
1611 }
1612
1613 if (texture_mask & options->lower_xy_uxvx_external) {
1614 lower_xy_uxvx_external(b, tex, options, texture_index);
1615 progress = true;
1616 }
1617
1618 if (texture_mask & options->lower_xy_vxux_external) {
1619 lower_xy_vxux_external(b, tex, options, texture_index);
1620 progress = true;
1621 }
1622
1623 if (texture_mask & options->lower_ayuv_external) {
1624 lower_ayuv_external(b, tex, options, texture_index);
1625 progress = true;
1626 }
1627
1628 if (texture_mask & options->lower_xyuv_external) {
1629 lower_xyuv_external(b, tex, options, texture_index);
1630 progress = true;
1631 }
1632
1633 if (texture_mask & options->lower_yuv_external) {
1634 lower_yuv_external(b, tex, options, texture_index);
1635 progress = true;
1636 }
1637
1638 if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1639 lower_yu_yv_external(b, tex, options, texture_index);
1640 progress = true;
1641 }
1642
1643 if ((1 << tex->texture_index) & options->lower_yv_yu_external) {
1644 lower_yv_yu_external(b, tex, options, texture_index);
1645 progress = true;
1646 }
1647
1648 if ((1 << tex->texture_index) & options->lower_y41x_external) {
1649 lower_y41x_external(b, tex, options, texture_index);
1650 progress = true;
1651 }
1652
1653 if (sat_mask) {
1654 tex = saturate_src(b, tex, sat_mask);
1655 progress = true;
1656 }
1657
1658 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1659 swizzle_tg4_broadcom(b, tex);
1660 progress = true;
1661 }
1662
1663 if ((texture_mask & options->swizzle_result) &&
1664 !nir_tex_instr_is_query(tex) &&
1665 !(tex->is_shadow && tex->is_new_style_shadow)) {
1666 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1667 progress = true;
1668 }
1669
1670 /* should be after swizzle so we know which channels are rgb: */
1671 if ((texture_mask & options->lower_srgb) &&
1672 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1673 linearize_srgb_result(b, tex);
1674 progress = true;
1675 }
1676
1677 const bool has_min_lod =
1678 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1679 const bool has_offset =
1680 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1681
1682 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1683 options->lower_txb_shadow_clamp) {
1684 lower_implicit_lod(b, tex);
1685 progress = true;
1686 }
1687
1688 if (options->lower_tex_packing_cb &&
1689 tex->op != nir_texop_txs &&
1690 tex->op != nir_texop_query_levels &&
1691 tex->op != nir_texop_texture_samples) {
1692 progress |= lower_tex_packing(b, tex, options);
1693 }
1694
1695 if (tex->op == nir_texop_txd &&
1696 (options->lower_txd ||
1697 (options->lower_txd_clamp && has_min_lod) ||
1698 (options->lower_txd_shadow && tex->is_shadow) ||
1699 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1700 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1701 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1702 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1703 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1704 has_min_lod && !sampler_index_lt(tex, 16)) ||
1705 (options->lower_txd_cube_map &&
1706 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1707 (options->lower_txd_3d &&
1708 tex->sampler_dim == GLSL_SAMPLER_DIM_3D) ||
1709 (options->lower_txd_array && tex->is_array))) {
1710 lower_gradient(b, tex);
1711 progress = true;
1712 continue;
1713 }
1714
1715 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1716 * three opcodes provides one. Provide a default LOD of 0.
1717 */
1718 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1719 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1720 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1721 b->cursor = nir_before_instr(&tex->instr);
1722 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_imm_int(b, 0));
1723 progress = true;
1724 continue;
1725 }
1726
1727 /* Only fragment and compute (in some cases) support implicit
1728 * derivatives. Lower those opcodes which use implicit derivatives to
1729 * use an explicit LOD of 0.
1730 * But don't touch RECT samplers because they don't have mips.
1731 */
1732 if (options->lower_invalid_implicit_lod &&
1733 nir_tex_instr_has_implicit_derivative(tex) &&
1734 tex->sampler_dim != GLSL_SAMPLER_DIM_RECT &&
1735 !nir_shader_supports_implicit_lod(b->shader)) {
1736 lower_zero_lod(b, tex);
1737 progress = true;
1738 }
1739
1740 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1741 progress |= nir_lower_txs_lod(b, tex);
1742 continue;
1743 }
1744
1745 if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1746 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1747 nir_lower_txs_cube_array(b, tex);
1748 progress = true;
1749 continue;
1750 }
1751
1752 /* has to happen after all the other lowerings as the original tg4 gets
1753 * replaced by 4 tg4 instructions.
1754 */
1755 if (tex->op == nir_texop_tg4 &&
1756 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1757 options->lower_tg4_offsets) {
1758 progress |= lower_tg4_offsets(b, tex);
1759 continue;
1760 }
1761
1762 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1763 nir_lower_ms_txf_to_fragment_fetch(b, tex);
1764 progress = true;
1765 continue;
1766 }
1767
1768 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1769 nir_lower_samples_identical_to_fragment_fetch(b, tex);
1770 progress = true;
1771 continue;
1772 }
1773
1774 if (options->lower_lod_zero_width && tex->op == nir_texop_lod) {
1775 nir_lower_lod_zero_width(b, tex);
1776 progress = true;
1777 continue;
1778 }
1779 }
1780
1781 return progress;
1782 }
1783
1784 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1785 nir_lower_tex_impl(nir_function_impl *impl,
1786 const nir_lower_tex_options *options,
1787 const struct nir_shader_compiler_options *compiler_options)
1788 {
1789 bool progress = false;
1790 nir_builder builder = nir_builder_create(impl);
1791
1792 nir_foreach_block(block, impl) {
1793 progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1794 }
1795
1796 nir_metadata_preserve(impl, nir_metadata_block_index |
1797 nir_metadata_dominance);
1798 return progress;
1799 }
1800
1801 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1802 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1803 {
1804 bool progress = false;
1805
1806 /* lower_tg4_offsets injects new tg4 instructions that won't be lowered
1807 * if lower_tg4_broadcom_swizzle is also requested so when both are set
1808 * we want to run lower_tg4_offsets in a separate pass first.
1809 */
1810 if (options->lower_tg4_offsets && options->lower_tg4_broadcom_swizzle) {
1811 nir_lower_tex_options _options = {
1812 .lower_tg4_offsets = true,
1813 };
1814 progress = nir_lower_tex(shader, &_options);
1815 }
1816
1817 nir_foreach_function_impl(impl, shader) {
1818 progress |= nir_lower_tex_impl(impl, options, shader->options);
1819 }
1820
1821 return progress;
1822 }
1823