• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Valve Corporation
3  * Copyright © 2015 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /*
26  * This lowering pass supports (as configured via nir_lower_tex_options)
27  * various texture related conversions:
28  *   + texture projector lowering: converts the coordinate division for
29  *     texture projection to be done in ALU instructions instead of
30  *     asking the texture operation to do so.
31  *   + lowering RECT: converts the un-normalized RECT texture coordinates
32  *     to normalized coordinates with txs plus ALU instructions
33  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
34  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
35  *     Note that this automatically triggers texture projector lowering if
36  *     needed, since clamping must happen after projector lowering.
37  *   + YUV-to-RGB conversion: to allow sampling YUV values as RGB values
38  *     according to a specific YUV color space and range.
39  */
40 
41 #include "nir.h"
42 #include "nir_builder.h"
43 #include "nir_builtin_builder.h"
44 #include "nir_format_convert.h"
45 
46 typedef struct nir_const_value_3_4 {
47    nir_const_value v[3][4];
48 } nir_const_value_3_4;
49 
50 static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { {
51    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
52    { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
53    { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
54 } };
55 static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { {
56    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
57    { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } },
58    { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } },
59 } };
60 static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { {
61    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
62    { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
63    { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } },
64 } };
65 static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { {
66    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
67    { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } },
68    { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } },
69 } };
70 static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { {
71    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
72    { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
73    { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } },
74 } };
75 static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { {
76    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
77    { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } },
78    { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } },
79 } };
80 
81 static const float bt601_limited_range_csc_offsets[3] = {
82    -0.874202218f, 0.531667823f, -1.085630789f
83 };
84 static const float bt601_full_range_csc_offsets[3] = {
85    -0.701000000f, 0.529136286f, -0.886000000f
86 };
87 static const float bt709_limited_range_csc_offsets[3] = {
88    -0.972945075f, 0.301482665f, -1.133402218f
89 };
90 static const float bt709_full_range_csc_offsets[3] = {
91    -0.787400000f, 0.327724273f, -0.927800000f
92 };
93 static const float bt2020_limited_range_csc_offsets[3] = {
94    -0.915745075f, 0.347480639f, -1.148145075f
95 };
96 static const float bt2020_full_range_csc_offsets[3] = {
97    -0.737350000f, 0.367972500f, -0.940700000f
98 };
99 
100 static bool
project_src(nir_builder * b,nir_tex_instr * tex)101 project_src(nir_builder *b, nir_tex_instr *tex)
102 {
103    nir_def *proj = nir_steal_tex_src(tex, nir_tex_src_projector);
104    if (!proj)
105       return false;
106 
107    b->cursor = nir_before_instr(&tex->instr);
108    nir_def *inv_proj = nir_frcp(b, proj);
109 
110    /* Walk through the sources projecting the arguments. */
111    for (unsigned i = 0; i < tex->num_srcs; i++) {
112       switch (tex->src[i].src_type) {
113       case nir_tex_src_coord:
114       case nir_tex_src_comparator:
115          break;
116       default:
117          continue;
118       }
119       nir_def *unprojected =
120          tex->src[i].src.ssa;
121       nir_def *projected = nir_fmul(b, unprojected, inv_proj);
122 
123       /* Array indices don't get projected, so make an new vector with the
124        * coordinate's array index untouched.
125        */
126       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
127          switch (tex->coord_components) {
128          case 4:
129             projected = nir_vec4(b,
130                                  nir_channel(b, projected, 0),
131                                  nir_channel(b, projected, 1),
132                                  nir_channel(b, projected, 2),
133                                  nir_channel(b, unprojected, 3));
134             break;
135          case 3:
136             projected = nir_vec3(b,
137                                  nir_channel(b, projected, 0),
138                                  nir_channel(b, projected, 1),
139                                  nir_channel(b, unprojected, 2));
140             break;
141          case 2:
142             projected = nir_vec2(b,
143                                  nir_channel(b, projected, 0),
144                                  nir_channel(b, unprojected, 1));
145             break;
146          default:
147             unreachable("bad texture coord count for array");
148             break;
149          }
150       }
151 
152       nir_src_rewrite(&tex->src[i].src, projected);
153    }
154 
155    return true;
156 }
157 
158 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)159 lower_offset(nir_builder *b, nir_tex_instr *tex)
160 {
161    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
162    if (!offset)
163       return false;
164 
165    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
166    assert(coord_index >= 0);
167 
168    nir_def *coord = tex->src[coord_index].src.ssa;
169 
170    b->cursor = nir_before_instr(&tex->instr);
171 
172    nir_def *offset_coord;
173    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
174       if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
175          offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
176       } else {
177          nir_def *scale = NULL;
178 
179          if (b->shader->options->has_texture_scaling) {
180             nir_def *idx = nir_imm_int(b, tex->texture_index);
181             scale = nir_load_texture_scale(b, 32, idx);
182          } else {
183             nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
184             scale = nir_frcp(b, txs);
185          }
186 
187          offset_coord = nir_fadd(b, coord,
188                                  nir_fmul(b,
189                                           nir_i2f32(b, offset),
190                                           scale));
191       }
192    } else {
193       offset_coord = nir_iadd(b, coord, offset);
194    }
195 
196    if (tex->is_array) {
197       /* The offset is not applied to the array index */
198       if (tex->coord_components == 2) {
199          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
200                                  nir_channel(b, coord, 1));
201       } else if (tex->coord_components == 3) {
202          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
203                                  nir_channel(b, offset_coord, 1),
204                                  nir_channel(b, coord, 2));
205       } else {
206          unreachable("Invalid number of components");
207       }
208    }
209 
210    nir_src_rewrite(&tex->src[coord_index].src, offset_coord);
211 
212    return true;
213 }
214 
215 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)216 lower_rect(nir_builder *b, nir_tex_instr *tex)
217 {
218    /* Set the sampler_dim to 2D here so that get_texture_size picks up the
219     * right dimensionality.
220     */
221    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
222 
223    nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
224    nir_def *scale = nir_frcp(b, txs);
225    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
226 
227    if (coord_index != -1) {
228       nir_def *coords =
229          tex->src[coord_index].src.ssa;
230       nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
231    }
232 }
233 
234 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)235 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
236 {
237    b->cursor = nir_before_instr(&tex->instr);
238 
239    nir_def *idx = nir_imm_int(b, tex->texture_index);
240    nir_def *scale = nir_load_texture_scale(b, 32, idx);
241    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
242 
243    if (coord_index != -1) {
244       nir_def *coords =
245          tex->src[coord_index].src.ssa;
246       nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
247    }
248 }
249 
250 static void
lower_1d(nir_builder * b,nir_tex_instr * tex)251 lower_1d(nir_builder *b, nir_tex_instr *tex)
252 {
253    b->cursor = nir_before_instr(&tex->instr);
254 
255    nir_def *coords = nir_steal_tex_src(tex, nir_tex_src_coord);
256    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
257    nir_def *ddx = nir_steal_tex_src(tex, nir_tex_src_ddx);
258    nir_def *ddy = nir_steal_tex_src(tex, nir_tex_src_ddy);
259 
260    /* Add in 2D sources to become a 2D operation */
261    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
262 
263    if (coords) {
264       /* We want to fetch texel 0 along the Y-axis. To do so, we sample at 0.5
265        * to get texel 0 with correct handling of wrap modes.
266        */
267       nir_def *y = nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5,
268                                     coords->bit_size);
269 
270       tex->coord_components++;
271 
272       if (tex->is_array && tex->op != nir_texop_lod) {
273          assert(tex->coord_components == 3);
274 
275          nir_def *x = nir_channel(b, coords, 0);
276          nir_def *idx = nir_channel(b, coords, 1);
277          coords = nir_vec3(b, x, y, idx);
278       } else {
279          assert(tex->coord_components == 2);
280          coords = nir_vec2(b, coords, y);
281       }
282 
283       nir_tex_instr_add_src(tex, nir_tex_src_coord, coords);
284    }
285 
286    if (offset) {
287       nir_tex_instr_add_src(tex, nir_tex_src_offset,
288                             nir_pad_vector_imm_int(b, offset, 0, 2));
289    }
290 
291    if (ddx || ddy) {
292       nir_tex_instr_add_src(tex, nir_tex_src_ddx,
293                             nir_pad_vector_imm_int(b, ddx, 0, 2));
294 
295       nir_tex_instr_add_src(tex, nir_tex_src_ddy,
296                             nir_pad_vector_imm_int(b, ddy, 0, 2));
297    }
298 
299    /* Handle destination component mismatch for txs. */
300    if (tex->op == nir_texop_txs) {
301       b->cursor = nir_after_instr(&tex->instr);
302 
303       nir_def *dst;
304       if (tex->is_array) {
305          assert(tex->def.num_components == 2);
306          tex->def.num_components = 3;
307 
308          /* For array, we take .xz to skip the newly added height */
309          dst = nir_channels(b, &tex->def, (1 << 0) | (1 << 2));
310       } else {
311          assert(tex->def.num_components == 1);
312          tex->def.num_components = 2;
313 
314          dst = nir_channel(b, &tex->def, 0);
315       }
316 
317       nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
318    }
319 }
320 
321 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_def * lod)322 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_def *lod)
323 {
324    assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
325    assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
326    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
327    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
328 
329    /* If we have a bias, add it in */
330    nir_def *bias = nir_steal_tex_src(tex, nir_tex_src_bias);
331    if (bias)
332       lod = nir_fadd(b, lod, bias);
333 
334    /* If we have a minimum LOD, clamp LOD accordingly */
335    nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
336    if (min_lod)
337       lod = nir_fmax(b, lod, min_lod);
338 
339    nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
340    tex->op = nir_texop_txl;
341 }
342 
343 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)344 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
345 {
346    b->cursor = nir_before_instr(&tex->instr);
347    lower_lod(b, tex, nir_get_texture_lod(b, tex));
348 }
349 
350 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)351 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
352 {
353    b->cursor = nir_before_instr(&tex->instr);
354 
355    if (tex->op == nir_texop_lod) {
356       nir_def_rewrite_uses(&tex->def, nir_imm_int(b, 0));
357       nir_instr_remove(&tex->instr);
358       return;
359    }
360 
361    lower_lod(b, tex, nir_imm_int(b, 0));
362 }
363 
364 static nir_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)365 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
366              const nir_lower_tex_options *options)
367 {
368    assert(nir_tex_instr_dest_size(tex) == 4);
369    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
370    assert(tex->op == nir_texop_tex);
371    assert(tex->coord_components == 2);
372 
373    nir_tex_instr *plane_tex =
374       nir_tex_instr_create(b->shader, tex->num_srcs + 1);
375    for (unsigned i = 0; i < tex->num_srcs; i++) {
376       plane_tex->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
377       plane_tex->src[i].src_type = tex->src[i].src_type;
378    }
379    plane_tex->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_plane,
380                                                        nir_imm_int(b, plane));
381    plane_tex->op = nir_texop_tex;
382    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
383    plane_tex->dest_type = nir_type_float | tex->def.bit_size;
384    plane_tex->coord_components = 2;
385 
386    plane_tex->texture_index = tex->texture_index;
387    plane_tex->sampler_index = tex->sampler_index;
388 
389    nir_def_init(&plane_tex->instr, &plane_tex->def, 4,
390                 tex->def.bit_size);
391 
392    nir_builder_instr_insert(b, &plane_tex->instr);
393 
394    /* If scaling_factor is set, return a scaled value. */
395    if (options->scale_factors[tex->texture_index])
396       return nir_fmul_imm(b, &plane_tex->def,
397                           options->scale_factors[tex->texture_index]);
398 
399    return &plane_tex->def;
400 }
401 
402 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_def * y,nir_def * u,nir_def * v,nir_def * a,const nir_lower_tex_options * options,unsigned texture_index)403 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
404                    nir_def *y, nir_def *u, nir_def *v,
405                    nir_def *a,
406                    const nir_lower_tex_options *options,
407                    unsigned texture_index)
408 {
409 
410    const float *offset_vals;
411    const nir_const_value_3_4 *m;
412    assert((options->bt709_external & options->bt2020_external) == 0);
413    if (options->yuv_full_range_external & (1u << texture_index)) {
414       if (options->bt709_external & (1u << texture_index)) {
415          m = &bt709_full_range_csc_coeffs;
416          offset_vals = bt709_full_range_csc_offsets;
417       } else if (options->bt2020_external & (1u << texture_index)) {
418          m = &bt2020_full_range_csc_coeffs;
419          offset_vals = bt2020_full_range_csc_offsets;
420       } else {
421          m = &bt601_full_range_csc_coeffs;
422          offset_vals = bt601_full_range_csc_offsets;
423       }
424    } else {
425       if (options->bt709_external & (1u << texture_index)) {
426          m = &bt709_limited_range_csc_coeffs;
427          offset_vals = bt709_limited_range_csc_offsets;
428       } else if (options->bt2020_external & (1u << texture_index)) {
429          m = &bt2020_limited_range_csc_coeffs;
430          offset_vals = bt2020_limited_range_csc_offsets;
431       } else {
432          m = &bt601_limited_range_csc_coeffs;
433          offset_vals = bt601_limited_range_csc_offsets;
434       }
435    }
436 
437    unsigned bit_size = tex->def.bit_size;
438 
439    nir_def *offset =
440       nir_vec4(b,
441                nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
442                nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
443                nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
444                a);
445 
446    offset = nir_f2fN(b, offset, bit_size);
447 
448    nir_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
449    nir_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
450    nir_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
451 
452    nir_def *result =
453       nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
454 
455    nir_def_rewrite_uses(&tex->def, result);
456 }
457 
458 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)459 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
460                     const nir_lower_tex_options *options,
461                     unsigned texture_index)
462 {
463    b->cursor = nir_after_instr(&tex->instr);
464 
465    nir_def *y = sample_plane(b, tex, 0, options);
466    nir_def *uv = sample_plane(b, tex, 1, options);
467 
468    convert_yuv_to_rgb(b, tex,
469                       nir_channel(b, y, 0),
470                       nir_channel(b, uv, 0),
471                       nir_channel(b, uv, 1),
472                       nir_imm_float(b, 1.0f),
473                       options,
474                       texture_index);
475 }
476 
477 static void
lower_y_vu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)478 lower_y_vu_external(nir_builder *b, nir_tex_instr *tex,
479                     const nir_lower_tex_options *options,
480                     unsigned texture_index)
481 {
482    b->cursor = nir_after_instr(&tex->instr);
483 
484    nir_def *y = sample_plane(b, tex, 0, options);
485    nir_def *vu = sample_plane(b, tex, 1, options);
486 
487    convert_yuv_to_rgb(b, tex,
488                       nir_channel(b, y, 0),
489                       nir_channel(b, vu, 1),
490                       nir_channel(b, vu, 0),
491                       nir_imm_float(b, 1.0f),
492                       options,
493                       texture_index);
494 }
495 
496 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)497 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
498                      const nir_lower_tex_options *options,
499                      unsigned texture_index)
500 {
501    b->cursor = nir_after_instr(&tex->instr);
502 
503    nir_def *y = sample_plane(b, tex, 0, options);
504    nir_def *u = sample_plane(b, tex, 1, options);
505    nir_def *v = sample_plane(b, tex, 2, options);
506 
507    convert_yuv_to_rgb(b, tex,
508                       nir_channel(b, y, 0),
509                       nir_channel(b, u, 0),
510                       nir_channel(b, v, 0),
511                       nir_imm_float(b, 1.0f),
512                       options,
513                       texture_index);
514 }
515 
516 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)517 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
518                        const nir_lower_tex_options *options,
519                        unsigned texture_index)
520 {
521    b->cursor = nir_after_instr(&tex->instr);
522 
523    nir_def *y = sample_plane(b, tex, 0, options);
524    nir_def *xuxv = sample_plane(b, tex, 1, options);
525 
526    convert_yuv_to_rgb(b, tex,
527                       nir_channel(b, y, 0),
528                       nir_channel(b, xuxv, 1),
529                       nir_channel(b, xuxv, 3),
530                       nir_imm_float(b, 1.0f),
531                       options,
532                       texture_index);
533 }
534 
535 static void
lower_yx_xvxu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)536 lower_yx_xvxu_external(nir_builder *b, nir_tex_instr *tex,
537                        const nir_lower_tex_options *options,
538                        unsigned texture_index)
539 {
540    b->cursor = nir_after_instr(&tex->instr);
541 
542    nir_def *y = sample_plane(b, tex, 0, options);
543    nir_def *xvxu = sample_plane(b, tex, 1, options);
544 
545    convert_yuv_to_rgb(b, tex,
546                       nir_channel(b, y, 0),
547                       nir_channel(b, xvxu, 3),
548                       nir_channel(b, xvxu, 1),
549                       nir_imm_float(b, 1.0f),
550                       options,
551                       texture_index);
552 }
553 
554 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)555 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
556                        const nir_lower_tex_options *options,
557                        unsigned texture_index)
558 {
559    b->cursor = nir_after_instr(&tex->instr);
560 
561    nir_def *y = sample_plane(b, tex, 0, options);
562    nir_def *uxvx = sample_plane(b, tex, 1, options);
563 
564    convert_yuv_to_rgb(b, tex,
565                       nir_channel(b, y, 1),
566                       nir_channel(b, uxvx, 0),
567                       nir_channel(b, uxvx, 2),
568                       nir_imm_float(b, 1.0f),
569                       options,
570                       texture_index);
571 }
572 
573 static void
lower_xy_vxux_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)574 lower_xy_vxux_external(nir_builder *b, nir_tex_instr *tex,
575                        const nir_lower_tex_options *options,
576                        unsigned texture_index)
577 {
578    b->cursor = nir_after_instr(&tex->instr);
579 
580    nir_def *y = sample_plane(b, tex, 0, options);
581    nir_def *vxux = sample_plane(b, tex, 1, options);
582 
583    convert_yuv_to_rgb(b, tex,
584                       nir_channel(b, y, 1),
585                       nir_channel(b, vxux, 2),
586                       nir_channel(b, vxux, 0),
587                       nir_imm_float(b, 1.0f),
588                       options,
589                       texture_index);
590 }
591 
592 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)593 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
594                     const nir_lower_tex_options *options,
595                     unsigned texture_index)
596 {
597    b->cursor = nir_after_instr(&tex->instr);
598 
599    nir_def *ayuv = sample_plane(b, tex, 0, options);
600 
601    convert_yuv_to_rgb(b, tex,
602                       nir_channel(b, ayuv, 2),
603                       nir_channel(b, ayuv, 1),
604                       nir_channel(b, ayuv, 0),
605                       nir_channel(b, ayuv, 3),
606                       options,
607                       texture_index);
608 }
609 
610 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)611 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
612                     const nir_lower_tex_options *options,
613                     unsigned texture_index)
614 {
615    b->cursor = nir_after_instr(&tex->instr);
616 
617    nir_def *y41x = sample_plane(b, tex, 0, options);
618 
619    convert_yuv_to_rgb(b, tex,
620                       nir_channel(b, y41x, 1),
621                       nir_channel(b, y41x, 0),
622                       nir_channel(b, y41x, 2),
623                       nir_channel(b, y41x, 3),
624                       options,
625                       texture_index);
626 }
627 
628 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)629 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
630                     const nir_lower_tex_options *options,
631                     unsigned texture_index)
632 {
633    b->cursor = nir_after_instr(&tex->instr);
634 
635    nir_def *xyuv = sample_plane(b, tex, 0, options);
636 
637    convert_yuv_to_rgb(b, tex,
638                       nir_channel(b, xyuv, 2),
639                       nir_channel(b, xyuv, 1),
640                       nir_channel(b, xyuv, 0),
641                       nir_imm_float(b, 1.0f),
642                       options,
643                       texture_index);
644 }
645 
646 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)647 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
648                    const nir_lower_tex_options *options,
649                    unsigned texture_index)
650 {
651    b->cursor = nir_after_instr(&tex->instr);
652 
653    nir_def *yuv = sample_plane(b, tex, 0, options);
654 
655    convert_yuv_to_rgb(b, tex,
656                       nir_channel(b, yuv, 0),
657                       nir_channel(b, yuv, 1),
658                       nir_channel(b, yuv, 2),
659                       nir_imm_float(b, 1.0f),
660                       options,
661                       texture_index);
662 }
663 
664 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)665 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
666                      const nir_lower_tex_options *options,
667                      unsigned texture_index)
668 {
669    b->cursor = nir_after_instr(&tex->instr);
670 
671    nir_def *yuv = sample_plane(b, tex, 0, options);
672 
673    convert_yuv_to_rgb(b, tex,
674                       nir_channel(b, yuv, 1),
675                       nir_channel(b, yuv, 2),
676                       nir_channel(b, yuv, 0),
677                       nir_imm_float(b, 1.0f),
678                       options,
679                       texture_index);
680 }
681 
682 static void
lower_yv_yu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)683 lower_yv_yu_external(nir_builder *b, nir_tex_instr *tex,
684                      const nir_lower_tex_options *options,
685                      unsigned texture_index)
686 {
687    b->cursor = nir_after_instr(&tex->instr);
688 
689    nir_def *yuv = sample_plane(b, tex, 0, options);
690 
691    convert_yuv_to_rgb(b, tex,
692                       nir_channel(b, yuv, 2),
693                       nir_channel(b, yuv, 1),
694                       nir_channel(b, yuv, 0),
695                       nir_imm_float(b, 1.0f),
696                       options,
697                       texture_index);
698 }
699 
700 /*
701  * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
702  * computed from the gradients.
703  */
704 static void
replace_gradient_with_lod(nir_builder * b,nir_def * lod,nir_tex_instr * tex)705 replace_gradient_with_lod(nir_builder *b, nir_def *lod, nir_tex_instr *tex)
706 {
707    assert(tex->op == nir_texop_txd);
708 
709    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
710    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
711 
712    /* If we have a minimum LOD, clamp LOD accordingly */
713    nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
714    if (min_lod)
715       lod = nir_fmax(b, lod, min_lod);
716 
717    nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
718    tex->op = nir_texop_txl;
719 }
720 
721 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)722 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
723 {
724    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
725    assert(tex->op == nir_texop_txd);
726 
727    /* Use textureSize() to get the width and height of LOD 0 */
728    nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
729 
730    /* Cubemap texture lookups first generate a texture coordinate normalized
731     * to [-1, 1] on the appropiate face. The appropiate face is determined
732     * by which component has largest magnitude and its sign. The texture
733     * coordinate is the quotient of the remaining texture coordinates against
734     * that absolute value of the component of largest magnitude. This
735     * division requires that the computing of the derivative of the texel
736     * coordinate must use the quotient rule. The high level GLSL code is as
737     * follows:
738     *
739     * Step 1: selection
740     *
741     * vec3 abs_p, Q, dQdx, dQdy;
742     * abs_p = abs(ir->coordinate);
743     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
744     *    Q = ir->coordinate.yzx;
745     *    dQdx = ir->lod_info.grad.dPdx.yzx;
746     *    dQdy = ir->lod_info.grad.dPdy.yzx;
747     * }
748     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
749     *    Q = ir->coordinate.xzy;
750     *    dQdx = ir->lod_info.grad.dPdx.xzy;
751     *    dQdy = ir->lod_info.grad.dPdy.xzy;
752     * }
753     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
754     *    Q = ir->coordinate;
755     *    dQdx = ir->lod_info.grad.dPdx;
756     *    dQdy = ir->lod_info.grad.dPdy;
757     * }
758     *
759     * Step 2: use quotient rule to compute derivative. The normalized to
760     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
761     * only concerned with the magnitudes of the derivatives whose values are
762     * not affected by the sign. We drop the sign from the computation.
763     *
764     * vec2 dx, dy;
765     * float recip;
766     *
767     * recip = 1.0 / Q.z;
768     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
769     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
770     *
771     * Step 3: compute LOD. At this point we have the derivatives of the
772     * texture coordinates normalized to [-1,1]. We take the LOD to be
773     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
774     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
775     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
776     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
777     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
778     * where L is the dimension of the cubemap. The code is:
779     *
780     * float M, result;
781     * M = max(dot(dx, dx), dot(dy, dy));
782     * L = textureSize(sampler, 0).x;
783     * result = -1.0 + 0.5 * log2(L * L * M);
784     */
785 
786    /* coordinate */
787    nir_def *p =
788       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
789 
790    /* unmodified dPdx, dPdy values */
791    nir_def *dPdx =
792       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
793    nir_def *dPdy =
794       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
795 
796    nir_def *abs_p = nir_fabs(b, p);
797    nir_def *abs_p_x = nir_channel(b, abs_p, 0);
798    nir_def *abs_p_y = nir_channel(b, abs_p, 1);
799    nir_def *abs_p_z = nir_channel(b, abs_p, 2);
800 
801    /* 1. compute selector */
802    nir_def *Q, *dQdx, *dQdy;
803 
804    nir_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
805    nir_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
806 
807    unsigned yzx[3] = { 1, 2, 0 };
808    unsigned xzy[3] = { 0, 2, 1 };
809 
810    Q = nir_bcsel(b, cond_z,
811                  p,
812                  nir_bcsel(b, cond_y,
813                            nir_swizzle(b, p, xzy, 3),
814                            nir_swizzle(b, p, yzx, 3)));
815 
816    dQdx = nir_bcsel(b, cond_z,
817                     dPdx,
818                     nir_bcsel(b, cond_y,
819                               nir_swizzle(b, dPdx, xzy, 3),
820                               nir_swizzle(b, dPdx, yzx, 3)));
821 
822    dQdy = nir_bcsel(b, cond_z,
823                     dPdy,
824                     nir_bcsel(b, cond_y,
825                               nir_swizzle(b, dPdy, xzy, 3),
826                               nir_swizzle(b, dPdy, yzx, 3)));
827 
828    /* 2. quotient rule */
829 
830    /* tmp = Q.xy * recip;
831     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
832     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
833     */
834    nir_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
835 
836    nir_def *Q_xy = nir_trim_vector(b, Q, 2);
837    nir_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
838 
839    nir_def *dQdx_xy = nir_trim_vector(b, dQdx, 2);
840    nir_def *dQdx_z = nir_channel(b, dQdx, 2);
841    nir_def *dx =
842       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
843 
844    nir_def *dQdy_xy = nir_trim_vector(b, dQdy, 2);
845    nir_def *dQdy_z = nir_channel(b, dQdy, 2);
846    nir_def *dy =
847       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
848 
849    /* M = max(dot(dx, dx), dot(dy, dy)); */
850    nir_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
851 
852    /* size has textureSize() of LOD 0 */
853    nir_def *L = nir_channel(b, size, 0);
854 
855    /* lod = -1.0 + 0.5 * log2(L * L * M); */
856    nir_def *lod =
857       nir_fadd(b,
858                nir_imm_float(b, -1.0f),
859                nir_fmul(b,
860                         nir_imm_float(b, 0.5f),
861                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
862 
863    /* 3. Replace the gradient instruction with an equivalent lod instruction */
864    replace_gradient_with_lod(b, lod, tex);
865 }
866 
867 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)868 lower_gradient(nir_builder *b, nir_tex_instr *tex)
869 {
870    /* Cubes are more complicated and have their own function */
871    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
872       lower_gradient_cube_map(b, tex);
873       return;
874    }
875 
876    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
877    assert(tex->op == nir_texop_txd);
878 
879    /* Use textureSize() to get the width and height of LOD 0 */
880    unsigned component_mask;
881    switch (tex->sampler_dim) {
882    case GLSL_SAMPLER_DIM_3D:
883       component_mask = 7;
884       break;
885    case GLSL_SAMPLER_DIM_1D:
886       component_mask = 1;
887       break;
888    default:
889       component_mask = 3;
890       break;
891    }
892 
893    nir_def *size =
894       nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
895                    component_mask);
896 
897    /* Scale the gradients by width and height.  Effectively, the incoming
898     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
899     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
900     */
901    nir_def *ddx =
902       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
903    nir_def *ddy =
904       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
905 
906    nir_def *dPdx = nir_fmul(b, ddx, size);
907    nir_def *dPdy = nir_fmul(b, ddy, size);
908 
909    nir_def *rho;
910    if (dPdx->num_components == 1) {
911       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
912    } else {
913       rho = nir_fmax(b,
914                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
915                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
916    }
917 
918    /* lod = log2(rho).  We're ignoring GL state biases for now. */
919    nir_def *lod = nir_flog2(b, rho);
920 
921    /* Replace the gradient instruction with an equivalent lod instruction */
922    replace_gradient_with_lod(b, lod, tex);
923 }
924 
925 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
926 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)927 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
928 {
929    b->cursor = nir_after_instr(&tex->instr);
930    nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
931 
932    txd->op = nir_texop_txd;
933    txd->sampler_dim = tex->sampler_dim;
934    txd->dest_type = tex->dest_type;
935    txd->coord_components = tex->coord_components;
936    txd->texture_index = tex->texture_index;
937    txd->sampler_index = tex->sampler_index;
938    txd->is_array = tex->is_array;
939    txd->is_shadow = tex->is_shadow;
940    txd->is_new_style_shadow = tex->is_new_style_shadow;
941 
942    /* reuse existing srcs */
943    for (unsigned i = 0; i < tex->num_srcs; i++) {
944       txd->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
945       txd->src[i].src_type = tex->src[i].src_type;
946    }
947    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
948    assert(coord_idx >= 0);
949    nir_def *coord = tex->src[coord_idx].src.ssa;
950    /* don't take the derivative of the array index */
951    if (tex->is_array)
952       coord = nir_channels(b, coord, nir_component_mask(coord->num_components - 1));
953    nir_def *dfdx = nir_fddx(b, coord);
954    nir_def *dfdy = nir_fddy(b, coord);
955    txd->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_ddx, dfdx);
956    txd->src[tex->num_srcs + 1] = nir_tex_src_for_ssa(nir_tex_src_ddy, dfdy);
957 
958    nir_def_init(&txd->instr, &txd->def,
959                 tex->def.num_components,
960                 tex->def.bit_size);
961    nir_builder_instr_insert(b, &txd->instr);
962    nir_def_rewrite_uses(&tex->def, &txd->def);
963    nir_instr_remove(&tex->instr);
964    return txd;
965 }
966 
967 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
968 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)969 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
970 {
971    b->cursor = nir_after_instr(&tex->instr);
972    nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
973 
974    txl->op = nir_texop_txl;
975    txl->sampler_dim = tex->sampler_dim;
976    txl->dest_type = tex->dest_type;
977    txl->coord_components = tex->coord_components;
978    txl->texture_index = tex->texture_index;
979    txl->sampler_index = tex->sampler_index;
980    txl->is_array = tex->is_array;
981    txl->is_shadow = tex->is_shadow;
982    txl->is_new_style_shadow = tex->is_new_style_shadow;
983 
984    /* reuse all but bias src */
985    for (int i = 0; i < tex->num_srcs; i++) {
986       if (tex->src[i].src_type != nir_tex_src_bias) {
987          txl->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
988          txl->src[i].src_type = tex->src[i].src_type;
989       }
990    }
991    nir_def *lod = nir_get_texture_lod(b, tex);
992 
993    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
994    assert(bias_idx >= 0);
995    lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
996    txl->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_lod, lod);
997 
998    nir_def_init(&txl->instr, &txl->def,
999                 tex->def.num_components,
1000                 tex->def.bit_size);
1001    nir_builder_instr_insert(b, &txl->instr);
1002    nir_def_rewrite_uses(&tex->def, &txl->def);
1003    nir_instr_remove(&tex->instr);
1004    return txl;
1005 }
1006 
1007 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)1008 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
1009 {
1010    if (tex->op == nir_texop_tex)
1011       tex = lower_tex_to_txd(b, tex);
1012    else if (tex->op == nir_texop_txb)
1013       tex = lower_txb_to_txl(b, tex);
1014 
1015    b->cursor = nir_before_instr(&tex->instr);
1016    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1017 
1018    if (coord_index != -1) {
1019       nir_def *src =
1020          tex->src[coord_index].src.ssa;
1021 
1022       /* split src into components: */
1023       nir_def *comp[4];
1024 
1025       assume(tex->coord_components >= 1);
1026 
1027       for (unsigned j = 0; j < tex->coord_components; j++)
1028          comp[j] = nir_channel(b, src, j);
1029 
1030       /* clamp requested components, array index does not get clamped: */
1031       unsigned ncomp = tex->coord_components;
1032       if (tex->is_array)
1033          ncomp--;
1034 
1035       for (unsigned j = 0; j < ncomp; j++) {
1036          if ((1 << j) & sat_mask) {
1037             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1038                /* non-normalized texture coords, so clamp to texture
1039                 * size rather than [0.0, 1.0]
1040                 */
1041                nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
1042                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
1043                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
1044             } else {
1045                comp[j] = nir_fsat(b, comp[j]);
1046             }
1047          }
1048       }
1049 
1050       /* and move the result back into a single vecN: */
1051       src = nir_vec(b, comp, tex->coord_components);
1052 
1053       nir_src_rewrite(&tex->src[coord_index].src, src);
1054    }
1055    return tex;
1056 }
1057 
1058 static nir_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)1059 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
1060 {
1061    nir_const_value v[4];
1062 
1063    memset(&v, 0, sizeof(v));
1064 
1065    if (swizzle_val == 4) {
1066       v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
1067    } else {
1068       assert(swizzle_val == 5);
1069       if (type == nir_type_float32)
1070          v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
1071       else
1072          v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
1073    }
1074 
1075    return nir_build_imm(b, 4, 32, v);
1076 }
1077 
1078 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)1079 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
1080 {
1081    b->cursor = nir_after_instr(&tex->instr);
1082 
1083    assert(nir_tex_instr_dest_size(tex) == 4);
1084    unsigned swiz[4] = { 2, 3, 1, 0 };
1085    nir_def *swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1086 
1087    nir_def_rewrite_uses_after(&tex->def, swizzled,
1088                               swizzled->parent_instr);
1089 }
1090 
1091 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])1092 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
1093 {
1094    b->cursor = nir_after_instr(&tex->instr);
1095 
1096    nir_def *swizzled;
1097    if (tex->op == nir_texop_tg4) {
1098       if (swizzle[tex->component] < 4) {
1099          /* This one's easy */
1100          tex->component = swizzle[tex->component];
1101          return;
1102       } else {
1103          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
1104       }
1105    } else {
1106       assert(nir_tex_instr_dest_size(tex) == 4);
1107       if (swizzle[0] < 4 && swizzle[1] < 4 &&
1108           swizzle[2] < 4 && swizzle[3] < 4) {
1109          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
1110          /* We have no 0s or 1s, just emit a swizzling MOV */
1111          swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1112       } else {
1113          nir_scalar srcs[4];
1114          for (unsigned i = 0; i < 4; i++) {
1115             if (swizzle[i] < 4) {
1116                srcs[i] = nir_get_scalar(&tex->def, swizzle[i]);
1117             } else {
1118                srcs[i] = nir_get_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
1119             }
1120          }
1121          swizzled = nir_vec_scalars(b, srcs, 4);
1122       }
1123    }
1124 
1125    nir_def_rewrite_uses_after(&tex->def, swizzled,
1126                               swizzled->parent_instr);
1127 }
1128 
1129 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)1130 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
1131 {
1132    assert(nir_tex_instr_dest_size(tex) == 4);
1133    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1134 
1135    b->cursor = nir_after_instr(&tex->instr);
1136 
1137    nir_def *rgb =
1138       nir_format_srgb_to_linear(b, nir_trim_vector(b, &tex->def, 3));
1139 
1140    /* alpha is untouched: */
1141    nir_def *result = nir_vec4(b,
1142                               nir_channel(b, rgb, 0),
1143                               nir_channel(b, rgb, 1),
1144                               nir_channel(b, rgb, 2),
1145                               nir_channel(b, &tex->def, 3));
1146 
1147    nir_def_rewrite_uses_after(&tex->def, result,
1148                               result->parent_instr);
1149 }
1150 
1151 /**
1152  * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
1153  * i16, or u16, or a single unorm4x8 value.
1154  *
1155  * Note that we don't change the destination num_components, because
1156  * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
1157  * to not store the other channels, given that nothing at the NIR level will
1158  * read them.
1159  */
1160 static bool
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)1161 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
1162                   const nir_lower_tex_options *options)
1163 {
1164    nir_def *color = &tex->def;
1165 
1166    b->cursor = nir_after_instr(&tex->instr);
1167 
1168    assert(options->lower_tex_packing_cb);
1169    enum nir_lower_tex_packing packing =
1170       options->lower_tex_packing_cb(tex, options->lower_tex_packing_data);
1171 
1172    switch (packing) {
1173    case nir_lower_tex_packing_none:
1174       return false;
1175 
1176    case nir_lower_tex_packing_16: {
1177       static const unsigned bits[4] = { 16, 16, 16, 16 };
1178 
1179       switch (nir_alu_type_get_base_type(tex->dest_type)) {
1180       case nir_type_float:
1181          switch (nir_tex_instr_dest_size(tex)) {
1182          case 1:
1183             assert(tex->is_shadow && tex->is_new_style_shadow);
1184             color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1185             break;
1186          case 2: {
1187             nir_def *rg = nir_channel(b, color, 0);
1188             color = nir_vec2(b,
1189                              nir_unpack_half_2x16_split_x(b, rg),
1190                              nir_unpack_half_2x16_split_y(b, rg));
1191             break;
1192          }
1193          case 4: {
1194             nir_def *rg = nir_channel(b, color, 0);
1195             nir_def *ba = nir_channel(b, color, 1);
1196             color = nir_vec4(b,
1197                              nir_unpack_half_2x16_split_x(b, rg),
1198                              nir_unpack_half_2x16_split_y(b, rg),
1199                              nir_unpack_half_2x16_split_x(b, ba),
1200                              nir_unpack_half_2x16_split_y(b, ba));
1201             break;
1202          }
1203          default:
1204             unreachable("wrong dest_size");
1205          }
1206          break;
1207 
1208       case nir_type_int:
1209          color = nir_format_unpack_sint(b, color, bits, 4);
1210          break;
1211 
1212       case nir_type_uint:
1213          color = nir_format_unpack_uint(b, color, bits, 4);
1214          break;
1215 
1216       default:
1217          unreachable("unknown base type");
1218       }
1219       break;
1220    }
1221 
1222    case nir_lower_tex_packing_8:
1223       assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1224       color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1225       break;
1226    }
1227 
1228    nir_def_rewrite_uses_after(&tex->def, color,
1229                               color->parent_instr);
1230    return true;
1231 }
1232 
1233 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1234 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1235 {
1236    assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1237 
1238    unsigned sampler_index = tex->sampler_index;
1239 
1240    int sampler_offset_idx =
1241       nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1242    if (sampler_offset_idx >= 0) {
1243       if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1244          return false;
1245 
1246       sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1247    }
1248 
1249    return sampler_index < max;
1250 }
1251 
1252 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1253 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1254 {
1255    assert(tex->op == nir_texop_tg4);
1256    assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1257    assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1258 
1259    b->cursor = nir_after_instr(&tex->instr);
1260 
1261    nir_scalar dest[5] = { 0 };
1262    nir_def *residency = NULL;
1263    for (unsigned i = 0; i < 4; ++i) {
1264       nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1265       tex_copy->op = tex->op;
1266       tex_copy->coord_components = tex->coord_components;
1267       tex_copy->sampler_dim = tex->sampler_dim;
1268       tex_copy->is_array = tex->is_array;
1269       tex_copy->is_shadow = tex->is_shadow;
1270       tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1271       tex_copy->is_sparse = tex->is_sparse;
1272       tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod;
1273       tex_copy->component = tex->component;
1274       tex_copy->dest_type = tex->dest_type;
1275       tex_copy->texture_index = tex->texture_index;
1276       tex_copy->sampler_index = tex->sampler_index;
1277       tex_copy->backend_flags = tex->backend_flags;
1278 
1279       for (unsigned j = 0; j < tex->num_srcs; ++j) {
1280          tex_copy->src[j].src = nir_src_for_ssa(tex->src[j].src.ssa);
1281          tex_copy->src[j].src_type = tex->src[j].src_type;
1282       }
1283 
1284       nir_def *offset = nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1285                                       tex->tg4_offsets[i][1]);
1286       nir_tex_src src = nir_tex_src_for_ssa(nir_tex_src_offset, offset);
1287       tex_copy->src[tex_copy->num_srcs - 1] = src;
1288 
1289       nir_def_init(&tex_copy->instr, &tex_copy->def,
1290                    nir_tex_instr_dest_size(tex), 32);
1291 
1292       nir_builder_instr_insert(b, &tex_copy->instr);
1293 
1294       dest[i] = nir_get_scalar(&tex_copy->def, 3);
1295       if (tex->is_sparse) {
1296          nir_def *code = nir_channel(b, &tex_copy->def, 4);
1297          if (residency)
1298             residency = nir_sparse_residency_code_and(b, residency, code);
1299          else
1300             residency = code;
1301       }
1302    }
1303    dest[4] = nir_get_scalar(residency, 0);
1304 
1305    nir_def *res = nir_vec_scalars(b, dest, tex->def.num_components);
1306    nir_def_rewrite_uses(&tex->def, res);
1307    nir_instr_remove(&tex->instr);
1308 
1309    return true;
1310 }
1311 
1312 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1313 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1314 {
1315    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1316    if (lod_idx < 0 ||
1317        (nir_src_is_const(tex->src[lod_idx].src) &&
1318         nir_src_as_int(tex->src[lod_idx].src) == 0))
1319       return false;
1320 
1321    unsigned dest_size = nir_tex_instr_dest_size(tex);
1322 
1323    b->cursor = nir_before_instr(&tex->instr);
1324    nir_def *lod = tex->src[lod_idx].src.ssa;
1325 
1326    /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1327    nir_src_rewrite(&tex->src[lod_idx].src, nir_imm_int(b, 0));
1328 
1329    /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1330     * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1331     * which should return 0, not 1.
1332     */
1333    b->cursor = nir_after_instr(&tex->instr);
1334    nir_def *minified = nir_imin(b, &tex->def,
1335                                 nir_imax(b, nir_ushr(b, &tex->def, lod),
1336                                          nir_imm_int(b, 1)));
1337 
1338    /* Make sure the component encoding the array size (if any) is not
1339     * minified.
1340     */
1341    if (tex->is_array) {
1342       nir_def *comp[3];
1343 
1344       assert(dest_size <= ARRAY_SIZE(comp));
1345       for (unsigned i = 0; i < dest_size - 1; i++)
1346          comp[i] = nir_channel(b, minified, i);
1347 
1348       comp[dest_size - 1] = nir_channel(b, &tex->def, dest_size - 1);
1349       minified = nir_vec(b, comp, dest_size);
1350    }
1351 
1352    nir_def_rewrite_uses_after(&tex->def, minified,
1353                               minified->parent_instr);
1354    return true;
1355 }
1356 
1357 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1358 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1359 {
1360    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1361    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1362 
1363    b->cursor = nir_after_instr(&tex->instr);
1364 
1365    assert(tex->def.num_components == 3);
1366    nir_def *size = &tex->def;
1367    size = nir_vec3(b, nir_channel(b, size, 1),
1368                    nir_channel(b, size, 1),
1369                    nir_idiv(b, nir_channel(b, size, 2),
1370                             nir_imm_int(b, 6)));
1371 
1372    nir_def_rewrite_uses_after(&tex->def, size, size->parent_instr);
1373 }
1374 
1375 /* Adjust the sample index according to AMD FMASK (fragment mask).
1376  *
1377  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1378  * which is the identity mapping. Each nibble says which physical sample
1379  * should be fetched to get that sample.
1380  *
1381  * For example, 0x11111100 means there are only 2 samples stored and
1382  * the second sample covers 3/4 of the pixel. When reading samples 0
1383  * and 1, return physical sample 0 (determined by the first two 0s
1384  * in FMASK), otherwise return physical sample 1.
1385  *
1386  * The sample index should be adjusted as follows:
1387  *   sample_index = ubfe(fmask, sample_index * 4, 3);
1388  *
1389  * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
1390  * means the physical sample index is unknown. We can map 8 to any valid
1391  * sample index, and extracting only 3 bits will map it to 0, which works
1392  * with all MSAA modes.
1393  */
1394 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1395 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1396 {
1397    lower_offset(b, tex);
1398 
1399    b->cursor = nir_before_instr(&tex->instr);
1400 
1401    /* Create FMASK fetch. */
1402    assert(tex->texture_index == 0);
1403    nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1404    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1405    fmask_fetch->coord_components = tex->coord_components;
1406    fmask_fetch->sampler_dim = tex->sampler_dim;
1407    fmask_fetch->is_array = tex->is_array;
1408    fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1409    fmask_fetch->dest_type = nir_type_uint32;
1410    nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1411 
1412    fmask_fetch->num_srcs = 0;
1413    for (unsigned i = 0; i < tex->num_srcs; i++) {
1414       if (tex->src[i].src_type == nir_tex_src_ms_index)
1415          continue;
1416       nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1417       src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1418       src->src_type = tex->src[i].src_type;
1419    }
1420 
1421    nir_builder_instr_insert(b, &fmask_fetch->instr);
1422 
1423    /* Obtain new sample index. */
1424    int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1425    assert(ms_index >= 0);
1426    nir_src sample = tex->src[ms_index].src;
1427    nir_def *new_sample = nir_ubfe(b, &fmask_fetch->def,
1428                                   nir_ishl_imm(b, sample.ssa, 2), nir_imm_int(b, 3));
1429 
1430    /* Update instruction. */
1431    tex->op = nir_texop_fragment_fetch_amd;
1432    nir_src_rewrite(&tex->src[ms_index].src, new_sample);
1433 }
1434 
1435 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1436 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1437 {
1438    b->cursor = nir_after_instr(&tex->instr);
1439 
1440    nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1441    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1442    fmask_fetch->dest_type = nir_type_uint32;
1443    nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1444    nir_builder_instr_insert(b, &fmask_fetch->instr);
1445 
1446    nir_def_rewrite_uses(&tex->def, nir_ieq_imm(b, &fmask_fetch->def, 0));
1447    nir_instr_remove_v(&tex->instr);
1448 }
1449 
1450 static void
nir_lower_lod_zero_width(nir_builder * b,nir_tex_instr * tex)1451 nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex)
1452 {
1453    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1454    assert(coord_index >= 0);
1455 
1456    b->cursor = nir_after_instr(&tex->instr);
1457 
1458    nir_def *is_zero = nir_imm_true(b);
1459    for (unsigned i = 0; i < tex->coord_components; i++) {
1460       nir_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i);
1461 
1462       /* Compute the sum of the absolute values of derivatives. */
1463       nir_def *dfdx = nir_fddx(b, coord);
1464       nir_def *dfdy = nir_fddy(b, coord);
1465       nir_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy));
1466 
1467       /* Check if the sum is 0. */
1468       is_zero = nir_iand(b, is_zero, nir_feq_imm(b, fwidth, 0.0));
1469    }
1470 
1471    /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */
1472    nir_def *adjusted_lod =
1473       nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX),
1474                 nir_channel(b, &tex->def, 1));
1475 
1476    nir_def *def =
1477       nir_vec2(b, nir_channel(b, &tex->def, 0), adjusted_lod);
1478 
1479    nir_def_rewrite_uses_after(&tex->def, def, def->parent_instr);
1480 }
1481 
1482 static bool
lower_index_to_offset(nir_builder * b,nir_tex_instr * tex)1483 lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
1484 {
1485    bool progress = false;
1486    b->cursor = nir_before_instr(&tex->instr);
1487 
1488    for (unsigned i = 0; i < tex->num_srcs; i++) {
1489       unsigned *index;
1490       switch (tex->src[i].src_type) {
1491       case nir_tex_src_texture_offset:
1492          index = &tex->texture_index;
1493          break;
1494       case nir_tex_src_sampler_offset:
1495          index = &tex->sampler_index;
1496          break;
1497       default:
1498          continue;
1499       }
1500 
1501       /* If there's no base index, there's nothing to lower */
1502       if ((*index) == 0)
1503          continue;
1504 
1505       nir_def *sum = nir_iadd_imm(b, tex->src[i].src.ssa, *index);
1506       nir_src_rewrite(&tex->src[i].src, sum);
1507       *index = 0;
1508       progress = true;
1509    }
1510 
1511    return progress;
1512 }
1513 
1514 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1515 nir_lower_tex_block(nir_block *block, nir_builder *b,
1516                     const nir_lower_tex_options *options,
1517                     const struct nir_shader_compiler_options *compiler_options)
1518 {
1519    bool progress = false;
1520 
1521    nir_foreach_instr_safe(instr, block) {
1522       if (instr->type != nir_instr_type_tex)
1523          continue;
1524 
1525       nir_tex_instr *tex = nir_instr_as_tex(instr);
1526       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1527 
1528       /* mask of src coords to saturate (clamp): */
1529       unsigned sat_mask = 0;
1530       /* ignore saturate for txf ops: these don't use samplers and can't GL_CLAMP */
1531       if (nir_tex_instr_need_sampler(tex)) {
1532          if ((1 << tex->sampler_index) & options->saturate_r)
1533             sat_mask |= (1 << 2); /* .z */
1534          if ((1 << tex->sampler_index) & options->saturate_t)
1535             sat_mask |= (1 << 1); /* .y */
1536          if ((1 << tex->sampler_index) & options->saturate_s)
1537             sat_mask |= (1 << 0); /* .x */
1538       }
1539 
1540       if (options->lower_index_to_offset)
1541          progress |= lower_index_to_offset(b, tex);
1542 
1543       /* If we are clamping any coords, we must lower projector first
1544        * as clamping happens *after* projection:
1545        */
1546       if (lower_txp || sat_mask ||
1547           (options->lower_txp_array && tex->is_array)) {
1548          progress |= project_src(b, tex);
1549       }
1550 
1551       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1552           (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1553           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1554            options->lower_rect_offset) ||
1555           (options->lower_offset_filter &&
1556            options->lower_offset_filter(instr, options->callback_data))) {
1557          progress = lower_offset(b, tex) || progress;
1558       }
1559 
1560       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1561           tex->op != nir_texop_txf) {
1562          if (nir_tex_instr_is_query(tex))
1563             tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1564          else if (compiler_options->has_texture_scaling)
1565             lower_rect_tex_scale(b, tex);
1566          else
1567             lower_rect(b, tex);
1568 
1569          progress = true;
1570       }
1571 
1572       if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D &&
1573           (options->lower_1d || (tex->is_shadow && options->lower_1d_shadow))) {
1574          lower_1d(b, tex);
1575          progress = true;
1576       }
1577 
1578       unsigned texture_index = tex->texture_index;
1579       uint32_t texture_mask = 1u << texture_index;
1580       int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1581       if (tex_index >= 0) {
1582          nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1583          nir_variable *var = nir_deref_instr_get_variable(deref);
1584          texture_index = var ? var->data.binding : 0;
1585          texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u;
1586       }
1587 
1588       if (texture_mask & options->lower_y_uv_external) {
1589          lower_y_uv_external(b, tex, options, texture_index);
1590          progress = true;
1591       }
1592 
1593       if (texture_mask & options->lower_y_vu_external) {
1594          lower_y_vu_external(b, tex, options, texture_index);
1595          progress = true;
1596       }
1597 
1598       if (texture_mask & options->lower_y_u_v_external) {
1599          lower_y_u_v_external(b, tex, options, texture_index);
1600          progress = true;
1601       }
1602 
1603       if (texture_mask & options->lower_yx_xuxv_external) {
1604          lower_yx_xuxv_external(b, tex, options, texture_index);
1605          progress = true;
1606       }
1607 
1608       if (texture_mask & options->lower_yx_xvxu_external) {
1609          lower_yx_xvxu_external(b, tex, options, texture_index);
1610          progress = true;
1611       }
1612 
1613       if (texture_mask & options->lower_xy_uxvx_external) {
1614          lower_xy_uxvx_external(b, tex, options, texture_index);
1615          progress = true;
1616       }
1617 
1618       if (texture_mask & options->lower_xy_vxux_external) {
1619          lower_xy_vxux_external(b, tex, options, texture_index);
1620          progress = true;
1621       }
1622 
1623       if (texture_mask & options->lower_ayuv_external) {
1624          lower_ayuv_external(b, tex, options, texture_index);
1625          progress = true;
1626       }
1627 
1628       if (texture_mask & options->lower_xyuv_external) {
1629          lower_xyuv_external(b, tex, options, texture_index);
1630          progress = true;
1631       }
1632 
1633       if (texture_mask & options->lower_yuv_external) {
1634          lower_yuv_external(b, tex, options, texture_index);
1635          progress = true;
1636       }
1637 
1638       if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1639          lower_yu_yv_external(b, tex, options, texture_index);
1640          progress = true;
1641       }
1642 
1643       if ((1 << tex->texture_index) & options->lower_yv_yu_external) {
1644          lower_yv_yu_external(b, tex, options, texture_index);
1645          progress = true;
1646       }
1647 
1648       if ((1 << tex->texture_index) & options->lower_y41x_external) {
1649          lower_y41x_external(b, tex, options, texture_index);
1650          progress = true;
1651       }
1652 
1653       if (sat_mask) {
1654          tex = saturate_src(b, tex, sat_mask);
1655          progress = true;
1656       }
1657 
1658       if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1659          swizzle_tg4_broadcom(b, tex);
1660          progress = true;
1661       }
1662 
1663       if ((texture_mask & options->swizzle_result) &&
1664           !nir_tex_instr_is_query(tex) &&
1665           !(tex->is_shadow && tex->is_new_style_shadow)) {
1666          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1667          progress = true;
1668       }
1669 
1670       /* should be after swizzle so we know which channels are rgb: */
1671       if ((texture_mask & options->lower_srgb) &&
1672           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1673          linearize_srgb_result(b, tex);
1674          progress = true;
1675       }
1676 
1677       const bool has_min_lod =
1678          nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1679       const bool has_offset =
1680          nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1681 
1682       if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1683           options->lower_txb_shadow_clamp) {
1684          lower_implicit_lod(b, tex);
1685          progress = true;
1686       }
1687 
1688       if (options->lower_tex_packing_cb &&
1689           tex->op != nir_texop_txs &&
1690           tex->op != nir_texop_query_levels &&
1691           tex->op != nir_texop_texture_samples) {
1692          progress |= lower_tex_packing(b, tex, options);
1693       }
1694 
1695       if (tex->op == nir_texop_txd &&
1696           (options->lower_txd ||
1697            (options->lower_txd_clamp && has_min_lod) ||
1698            (options->lower_txd_shadow && tex->is_shadow) ||
1699            (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1700            (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1701            (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1702             nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1703            (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1704             has_min_lod && !sampler_index_lt(tex, 16)) ||
1705            (options->lower_txd_cube_map &&
1706             tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1707            (options->lower_txd_3d &&
1708             tex->sampler_dim == GLSL_SAMPLER_DIM_3D) ||
1709            (options->lower_txd_array && tex->is_array))) {
1710          lower_gradient(b, tex);
1711          progress = true;
1712          continue;
1713       }
1714 
1715       /* TXF, TXS and TXL require a LOD but not everything we implement using those
1716        * three opcodes provides one.  Provide a default LOD of 0.
1717        */
1718       if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1719           (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1720            tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1721          b->cursor = nir_before_instr(&tex->instr);
1722          nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_imm_int(b, 0));
1723          progress = true;
1724          continue;
1725       }
1726 
1727       /* Only fragment and compute (in some cases) support implicit
1728        * derivatives.  Lower those opcodes which use implicit derivatives to
1729        * use an explicit LOD of 0.
1730        * But don't touch RECT samplers because they don't have mips.
1731        */
1732       if (options->lower_invalid_implicit_lod &&
1733           nir_tex_instr_has_implicit_derivative(tex) &&
1734           tex->sampler_dim != GLSL_SAMPLER_DIM_RECT &&
1735           !nir_shader_supports_implicit_lod(b->shader)) {
1736          lower_zero_lod(b, tex);
1737          progress = true;
1738       }
1739 
1740       if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1741          progress |= nir_lower_txs_lod(b, tex);
1742          continue;
1743       }
1744 
1745       if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1746           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1747          nir_lower_txs_cube_array(b, tex);
1748          progress = true;
1749          continue;
1750       }
1751 
1752       /* has to happen after all the other lowerings as the original tg4 gets
1753        * replaced by 4 tg4 instructions.
1754        */
1755       if (tex->op == nir_texop_tg4 &&
1756           nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1757           options->lower_tg4_offsets) {
1758          progress |= lower_tg4_offsets(b, tex);
1759          continue;
1760       }
1761 
1762       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1763          nir_lower_ms_txf_to_fragment_fetch(b, tex);
1764          progress = true;
1765          continue;
1766       }
1767 
1768       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1769          nir_lower_samples_identical_to_fragment_fetch(b, tex);
1770          progress = true;
1771          continue;
1772       }
1773 
1774       if (options->lower_lod_zero_width && tex->op == nir_texop_lod) {
1775          nir_lower_lod_zero_width(b, tex);
1776          progress = true;
1777          continue;
1778       }
1779    }
1780 
1781    return progress;
1782 }
1783 
1784 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1785 nir_lower_tex_impl(nir_function_impl *impl,
1786                    const nir_lower_tex_options *options,
1787                    const struct nir_shader_compiler_options *compiler_options)
1788 {
1789    bool progress = false;
1790    nir_builder builder = nir_builder_create(impl);
1791 
1792    nir_foreach_block(block, impl) {
1793       progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1794    }
1795 
1796    nir_metadata_preserve(impl, nir_metadata_block_index |
1797                                   nir_metadata_dominance);
1798    return progress;
1799 }
1800 
1801 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1802 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1803 {
1804    bool progress = false;
1805 
1806    /* lower_tg4_offsets injects new tg4 instructions that won't be lowered
1807     * if lower_tg4_broadcom_swizzle is also requested so when both are set
1808     * we want to run lower_tg4_offsets in a separate pass first.
1809     */
1810    if (options->lower_tg4_offsets && options->lower_tg4_broadcom_swizzle) {
1811       nir_lower_tex_options _options = {
1812          .lower_tg4_offsets = true,
1813       };
1814       progress = nir_lower_tex(shader, &_options);
1815    }
1816 
1817    nir_foreach_function_impl(impl, shader) {
1818       progress |= nir_lower_tex_impl(impl, options, shader->options);
1819    }
1820 
1821    return progress;
1822 }
1823