• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * This lowering pass supports (as configured via nir_lower_tex_options)
26  * various texture related conversions:
27  *   + texture projector lowering: converts the coordinate division for
28  *     texture projection to be done in ALU instructions instead of
29  *     asking the texture operation to do so.
30  *   + lowering RECT: converts the un-normalized RECT texture coordinates
31  *     to normalized coordinates with txs plus ALU instructions
32  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34  *     Note that this automatically triggers texture projector lowering if
35  *     needed, since clamping must happen after projector lowering.
36  */
37 
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42 
43 typedef struct nir_const_value_3_4 {
44    nir_const_value v[3][4];
45 } nir_const_value_3_4;
46 
47 static const nir_const_value_3_4 bt601_csc_coeffs = { {
48    { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
49    { { .f32 = 0.0f        }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
50    { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f        } },
51 } };
52 static const nir_const_value_3_4 bt709_csc_coeffs = { {
53    { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
54    { { .f32 = 0.0f        }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
55    { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f        } },
56 } };
57 static const nir_const_value_3_4 bt2020_csc_coeffs = { {
58    { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
59    { { .f32 = 0.0f        }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
60    { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f        } },
61 } };
62 
63 static const float bt601_csc_offsets[3] = {
64    -0.874202218f, 0.531667823f, -1.085630789f
65 };
66 static const float bt709_csc_offsets[3] = {
67    -0.972945075f, 0.301482665f, -1.133402218f
68 };
69 static const float bt2020_csc_offsets[3] = {
70    -0.915687932f, 0.347458499f, -1.148145075f
71 };
72 
73 static bool
project_src(nir_builder * b,nir_tex_instr * tex)74 project_src(nir_builder *b, nir_tex_instr *tex)
75 {
76    /* Find the projector in the srcs list, if present. */
77    int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
78    if (proj_index < 0)
79       return false;
80 
81    b->cursor = nir_before_instr(&tex->instr);
82 
83    nir_ssa_def *inv_proj =
84       nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
85 
86    /* Walk through the sources projecting the arguments. */
87    for (unsigned i = 0; i < tex->num_srcs; i++) {
88       switch (tex->src[i].src_type) {
89       case nir_tex_src_coord:
90       case nir_tex_src_comparator:
91          break;
92       default:
93          continue;
94       }
95       nir_ssa_def *unprojected =
96          nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
97       nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
98 
99       /* Array indices don't get projected, so make an new vector with the
100        * coordinate's array index untouched.
101        */
102       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
103          switch (tex->coord_components) {
104          case 4:
105             projected = nir_vec4(b,
106                                  nir_channel(b, projected, 0),
107                                  nir_channel(b, projected, 1),
108                                  nir_channel(b, projected, 2),
109                                  nir_channel(b, unprojected, 3));
110             break;
111          case 3:
112             projected = nir_vec3(b,
113                                  nir_channel(b, projected, 0),
114                                  nir_channel(b, projected, 1),
115                                  nir_channel(b, unprojected, 2));
116             break;
117          case 2:
118             projected = nir_vec2(b,
119                                  nir_channel(b, projected, 0),
120                                  nir_channel(b, unprojected, 1));
121             break;
122          default:
123             unreachable("bad texture coord count for array");
124             break;
125          }
126       }
127 
128       nir_instr_rewrite_src(&tex->instr,
129                             &tex->src[i].src,
130                             nir_src_for_ssa(projected));
131    }
132 
133    nir_tex_instr_remove_src(tex, proj_index);
134    return true;
135 }
136 
137 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)138 lower_offset(nir_builder *b, nir_tex_instr *tex)
139 {
140    int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
141    if (offset_index < 0)
142       return false;
143 
144    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
145    assert(coord_index >= 0);
146 
147    assert(tex->src[offset_index].src.is_ssa);
148    assert(tex->src[coord_index].src.is_ssa);
149    nir_ssa_def *offset = tex->src[offset_index].src.ssa;
150    nir_ssa_def *coord = tex->src[coord_index].src.ssa;
151 
152    b->cursor = nir_before_instr(&tex->instr);
153 
154    nir_ssa_def *offset_coord;
155    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
156       if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
157          offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
158       } else {
159          nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
160          nir_ssa_def *scale = nir_frcp(b, txs);
161 
162          offset_coord = nir_fadd(b, coord,
163                                  nir_fmul(b,
164                                           nir_i2f32(b, offset),
165                                           scale));
166       }
167    } else {
168       offset_coord = nir_iadd(b, coord, offset);
169    }
170 
171    if (tex->is_array) {
172       /* The offset is not applied to the array index */
173       if (tex->coord_components == 2) {
174          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
175                                     nir_channel(b, coord, 1));
176       } else if (tex->coord_components == 3) {
177          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
178                                     nir_channel(b, offset_coord, 1),
179                                     nir_channel(b, coord, 2));
180       } else {
181          unreachable("Invalid number of components");
182       }
183    }
184 
185    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
186                          nir_src_for_ssa(offset_coord));
187 
188    nir_tex_instr_remove_src(tex, offset_index);
189 
190    return true;
191 }
192 
193 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)194 lower_rect(nir_builder *b, nir_tex_instr *tex)
195 {
196    /* Set the sampler_dim to 2D here so that get_texture_size picks up the
197     * right dimensionality.
198     */
199    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
200 
201    nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
202    nir_ssa_def *scale = nir_frcp(b, txs);
203    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
204 
205    if (coord_index != -1) {
206       nir_ssa_def *coords =
207          nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
208       nir_instr_rewrite_src(&tex->instr,
209                             &tex->src[coord_index].src,
210                             nir_src_for_ssa(nir_fmul(b, coords, scale)));
211    }
212 }
213 
214 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)215 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
216 {
217    b->cursor = nir_before_instr(&tex->instr);
218 
219    nir_ssa_def *idx = nir_imm_int(b, tex->texture_index);
220    nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx);
221    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
222 
223    if (coord_index != -1) {
224       nir_ssa_def *coords =
225          nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
226       nir_instr_rewrite_src(&tex->instr,
227                             &tex->src[coord_index].src,
228                             nir_src_for_ssa(nir_fmul(b, coords, scale)));
229    }
230 }
231 
232 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * lod)233 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod)
234 {
235    assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
236    assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
237    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
238    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
239 
240    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
241    if (bias_idx >= 0) {
242       /* If we have a bias, add it in */
243       lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
244       nir_tex_instr_remove_src(tex, bias_idx);
245    }
246 
247    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
248    if (min_lod_idx >= 0) {
249       /* If we have a minimum LOD, clamp LOD accordingly */
250       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
251       nir_tex_instr_remove_src(tex, min_lod_idx);
252    }
253 
254    nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
255    tex->op = nir_texop_txl;
256 }
257 
258 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)259 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
260 {
261    b->cursor = nir_before_instr(&tex->instr);
262    lower_lod(b, tex, nir_get_texture_lod(b, tex));
263 }
264 
265 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)266 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
267 {
268    b->cursor = nir_before_instr(&tex->instr);
269 
270    if (tex->op == nir_texop_lod) {
271       nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0));
272       nir_instr_remove(&tex->instr);
273       return;
274    }
275 
276    lower_lod(b, tex, nir_imm_int(b, 0));
277 }
278 
279 static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)280 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
281              const nir_lower_tex_options *options)
282 {
283    assert(tex->dest.is_ssa);
284    assert(nir_tex_instr_dest_size(tex) == 4);
285    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
286    assert(tex->op == nir_texop_tex);
287    assert(tex->coord_components == 2);
288 
289    nir_tex_instr *plane_tex =
290       nir_tex_instr_create(b->shader, tex->num_srcs + 1);
291    for (unsigned i = 0; i < tex->num_srcs; i++) {
292       nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src);
293       plane_tex->src[i].src_type = tex->src[i].src_type;
294    }
295    plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
296    plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
297    plane_tex->op = nir_texop_tex;
298    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
299    plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest);
300    plane_tex->coord_components = 2;
301 
302    plane_tex->texture_index = tex->texture_index;
303    plane_tex->sampler_index = tex->sampler_index;
304 
305    nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
306          nir_dest_bit_size(tex->dest), NULL);
307 
308    nir_builder_instr_insert(b, &plane_tex->instr);
309 
310    /* If scaling_factor is set, return a scaled value. */
311    if (options->scale_factors[tex->texture_index])
312       return nir_fmul_imm(b, &plane_tex->dest.ssa,
313                           options->scale_factors[tex->texture_index]);
314 
315    return &plane_tex->dest.ssa;
316 }
317 
318 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options,unsigned texture_index)319 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
320                    nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
321                    nir_ssa_def *a,
322                    const nir_lower_tex_options *options,
323                    unsigned texture_index)
324 {
325 
326    const float *offset_vals;
327    const nir_const_value_3_4 *m;
328    assert((options->bt709_external & options->bt2020_external) == 0);
329    if (options->bt709_external & (1u << texture_index)) {
330       m = &bt709_csc_coeffs;
331       offset_vals = bt709_csc_offsets;
332    } else if (options->bt2020_external & (1u << texture_index)) {
333       m = &bt2020_csc_coeffs;
334       offset_vals = bt2020_csc_offsets;
335    } else {
336       m = &bt601_csc_coeffs;
337       offset_vals = bt601_csc_offsets;
338    }
339 
340    unsigned bit_size = nir_dest_bit_size(tex->dest);
341 
342    nir_ssa_def *offset =
343       nir_vec4(b,
344                nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
345                nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
346                nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
347                a);
348 
349    offset = nir_f2fN(b, offset, bit_size);
350 
351    nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
352    nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
353    nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
354 
355    nir_ssa_def *result =
356       nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
357 
358    nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
359 }
360 
361 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)362 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
363                     const nir_lower_tex_options *options,
364                     unsigned texture_index)
365 {
366    b->cursor = nir_after_instr(&tex->instr);
367 
368    nir_ssa_def *y = sample_plane(b, tex, 0, options);
369    nir_ssa_def *uv = sample_plane(b, tex, 1, options);
370 
371    convert_yuv_to_rgb(b, tex,
372                       nir_channel(b, y, 0),
373                       nir_channel(b, uv, 0),
374                       nir_channel(b, uv, 1),
375                       nir_imm_float(b, 1.0f),
376                       options,
377                       texture_index);
378 }
379 
380 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)381 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
382                      const nir_lower_tex_options *options,
383                      unsigned texture_index)
384 {
385    b->cursor = nir_after_instr(&tex->instr);
386 
387    nir_ssa_def *y = sample_plane(b, tex, 0, options);
388    nir_ssa_def *u = sample_plane(b, tex, 1, options);
389    nir_ssa_def *v = sample_plane(b, tex, 2, options);
390 
391    convert_yuv_to_rgb(b, tex,
392                       nir_channel(b, y, 0),
393                       nir_channel(b, u, 0),
394                       nir_channel(b, v, 0),
395                       nir_imm_float(b, 1.0f),
396                       options,
397                       texture_index);
398 }
399 
400 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)401 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
402                        const nir_lower_tex_options *options,
403                        unsigned texture_index)
404 {
405    b->cursor = nir_after_instr(&tex->instr);
406 
407    nir_ssa_def *y = sample_plane(b, tex, 0, options);
408    nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
409 
410    convert_yuv_to_rgb(b, tex,
411                       nir_channel(b, y, 0),
412                       nir_channel(b, xuxv, 1),
413                       nir_channel(b, xuxv, 3),
414                       nir_imm_float(b, 1.0f),
415                       options,
416                       texture_index);
417 }
418 
419 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)420 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
421                        const nir_lower_tex_options *options,
422                        unsigned texture_index)
423 {
424   b->cursor = nir_after_instr(&tex->instr);
425 
426   nir_ssa_def *y = sample_plane(b, tex, 0, options);
427   nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
428 
429   convert_yuv_to_rgb(b, tex,
430                      nir_channel(b, y, 1),
431                      nir_channel(b, uxvx, 0),
432                      nir_channel(b, uxvx, 2),
433                      nir_imm_float(b, 1.0f),
434                      options,
435                      texture_index);
436 }
437 
438 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)439 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
440                     const nir_lower_tex_options *options,
441                     unsigned texture_index)
442 {
443   b->cursor = nir_after_instr(&tex->instr);
444 
445   nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
446 
447   convert_yuv_to_rgb(b, tex,
448                      nir_channel(b, ayuv, 2),
449                      nir_channel(b, ayuv, 1),
450                      nir_channel(b, ayuv, 0),
451                      nir_channel(b, ayuv, 3),
452                      options,
453                      texture_index);
454 }
455 
456 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)457 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
458                     const nir_lower_tex_options *options,
459                     unsigned texture_index)
460 {
461   b->cursor = nir_after_instr(&tex->instr);
462 
463   nir_ssa_def *y41x = sample_plane(b, tex, 0, options);
464 
465   convert_yuv_to_rgb(b, tex,
466                      nir_channel(b, y41x, 1),
467                      nir_channel(b, y41x, 0),
468                      nir_channel(b, y41x, 2),
469                      nir_channel(b, y41x, 3),
470                      options,
471                      texture_index);
472 }
473 
474 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)475 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
476                     const nir_lower_tex_options *options,
477                     unsigned texture_index)
478 {
479   b->cursor = nir_after_instr(&tex->instr);
480 
481   nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
482 
483   convert_yuv_to_rgb(b, tex,
484                      nir_channel(b, xyuv, 2),
485                      nir_channel(b, xyuv, 1),
486                      nir_channel(b, xyuv, 0),
487                      nir_imm_float(b, 1.0f),
488                      options,
489                      texture_index);
490 }
491 
492 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)493 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
494                    const nir_lower_tex_options *options,
495                    unsigned texture_index)
496 {
497   b->cursor = nir_after_instr(&tex->instr);
498 
499   nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
500 
501   convert_yuv_to_rgb(b, tex,
502                      nir_channel(b, yuv, 0),
503                      nir_channel(b, yuv, 1),
504                      nir_channel(b, yuv, 2),
505                      nir_imm_float(b, 1.0f),
506                      options,
507                      texture_index);
508 }
509 
510 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)511 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
512                      const nir_lower_tex_options *options,
513                      unsigned texture_index)
514 {
515   b->cursor = nir_after_instr(&tex->instr);
516 
517   nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
518 
519   convert_yuv_to_rgb(b, tex,
520                      nir_channel(b, yuv, 1),
521                      nir_channel(b, yuv, 2),
522                      nir_channel(b, yuv, 0),
523                      nir_imm_float(b, 1.0f),
524                      options,
525                      texture_index);
526 }
527 
528 /*
529  * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
530  * computed from the gradients.
531  */
532 static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)533 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
534 {
535    assert(tex->op == nir_texop_txd);
536 
537    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
538    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
539 
540    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
541    if (min_lod_idx >= 0) {
542       /* If we have a minimum LOD, clamp LOD accordingly */
543       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
544       nir_tex_instr_remove_src(tex, min_lod_idx);
545    }
546 
547    nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
548    tex->op = nir_texop_txl;
549 }
550 
551 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)552 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
553 {
554    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
555    assert(tex->op == nir_texop_txd);
556    assert(tex->dest.is_ssa);
557 
558    /* Use textureSize() to get the width and height of LOD 0 */
559    nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
560 
561    /* Cubemap texture lookups first generate a texture coordinate normalized
562     * to [-1, 1] on the appropiate face. The appropiate face is determined
563     * by which component has largest magnitude and its sign. The texture
564     * coordinate is the quotient of the remaining texture coordinates against
565     * that absolute value of the component of largest magnitude. This
566     * division requires that the computing of the derivative of the texel
567     * coordinate must use the quotient rule. The high level GLSL code is as
568     * follows:
569     *
570     * Step 1: selection
571     *
572     * vec3 abs_p, Q, dQdx, dQdy;
573     * abs_p = abs(ir->coordinate);
574     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
575     *    Q = ir->coordinate.yzx;
576     *    dQdx = ir->lod_info.grad.dPdx.yzx;
577     *    dQdy = ir->lod_info.grad.dPdy.yzx;
578     * }
579     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
580     *    Q = ir->coordinate.xzy;
581     *    dQdx = ir->lod_info.grad.dPdx.xzy;
582     *    dQdy = ir->lod_info.grad.dPdy.xzy;
583     * }
584     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
585     *    Q = ir->coordinate;
586     *    dQdx = ir->lod_info.grad.dPdx;
587     *    dQdy = ir->lod_info.grad.dPdy;
588     * }
589     *
590     * Step 2: use quotient rule to compute derivative. The normalized to
591     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
592     * only concerned with the magnitudes of the derivatives whose values are
593     * not affected by the sign. We drop the sign from the computation.
594     *
595     * vec2 dx, dy;
596     * float recip;
597     *
598     * recip = 1.0 / Q.z;
599     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
600     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
601     *
602     * Step 3: compute LOD. At this point we have the derivatives of the
603     * texture coordinates normalized to [-1,1]. We take the LOD to be
604     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
605     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
606     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
607     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
608     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
609     * where L is the dimension of the cubemap. The code is:
610     *
611     * float M, result;
612     * M = max(dot(dx, dx), dot(dy, dy));
613     * L = textureSize(sampler, 0).x;
614     * result = -1.0 + 0.5 * log2(L * L * M);
615     */
616 
617    /* coordinate */
618    nir_ssa_def *p =
619       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
620 
621    /* unmodified dPdx, dPdy values */
622    nir_ssa_def *dPdx =
623       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
624    nir_ssa_def *dPdy =
625       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
626 
627    nir_ssa_def *abs_p = nir_fabs(b, p);
628    nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
629    nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
630    nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
631 
632    /* 1. compute selector */
633    nir_ssa_def *Q, *dQdx, *dQdy;
634 
635    nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
636    nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
637 
638    unsigned yzx[3] = { 1, 2, 0 };
639    unsigned xzy[3] = { 0, 2, 1 };
640 
641    Q = nir_bcsel(b, cond_z,
642                  p,
643                  nir_bcsel(b, cond_y,
644                            nir_swizzle(b, p, xzy, 3),
645                            nir_swizzle(b, p, yzx, 3)));
646 
647    dQdx = nir_bcsel(b, cond_z,
648                     dPdx,
649                     nir_bcsel(b, cond_y,
650                               nir_swizzle(b, dPdx, xzy, 3),
651                               nir_swizzle(b, dPdx, yzx, 3)));
652 
653    dQdy = nir_bcsel(b, cond_z,
654                     dPdy,
655                     nir_bcsel(b, cond_y,
656                               nir_swizzle(b, dPdy, xzy, 3),
657                               nir_swizzle(b, dPdy, yzx, 3)));
658 
659    /* 2. quotient rule */
660 
661    /* tmp = Q.xy * recip;
662     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
663     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
664     */
665    nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
666 
667    nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
668    nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
669 
670    nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
671    nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
672    nir_ssa_def *dx =
673       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
674 
675    nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
676    nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
677    nir_ssa_def *dy =
678       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
679 
680    /* M = max(dot(dx, dx), dot(dy, dy)); */
681    nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
682 
683    /* size has textureSize() of LOD 0 */
684    nir_ssa_def *L = nir_channel(b, size, 0);
685 
686    /* lod = -1.0 + 0.5 * log2(L * L * M); */
687    nir_ssa_def *lod =
688       nir_fadd(b,
689                nir_imm_float(b, -1.0f),
690                nir_fmul(b,
691                         nir_imm_float(b, 0.5f),
692                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
693 
694    /* 3. Replace the gradient instruction with an equivalent lod instruction */
695    replace_gradient_with_lod(b, lod, tex);
696 }
697 
698 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)699 lower_gradient(nir_builder *b, nir_tex_instr *tex)
700 {
701    /* Cubes are more complicated and have their own function */
702    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
703       lower_gradient_cube_map(b, tex);
704       return;
705    }
706 
707    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
708    assert(tex->op == nir_texop_txd);
709    assert(tex->dest.is_ssa);
710 
711    /* Use textureSize() to get the width and height of LOD 0 */
712    unsigned component_mask;
713    switch (tex->sampler_dim) {
714    case GLSL_SAMPLER_DIM_3D:
715       component_mask = 7;
716       break;
717    case GLSL_SAMPLER_DIM_1D:
718       component_mask = 1;
719       break;
720    default:
721       component_mask = 3;
722       break;
723    }
724 
725    nir_ssa_def *size =
726       nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
727                       component_mask);
728 
729    /* Scale the gradients by width and height.  Effectively, the incoming
730     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
731     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
732     */
733    nir_ssa_def *ddx =
734       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
735    nir_ssa_def *ddy =
736       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
737 
738    nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
739    nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
740 
741    nir_ssa_def *rho;
742    if (dPdx->num_components == 1) {
743       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
744    } else {
745       rho = nir_fmax(b,
746                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
747                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
748    }
749 
750    /* lod = log2(rho).  We're ignoring GL state biases for now. */
751    nir_ssa_def *lod = nir_flog2(b, rho);
752 
753    /* Replace the gradient instruction with an equivalent lod instruction */
754    replace_gradient_with_lod(b, lod, tex);
755 }
756 
757 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
758 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)759 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
760 {
761    b->cursor = nir_after_instr(&tex->instr);
762    nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
763 
764    txd->op = nir_texop_txd;
765    txd->sampler_dim = tex->sampler_dim;
766    txd->dest_type = tex->dest_type;
767    txd->coord_components = tex->coord_components;
768    txd->texture_index = tex->texture_index;
769    txd->sampler_index = tex->sampler_index;
770 
771    /* reuse existing srcs */
772    for (unsigned i = 0; i < tex->num_srcs; i++) {
773       nir_src_copy(&txd->src[i].src, &tex->src[i].src);
774       txd->src[i].src_type = tex->src[i].src_type;
775    }
776    int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord);
777    assert(coord >= 0);
778    nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa);
779    nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa);
780    txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx);
781    txd->src[tex->num_srcs].src_type = nir_tex_src_ddx;
782    txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy);
783    txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy;
784 
785    nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest),
786                      nir_dest_bit_size(tex->dest), NULL);
787    nir_builder_instr_insert(b, &txd->instr);
788    nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa);
789    nir_instr_remove(&tex->instr);
790    return txd;
791 }
792 
793 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
794 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)795 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
796 {
797    b->cursor = nir_after_instr(&tex->instr);
798    nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
799 
800    txl->op = nir_texop_txl;
801    txl->sampler_dim = tex->sampler_dim;
802    txl->dest_type = tex->dest_type;
803    txl->coord_components = tex->coord_components;
804    txl->texture_index = tex->texture_index;
805    txl->sampler_index = tex->sampler_index;
806 
807    /* reuse all but bias src */
808    for (int i = 0; i < 2; i++) {
809       if (tex->src[i].src_type != nir_tex_src_bias) {
810          nir_src_copy(&txl->src[i].src, &tex->src[i].src);
811          txl->src[i].src_type = tex->src[i].src_type;
812       }
813    }
814    nir_ssa_def *lod = nir_get_texture_lod(b, txl);
815 
816    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
817    assert(bias_idx >= 0);
818    lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
819    txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod);
820    txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod;
821 
822    nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest),
823                      nir_dest_bit_size(tex->dest), NULL);
824    nir_builder_instr_insert(b, &txl->instr);
825    nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa);
826    nir_instr_remove(&tex->instr);
827    return txl;
828 }
829 
830 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)831 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
832 {
833    if (tex->op == nir_texop_tex)
834       tex = lower_tex_to_txd(b, tex);
835    else if (tex->op == nir_texop_txb)
836       tex = lower_txb_to_txl(b, tex);
837 
838    b->cursor = nir_before_instr(&tex->instr);
839    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
840 
841    if (coord_index != -1) {
842       nir_ssa_def *src =
843          nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
844 
845       /* split src into components: */
846       nir_ssa_def *comp[4];
847 
848       assume(tex->coord_components >= 1);
849 
850       for (unsigned j = 0; j < tex->coord_components; j++)
851          comp[j] = nir_channel(b, src, j);
852 
853       /* clamp requested components, array index does not get clamped: */
854       unsigned ncomp = tex->coord_components;
855       if (tex->is_array)
856          ncomp--;
857 
858       for (unsigned j = 0; j < ncomp; j++) {
859          if ((1 << j) & sat_mask) {
860             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
861                /* non-normalized texture coords, so clamp to texture
862                 * size rather than [0.0, 1.0]
863                 */
864                nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
865                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
866                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
867             } else {
868                comp[j] = nir_fsat(b, comp[j]);
869             }
870          }
871       }
872 
873       /* and move the result back into a single vecN: */
874       src = nir_vec(b, comp, tex->coord_components);
875 
876       nir_instr_rewrite_src(&tex->instr,
877                             &tex->src[coord_index].src,
878                             nir_src_for_ssa(src));
879    }
880    return tex;
881 }
882 
883 static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)884 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
885 {
886    nir_const_value v[4];
887 
888    memset(&v, 0, sizeof(v));
889 
890    if (swizzle_val == 4) {
891       v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
892    } else {
893       assert(swizzle_val == 5);
894       if (type == nir_type_float32)
895          v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
896       else
897          v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
898    }
899 
900    return nir_build_imm(b, 4, 32, v);
901 }
902 
903 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)904 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
905 {
906    assert(tex->dest.is_ssa);
907 
908    b->cursor = nir_after_instr(&tex->instr);
909 
910    assert(nir_tex_instr_dest_size(tex) == 4);
911    unsigned swiz[4] = { 2, 3, 1, 0 };
912    nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
913 
914    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
915                                   swizzled->parent_instr);
916 }
917 
918 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])919 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
920 {
921    assert(tex->dest.is_ssa);
922 
923    b->cursor = nir_after_instr(&tex->instr);
924 
925    nir_ssa_def *swizzled;
926    if (tex->op == nir_texop_tg4) {
927       if (swizzle[tex->component] < 4) {
928          /* This one's easy */
929          tex->component = swizzle[tex->component];
930          return;
931       } else {
932          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
933       }
934    } else {
935       assert(nir_tex_instr_dest_size(tex) == 4);
936       if (swizzle[0] < 4 && swizzle[1] < 4 &&
937           swizzle[2] < 4 && swizzle[3] < 4) {
938          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
939          /* We have no 0s or 1s, just emit a swizzling MOV */
940          swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
941       } else {
942          nir_ssa_def *srcs[4];
943          for (unsigned i = 0; i < 4; i++) {
944             if (swizzle[i] < 4) {
945                srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
946             } else {
947                srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
948             }
949          }
950          swizzled = nir_vec(b, srcs, 4);
951       }
952    }
953 
954    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
955                                   swizzled->parent_instr);
956 }
957 
958 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)959 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
960 {
961    assert(tex->dest.is_ssa);
962    assert(nir_tex_instr_dest_size(tex) == 4);
963    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
964 
965    b->cursor = nir_after_instr(&tex->instr);
966 
967    nir_ssa_def *rgb =
968       nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
969 
970    /* alpha is untouched: */
971    nir_ssa_def *result = nir_vec4(b,
972                                   nir_channel(b, rgb, 0),
973                                   nir_channel(b, rgb, 1),
974                                   nir_channel(b, rgb, 2),
975                                   nir_channel(b, &tex->dest.ssa, 3));
976 
977    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
978                                   result->parent_instr);
979 }
980 
981 /**
982  * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
983  * i16, or u16, or a single unorm4x8 value.
984  *
985  * Note that we don't change the destination num_components, because
986  * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
987  * to not store the other channels, given that nothing at the NIR level will
988  * read them.
989  */
990 static void
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)991 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
992                   const nir_lower_tex_options *options)
993 {
994    nir_ssa_def *color = &tex->dest.ssa;
995 
996    b->cursor = nir_after_instr(&tex->instr);
997 
998    switch (options->lower_tex_packing[tex->sampler_index]) {
999    case nir_lower_tex_packing_none:
1000       return;
1001 
1002    case nir_lower_tex_packing_16: {
1003       static const unsigned bits[4] = {16, 16, 16, 16};
1004 
1005       switch (nir_alu_type_get_base_type(tex->dest_type)) {
1006       case nir_type_float:
1007          switch (nir_tex_instr_dest_size(tex)) {
1008          case 1:
1009             assert(tex->is_shadow && tex->is_new_style_shadow);
1010             color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1011             break;
1012          case 2: {
1013             nir_ssa_def *rg = nir_channel(b, color, 0);
1014             color = nir_vec2(b,
1015                              nir_unpack_half_2x16_split_x(b, rg),
1016                              nir_unpack_half_2x16_split_y(b, rg));
1017             break;
1018          }
1019          case 4: {
1020             nir_ssa_def *rg = nir_channel(b, color, 0);
1021             nir_ssa_def *ba = nir_channel(b, color, 1);
1022             color = nir_vec4(b,
1023                              nir_unpack_half_2x16_split_x(b, rg),
1024                              nir_unpack_half_2x16_split_y(b, rg),
1025                              nir_unpack_half_2x16_split_x(b, ba),
1026                              nir_unpack_half_2x16_split_y(b, ba));
1027             break;
1028          }
1029          default:
1030             unreachable("wrong dest_size");
1031          }
1032          break;
1033 
1034       case nir_type_int:
1035          color = nir_format_unpack_sint(b, color, bits, 4);
1036          break;
1037 
1038       case nir_type_uint:
1039          color = nir_format_unpack_uint(b, color, bits, 4);
1040          break;
1041 
1042       default:
1043          unreachable("unknown base type");
1044       }
1045       break;
1046    }
1047 
1048    case nir_lower_tex_packing_8:
1049       assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1050       color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1051       break;
1052    }
1053 
1054    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color,
1055                                   color->parent_instr);
1056 }
1057 
1058 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1059 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1060 {
1061    assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1062 
1063    unsigned sampler_index = tex->sampler_index;
1064 
1065    int sampler_offset_idx =
1066       nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1067    if (sampler_offset_idx >= 0) {
1068       if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1069          return false;
1070 
1071       sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1072    }
1073 
1074    return sampler_index < max;
1075 }
1076 
1077 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1078 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1079 {
1080    assert(tex->op == nir_texop_tg4);
1081    assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1082    assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1083 
1084    b->cursor = nir_after_instr(&tex->instr);
1085 
1086    nir_ssa_def *dest[5] = {NULL};
1087    for (unsigned i = 0; i < 4; ++i) {
1088       nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1089       tex_copy->op = tex->op;
1090       tex_copy->coord_components = tex->coord_components;
1091       tex_copy->sampler_dim = tex->sampler_dim;
1092       tex_copy->is_array = tex->is_array;
1093       tex_copy->is_shadow = tex->is_shadow;
1094       tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1095       tex_copy->is_sparse = tex->is_sparse;
1096       tex_copy->component = tex->component;
1097       tex_copy->dest_type = tex->dest_type;
1098 
1099       for (unsigned j = 0; j < tex->num_srcs; ++j) {
1100          nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src);
1101          tex_copy->src[j].src_type = tex->src[j].src_type;
1102       }
1103 
1104       nir_tex_src src;
1105       src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1106                                                  tex->tg4_offsets[i][1]));
1107       src.src_type = nir_tex_src_offset;
1108       tex_copy->src[tex_copy->num_srcs - 1] = src;
1109 
1110       nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
1111                         nir_tex_instr_dest_size(tex), 32, NULL);
1112 
1113       nir_builder_instr_insert(b, &tex_copy->instr);
1114 
1115       dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
1116       if (tex->is_sparse) {
1117          nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
1118          dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code;
1119       }
1120    }
1121 
1122    nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components);
1123    nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
1124    nir_instr_remove(&tex->instr);
1125 
1126    return true;
1127 }
1128 
1129 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1130 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1131 {
1132    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1133    if (lod_idx < 0 ||
1134        (nir_src_is_const(tex->src[lod_idx].src) &&
1135         nir_src_as_int(tex->src[lod_idx].src) == 0))
1136       return false;
1137 
1138    unsigned dest_size = nir_tex_instr_dest_size(tex);
1139 
1140    b->cursor = nir_before_instr(&tex->instr);
1141    nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
1142 
1143    /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1144    nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
1145                          nir_src_for_ssa(nir_imm_int(b, 0)));
1146 
1147    /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1148     * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1149     * which should return 0, not 1.
1150     */
1151    b->cursor = nir_after_instr(&tex->instr);
1152    nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa,
1153                                     nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
1154                                              nir_imm_int(b, 1)));
1155 
1156    /* Make sure the component encoding the array size (if any) is not
1157     * minified.
1158     */
1159    if (tex->is_array) {
1160       nir_ssa_def *comp[3];
1161 
1162       assert(dest_size <= ARRAY_SIZE(comp));
1163       for (unsigned i = 0; i < dest_size - 1; i++)
1164          comp[i] = nir_channel(b, minified, i);
1165 
1166       comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1167       minified = nir_vec(b, comp, dest_size);
1168    }
1169 
1170    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified,
1171                                   minified->parent_instr);
1172    return true;
1173 }
1174 
1175 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1176 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1177 {
1178    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1179    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1180 
1181    b->cursor = nir_after_instr(&tex->instr);
1182 
1183    assert(tex->dest.is_ssa);
1184    assert(tex->dest.ssa.num_components == 3);
1185    nir_ssa_def *size = &tex->dest.ssa;
1186    size = nir_vec3(b, nir_channel(b, size, 0),
1187                       nir_channel(b, size, 1),
1188                       nir_idiv(b, nir_channel(b, size, 2),
1189                                   nir_imm_int(b, 6)));
1190 
1191    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr);
1192 }
1193 
1194 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1195 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1196 {
1197    lower_offset(b, tex);
1198 
1199    b->cursor = nir_before_instr(&tex->instr);
1200 
1201    /* Create FMASK fetch. */
1202    assert(tex->texture_index == 0);
1203    nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1204    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1205    fmask_fetch->coord_components = tex->coord_components;
1206    fmask_fetch->sampler_dim = tex->sampler_dim;
1207    fmask_fetch->is_array = tex->is_array;
1208    fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1209    fmask_fetch->dest_type = nir_type_uint32;
1210    nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1211 
1212    fmask_fetch->num_srcs = 0;
1213    for (unsigned i = 0; i < tex->num_srcs; i++) {
1214       if (tex->src[i].src_type == nir_tex_src_ms_index)
1215          continue;
1216       nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1217       src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1218       src->src_type = tex->src[i].src_type;
1219    }
1220 
1221    nir_builder_instr_insert(b, &fmask_fetch->instr);
1222 
1223    /* Obtain new sample index. */
1224    int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1225    assert(ms_index >= 0);
1226    nir_src sample = tex->src[ms_index].src;
1227    nir_ssa_def *new_sample = NULL;
1228    if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) {
1229       if (nir_src_as_uint(sample) == 7)
1230          new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28));
1231       else
1232          new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf);
1233    } else {
1234       new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa,
1235                                          nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4));
1236    }
1237 
1238    /* Update instruction. */
1239    tex->op = nir_texop_fragment_fetch_amd;
1240    nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample);
1241 }
1242 
1243 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1244 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1245 {
1246    b->cursor = nir_after_instr(&tex->instr);
1247 
1248    nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1249    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1250    fmask_fetch->dest_type = nir_type_uint32;
1251    nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1252    nir_builder_instr_insert(b, &fmask_fetch->instr);
1253 
1254    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0));
1255    nir_instr_remove_v(&tex->instr);
1256 }
1257 
1258 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1259 nir_lower_tex_block(nir_block *block, nir_builder *b,
1260                     const nir_lower_tex_options *options,
1261                     const struct nir_shader_compiler_options *compiler_options)
1262 {
1263    bool progress = false;
1264 
1265    nir_foreach_instr_safe(instr, block) {
1266       if (instr->type != nir_instr_type_tex)
1267          continue;
1268 
1269       nir_tex_instr *tex = nir_instr_as_tex(instr);
1270       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1271 
1272       /* mask of src coords to saturate (clamp): */
1273       unsigned sat_mask = 0;
1274 
1275       if ((1 << tex->sampler_index) & options->saturate_r)
1276          sat_mask |= (1 << 2);    /* .z */
1277       if ((1 << tex->sampler_index) & options->saturate_t)
1278          sat_mask |= (1 << 1);    /* .y */
1279       if ((1 << tex->sampler_index) & options->saturate_s)
1280          sat_mask |= (1 << 0);    /* .x */
1281 
1282       /* If we are clamping any coords, we must lower projector first
1283        * as clamping happens *after* projection:
1284        */
1285       if (lower_txp || sat_mask) {
1286          progress |= project_src(b, tex);
1287       }
1288 
1289       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1290           (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1291           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1292            options->lower_rect_offset)) {
1293          progress = lower_offset(b, tex) || progress;
1294       }
1295 
1296       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1297           tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1298 
1299          if (compiler_options->has_txs)
1300             lower_rect(b, tex);
1301          else
1302             lower_rect_tex_scale(b, tex);
1303 
1304          progress = true;
1305       }
1306 
1307       unsigned texture_index = tex->texture_index;
1308       uint32_t texture_mask = 1u << texture_index;
1309       int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1310       if (tex_index >= 0) {
1311          nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1312          nir_variable *var = nir_deref_instr_get_variable(deref);
1313          texture_index = var ? var->data.binding : 0;
1314          texture_mask = var ? (1u << texture_index) : 0u;
1315       }
1316 
1317       if (texture_mask & options->lower_y_uv_external) {
1318          lower_y_uv_external(b, tex, options, texture_index);
1319          progress = true;
1320       }
1321 
1322       if (texture_mask & options->lower_y_u_v_external) {
1323          lower_y_u_v_external(b, tex, options, texture_index);
1324          progress = true;
1325       }
1326 
1327       if (texture_mask & options->lower_yx_xuxv_external) {
1328          lower_yx_xuxv_external(b, tex, options, texture_index);
1329          progress = true;
1330       }
1331 
1332       if (texture_mask & options->lower_xy_uxvx_external) {
1333          lower_xy_uxvx_external(b, tex, options, texture_index);
1334          progress = true;
1335       }
1336 
1337       if (texture_mask & options->lower_ayuv_external) {
1338          lower_ayuv_external(b, tex, options, texture_index);
1339          progress = true;
1340       }
1341 
1342       if (texture_mask & options->lower_xyuv_external) {
1343          lower_xyuv_external(b, tex, options, texture_index);
1344          progress = true;
1345       }
1346 
1347       if (texture_mask & options->lower_yuv_external) {
1348          lower_yuv_external(b, tex, options, texture_index);
1349          progress = true;
1350       }
1351 
1352       if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1353          lower_yu_yv_external(b, tex, options, texture_index);
1354          progress = true;
1355       }
1356 
1357       if ((1 << tex->texture_index) & options->lower_y41x_external) {
1358          lower_y41x_external(b, tex, options, texture_index);
1359          progress = true;
1360       }
1361 
1362       if (sat_mask) {
1363          tex = saturate_src(b, tex, sat_mask);
1364          progress = true;
1365       }
1366 
1367       if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1368          swizzle_tg4_broadcom(b, tex);
1369          progress = true;
1370       }
1371 
1372       if ((texture_mask & options->swizzle_result) &&
1373           !nir_tex_instr_is_query(tex) &&
1374           !(tex->is_shadow && tex->is_new_style_shadow)) {
1375          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1376          progress = true;
1377       }
1378 
1379       /* should be after swizzle so we know which channels are rgb: */
1380       if ((texture_mask & options->lower_srgb) &&
1381           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1382          linearize_srgb_result(b, tex);
1383          progress = true;
1384       }
1385 
1386       const bool has_min_lod =
1387          nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1388       const bool has_offset =
1389          nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1390 
1391       if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1392           options->lower_txb_shadow_clamp) {
1393          lower_implicit_lod(b, tex);
1394          progress = true;
1395       }
1396 
1397       if (options->lower_tex_packing[tex->sampler_index] !=
1398           nir_lower_tex_packing_none &&
1399           tex->op != nir_texop_txs &&
1400           tex->op != nir_texop_query_levels &&
1401           tex->op != nir_texop_texture_samples) {
1402          lower_tex_packing(b, tex, options);
1403          progress = true;
1404       }
1405 
1406       if (tex->op == nir_texop_txd &&
1407           (options->lower_txd ||
1408            (options->lower_txd_shadow && tex->is_shadow) ||
1409            (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1410            (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1411            (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1412             nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1413            (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1414             has_min_lod && !sampler_index_lt(tex, 16)) ||
1415            (options->lower_txd_cube_map &&
1416             tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1417            (options->lower_txd_3d &&
1418             tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1419          lower_gradient(b, tex);
1420          progress = true;
1421          continue;
1422       }
1423 
1424       /* TXF, TXS and TXL require a LOD but not everything we implement using those
1425        * three opcodes provides one.  Provide a default LOD of 0.
1426        */
1427       if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1428           (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1429            tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1430          b->cursor = nir_before_instr(&tex->instr);
1431          nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1432          progress = true;
1433          continue;
1434       }
1435 
1436       /* Only fragment and compute (in some cases) support implicit
1437        * derivatives.  Lower those opcodes which use implicit derivatives to
1438        * use an explicit LOD of 0.
1439        */
1440       if (nir_tex_instr_has_implicit_derivative(tex) &&
1441           !nir_shader_supports_implicit_lod(b->shader)) {
1442          lower_zero_lod(b, tex);
1443          progress = true;
1444       }
1445 
1446       if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1447          progress |= nir_lower_txs_lod(b, tex);
1448          continue;
1449       }
1450 
1451       if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1452           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1453          nir_lower_txs_cube_array(b, tex);
1454          progress = true;
1455          continue;
1456       }
1457 
1458       /* has to happen after all the other lowerings as the original tg4 gets
1459        * replaced by 4 tg4 instructions.
1460        */
1461       if (tex->op == nir_texop_tg4 &&
1462           nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1463           options->lower_tg4_offsets) {
1464          progress |= lower_tg4_offsets(b, tex);
1465          continue;
1466       }
1467 
1468       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1469          nir_lower_ms_txf_to_fragment_fetch(b, tex);
1470          progress = true;
1471          continue;
1472       }
1473 
1474       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1475          nir_lower_samples_identical_to_fragment_fetch(b, tex);
1476          progress = true;
1477          continue;
1478       }
1479    }
1480 
1481    return progress;
1482 }
1483 
1484 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1485 nir_lower_tex_impl(nir_function_impl *impl,
1486                    const nir_lower_tex_options *options,
1487                    const struct nir_shader_compiler_options *compiler_options)
1488 {
1489    bool progress = false;
1490    nir_builder builder;
1491    nir_builder_init(&builder, impl);
1492 
1493    nir_foreach_block(block, impl) {
1494       progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1495    }
1496 
1497    nir_metadata_preserve(impl, nir_metadata_block_index |
1498                                nir_metadata_dominance);
1499    return progress;
1500 }
1501 
1502 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1503 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1504 {
1505    bool progress = false;
1506 
1507    nir_foreach_function(function, shader) {
1508       if (function->impl)
1509          progress |= nir_lower_tex_impl(function->impl, options, shader->options);
1510    }
1511 
1512    return progress;
1513 }
1514