• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright © 2015 Broadcom
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21   * IN THE SOFTWARE.
22   */
23  
24  /*
25   * This lowering pass supports (as configured via nir_lower_tex_options)
26   * various texture related conversions:
27   *   + texture projector lowering: converts the coordinate division for
28   *     texture projection to be done in ALU instructions instead of
29   *     asking the texture operation to do so.
30   *   + lowering RECT: converts the un-normalized RECT texture coordinates
31   *     to normalized coordinates with txs plus ALU instructions
32   *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33   *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34   *     Note that this automatically triggers texture projector lowering if
35   *     needed, since clamping must happen after projector lowering.
36   */
37  
38  #include "nir.h"
39  #include "nir_builder.h"
40  #include "nir_builtin_builder.h"
41  #include "nir_format_convert.h"
42  
43  typedef struct nir_const_value_3_4 {
44     nir_const_value v[3][4];
45  } nir_const_value_3_4;
46  
47  static const nir_const_value_3_4 bt601_csc_coeffs = { {
48     { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
49     { { .f32 = 0.0f        }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
50     { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f        } },
51  } };
52  static const nir_const_value_3_4 bt709_csc_coeffs = { {
53     { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
54     { { .f32 = 0.0f        }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
55     { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f        } },
56  } };
57  static const nir_const_value_3_4 bt2020_csc_coeffs = { {
58     { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
59     { { .f32 = 0.0f        }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
60     { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f        } },
61  } };
62  
63  static const float bt601_csc_offsets[3] = {
64     -0.874202218f, 0.531667823f, -1.085630789f
65  };
66  static const float bt709_csc_offsets[3] = {
67     -0.972945075f, 0.301482665f, -1.133402218f
68  };
69  static const float bt2020_csc_offsets[3] = {
70     -0.915687932f, 0.347458499f, -1.148145075f
71  };
72  
73  static bool
project_src(nir_builder * b,nir_tex_instr * tex)74  project_src(nir_builder *b, nir_tex_instr *tex)
75  {
76     /* Find the projector in the srcs list, if present. */
77     int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
78     if (proj_index < 0)
79        return false;
80  
81     b->cursor = nir_before_instr(&tex->instr);
82  
83     nir_ssa_def *inv_proj =
84        nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
85  
86     /* Walk through the sources projecting the arguments. */
87     for (unsigned i = 0; i < tex->num_srcs; i++) {
88        switch (tex->src[i].src_type) {
89        case nir_tex_src_coord:
90        case nir_tex_src_comparator:
91           break;
92        default:
93           continue;
94        }
95        nir_ssa_def *unprojected =
96           nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
97        nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
98  
99        /* Array indices don't get projected, so make an new vector with the
100         * coordinate's array index untouched.
101         */
102        if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
103           switch (tex->coord_components) {
104           case 4:
105              projected = nir_vec4(b,
106                                   nir_channel(b, projected, 0),
107                                   nir_channel(b, projected, 1),
108                                   nir_channel(b, projected, 2),
109                                   nir_channel(b, unprojected, 3));
110              break;
111           case 3:
112              projected = nir_vec3(b,
113                                   nir_channel(b, projected, 0),
114                                   nir_channel(b, projected, 1),
115                                   nir_channel(b, unprojected, 2));
116              break;
117           case 2:
118              projected = nir_vec2(b,
119                                   nir_channel(b, projected, 0),
120                                   nir_channel(b, unprojected, 1));
121              break;
122           default:
123              unreachable("bad texture coord count for array");
124              break;
125           }
126        }
127  
128        nir_instr_rewrite_src(&tex->instr,
129                              &tex->src[i].src,
130                              nir_src_for_ssa(projected));
131     }
132  
133     nir_tex_instr_remove_src(tex, proj_index);
134     return true;
135  }
136  
137  static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)138  lower_offset(nir_builder *b, nir_tex_instr *tex)
139  {
140     int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
141     if (offset_index < 0)
142        return false;
143  
144     int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
145     assert(coord_index >= 0);
146  
147     assert(tex->src[offset_index].src.is_ssa);
148     assert(tex->src[coord_index].src.is_ssa);
149     nir_ssa_def *offset = tex->src[offset_index].src.ssa;
150     nir_ssa_def *coord = tex->src[coord_index].src.ssa;
151  
152     b->cursor = nir_before_instr(&tex->instr);
153  
154     nir_ssa_def *offset_coord;
155     if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
156        if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
157           offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
158        } else {
159           nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
160           nir_ssa_def *scale = nir_frcp(b, txs);
161  
162           offset_coord = nir_fadd(b, coord,
163                                   nir_fmul(b,
164                                            nir_i2f32(b, offset),
165                                            scale));
166        }
167     } else {
168        offset_coord = nir_iadd(b, coord, offset);
169     }
170  
171     if (tex->is_array) {
172        /* The offset is not applied to the array index */
173        if (tex->coord_components == 2) {
174           offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
175                                      nir_channel(b, coord, 1));
176        } else if (tex->coord_components == 3) {
177           offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
178                                      nir_channel(b, offset_coord, 1),
179                                      nir_channel(b, coord, 2));
180        } else {
181           unreachable("Invalid number of components");
182        }
183     }
184  
185     nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
186                           nir_src_for_ssa(offset_coord));
187  
188     nir_tex_instr_remove_src(tex, offset_index);
189  
190     return true;
191  }
192  
193  static void
lower_rect(nir_builder * b,nir_tex_instr * tex)194  lower_rect(nir_builder *b, nir_tex_instr *tex)
195  {
196     /* Set the sampler_dim to 2D here so that get_texture_size picks up the
197      * right dimensionality.
198      */
199     tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
200  
201     nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
202     nir_ssa_def *scale = nir_frcp(b, txs);
203     int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
204  
205     if (coord_index != -1) {
206        nir_ssa_def *coords =
207           nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
208        nir_instr_rewrite_src(&tex->instr,
209                              &tex->src[coord_index].src,
210                              nir_src_for_ssa(nir_fmul(b, coords, scale)));
211     }
212  }
213  
214  static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)215  lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
216  {
217     b->cursor = nir_before_instr(&tex->instr);
218  
219     nir_ssa_def *idx = nir_imm_int(b, tex->texture_index);
220     nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx);
221     int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
222  
223     if (coord_index != -1) {
224        nir_ssa_def *coords =
225           nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
226        nir_instr_rewrite_src(&tex->instr,
227                              &tex->src[coord_index].src,
228                              nir_src_for_ssa(nir_fmul(b, coords, scale)));
229     }
230  }
231  
232  static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * lod)233  lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod)
234  {
235     assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
236     assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
237     assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
238     assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
239  
240     int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
241     if (bias_idx >= 0) {
242        /* If we have a bias, add it in */
243        lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
244        nir_tex_instr_remove_src(tex, bias_idx);
245     }
246  
247     int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
248     if (min_lod_idx >= 0) {
249        /* If we have a minimum LOD, clamp LOD accordingly */
250        lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
251        nir_tex_instr_remove_src(tex, min_lod_idx);
252     }
253  
254     nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
255     tex->op = nir_texop_txl;
256  }
257  
258  static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)259  lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
260  {
261     b->cursor = nir_before_instr(&tex->instr);
262     lower_lod(b, tex, nir_get_texture_lod(b, tex));
263  }
264  
265  static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)266  lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
267  {
268     b->cursor = nir_before_instr(&tex->instr);
269  
270     if (tex->op == nir_texop_lod) {
271        nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0));
272        nir_instr_remove(&tex->instr);
273        return;
274     }
275  
276     lower_lod(b, tex, nir_imm_int(b, 0));
277  }
278  
279  static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)280  sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
281               const nir_lower_tex_options *options)
282  {
283     assert(tex->dest.is_ssa);
284     assert(nir_tex_instr_dest_size(tex) == 4);
285     assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
286     assert(tex->op == nir_texop_tex);
287     assert(tex->coord_components == 2);
288  
289     nir_tex_instr *plane_tex =
290        nir_tex_instr_create(b->shader, tex->num_srcs + 1);
291     for (unsigned i = 0; i < tex->num_srcs; i++) {
292        nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src);
293        plane_tex->src[i].src_type = tex->src[i].src_type;
294     }
295     plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
296     plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
297     plane_tex->op = nir_texop_tex;
298     plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
299     plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest);
300     plane_tex->coord_components = 2;
301  
302     plane_tex->texture_index = tex->texture_index;
303     plane_tex->sampler_index = tex->sampler_index;
304  
305     nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
306           nir_dest_bit_size(tex->dest), NULL);
307  
308     nir_builder_instr_insert(b, &plane_tex->instr);
309  
310     /* If scaling_factor is set, return a scaled value. */
311     if (options->scale_factors[tex->texture_index])
312        return nir_fmul_imm(b, &plane_tex->dest.ssa,
313                            options->scale_factors[tex->texture_index]);
314  
315     return &plane_tex->dest.ssa;
316  }
317  
318  static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options,unsigned texture_index)319  convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
320                     nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
321                     nir_ssa_def *a,
322                     const nir_lower_tex_options *options,
323                     unsigned texture_index)
324  {
325  
326     const float *offset_vals;
327     const nir_const_value_3_4 *m;
328     assert((options->bt709_external & options->bt2020_external) == 0);
329     if (options->bt709_external & (1u << texture_index)) {
330        m = &bt709_csc_coeffs;
331        offset_vals = bt709_csc_offsets;
332     } else if (options->bt2020_external & (1u << texture_index)) {
333        m = &bt2020_csc_coeffs;
334        offset_vals = bt2020_csc_offsets;
335     } else {
336        m = &bt601_csc_coeffs;
337        offset_vals = bt601_csc_offsets;
338     }
339  
340     unsigned bit_size = nir_dest_bit_size(tex->dest);
341  
342     nir_ssa_def *offset =
343        nir_vec4(b,
344                 nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
345                 nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
346                 nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
347                 a);
348  
349     offset = nir_f2fN(b, offset, bit_size);
350  
351     nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
352     nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
353     nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
354  
355     nir_ssa_def *result =
356        nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
357  
358     nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
359  }
360  
361  static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)362  lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
363                      const nir_lower_tex_options *options,
364                      unsigned texture_index)
365  {
366     b->cursor = nir_after_instr(&tex->instr);
367  
368     nir_ssa_def *y = sample_plane(b, tex, 0, options);
369     nir_ssa_def *uv = sample_plane(b, tex, 1, options);
370  
371     convert_yuv_to_rgb(b, tex,
372                        nir_channel(b, y, 0),
373                        nir_channel(b, uv, 0),
374                        nir_channel(b, uv, 1),
375                        nir_imm_float(b, 1.0f),
376                        options,
377                        texture_index);
378  }
379  
380  static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)381  lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
382                       const nir_lower_tex_options *options,
383                       unsigned texture_index)
384  {
385     b->cursor = nir_after_instr(&tex->instr);
386  
387     nir_ssa_def *y = sample_plane(b, tex, 0, options);
388     nir_ssa_def *u = sample_plane(b, tex, 1, options);
389     nir_ssa_def *v = sample_plane(b, tex, 2, options);
390  
391     convert_yuv_to_rgb(b, tex,
392                        nir_channel(b, y, 0),
393                        nir_channel(b, u, 0),
394                        nir_channel(b, v, 0),
395                        nir_imm_float(b, 1.0f),
396                        options,
397                        texture_index);
398  }
399  
400  static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)401  lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
402                         const nir_lower_tex_options *options,
403                         unsigned texture_index)
404  {
405     b->cursor = nir_after_instr(&tex->instr);
406  
407     nir_ssa_def *y = sample_plane(b, tex, 0, options);
408     nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
409  
410     convert_yuv_to_rgb(b, tex,
411                        nir_channel(b, y, 0),
412                        nir_channel(b, xuxv, 1),
413                        nir_channel(b, xuxv, 3),
414                        nir_imm_float(b, 1.0f),
415                        options,
416                        texture_index);
417  }
418  
419  static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)420  lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
421                         const nir_lower_tex_options *options,
422                         unsigned texture_index)
423  {
424    b->cursor = nir_after_instr(&tex->instr);
425  
426    nir_ssa_def *y = sample_plane(b, tex, 0, options);
427    nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
428  
429    convert_yuv_to_rgb(b, tex,
430                       nir_channel(b, y, 1),
431                       nir_channel(b, uxvx, 0),
432                       nir_channel(b, uxvx, 2),
433                       nir_imm_float(b, 1.0f),
434                       options,
435                       texture_index);
436  }
437  
438  static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)439  lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
440                      const nir_lower_tex_options *options,
441                      unsigned texture_index)
442  {
443    b->cursor = nir_after_instr(&tex->instr);
444  
445    nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
446  
447    convert_yuv_to_rgb(b, tex,
448                       nir_channel(b, ayuv, 2),
449                       nir_channel(b, ayuv, 1),
450                       nir_channel(b, ayuv, 0),
451                       nir_channel(b, ayuv, 3),
452                       options,
453                       texture_index);
454  }
455  
456  static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)457  lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
458                      const nir_lower_tex_options *options,
459                      unsigned texture_index)
460  {
461    b->cursor = nir_after_instr(&tex->instr);
462  
463    nir_ssa_def *y41x = sample_plane(b, tex, 0, options);
464  
465    convert_yuv_to_rgb(b, tex,
466                       nir_channel(b, y41x, 1),
467                       nir_channel(b, y41x, 0),
468                       nir_channel(b, y41x, 2),
469                       nir_channel(b, y41x, 3),
470                       options,
471                       texture_index);
472  }
473  
474  static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)475  lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
476                      const nir_lower_tex_options *options,
477                      unsigned texture_index)
478  {
479    b->cursor = nir_after_instr(&tex->instr);
480  
481    nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
482  
483    convert_yuv_to_rgb(b, tex,
484                       nir_channel(b, xyuv, 2),
485                       nir_channel(b, xyuv, 1),
486                       nir_channel(b, xyuv, 0),
487                       nir_imm_float(b, 1.0f),
488                       options,
489                       texture_index);
490  }
491  
492  static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)493  lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
494                     const nir_lower_tex_options *options,
495                     unsigned texture_index)
496  {
497    b->cursor = nir_after_instr(&tex->instr);
498  
499    nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
500  
501    convert_yuv_to_rgb(b, tex,
502                       nir_channel(b, yuv, 0),
503                       nir_channel(b, yuv, 1),
504                       nir_channel(b, yuv, 2),
505                       nir_imm_float(b, 1.0f),
506                       options,
507                       texture_index);
508  }
509  
510  static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)511  lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
512                       const nir_lower_tex_options *options,
513                       unsigned texture_index)
514  {
515    b->cursor = nir_after_instr(&tex->instr);
516  
517    nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
518  
519    convert_yuv_to_rgb(b, tex,
520                       nir_channel(b, yuv, 1),
521                       nir_channel(b, yuv, 2),
522                       nir_channel(b, yuv, 0),
523                       nir_imm_float(b, 1.0f),
524                       options,
525                       texture_index);
526  }
527  
528  /*
529   * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
530   * computed from the gradients.
531   */
532  static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)533  replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
534  {
535     assert(tex->op == nir_texop_txd);
536  
537     nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
538     nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
539  
540     int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
541     if (min_lod_idx >= 0) {
542        /* If we have a minimum LOD, clamp LOD accordingly */
543        lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
544        nir_tex_instr_remove_src(tex, min_lod_idx);
545     }
546  
547     nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
548     tex->op = nir_texop_txl;
549  }
550  
551  static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)552  lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
553  {
554     assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
555     assert(tex->op == nir_texop_txd);
556     assert(tex->dest.is_ssa);
557  
558     /* Use textureSize() to get the width and height of LOD 0 */
559     nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
560  
561     /* Cubemap texture lookups first generate a texture coordinate normalized
562      * to [-1, 1] on the appropiate face. The appropiate face is determined
563      * by which component has largest magnitude and its sign. The texture
564      * coordinate is the quotient of the remaining texture coordinates against
565      * that absolute value of the component of largest magnitude. This
566      * division requires that the computing of the derivative of the texel
567      * coordinate must use the quotient rule. The high level GLSL code is as
568      * follows:
569      *
570      * Step 1: selection
571      *
572      * vec3 abs_p, Q, dQdx, dQdy;
573      * abs_p = abs(ir->coordinate);
574      * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
575      *    Q = ir->coordinate.yzx;
576      *    dQdx = ir->lod_info.grad.dPdx.yzx;
577      *    dQdy = ir->lod_info.grad.dPdy.yzx;
578      * }
579      * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
580      *    Q = ir->coordinate.xzy;
581      *    dQdx = ir->lod_info.grad.dPdx.xzy;
582      *    dQdy = ir->lod_info.grad.dPdy.xzy;
583      * }
584      * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
585      *    Q = ir->coordinate;
586      *    dQdx = ir->lod_info.grad.dPdx;
587      *    dQdy = ir->lod_info.grad.dPdy;
588      * }
589      *
590      * Step 2: use quotient rule to compute derivative. The normalized to
591      * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
592      * only concerned with the magnitudes of the derivatives whose values are
593      * not affected by the sign. We drop the sign from the computation.
594      *
595      * vec2 dx, dy;
596      * float recip;
597      *
598      * recip = 1.0 / Q.z;
599      * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
600      * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
601      *
602      * Step 3: compute LOD. At this point we have the derivatives of the
603      * texture coordinates normalized to [-1,1]. We take the LOD to be
604      *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
605      *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
606      *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
607      *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
608      *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
609      * where L is the dimension of the cubemap. The code is:
610      *
611      * float M, result;
612      * M = max(dot(dx, dx), dot(dy, dy));
613      * L = textureSize(sampler, 0).x;
614      * result = -1.0 + 0.5 * log2(L * L * M);
615      */
616  
617     /* coordinate */
618     nir_ssa_def *p =
619        tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
620  
621     /* unmodified dPdx, dPdy values */
622     nir_ssa_def *dPdx =
623        tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
624     nir_ssa_def *dPdy =
625        tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
626  
627     nir_ssa_def *abs_p = nir_fabs(b, p);
628     nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
629     nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
630     nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
631  
632     /* 1. compute selector */
633     nir_ssa_def *Q, *dQdx, *dQdy;
634  
635     nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
636     nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
637  
638     unsigned yzx[3] = { 1, 2, 0 };
639     unsigned xzy[3] = { 0, 2, 1 };
640  
641     Q = nir_bcsel(b, cond_z,
642                   p,
643                   nir_bcsel(b, cond_y,
644                             nir_swizzle(b, p, xzy, 3),
645                             nir_swizzle(b, p, yzx, 3)));
646  
647     dQdx = nir_bcsel(b, cond_z,
648                      dPdx,
649                      nir_bcsel(b, cond_y,
650                                nir_swizzle(b, dPdx, xzy, 3),
651                                nir_swizzle(b, dPdx, yzx, 3)));
652  
653     dQdy = nir_bcsel(b, cond_z,
654                      dPdy,
655                      nir_bcsel(b, cond_y,
656                                nir_swizzle(b, dPdy, xzy, 3),
657                                nir_swizzle(b, dPdy, yzx, 3)));
658  
659     /* 2. quotient rule */
660  
661     /* tmp = Q.xy * recip;
662      * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
663      * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
664      */
665     nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
666  
667     nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
668     nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
669  
670     nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
671     nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
672     nir_ssa_def *dx =
673        nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
674  
675     nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
676     nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
677     nir_ssa_def *dy =
678        nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
679  
680     /* M = max(dot(dx, dx), dot(dy, dy)); */
681     nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
682  
683     /* size has textureSize() of LOD 0 */
684     nir_ssa_def *L = nir_channel(b, size, 0);
685  
686     /* lod = -1.0 + 0.5 * log2(L * L * M); */
687     nir_ssa_def *lod =
688        nir_fadd(b,
689                 nir_imm_float(b, -1.0f),
690                 nir_fmul(b,
691                          nir_imm_float(b, 0.5f),
692                          nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
693  
694     /* 3. Replace the gradient instruction with an equivalent lod instruction */
695     replace_gradient_with_lod(b, lod, tex);
696  }
697  
698  static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)699  lower_gradient(nir_builder *b, nir_tex_instr *tex)
700  {
701     /* Cubes are more complicated and have their own function */
702     if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
703        lower_gradient_cube_map(b, tex);
704        return;
705     }
706  
707     assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
708     assert(tex->op == nir_texop_txd);
709     assert(tex->dest.is_ssa);
710  
711     /* Use textureSize() to get the width and height of LOD 0 */
712     unsigned component_mask;
713     switch (tex->sampler_dim) {
714     case GLSL_SAMPLER_DIM_3D:
715        component_mask = 7;
716        break;
717     case GLSL_SAMPLER_DIM_1D:
718        component_mask = 1;
719        break;
720     default:
721        component_mask = 3;
722        break;
723     }
724  
725     nir_ssa_def *size =
726        nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
727                        component_mask);
728  
729     /* Scale the gradients by width and height.  Effectively, the incoming
730      * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
731      * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
732      */
733     nir_ssa_def *ddx =
734        tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
735     nir_ssa_def *ddy =
736        tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
737  
738     nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
739     nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
740  
741     nir_ssa_def *rho;
742     if (dPdx->num_components == 1) {
743        rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
744     } else {
745        rho = nir_fmax(b,
746                       nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
747                       nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
748     }
749  
750     /* lod = log2(rho).  We're ignoring GL state biases for now. */
751     nir_ssa_def *lod = nir_flog2(b, rho);
752  
753     /* Replace the gradient instruction with an equivalent lod instruction */
754     replace_gradient_with_lod(b, lod, tex);
755  }
756  
757  /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
758  static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)759  lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
760  {
761     b->cursor = nir_after_instr(&tex->instr);
762     nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
763  
764     txd->op = nir_texop_txd;
765     txd->sampler_dim = tex->sampler_dim;
766     txd->dest_type = tex->dest_type;
767     txd->coord_components = tex->coord_components;
768     txd->texture_index = tex->texture_index;
769     txd->sampler_index = tex->sampler_index;
770  
771     /* reuse existing srcs */
772     for (unsigned i = 0; i < tex->num_srcs; i++) {
773        nir_src_copy(&txd->src[i].src, &tex->src[i].src);
774        txd->src[i].src_type = tex->src[i].src_type;
775     }
776     int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord);
777     assert(coord >= 0);
778     nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa);
779     nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa);
780     txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx);
781     txd->src[tex->num_srcs].src_type = nir_tex_src_ddx;
782     txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy);
783     txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy;
784  
785     nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest),
786                       nir_dest_bit_size(tex->dest), NULL);
787     nir_builder_instr_insert(b, &txd->instr);
788     nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa);
789     nir_instr_remove(&tex->instr);
790     return txd;
791  }
792  
793  /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
794  static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)795  lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
796  {
797     b->cursor = nir_after_instr(&tex->instr);
798     nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
799  
800     txl->op = nir_texop_txl;
801     txl->sampler_dim = tex->sampler_dim;
802     txl->dest_type = tex->dest_type;
803     txl->coord_components = tex->coord_components;
804     txl->texture_index = tex->texture_index;
805     txl->sampler_index = tex->sampler_index;
806  
807     /* reuse all but bias src */
808     for (int i = 0; i < 2; i++) {
809        if (tex->src[i].src_type != nir_tex_src_bias) {
810           nir_src_copy(&txl->src[i].src, &tex->src[i].src);
811           txl->src[i].src_type = tex->src[i].src_type;
812        }
813     }
814     nir_ssa_def *lod = nir_get_texture_lod(b, txl);
815  
816     int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
817     assert(bias_idx >= 0);
818     lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
819     txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod);
820     txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod;
821  
822     nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest),
823                       nir_dest_bit_size(tex->dest), NULL);
824     nir_builder_instr_insert(b, &txl->instr);
825     nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa);
826     nir_instr_remove(&tex->instr);
827     return txl;
828  }
829  
830  static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)831  saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
832  {
833     if (tex->op == nir_texop_tex)
834        tex = lower_tex_to_txd(b, tex);
835     else if (tex->op == nir_texop_txb)
836        tex = lower_txb_to_txl(b, tex);
837  
838     b->cursor = nir_before_instr(&tex->instr);
839     int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
840  
841     if (coord_index != -1) {
842        nir_ssa_def *src =
843           nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
844  
845        /* split src into components: */
846        nir_ssa_def *comp[4];
847  
848        assume(tex->coord_components >= 1);
849  
850        for (unsigned j = 0; j < tex->coord_components; j++)
851           comp[j] = nir_channel(b, src, j);
852  
853        /* clamp requested components, array index does not get clamped: */
854        unsigned ncomp = tex->coord_components;
855        if (tex->is_array)
856           ncomp--;
857  
858        for (unsigned j = 0; j < ncomp; j++) {
859           if ((1 << j) & sat_mask) {
860              if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
861                 /* non-normalized texture coords, so clamp to texture
862                  * size rather than [0.0, 1.0]
863                  */
864                 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
865                 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
866                 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
867              } else {
868                 comp[j] = nir_fsat(b, comp[j]);
869              }
870           }
871        }
872  
873        /* and move the result back into a single vecN: */
874        src = nir_vec(b, comp, tex->coord_components);
875  
876        nir_instr_rewrite_src(&tex->instr,
877                              &tex->src[coord_index].src,
878                              nir_src_for_ssa(src));
879     }
880     return tex;
881  }
882  
883  static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)884  get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
885  {
886     nir_const_value v[4];
887  
888     memset(&v, 0, sizeof(v));
889  
890     if (swizzle_val == 4) {
891        v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
892     } else {
893        assert(swizzle_val == 5);
894        if (type == nir_type_float32)
895           v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
896        else
897           v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
898     }
899  
900     return nir_build_imm(b, 4, 32, v);
901  }
902  
903  static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)904  swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
905  {
906     assert(tex->dest.is_ssa);
907  
908     b->cursor = nir_after_instr(&tex->instr);
909  
910     assert(nir_tex_instr_dest_size(tex) == 4);
911     unsigned swiz[4] = { 2, 3, 1, 0 };
912     nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
913  
914     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
915                                    swizzled->parent_instr);
916  }
917  
918  static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])919  swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
920  {
921     assert(tex->dest.is_ssa);
922  
923     b->cursor = nir_after_instr(&tex->instr);
924  
925     nir_ssa_def *swizzled;
926     if (tex->op == nir_texop_tg4) {
927        if (swizzle[tex->component] < 4) {
928           /* This one's easy */
929           tex->component = swizzle[tex->component];
930           return;
931        } else {
932           swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
933        }
934     } else {
935        assert(nir_tex_instr_dest_size(tex) == 4);
936        if (swizzle[0] < 4 && swizzle[1] < 4 &&
937            swizzle[2] < 4 && swizzle[3] < 4) {
938           unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
939           /* We have no 0s or 1s, just emit a swizzling MOV */
940           swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
941        } else {
942           nir_ssa_def *srcs[4];
943           for (unsigned i = 0; i < 4; i++) {
944              if (swizzle[i] < 4) {
945                 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
946              } else {
947                 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
948              }
949           }
950           swizzled = nir_vec(b, srcs, 4);
951        }
952     }
953  
954     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
955                                    swizzled->parent_instr);
956  }
957  
958  static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)959  linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
960  {
961     assert(tex->dest.is_ssa);
962     assert(nir_tex_instr_dest_size(tex) == 4);
963     assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
964  
965     b->cursor = nir_after_instr(&tex->instr);
966  
967     nir_ssa_def *rgb =
968        nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
969  
970     /* alpha is untouched: */
971     nir_ssa_def *result = nir_vec4(b,
972                                    nir_channel(b, rgb, 0),
973                                    nir_channel(b, rgb, 1),
974                                    nir_channel(b, rgb, 2),
975                                    nir_channel(b, &tex->dest.ssa, 3));
976  
977     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
978                                    result->parent_instr);
979  }
980  
981  /**
982   * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
983   * i16, or u16, or a single unorm4x8 value.
984   *
985   * Note that we don't change the destination num_components, because
986   * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
987   * to not store the other channels, given that nothing at the NIR level will
988   * read them.
989   */
990  static void
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)991  lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
992                    const nir_lower_tex_options *options)
993  {
994     nir_ssa_def *color = &tex->dest.ssa;
995  
996     b->cursor = nir_after_instr(&tex->instr);
997  
998     switch (options->lower_tex_packing[tex->sampler_index]) {
999     case nir_lower_tex_packing_none:
1000        return;
1001  
1002     case nir_lower_tex_packing_16: {
1003        static const unsigned bits[4] = {16, 16, 16, 16};
1004  
1005        switch (nir_alu_type_get_base_type(tex->dest_type)) {
1006        case nir_type_float:
1007           switch (nir_tex_instr_dest_size(tex)) {
1008           case 1:
1009              assert(tex->is_shadow && tex->is_new_style_shadow);
1010              color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1011              break;
1012           case 2: {
1013              nir_ssa_def *rg = nir_channel(b, color, 0);
1014              color = nir_vec2(b,
1015                               nir_unpack_half_2x16_split_x(b, rg),
1016                               nir_unpack_half_2x16_split_y(b, rg));
1017              break;
1018           }
1019           case 4: {
1020              nir_ssa_def *rg = nir_channel(b, color, 0);
1021              nir_ssa_def *ba = nir_channel(b, color, 1);
1022              color = nir_vec4(b,
1023                               nir_unpack_half_2x16_split_x(b, rg),
1024                               nir_unpack_half_2x16_split_y(b, rg),
1025                               nir_unpack_half_2x16_split_x(b, ba),
1026                               nir_unpack_half_2x16_split_y(b, ba));
1027              break;
1028           }
1029           default:
1030              unreachable("wrong dest_size");
1031           }
1032           break;
1033  
1034        case nir_type_int:
1035           color = nir_format_unpack_sint(b, color, bits, 4);
1036           break;
1037  
1038        case nir_type_uint:
1039           color = nir_format_unpack_uint(b, color, bits, 4);
1040           break;
1041  
1042        default:
1043           unreachable("unknown base type");
1044        }
1045        break;
1046     }
1047  
1048     case nir_lower_tex_packing_8:
1049        assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1050        color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1051        break;
1052     }
1053  
1054     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color,
1055                                    color->parent_instr);
1056  }
1057  
1058  static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1059  sampler_index_lt(nir_tex_instr *tex, unsigned max)
1060  {
1061     assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1062  
1063     unsigned sampler_index = tex->sampler_index;
1064  
1065     int sampler_offset_idx =
1066        nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1067     if (sampler_offset_idx >= 0) {
1068        if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1069           return false;
1070  
1071        sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1072     }
1073  
1074     return sampler_index < max;
1075  }
1076  
1077  static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1078  lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1079  {
1080     assert(tex->op == nir_texop_tg4);
1081     assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1082     assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1083  
1084     b->cursor = nir_after_instr(&tex->instr);
1085  
1086     nir_ssa_def *dest[5] = {NULL};
1087     for (unsigned i = 0; i < 4; ++i) {
1088        nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1089        tex_copy->op = tex->op;
1090        tex_copy->coord_components = tex->coord_components;
1091        tex_copy->sampler_dim = tex->sampler_dim;
1092        tex_copy->is_array = tex->is_array;
1093        tex_copy->is_shadow = tex->is_shadow;
1094        tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1095        tex_copy->is_sparse = tex->is_sparse;
1096        tex_copy->component = tex->component;
1097        tex_copy->dest_type = tex->dest_type;
1098  
1099        for (unsigned j = 0; j < tex->num_srcs; ++j) {
1100           nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src);
1101           tex_copy->src[j].src_type = tex->src[j].src_type;
1102        }
1103  
1104        nir_tex_src src;
1105        src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1106                                                   tex->tg4_offsets[i][1]));
1107        src.src_type = nir_tex_src_offset;
1108        tex_copy->src[tex_copy->num_srcs - 1] = src;
1109  
1110        nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
1111                          nir_tex_instr_dest_size(tex), 32, NULL);
1112  
1113        nir_builder_instr_insert(b, &tex_copy->instr);
1114  
1115        dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
1116        if (tex->is_sparse) {
1117           nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
1118           dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code;
1119        }
1120     }
1121  
1122     nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components);
1123     nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
1124     nir_instr_remove(&tex->instr);
1125  
1126     return true;
1127  }
1128  
1129  static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1130  nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1131  {
1132     int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1133     if (lod_idx < 0 ||
1134         (nir_src_is_const(tex->src[lod_idx].src) &&
1135          nir_src_as_int(tex->src[lod_idx].src) == 0))
1136        return false;
1137  
1138     unsigned dest_size = nir_tex_instr_dest_size(tex);
1139  
1140     b->cursor = nir_before_instr(&tex->instr);
1141     nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
1142  
1143     /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1144     nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
1145                           nir_src_for_ssa(nir_imm_int(b, 0)));
1146  
1147     /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1148      * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1149      * which should return 0, not 1.
1150      */
1151     b->cursor = nir_after_instr(&tex->instr);
1152     nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa,
1153                                      nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
1154                                               nir_imm_int(b, 1)));
1155  
1156     /* Make sure the component encoding the array size (if any) is not
1157      * minified.
1158      */
1159     if (tex->is_array) {
1160        nir_ssa_def *comp[3];
1161  
1162        assert(dest_size <= ARRAY_SIZE(comp));
1163        for (unsigned i = 0; i < dest_size - 1; i++)
1164           comp[i] = nir_channel(b, minified, i);
1165  
1166        comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1167        minified = nir_vec(b, comp, dest_size);
1168     }
1169  
1170     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified,
1171                                    minified->parent_instr);
1172     return true;
1173  }
1174  
1175  static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1176  nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1177  {
1178     assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1179     tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1180  
1181     b->cursor = nir_after_instr(&tex->instr);
1182  
1183     assert(tex->dest.is_ssa);
1184     assert(tex->dest.ssa.num_components == 3);
1185     nir_ssa_def *size = &tex->dest.ssa;
1186     size = nir_vec3(b, nir_channel(b, size, 0),
1187                        nir_channel(b, size, 1),
1188                        nir_idiv(b, nir_channel(b, size, 2),
1189                                    nir_imm_int(b, 6)));
1190  
1191     nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr);
1192  }
1193  
1194  static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1195  nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1196  {
1197     lower_offset(b, tex);
1198  
1199     b->cursor = nir_before_instr(&tex->instr);
1200  
1201     /* Create FMASK fetch. */
1202     assert(tex->texture_index == 0);
1203     nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1204     fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1205     fmask_fetch->coord_components = tex->coord_components;
1206     fmask_fetch->sampler_dim = tex->sampler_dim;
1207     fmask_fetch->is_array = tex->is_array;
1208     fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1209     fmask_fetch->dest_type = nir_type_uint32;
1210     nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1211  
1212     fmask_fetch->num_srcs = 0;
1213     for (unsigned i = 0; i < tex->num_srcs; i++) {
1214        if (tex->src[i].src_type == nir_tex_src_ms_index)
1215           continue;
1216        nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1217        src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1218        src->src_type = tex->src[i].src_type;
1219     }
1220  
1221     nir_builder_instr_insert(b, &fmask_fetch->instr);
1222  
1223     /* Obtain new sample index. */
1224     int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1225     assert(ms_index >= 0);
1226     nir_src sample = tex->src[ms_index].src;
1227     nir_ssa_def *new_sample = NULL;
1228     if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) {
1229        if (nir_src_as_uint(sample) == 7)
1230           new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28));
1231        else
1232           new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf);
1233     } else {
1234        new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa,
1235                                           nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4));
1236     }
1237  
1238     /* Update instruction. */
1239     tex->op = nir_texop_fragment_fetch_amd;
1240     nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample);
1241  }
1242  
1243  static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1244  nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1245  {
1246     b->cursor = nir_after_instr(&tex->instr);
1247  
1248     nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1249     fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1250     fmask_fetch->dest_type = nir_type_uint32;
1251     nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1252     nir_builder_instr_insert(b, &fmask_fetch->instr);
1253  
1254     nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0));
1255     nir_instr_remove_v(&tex->instr);
1256  }
1257  
1258  static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1259  nir_lower_tex_block(nir_block *block, nir_builder *b,
1260                      const nir_lower_tex_options *options,
1261                      const struct nir_shader_compiler_options *compiler_options)
1262  {
1263     bool progress = false;
1264  
1265     nir_foreach_instr_safe(instr, block) {
1266        if (instr->type != nir_instr_type_tex)
1267           continue;
1268  
1269        nir_tex_instr *tex = nir_instr_as_tex(instr);
1270        bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1271  
1272        /* mask of src coords to saturate (clamp): */
1273        unsigned sat_mask = 0;
1274  
1275        if ((1 << tex->sampler_index) & options->saturate_r)
1276           sat_mask |= (1 << 2);    /* .z */
1277        if ((1 << tex->sampler_index) & options->saturate_t)
1278           sat_mask |= (1 << 1);    /* .y */
1279        if ((1 << tex->sampler_index) & options->saturate_s)
1280           sat_mask |= (1 << 0);    /* .x */
1281  
1282        /* If we are clamping any coords, we must lower projector first
1283         * as clamping happens *after* projection:
1284         */
1285        if (lower_txp || sat_mask) {
1286           progress |= project_src(b, tex);
1287        }
1288  
1289        if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1290            (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1291            (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1292             options->lower_rect_offset)) {
1293           progress = lower_offset(b, tex) || progress;
1294        }
1295  
1296        if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1297            tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1298  
1299           if (compiler_options->has_txs)
1300              lower_rect(b, tex);
1301           else
1302              lower_rect_tex_scale(b, tex);
1303  
1304           progress = true;
1305        }
1306  
1307        unsigned texture_index = tex->texture_index;
1308        uint32_t texture_mask = 1u << texture_index;
1309        int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1310        if (tex_index >= 0) {
1311           nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1312           nir_variable *var = nir_deref_instr_get_variable(deref);
1313           texture_index = var ? var->data.binding : 0;
1314           texture_mask = var ? (1u << texture_index) : 0u;
1315        }
1316  
1317        if (texture_mask & options->lower_y_uv_external) {
1318           lower_y_uv_external(b, tex, options, texture_index);
1319           progress = true;
1320        }
1321  
1322        if (texture_mask & options->lower_y_u_v_external) {
1323           lower_y_u_v_external(b, tex, options, texture_index);
1324           progress = true;
1325        }
1326  
1327        if (texture_mask & options->lower_yx_xuxv_external) {
1328           lower_yx_xuxv_external(b, tex, options, texture_index);
1329           progress = true;
1330        }
1331  
1332        if (texture_mask & options->lower_xy_uxvx_external) {
1333           lower_xy_uxvx_external(b, tex, options, texture_index);
1334           progress = true;
1335        }
1336  
1337        if (texture_mask & options->lower_ayuv_external) {
1338           lower_ayuv_external(b, tex, options, texture_index);
1339           progress = true;
1340        }
1341  
1342        if (texture_mask & options->lower_xyuv_external) {
1343           lower_xyuv_external(b, tex, options, texture_index);
1344           progress = true;
1345        }
1346  
1347        if (texture_mask & options->lower_yuv_external) {
1348           lower_yuv_external(b, tex, options, texture_index);
1349           progress = true;
1350        }
1351  
1352        if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1353           lower_yu_yv_external(b, tex, options, texture_index);
1354           progress = true;
1355        }
1356  
1357        if ((1 << tex->texture_index) & options->lower_y41x_external) {
1358           lower_y41x_external(b, tex, options, texture_index);
1359           progress = true;
1360        }
1361  
1362        if (sat_mask) {
1363           tex = saturate_src(b, tex, sat_mask);
1364           progress = true;
1365        }
1366  
1367        if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1368           swizzle_tg4_broadcom(b, tex);
1369           progress = true;
1370        }
1371  
1372        if ((texture_mask & options->swizzle_result) &&
1373            !nir_tex_instr_is_query(tex) &&
1374            !(tex->is_shadow && tex->is_new_style_shadow)) {
1375           swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1376           progress = true;
1377        }
1378  
1379        /* should be after swizzle so we know which channels are rgb: */
1380        if ((texture_mask & options->lower_srgb) &&
1381            !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1382           linearize_srgb_result(b, tex);
1383           progress = true;
1384        }
1385  
1386        const bool has_min_lod =
1387           nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1388        const bool has_offset =
1389           nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1390  
1391        if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1392            options->lower_txb_shadow_clamp) {
1393           lower_implicit_lod(b, tex);
1394           progress = true;
1395        }
1396  
1397        if (options->lower_tex_packing[tex->sampler_index] !=
1398            nir_lower_tex_packing_none &&
1399            tex->op != nir_texop_txs &&
1400            tex->op != nir_texop_query_levels &&
1401            tex->op != nir_texop_texture_samples) {
1402           lower_tex_packing(b, tex, options);
1403           progress = true;
1404        }
1405  
1406        if (tex->op == nir_texop_txd &&
1407            (options->lower_txd ||
1408             (options->lower_txd_shadow && tex->is_shadow) ||
1409             (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1410             (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1411             (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1412              nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1413             (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1414              has_min_lod && !sampler_index_lt(tex, 16)) ||
1415             (options->lower_txd_cube_map &&
1416              tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1417             (options->lower_txd_3d &&
1418              tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1419           lower_gradient(b, tex);
1420           progress = true;
1421           continue;
1422        }
1423  
1424        /* TXF, TXS and TXL require a LOD but not everything we implement using those
1425         * three opcodes provides one.  Provide a default LOD of 0.
1426         */
1427        if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1428            (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1429             tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1430           b->cursor = nir_before_instr(&tex->instr);
1431           nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1432           progress = true;
1433           continue;
1434        }
1435  
1436        /* Only fragment and compute (in some cases) support implicit
1437         * derivatives.  Lower those opcodes which use implicit derivatives to
1438         * use an explicit LOD of 0.
1439         */
1440        if (nir_tex_instr_has_implicit_derivative(tex) &&
1441            !nir_shader_supports_implicit_lod(b->shader)) {
1442           lower_zero_lod(b, tex);
1443           progress = true;
1444        }
1445  
1446        if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1447           progress |= nir_lower_txs_lod(b, tex);
1448           continue;
1449        }
1450  
1451        if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1452            tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1453           nir_lower_txs_cube_array(b, tex);
1454           progress = true;
1455           continue;
1456        }
1457  
1458        /* has to happen after all the other lowerings as the original tg4 gets
1459         * replaced by 4 tg4 instructions.
1460         */
1461        if (tex->op == nir_texop_tg4 &&
1462            nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1463            options->lower_tg4_offsets) {
1464           progress |= lower_tg4_offsets(b, tex);
1465           continue;
1466        }
1467  
1468        if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1469           nir_lower_ms_txf_to_fragment_fetch(b, tex);
1470           progress = true;
1471           continue;
1472        }
1473  
1474        if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1475           nir_lower_samples_identical_to_fragment_fetch(b, tex);
1476           progress = true;
1477           continue;
1478        }
1479     }
1480  
1481     return progress;
1482  }
1483  
1484  static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1485  nir_lower_tex_impl(nir_function_impl *impl,
1486                     const nir_lower_tex_options *options,
1487                     const struct nir_shader_compiler_options *compiler_options)
1488  {
1489     bool progress = false;
1490     nir_builder builder;
1491     nir_builder_init(&builder, impl);
1492  
1493     nir_foreach_block(block, impl) {
1494        progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1495     }
1496  
1497     nir_metadata_preserve(impl, nir_metadata_block_index |
1498                                 nir_metadata_dominance);
1499     return progress;
1500  }
1501  
1502  bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1503  nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1504  {
1505     bool progress = false;
1506  
1507     nir_foreach_function(function, shader) {
1508        if (function->impl)
1509           progress |= nir_lower_tex_impl(function->impl, options, shader->options);
1510     }
1511  
1512     return progress;
1513  }
1514