• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /*
25  * This lowering pass supports (as configured via nir_lower_tex_options)
26  * various texture related conversions:
27  *   + texture projector lowering: converts the coordinate division for
28  *     texture projection to be done in ALU instructions instead of
29  *     asking the texture operation to do so.
30  *   + lowering RECT: converts the un-normalized RECT texture coordinates
31  *     to normalized coordinates with txs plus ALU instructions
32  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34  *     Note that this automatically triggers texture projector lowering if
35  *     needed, since clamping must happen after projector lowering.
36  */
37 
38 #include "nir.h"
39 #include "nir_builder.h"
40 
41 static void
project_src(nir_builder * b,nir_tex_instr * tex)42 project_src(nir_builder *b, nir_tex_instr *tex)
43 {
44    /* Find the projector in the srcs list, if present. */
45    int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
46    if (proj_index < 0)
47       return;
48 
49    b->cursor = nir_before_instr(&tex->instr);
50 
51    nir_ssa_def *inv_proj =
52       nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
53 
54    /* Walk through the sources projecting the arguments. */
55    for (unsigned i = 0; i < tex->num_srcs; i++) {
56       switch (tex->src[i].src_type) {
57       case nir_tex_src_coord:
58       case nir_tex_src_comparator:
59          break;
60       default:
61          continue;
62       }
63       nir_ssa_def *unprojected =
64          nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
65       nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
66 
67       /* Array indices don't get projected, so make an new vector with the
68        * coordinate's array index untouched.
69        */
70       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
71          switch (tex->coord_components) {
72          case 4:
73             projected = nir_vec4(b,
74                                  nir_channel(b, projected, 0),
75                                  nir_channel(b, projected, 1),
76                                  nir_channel(b, projected, 2),
77                                  nir_channel(b, unprojected, 3));
78             break;
79          case 3:
80             projected = nir_vec3(b,
81                                  nir_channel(b, projected, 0),
82                                  nir_channel(b, projected, 1),
83                                  nir_channel(b, unprojected, 2));
84             break;
85          case 2:
86             projected = nir_vec2(b,
87                                  nir_channel(b, projected, 0),
88                                  nir_channel(b, unprojected, 1));
89             break;
90          default:
91             unreachable("bad texture coord count for array");
92             break;
93          }
94       }
95 
96       nir_instr_rewrite_src(&tex->instr,
97                             &tex->src[i].src,
98                             nir_src_for_ssa(projected));
99    }
100 
101    nir_tex_instr_remove_src(tex, proj_index);
102 }
103 
104 static nir_ssa_def *
get_texture_size(nir_builder * b,nir_tex_instr * tex)105 get_texture_size(nir_builder *b, nir_tex_instr *tex)
106 {
107    b->cursor = nir_before_instr(&tex->instr);
108 
109    nir_tex_instr *txs;
110 
111    txs = nir_tex_instr_create(b->shader, 1);
112    txs->op = nir_texop_txs;
113    txs->sampler_dim = tex->sampler_dim;
114    txs->is_array = tex->is_array;
115    txs->is_shadow = tex->is_shadow;
116    txs->is_new_style_shadow = tex->is_new_style_shadow;
117    txs->texture_index = tex->texture_index;
118    txs->texture = nir_deref_var_clone(tex->texture, txs);
119    txs->sampler_index = tex->sampler_index;
120    txs->sampler = nir_deref_var_clone(tex->sampler, txs);
121    txs->dest_type = nir_type_int;
122 
123    /* only single src, the lod: */
124    txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
125    txs->src[0].src_type = nir_tex_src_lod;
126 
127    nir_ssa_dest_init(&txs->instr, &txs->dest,
128                      nir_tex_instr_dest_size(txs), 32, NULL);
129    nir_builder_instr_insert(b, &txs->instr);
130 
131    return nir_i2f32(b, &txs->dest.ssa);
132 }
133 
134 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)135 lower_offset(nir_builder *b, nir_tex_instr *tex)
136 {
137    int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
138    if (offset_index < 0)
139       return false;
140 
141    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
142    assert(coord_index >= 0);
143 
144    assert(tex->src[offset_index].src.is_ssa);
145    assert(tex->src[coord_index].src.is_ssa);
146    nir_ssa_def *offset = tex->src[offset_index].src.ssa;
147    nir_ssa_def *coord = tex->src[coord_index].src.ssa;
148 
149    b->cursor = nir_before_instr(&tex->instr);
150 
151    nir_ssa_def *offset_coord;
152    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
153       if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
154          offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
155       } else {
156          nir_ssa_def *txs = get_texture_size(b, tex);
157          nir_ssa_def *scale = nir_frcp(b, txs);
158 
159          offset_coord = nir_fadd(b, coord,
160                                  nir_fmul(b,
161                                           nir_i2f32(b, offset),
162                                           scale));
163       }
164    } else {
165       offset_coord = nir_iadd(b, coord, offset);
166    }
167 
168    if (tex->is_array) {
169       /* The offset is not applied to the array index */
170       if (tex->coord_components == 2) {
171          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
172                                     nir_channel(b, coord, 1));
173       } else if (tex->coord_components == 3) {
174          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
175                                     nir_channel(b, offset_coord, 1),
176                                     nir_channel(b, coord, 2));
177       } else {
178          unreachable("Invalid number of components");
179       }
180    }
181 
182    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
183                          nir_src_for_ssa(offset_coord));
184 
185    nir_tex_instr_remove_src(tex, offset_index);
186 
187    return true;
188 }
189 
190 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)191 lower_rect(nir_builder *b, nir_tex_instr *tex)
192 {
193    nir_ssa_def *txs = get_texture_size(b, tex);
194    nir_ssa_def *scale = nir_frcp(b, txs);
195 
196    /* Walk through the sources normalizing the requested arguments. */
197    for (unsigned i = 0; i < tex->num_srcs; i++) {
198       if (tex->src[i].src_type != nir_tex_src_coord)
199          continue;
200 
201       nir_ssa_def *coords =
202          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
203       nir_instr_rewrite_src(&tex->instr,
204                             &tex->src[i].src,
205                             nir_src_for_ssa(nir_fmul(b, coords, scale)));
206    }
207 
208    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
209 }
210 
211 static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane)212 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane)
213 {
214    assert(tex->dest.is_ssa);
215    assert(nir_tex_instr_dest_size(tex) == 4);
216    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
217    assert(tex->op == nir_texop_tex);
218    assert(tex->coord_components == 2);
219 
220    nir_tex_instr *plane_tex = nir_tex_instr_create(b->shader, 2);
221    nir_src_copy(&plane_tex->src[0].src, &tex->src[0].src, plane_tex);
222    plane_tex->src[0].src_type = nir_tex_src_coord;
223    plane_tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, plane));
224    plane_tex->src[1].src_type = nir_tex_src_plane;
225    plane_tex->op = nir_texop_tex;
226    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
227    plane_tex->dest_type = nir_type_float;
228    plane_tex->coord_components = 2;
229 
230    plane_tex->texture_index = tex->texture_index;
231    plane_tex->texture = nir_deref_var_clone(tex->texture, plane_tex);
232    plane_tex->sampler_index = tex->sampler_index;
233    plane_tex->sampler = nir_deref_var_clone(tex->sampler, plane_tex);
234 
235    nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
236 
237    nir_builder_instr_insert(b, &plane_tex->instr);
238 
239    return &plane_tex->dest.ssa;
240 }
241 
242 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v)243 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
244                    nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v)
245 {
246    nir_const_value m[3] = {
247       { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },
248       { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
249       { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }
250    };
251 
252    nir_ssa_def *yuv =
253       nir_vec4(b,
254                nir_fmul(b, nir_imm_float(b, 1.16438356f),
255                         nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
256                nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
257                nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
258                nir_imm_float(b, 0.0));
259 
260    nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
261    nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
262    nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
263 
264    nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f));
265 
266    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
267 }
268 
269 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex)270 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
271 {
272    b->cursor = nir_after_instr(&tex->instr);
273 
274    nir_ssa_def *y = sample_plane(b, tex, 0);
275    nir_ssa_def *uv = sample_plane(b, tex, 1);
276 
277    convert_yuv_to_rgb(b, tex,
278                       nir_channel(b, y, 0),
279                       nir_channel(b, uv, 0),
280                       nir_channel(b, uv, 1));
281 }
282 
283 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex)284 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex)
285 {
286    b->cursor = nir_after_instr(&tex->instr);
287 
288    nir_ssa_def *y = sample_plane(b, tex, 0);
289    nir_ssa_def *u = sample_plane(b, tex, 1);
290    nir_ssa_def *v = sample_plane(b, tex, 2);
291 
292    convert_yuv_to_rgb(b, tex,
293                       nir_channel(b, y, 0),
294                       nir_channel(b, u, 0),
295                       nir_channel(b, v, 0));
296 }
297 
298 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex)299 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
300 {
301    b->cursor = nir_after_instr(&tex->instr);
302 
303    nir_ssa_def *y = sample_plane(b, tex, 0);
304    nir_ssa_def *xuxv = sample_plane(b, tex, 1);
305 
306    convert_yuv_to_rgb(b, tex,
307                       nir_channel(b, y, 0),
308                       nir_channel(b, xuxv, 1),
309                       nir_channel(b, xuxv, 3));
310 }
311 
312 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex)313 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex)
314 {
315   b->cursor = nir_after_instr(&tex->instr);
316 
317   nir_ssa_def *y = sample_plane(b, tex, 0);
318   nir_ssa_def *uxvx = sample_plane(b, tex, 1);
319 
320   convert_yuv_to_rgb(b, tex,
321                      nir_channel(b, y, 1),
322                      nir_channel(b, uxvx, 0),
323                      nir_channel(b, uxvx, 2));
324 }
325 
326 /*
327  * Emits a textureLod operation used to replace an existing
328  * textureGrad instruction.
329  */
330 static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)331 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
332 {
333    /* We are going to emit a textureLod() with the same parameters except that
334     * we replace ddx/ddy with lod.
335     */
336    int num_srcs = tex->num_srcs - 1;
337    nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
338 
339    txl->op = nir_texop_txl;
340    txl->sampler_dim = tex->sampler_dim;
341    txl->texture_index = tex->texture_index;
342    txl->dest_type = tex->dest_type;
343    txl->is_array = tex->is_array;
344    txl->is_shadow = tex->is_shadow;
345    txl->is_new_style_shadow = tex->is_new_style_shadow;
346    txl->sampler_index = tex->sampler_index;
347    txl->texture = nir_deref_var_clone(tex->texture, txl);
348    txl->sampler = nir_deref_var_clone(tex->sampler, txl);
349    txl->coord_components = tex->coord_components;
350 
351    nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
352 
353    int src_num = 0;
354    for (int i = 0; i < tex->num_srcs; i++) {
355       if (tex->src[i].src_type == nir_tex_src_ddx ||
356           tex->src[i].src_type == nir_tex_src_ddy)
357          continue;
358       nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl);
359       txl->src[src_num].src_type = tex->src[i].src_type;
360       src_num++;
361    }
362 
363    txl->src[src_num].src = nir_src_for_ssa(lod);
364    txl->src[src_num].src_type = nir_tex_src_lod;
365    src_num++;
366 
367    assert(src_num == num_srcs);
368 
369    nir_ssa_dest_init(&txl->instr, &txl->dest,
370                      tex->dest.ssa.num_components, 32, NULL);
371    nir_builder_instr_insert(b, &txl->instr);
372 
373    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
374 
375    nir_instr_remove(&tex->instr);
376 }
377 
378 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)379 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
380 {
381    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
382    assert(tex->op == nir_texop_txd);
383    assert(tex->dest.is_ssa);
384 
385    /* Use textureSize() to get the width and height of LOD 0 */
386    nir_ssa_def *size = get_texture_size(b, tex);
387 
388    /* Cubemap texture lookups first generate a texture coordinate normalized
389     * to [-1, 1] on the appropiate face. The appropiate face is determined
390     * by which component has largest magnitude and its sign. The texture
391     * coordinate is the quotient of the remaining texture coordinates against
392     * that absolute value of the component of largest magnitude. This
393     * division requires that the computing of the derivative of the texel
394     * coordinate must use the quotient rule. The high level GLSL code is as
395     * follows:
396     *
397     * Step 1: selection
398     *
399     * vec3 abs_p, Q, dQdx, dQdy;
400     * abs_p = abs(ir->coordinate);
401     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
402     *    Q = ir->coordinate.yzx;
403     *    dQdx = ir->lod_info.grad.dPdx.yzx;
404     *    dQdy = ir->lod_info.grad.dPdy.yzx;
405     * }
406     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
407     *    Q = ir->coordinate.xzy;
408     *    dQdx = ir->lod_info.grad.dPdx.xzy;
409     *    dQdy = ir->lod_info.grad.dPdy.xzy;
410     * }
411     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
412     *    Q = ir->coordinate;
413     *    dQdx = ir->lod_info.grad.dPdx;
414     *    dQdy = ir->lod_info.grad.dPdy;
415     * }
416     *
417     * Step 2: use quotient rule to compute derivative. The normalized to
418     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
419     * only concerned with the magnitudes of the derivatives whose values are
420     * not affected by the sign. We drop the sign from the computation.
421     *
422     * vec2 dx, dy;
423     * float recip;
424     *
425     * recip = 1.0 / Q.z;
426     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
427     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
428     *
429     * Step 3: compute LOD. At this point we have the derivatives of the
430     * texture coordinates normalized to [-1,1]. We take the LOD to be
431     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
432     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
433     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
434     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
435     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
436     * where L is the dimension of the cubemap. The code is:
437     *
438     * float M, result;
439     * M = max(dot(dx, dx), dot(dy, dy));
440     * L = textureSize(sampler, 0).x;
441     * result = -1.0 + 0.5 * log2(L * L * M);
442     */
443 
444    /* coordinate */
445    nir_ssa_def *p =
446       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
447 
448    /* unmodified dPdx, dPdy values */
449    nir_ssa_def *dPdx =
450       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
451    nir_ssa_def *dPdy =
452       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
453 
454    nir_ssa_def *abs_p = nir_fabs(b, p);
455    nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
456    nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
457    nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
458 
459    /* 1. compute selector */
460    nir_ssa_def *Q, *dQdx, *dQdy;
461 
462    nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
463    nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
464 
465    unsigned yzx[4] = { 1, 2, 0, 0 };
466    unsigned xzy[4] = { 0, 2, 1, 0 };
467 
468    Q = nir_bcsel(b, cond_z,
469                  p,
470                  nir_bcsel(b, cond_y,
471                            nir_swizzle(b, p, xzy, 3, false),
472                            nir_swizzle(b, p, yzx, 3, false)));
473 
474    dQdx = nir_bcsel(b, cond_z,
475                     dPdx,
476                     nir_bcsel(b, cond_y,
477                               nir_swizzle(b, dPdx, xzy, 3, false),
478                               nir_swizzle(b, dPdx, yzx, 3, false)));
479 
480    dQdy = nir_bcsel(b, cond_z,
481                     dPdy,
482                     nir_bcsel(b, cond_y,
483                               nir_swizzle(b, dPdy, xzy, 3, false),
484                               nir_swizzle(b, dPdy, yzx, 3, false)));
485 
486    /* 2. quotient rule */
487 
488    /* tmp = Q.xy * recip;
489     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
490     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
491     */
492    nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
493 
494    unsigned xy[4] = { 0, 1, 0, 0 };
495    nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false);
496    nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
497 
498    nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false);
499    nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
500    nir_ssa_def *dx =
501       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
502 
503    nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false);
504    nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
505    nir_ssa_def *dy =
506       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
507 
508    /* M = max(dot(dx, dx), dot(dy, dy)); */
509    nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
510 
511    /* size has textureSize() of LOD 0 */
512    nir_ssa_def *L = nir_channel(b, size, 0);
513 
514    /* lod = -1.0 + 0.5 * log2(L * L * M); */
515    nir_ssa_def *lod =
516       nir_fadd(b,
517                nir_imm_float(b, -1.0f),
518                nir_fmul(b,
519                         nir_imm_float(b, 0.5f),
520                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
521 
522    /* 3. Replace the gradient instruction with an equivalent lod instruction */
523    replace_gradient_with_lod(b, lod, tex);
524 }
525 
526 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)527 lower_gradient(nir_builder *b, nir_tex_instr *tex)
528 {
529    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
530    assert(tex->op == nir_texop_txd);
531    assert(tex->dest.is_ssa);
532 
533    /* Use textureSize() to get the width and height of LOD 0 */
534    unsigned component_mask;
535    switch (tex->sampler_dim) {
536    case GLSL_SAMPLER_DIM_3D:
537       component_mask = 7;
538       break;
539    case GLSL_SAMPLER_DIM_1D:
540       component_mask = 1;
541       break;
542    default:
543       component_mask = 3;
544       break;
545    }
546 
547    nir_ssa_def *size =
548       nir_channels(b, get_texture_size(b, tex), component_mask);
549 
550    /* Scale the gradients by width and height.  Effectively, the incoming
551     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
552     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
553     */
554    nir_ssa_def *ddx =
555       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
556    nir_ssa_def *ddy =
557       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
558 
559    nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
560    nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
561 
562    nir_ssa_def *rho;
563    if (dPdx->num_components == 1) {
564       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
565    } else {
566       rho = nir_fmax(b,
567                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
568                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
569    }
570 
571    /* lod = log2(rho).  We're ignoring GL state biases for now. */
572    nir_ssa_def *lod = nir_flog2(b, rho);
573 
574    /* Replace the gradient instruction with an equivalent lod instruction */
575    replace_gradient_with_lod(b, lod, tex);
576 }
577 
578 static void
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)579 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
580 {
581    b->cursor = nir_before_instr(&tex->instr);
582 
583    /* Walk through the sources saturating the requested arguments. */
584    for (unsigned i = 0; i < tex->num_srcs; i++) {
585       if (tex->src[i].src_type != nir_tex_src_coord)
586          continue;
587 
588       nir_ssa_def *src =
589          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
590 
591       /* split src into components: */
592       nir_ssa_def *comp[4];
593 
594       assume(tex->coord_components >= 1);
595 
596       for (unsigned j = 0; j < tex->coord_components; j++)
597          comp[j] = nir_channel(b, src, j);
598 
599       /* clamp requested components, array index does not get clamped: */
600       unsigned ncomp = tex->coord_components;
601       if (tex->is_array)
602          ncomp--;
603 
604       for (unsigned j = 0; j < ncomp; j++) {
605          if ((1 << j) & sat_mask) {
606             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
607                /* non-normalized texture coords, so clamp to texture
608                 * size rather than [0.0, 1.0]
609                 */
610                nir_ssa_def *txs = get_texture_size(b, tex);
611                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
612                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
613             } else {
614                comp[j] = nir_fsat(b, comp[j]);
615             }
616          }
617       }
618 
619       /* and move the result back into a single vecN: */
620       src = nir_vec(b, comp, tex->coord_components);
621 
622       nir_instr_rewrite_src(&tex->instr,
623                             &tex->src[i].src,
624                             nir_src_for_ssa(src));
625    }
626 }
627 
628 static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)629 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
630 {
631    nir_const_value v;
632 
633    memset(&v, 0, sizeof(v));
634 
635    if (swizzle_val == 4) {
636       v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
637    } else {
638       assert(swizzle_val == 5);
639       if (type == nir_type_float)
640          v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
641       else
642          v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
643    }
644 
645    return nir_build_imm(b, 4, 32, v);
646 }
647 
648 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])649 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
650 {
651    assert(tex->dest.is_ssa);
652 
653    b->cursor = nir_after_instr(&tex->instr);
654 
655    nir_ssa_def *swizzled;
656    if (tex->op == nir_texop_tg4) {
657       if (swizzle[tex->component] < 4) {
658          /* This one's easy */
659          tex->component = swizzle[tex->component];
660          return;
661       } else {
662          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
663       }
664    } else {
665       assert(nir_tex_instr_dest_size(tex) == 4);
666       if (swizzle[0] < 4 && swizzle[1] < 4 &&
667           swizzle[2] < 4 && swizzle[3] < 4) {
668          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
669          /* We have no 0s or 1s, just emit a swizzling MOV */
670          swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
671       } else {
672          nir_ssa_def *srcs[4];
673          for (unsigned i = 0; i < 4; i++) {
674             if (swizzle[i] < 4) {
675                srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
676             } else {
677                srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
678             }
679          }
680          swizzled = nir_vec(b, srcs, 4);
681       }
682    }
683 
684    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
685                                   swizzled->parent_instr);
686 }
687 
688 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)689 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
690 {
691    assert(tex->dest.is_ssa);
692    assert(nir_tex_instr_dest_size(tex) == 4);
693    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
694 
695    b->cursor = nir_after_instr(&tex->instr);
696 
697    static const unsigned swiz[4] = {0, 1, 2, 0};
698    nir_ssa_def *comp = nir_swizzle(b, &tex->dest.ssa, swiz, 3, true);
699 
700    /* Formula is:
701     *    (comp <= 0.04045) ?
702     *          (comp / 12.92) :
703     *          pow((comp + 0.055) / 1.055, 2.4)
704     */
705    nir_ssa_def *low  = nir_fmul(b, comp, nir_imm_float(b, 1.0 / 12.92));
706    nir_ssa_def *high = nir_fpow(b,
707                                 nir_fmul(b,
708                                          nir_fadd(b,
709                                                   comp,
710                                                   nir_imm_float(b, 0.055)),
711                                          nir_imm_float(b, 1.0 / 1.055)),
712                                 nir_imm_float(b, 2.4));
713    nir_ssa_def *cond = nir_fge(b, nir_imm_float(b, 0.04045), comp);
714    nir_ssa_def *rgb  = nir_bcsel(b, cond, low, high);
715 
716    /* alpha is untouched: */
717    nir_ssa_def *result = nir_vec4(b,
718                                   nir_channel(b, rgb, 0),
719                                   nir_channel(b, rgb, 1),
720                                   nir_channel(b, rgb, 2),
721                                   nir_channel(b, &tex->dest.ssa, 3));
722 
723    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
724                                   result->parent_instr);
725 }
726 
727 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options)728 nir_lower_tex_block(nir_block *block, nir_builder *b,
729                     const nir_lower_tex_options *options)
730 {
731    bool progress = false;
732 
733    nir_foreach_instr_safe(instr, block) {
734       if (instr->type != nir_instr_type_tex)
735          continue;
736 
737       nir_tex_instr *tex = nir_instr_as_tex(instr);
738       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
739 
740       /* mask of src coords to saturate (clamp): */
741       unsigned sat_mask = 0;
742 
743       if ((1 << tex->sampler_index) & options->saturate_r)
744          sat_mask |= (1 << 2);    /* .z */
745       if ((1 << tex->sampler_index) & options->saturate_t)
746          sat_mask |= (1 << 1);    /* .y */
747       if ((1 << tex->sampler_index) & options->saturate_s)
748          sat_mask |= (1 << 0);    /* .x */
749 
750       /* If we are clamping any coords, we must lower projector first
751        * as clamping happens *after* projection:
752        */
753       if (lower_txp || sat_mask) {
754          project_src(b, tex);
755          progress = true;
756       }
757 
758       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
759           (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
760           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
761            options->lower_rect_offset)) {
762          progress = lower_offset(b, tex) || progress;
763       }
764 
765       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
766          lower_rect(b, tex);
767          progress = true;
768       }
769 
770       if ((1 << tex->texture_index) & options->lower_y_uv_external) {
771          lower_y_uv_external(b, tex);
772          progress = true;
773       }
774 
775       if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
776          lower_y_u_v_external(b, tex);
777          progress = true;
778       }
779 
780       if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
781          lower_yx_xuxv_external(b, tex);
782          progress = true;
783       }
784 
785       if ((1 << tex->texture_index) & options->lower_xy_uxvx_external) {
786          lower_xy_uxvx_external(b, tex);
787          progress = true;
788       }
789 
790       if (sat_mask) {
791          saturate_src(b, tex, sat_mask);
792          progress = true;
793       }
794 
795       if (((1 << tex->texture_index) & options->swizzle_result) &&
796           !nir_tex_instr_is_query(tex) &&
797           !(tex->is_shadow && tex->is_new_style_shadow)) {
798          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
799          progress = true;
800       }
801 
802       /* should be after swizzle so we know which channels are rgb: */
803       if (((1 << tex->texture_index) & options->lower_srgb) &&
804           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
805          linearize_srgb_result(b, tex);
806          progress = true;
807       }
808 
809       if (tex->op == nir_texop_txd &&
810           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
811           (options->lower_txd ||
812            options->lower_txd_cube_map ||
813            (tex->is_shadow && options->lower_txd_shadow))) {
814          lower_gradient_cube_map(b, tex);
815          progress = true;
816          continue;
817       }
818 
819       if (tex->op == nir_texop_txd &&
820           (options->lower_txd ||
821            (options->lower_txd_shadow &&
822             tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE))) {
823          lower_gradient(b, tex);
824          progress = true;
825          continue;
826       }
827 
828       /* TXF, TXS and TXL require a LOD but not everything we implement using those
829        * three opcodes provides one.  Provide a default LOD of 0.
830        */
831       if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
832           (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
833            tex->op == nir_texop_txl || tex->op == nir_texop_query_levels ||
834            (tex->op == nir_texop_tex &&
835             b->shader->info.stage != MESA_SHADER_FRAGMENT))) {
836          b->cursor = nir_before_instr(&tex->instr);
837          nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
838          progress = true;
839          continue;
840       }
841    }
842 
843    return progress;
844 }
845 
846 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options)847 nir_lower_tex_impl(nir_function_impl *impl,
848                    const nir_lower_tex_options *options)
849 {
850    bool progress = false;
851    nir_builder builder;
852    nir_builder_init(&builder, impl);
853 
854    nir_foreach_block(block, impl) {
855       progress |= nir_lower_tex_block(block, &builder, options);
856    }
857 
858    nir_metadata_preserve(impl, nir_metadata_block_index |
859                                nir_metadata_dominance);
860    return progress;
861 }
862 
863 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)864 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
865 {
866    bool progress = false;
867 
868    nir_foreach_function(function, shader) {
869       if (function->impl)
870          progress |= nir_lower_tex_impl(function->impl, options);
871    }
872 
873    return progress;
874 }
875