1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 * + texture projector lowering: converts the coordinate division for
28 * texture projection to be done in ALU instructions instead of
29 * asking the texture operation to do so.
30 * + lowering RECT: converts the un-normalized RECT texture coordinates
31 * to normalized coordinates with txs plus ALU instructions
32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 * Note that this automatically triggers texture projector lowering if
35 * needed, since clamping must happen after projector lowering.
36 */
37
38 #include "nir.h"
39 #include "nir_builder.h"
40 #include "nir_builtin_builder.h"
41 #include "nir_format_convert.h"
42
43 typedef struct nir_const_value_3_4 {
44 nir_const_value v[3][4];
45 } nir_const_value_3_4;
46
47 static const nir_const_value_3_4 bt601_csc_coeffs = { {
48 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
49 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
50 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
51 } };
52 static const nir_const_value_3_4 bt709_csc_coeffs = { {
53 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
54 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
55 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } },
56 } };
57 static const nir_const_value_3_4 bt2020_csc_coeffs = { {
58 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
59 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
60 { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f } },
61 } };
62
63 static const float bt601_csc_offsets[3] = {
64 -0.874202218f, 0.531667823f, -1.085630789f
65 };
66 static const float bt709_csc_offsets[3] = {
67 -0.972945075f, 0.301482665f, -1.133402218f
68 };
69 static const float bt2020_csc_offsets[3] = {
70 -0.915687932f, 0.347458499f, -1.148145075f
71 };
72
73 static bool
project_src(nir_builder * b,nir_tex_instr * tex)74 project_src(nir_builder *b, nir_tex_instr *tex)
75 {
76 /* Find the projector in the srcs list, if present. */
77 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
78 if (proj_index < 0)
79 return false;
80
81 b->cursor = nir_before_instr(&tex->instr);
82
83 nir_ssa_def *inv_proj =
84 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
85
86 /* Walk through the sources projecting the arguments. */
87 for (unsigned i = 0; i < tex->num_srcs; i++) {
88 switch (tex->src[i].src_type) {
89 case nir_tex_src_coord:
90 case nir_tex_src_comparator:
91 break;
92 default:
93 continue;
94 }
95 nir_ssa_def *unprojected =
96 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
97 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
98
99 /* Array indices don't get projected, so make an new vector with the
100 * coordinate's array index untouched.
101 */
102 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
103 switch (tex->coord_components) {
104 case 4:
105 projected = nir_vec4(b,
106 nir_channel(b, projected, 0),
107 nir_channel(b, projected, 1),
108 nir_channel(b, projected, 2),
109 nir_channel(b, unprojected, 3));
110 break;
111 case 3:
112 projected = nir_vec3(b,
113 nir_channel(b, projected, 0),
114 nir_channel(b, projected, 1),
115 nir_channel(b, unprojected, 2));
116 break;
117 case 2:
118 projected = nir_vec2(b,
119 nir_channel(b, projected, 0),
120 nir_channel(b, unprojected, 1));
121 break;
122 default:
123 unreachable("bad texture coord count for array");
124 break;
125 }
126 }
127
128 nir_instr_rewrite_src(&tex->instr,
129 &tex->src[i].src,
130 nir_src_for_ssa(projected));
131 }
132
133 nir_tex_instr_remove_src(tex, proj_index);
134 return true;
135 }
136
137 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)138 lower_offset(nir_builder *b, nir_tex_instr *tex)
139 {
140 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
141 if (offset_index < 0)
142 return false;
143
144 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
145 assert(coord_index >= 0);
146
147 assert(tex->src[offset_index].src.is_ssa);
148 assert(tex->src[coord_index].src.is_ssa);
149 nir_ssa_def *offset = tex->src[offset_index].src.ssa;
150 nir_ssa_def *coord = tex->src[coord_index].src.ssa;
151
152 b->cursor = nir_before_instr(&tex->instr);
153
154 nir_ssa_def *offset_coord;
155 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
156 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
157 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
158 } else {
159 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
160 nir_ssa_def *scale = nir_frcp(b, txs);
161
162 offset_coord = nir_fadd(b, coord,
163 nir_fmul(b,
164 nir_i2f32(b, offset),
165 scale));
166 }
167 } else {
168 offset_coord = nir_iadd(b, coord, offset);
169 }
170
171 if (tex->is_array) {
172 /* The offset is not applied to the array index */
173 if (tex->coord_components == 2) {
174 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
175 nir_channel(b, coord, 1));
176 } else if (tex->coord_components == 3) {
177 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
178 nir_channel(b, offset_coord, 1),
179 nir_channel(b, coord, 2));
180 } else {
181 unreachable("Invalid number of components");
182 }
183 }
184
185 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
186 nir_src_for_ssa(offset_coord));
187
188 nir_tex_instr_remove_src(tex, offset_index);
189
190 return true;
191 }
192
193 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)194 lower_rect(nir_builder *b, nir_tex_instr *tex)
195 {
196 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
197 * right dimensionality.
198 */
199 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
200
201 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
202 nir_ssa_def *scale = nir_frcp(b, txs);
203 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
204
205 if (coord_index != -1) {
206 nir_ssa_def *coords =
207 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
208 nir_instr_rewrite_src(&tex->instr,
209 &tex->src[coord_index].src,
210 nir_src_for_ssa(nir_fmul(b, coords, scale)));
211 }
212 }
213
214 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)215 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
216 {
217 b->cursor = nir_before_instr(&tex->instr);
218
219 nir_ssa_def *idx = nir_imm_int(b, tex->texture_index);
220 nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx);
221 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
222
223 if (coord_index != -1) {
224 nir_ssa_def *coords =
225 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
226 nir_instr_rewrite_src(&tex->instr,
227 &tex->src[coord_index].src,
228 nir_src_for_ssa(nir_fmul(b, coords, scale)));
229 }
230 }
231
232 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * lod)233 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod)
234 {
235 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
236 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
237 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
238 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
239
240 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
241 if (bias_idx >= 0) {
242 /* If we have a bias, add it in */
243 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
244 nir_tex_instr_remove_src(tex, bias_idx);
245 }
246
247 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
248 if (min_lod_idx >= 0) {
249 /* If we have a minimum LOD, clamp LOD accordingly */
250 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
251 nir_tex_instr_remove_src(tex, min_lod_idx);
252 }
253
254 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
255 tex->op = nir_texop_txl;
256 }
257
258 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)259 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
260 {
261 b->cursor = nir_before_instr(&tex->instr);
262 lower_lod(b, tex, nir_get_texture_lod(b, tex));
263 }
264
265 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)266 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
267 {
268 b->cursor = nir_before_instr(&tex->instr);
269
270 if (tex->op == nir_texop_lod) {
271 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0));
272 nir_instr_remove(&tex->instr);
273 return;
274 }
275
276 lower_lod(b, tex, nir_imm_int(b, 0));
277 }
278
279 static nir_ssa_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)280 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
281 const nir_lower_tex_options *options)
282 {
283 assert(tex->dest.is_ssa);
284 assert(nir_tex_instr_dest_size(tex) == 4);
285 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
286 assert(tex->op == nir_texop_tex);
287 assert(tex->coord_components == 2);
288
289 nir_tex_instr *plane_tex =
290 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
291 for (unsigned i = 0; i < tex->num_srcs; i++) {
292 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src);
293 plane_tex->src[i].src_type = tex->src[i].src_type;
294 }
295 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
296 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
297 plane_tex->op = nir_texop_tex;
298 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
299 plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest);
300 plane_tex->coord_components = 2;
301
302 plane_tex->texture_index = tex->texture_index;
303 plane_tex->sampler_index = tex->sampler_index;
304
305 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
306 nir_dest_bit_size(tex->dest), NULL);
307
308 nir_builder_instr_insert(b, &plane_tex->instr);
309
310 /* If scaling_factor is set, return a scaled value. */
311 if (options->scale_factors[tex->texture_index])
312 return nir_fmul_imm(b, &plane_tex->dest.ssa,
313 options->scale_factors[tex->texture_index]);
314
315 return &plane_tex->dest.ssa;
316 }
317
318 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options,unsigned texture_index)319 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
320 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
321 nir_ssa_def *a,
322 const nir_lower_tex_options *options,
323 unsigned texture_index)
324 {
325
326 const float *offset_vals;
327 const nir_const_value_3_4 *m;
328 assert((options->bt709_external & options->bt2020_external) == 0);
329 if (options->bt709_external & (1u << texture_index)) {
330 m = &bt709_csc_coeffs;
331 offset_vals = bt709_csc_offsets;
332 } else if (options->bt2020_external & (1u << texture_index)) {
333 m = &bt2020_csc_coeffs;
334 offset_vals = bt2020_csc_offsets;
335 } else {
336 m = &bt601_csc_coeffs;
337 offset_vals = bt601_csc_offsets;
338 }
339
340 unsigned bit_size = nir_dest_bit_size(tex->dest);
341
342 nir_ssa_def *offset =
343 nir_vec4(b,
344 nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
345 nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
346 nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
347 a);
348
349 offset = nir_f2fN(b, offset, bit_size);
350
351 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
352 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
353 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
354
355 nir_ssa_def *result =
356 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
357
358 nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
359 }
360
361 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)362 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
363 const nir_lower_tex_options *options,
364 unsigned texture_index)
365 {
366 b->cursor = nir_after_instr(&tex->instr);
367
368 nir_ssa_def *y = sample_plane(b, tex, 0, options);
369 nir_ssa_def *uv = sample_plane(b, tex, 1, options);
370
371 convert_yuv_to_rgb(b, tex,
372 nir_channel(b, y, 0),
373 nir_channel(b, uv, 0),
374 nir_channel(b, uv, 1),
375 nir_imm_float(b, 1.0f),
376 options,
377 texture_index);
378 }
379
380 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)381 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
382 const nir_lower_tex_options *options,
383 unsigned texture_index)
384 {
385 b->cursor = nir_after_instr(&tex->instr);
386
387 nir_ssa_def *y = sample_plane(b, tex, 0, options);
388 nir_ssa_def *u = sample_plane(b, tex, 1, options);
389 nir_ssa_def *v = sample_plane(b, tex, 2, options);
390
391 convert_yuv_to_rgb(b, tex,
392 nir_channel(b, y, 0),
393 nir_channel(b, u, 0),
394 nir_channel(b, v, 0),
395 nir_imm_float(b, 1.0f),
396 options,
397 texture_index);
398 }
399
400 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)401 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
402 const nir_lower_tex_options *options,
403 unsigned texture_index)
404 {
405 b->cursor = nir_after_instr(&tex->instr);
406
407 nir_ssa_def *y = sample_plane(b, tex, 0, options);
408 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
409
410 convert_yuv_to_rgb(b, tex,
411 nir_channel(b, y, 0),
412 nir_channel(b, xuxv, 1),
413 nir_channel(b, xuxv, 3),
414 nir_imm_float(b, 1.0f),
415 options,
416 texture_index);
417 }
418
419 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)420 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
421 const nir_lower_tex_options *options,
422 unsigned texture_index)
423 {
424 b->cursor = nir_after_instr(&tex->instr);
425
426 nir_ssa_def *y = sample_plane(b, tex, 0, options);
427 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
428
429 convert_yuv_to_rgb(b, tex,
430 nir_channel(b, y, 1),
431 nir_channel(b, uxvx, 0),
432 nir_channel(b, uxvx, 2),
433 nir_imm_float(b, 1.0f),
434 options,
435 texture_index);
436 }
437
438 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)439 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
440 const nir_lower_tex_options *options,
441 unsigned texture_index)
442 {
443 b->cursor = nir_after_instr(&tex->instr);
444
445 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
446
447 convert_yuv_to_rgb(b, tex,
448 nir_channel(b, ayuv, 2),
449 nir_channel(b, ayuv, 1),
450 nir_channel(b, ayuv, 0),
451 nir_channel(b, ayuv, 3),
452 options,
453 texture_index);
454 }
455
456 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)457 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
458 const nir_lower_tex_options *options,
459 unsigned texture_index)
460 {
461 b->cursor = nir_after_instr(&tex->instr);
462
463 nir_ssa_def *y41x = sample_plane(b, tex, 0, options);
464
465 convert_yuv_to_rgb(b, tex,
466 nir_channel(b, y41x, 1),
467 nir_channel(b, y41x, 0),
468 nir_channel(b, y41x, 2),
469 nir_channel(b, y41x, 3),
470 options,
471 texture_index);
472 }
473
474 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)475 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
476 const nir_lower_tex_options *options,
477 unsigned texture_index)
478 {
479 b->cursor = nir_after_instr(&tex->instr);
480
481 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
482
483 convert_yuv_to_rgb(b, tex,
484 nir_channel(b, xyuv, 2),
485 nir_channel(b, xyuv, 1),
486 nir_channel(b, xyuv, 0),
487 nir_imm_float(b, 1.0f),
488 options,
489 texture_index);
490 }
491
492 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)493 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
494 const nir_lower_tex_options *options,
495 unsigned texture_index)
496 {
497 b->cursor = nir_after_instr(&tex->instr);
498
499 nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
500
501 convert_yuv_to_rgb(b, tex,
502 nir_channel(b, yuv, 0),
503 nir_channel(b, yuv, 1),
504 nir_channel(b, yuv, 2),
505 nir_imm_float(b, 1.0f),
506 options,
507 texture_index);
508 }
509
510 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)511 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
512 const nir_lower_tex_options *options,
513 unsigned texture_index)
514 {
515 b->cursor = nir_after_instr(&tex->instr);
516
517 nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
518
519 convert_yuv_to_rgb(b, tex,
520 nir_channel(b, yuv, 1),
521 nir_channel(b, yuv, 2),
522 nir_channel(b, yuv, 0),
523 nir_imm_float(b, 1.0f),
524 options,
525 texture_index);
526 }
527
528 /*
529 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
530 * computed from the gradients.
531 */
532 static void
replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)533 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
534 {
535 assert(tex->op == nir_texop_txd);
536
537 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
538 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
539
540 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
541 if (min_lod_idx >= 0) {
542 /* If we have a minimum LOD, clamp LOD accordingly */
543 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
544 nir_tex_instr_remove_src(tex, min_lod_idx);
545 }
546
547 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
548 tex->op = nir_texop_txl;
549 }
550
551 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)552 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
553 {
554 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
555 assert(tex->op == nir_texop_txd);
556 assert(tex->dest.is_ssa);
557
558 /* Use textureSize() to get the width and height of LOD 0 */
559 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
560
561 /* Cubemap texture lookups first generate a texture coordinate normalized
562 * to [-1, 1] on the appropiate face. The appropiate face is determined
563 * by which component has largest magnitude and its sign. The texture
564 * coordinate is the quotient of the remaining texture coordinates against
565 * that absolute value of the component of largest magnitude. This
566 * division requires that the computing of the derivative of the texel
567 * coordinate must use the quotient rule. The high level GLSL code is as
568 * follows:
569 *
570 * Step 1: selection
571 *
572 * vec3 abs_p, Q, dQdx, dQdy;
573 * abs_p = abs(ir->coordinate);
574 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
575 * Q = ir->coordinate.yzx;
576 * dQdx = ir->lod_info.grad.dPdx.yzx;
577 * dQdy = ir->lod_info.grad.dPdy.yzx;
578 * }
579 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
580 * Q = ir->coordinate.xzy;
581 * dQdx = ir->lod_info.grad.dPdx.xzy;
582 * dQdy = ir->lod_info.grad.dPdy.xzy;
583 * }
584 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
585 * Q = ir->coordinate;
586 * dQdx = ir->lod_info.grad.dPdx;
587 * dQdy = ir->lod_info.grad.dPdy;
588 * }
589 *
590 * Step 2: use quotient rule to compute derivative. The normalized to
591 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
592 * only concerned with the magnitudes of the derivatives whose values are
593 * not affected by the sign. We drop the sign from the computation.
594 *
595 * vec2 dx, dy;
596 * float recip;
597 *
598 * recip = 1.0 / Q.z;
599 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
600 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
601 *
602 * Step 3: compute LOD. At this point we have the derivatives of the
603 * texture coordinates normalized to [-1,1]. We take the LOD to be
604 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
605 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
606 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
607 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
608 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
609 * where L is the dimension of the cubemap. The code is:
610 *
611 * float M, result;
612 * M = max(dot(dx, dx), dot(dy, dy));
613 * L = textureSize(sampler, 0).x;
614 * result = -1.0 + 0.5 * log2(L * L * M);
615 */
616
617 /* coordinate */
618 nir_ssa_def *p =
619 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
620
621 /* unmodified dPdx, dPdy values */
622 nir_ssa_def *dPdx =
623 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
624 nir_ssa_def *dPdy =
625 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
626
627 nir_ssa_def *abs_p = nir_fabs(b, p);
628 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
629 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
630 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
631
632 /* 1. compute selector */
633 nir_ssa_def *Q, *dQdx, *dQdy;
634
635 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
636 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
637
638 unsigned yzx[3] = { 1, 2, 0 };
639 unsigned xzy[3] = { 0, 2, 1 };
640
641 Q = nir_bcsel(b, cond_z,
642 p,
643 nir_bcsel(b, cond_y,
644 nir_swizzle(b, p, xzy, 3),
645 nir_swizzle(b, p, yzx, 3)));
646
647 dQdx = nir_bcsel(b, cond_z,
648 dPdx,
649 nir_bcsel(b, cond_y,
650 nir_swizzle(b, dPdx, xzy, 3),
651 nir_swizzle(b, dPdx, yzx, 3)));
652
653 dQdy = nir_bcsel(b, cond_z,
654 dPdy,
655 nir_bcsel(b, cond_y,
656 nir_swizzle(b, dPdy, xzy, 3),
657 nir_swizzle(b, dPdy, yzx, 3)));
658
659 /* 2. quotient rule */
660
661 /* tmp = Q.xy * recip;
662 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
663 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
664 */
665 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
666
667 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
668 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
669
670 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
671 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
672 nir_ssa_def *dx =
673 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
674
675 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
676 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
677 nir_ssa_def *dy =
678 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
679
680 /* M = max(dot(dx, dx), dot(dy, dy)); */
681 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
682
683 /* size has textureSize() of LOD 0 */
684 nir_ssa_def *L = nir_channel(b, size, 0);
685
686 /* lod = -1.0 + 0.5 * log2(L * L * M); */
687 nir_ssa_def *lod =
688 nir_fadd(b,
689 nir_imm_float(b, -1.0f),
690 nir_fmul(b,
691 nir_imm_float(b, 0.5f),
692 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
693
694 /* 3. Replace the gradient instruction with an equivalent lod instruction */
695 replace_gradient_with_lod(b, lod, tex);
696 }
697
698 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)699 lower_gradient(nir_builder *b, nir_tex_instr *tex)
700 {
701 /* Cubes are more complicated and have their own function */
702 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
703 lower_gradient_cube_map(b, tex);
704 return;
705 }
706
707 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
708 assert(tex->op == nir_texop_txd);
709 assert(tex->dest.is_ssa);
710
711 /* Use textureSize() to get the width and height of LOD 0 */
712 unsigned component_mask;
713 switch (tex->sampler_dim) {
714 case GLSL_SAMPLER_DIM_3D:
715 component_mask = 7;
716 break;
717 case GLSL_SAMPLER_DIM_1D:
718 component_mask = 1;
719 break;
720 default:
721 component_mask = 3;
722 break;
723 }
724
725 nir_ssa_def *size =
726 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
727 component_mask);
728
729 /* Scale the gradients by width and height. Effectively, the incoming
730 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
731 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
732 */
733 nir_ssa_def *ddx =
734 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
735 nir_ssa_def *ddy =
736 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
737
738 nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
739 nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
740
741 nir_ssa_def *rho;
742 if (dPdx->num_components == 1) {
743 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
744 } else {
745 rho = nir_fmax(b,
746 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
747 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
748 }
749
750 /* lod = log2(rho). We're ignoring GL state biases for now. */
751 nir_ssa_def *lod = nir_flog2(b, rho);
752
753 /* Replace the gradient instruction with an equivalent lod instruction */
754 replace_gradient_with_lod(b, lod, tex);
755 }
756
757 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
758 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)759 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
760 {
761 b->cursor = nir_after_instr(&tex->instr);
762 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
763
764 txd->op = nir_texop_txd;
765 txd->sampler_dim = tex->sampler_dim;
766 txd->dest_type = tex->dest_type;
767 txd->coord_components = tex->coord_components;
768 txd->texture_index = tex->texture_index;
769 txd->sampler_index = tex->sampler_index;
770
771 /* reuse existing srcs */
772 for (unsigned i = 0; i < tex->num_srcs; i++) {
773 nir_src_copy(&txd->src[i].src, &tex->src[i].src);
774 txd->src[i].src_type = tex->src[i].src_type;
775 }
776 int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord);
777 assert(coord >= 0);
778 nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa);
779 nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa);
780 txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx);
781 txd->src[tex->num_srcs].src_type = nir_tex_src_ddx;
782 txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy);
783 txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy;
784
785 nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest),
786 nir_dest_bit_size(tex->dest), NULL);
787 nir_builder_instr_insert(b, &txd->instr);
788 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa);
789 nir_instr_remove(&tex->instr);
790 return txd;
791 }
792
793 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
794 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)795 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
796 {
797 b->cursor = nir_after_instr(&tex->instr);
798 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
799
800 txl->op = nir_texop_txl;
801 txl->sampler_dim = tex->sampler_dim;
802 txl->dest_type = tex->dest_type;
803 txl->coord_components = tex->coord_components;
804 txl->texture_index = tex->texture_index;
805 txl->sampler_index = tex->sampler_index;
806
807 /* reuse all but bias src */
808 for (int i = 0; i < 2; i++) {
809 if (tex->src[i].src_type != nir_tex_src_bias) {
810 nir_src_copy(&txl->src[i].src, &tex->src[i].src);
811 txl->src[i].src_type = tex->src[i].src_type;
812 }
813 }
814 nir_ssa_def *lod = nir_get_texture_lod(b, txl);
815
816 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
817 assert(bias_idx >= 0);
818 lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
819 txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod);
820 txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod;
821
822 nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest),
823 nir_dest_bit_size(tex->dest), NULL);
824 nir_builder_instr_insert(b, &txl->instr);
825 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa);
826 nir_instr_remove(&tex->instr);
827 return txl;
828 }
829
830 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)831 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
832 {
833 if (tex->op == nir_texop_tex)
834 tex = lower_tex_to_txd(b, tex);
835 else if (tex->op == nir_texop_txb)
836 tex = lower_txb_to_txl(b, tex);
837
838 b->cursor = nir_before_instr(&tex->instr);
839 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
840
841 if (coord_index != -1) {
842 nir_ssa_def *src =
843 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
844
845 /* split src into components: */
846 nir_ssa_def *comp[4];
847
848 assume(tex->coord_components >= 1);
849
850 for (unsigned j = 0; j < tex->coord_components; j++)
851 comp[j] = nir_channel(b, src, j);
852
853 /* clamp requested components, array index does not get clamped: */
854 unsigned ncomp = tex->coord_components;
855 if (tex->is_array)
856 ncomp--;
857
858 for (unsigned j = 0; j < ncomp; j++) {
859 if ((1 << j) & sat_mask) {
860 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
861 /* non-normalized texture coords, so clamp to texture
862 * size rather than [0.0, 1.0]
863 */
864 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
865 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
866 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
867 } else {
868 comp[j] = nir_fsat(b, comp[j]);
869 }
870 }
871 }
872
873 /* and move the result back into a single vecN: */
874 src = nir_vec(b, comp, tex->coord_components);
875
876 nir_instr_rewrite_src(&tex->instr,
877 &tex->src[coord_index].src,
878 nir_src_for_ssa(src));
879 }
880 return tex;
881 }
882
883 static nir_ssa_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)884 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
885 {
886 nir_const_value v[4];
887
888 memset(&v, 0, sizeof(v));
889
890 if (swizzle_val == 4) {
891 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
892 } else {
893 assert(swizzle_val == 5);
894 if (type == nir_type_float32)
895 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
896 else
897 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
898 }
899
900 return nir_build_imm(b, 4, 32, v);
901 }
902
903 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)904 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
905 {
906 assert(tex->dest.is_ssa);
907
908 b->cursor = nir_after_instr(&tex->instr);
909
910 assert(nir_tex_instr_dest_size(tex) == 4);
911 unsigned swiz[4] = { 2, 3, 1, 0 };
912 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
913
914 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
915 swizzled->parent_instr);
916 }
917
918 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])919 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
920 {
921 assert(tex->dest.is_ssa);
922
923 b->cursor = nir_after_instr(&tex->instr);
924
925 nir_ssa_def *swizzled;
926 if (tex->op == nir_texop_tg4) {
927 if (swizzle[tex->component] < 4) {
928 /* This one's easy */
929 tex->component = swizzle[tex->component];
930 return;
931 } else {
932 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
933 }
934 } else {
935 assert(nir_tex_instr_dest_size(tex) == 4);
936 if (swizzle[0] < 4 && swizzle[1] < 4 &&
937 swizzle[2] < 4 && swizzle[3] < 4) {
938 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
939 /* We have no 0s or 1s, just emit a swizzling MOV */
940 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
941 } else {
942 nir_ssa_def *srcs[4];
943 for (unsigned i = 0; i < 4; i++) {
944 if (swizzle[i] < 4) {
945 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
946 } else {
947 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
948 }
949 }
950 swizzled = nir_vec(b, srcs, 4);
951 }
952 }
953
954 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
955 swizzled->parent_instr);
956 }
957
958 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)959 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
960 {
961 assert(tex->dest.is_ssa);
962 assert(nir_tex_instr_dest_size(tex) == 4);
963 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
964
965 b->cursor = nir_after_instr(&tex->instr);
966
967 nir_ssa_def *rgb =
968 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
969
970 /* alpha is untouched: */
971 nir_ssa_def *result = nir_vec4(b,
972 nir_channel(b, rgb, 0),
973 nir_channel(b, rgb, 1),
974 nir_channel(b, rgb, 2),
975 nir_channel(b, &tex->dest.ssa, 3));
976
977 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
978 result->parent_instr);
979 }
980
981 /**
982 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
983 * i16, or u16, or a single unorm4x8 value.
984 *
985 * Note that we don't change the destination num_components, because
986 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
987 * to not store the other channels, given that nothing at the NIR level will
988 * read them.
989 */
990 static void
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)991 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
992 const nir_lower_tex_options *options)
993 {
994 nir_ssa_def *color = &tex->dest.ssa;
995
996 b->cursor = nir_after_instr(&tex->instr);
997
998 switch (options->lower_tex_packing[tex->sampler_index]) {
999 case nir_lower_tex_packing_none:
1000 return;
1001
1002 case nir_lower_tex_packing_16: {
1003 static const unsigned bits[4] = {16, 16, 16, 16};
1004
1005 switch (nir_alu_type_get_base_type(tex->dest_type)) {
1006 case nir_type_float:
1007 switch (nir_tex_instr_dest_size(tex)) {
1008 case 1:
1009 assert(tex->is_shadow && tex->is_new_style_shadow);
1010 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1011 break;
1012 case 2: {
1013 nir_ssa_def *rg = nir_channel(b, color, 0);
1014 color = nir_vec2(b,
1015 nir_unpack_half_2x16_split_x(b, rg),
1016 nir_unpack_half_2x16_split_y(b, rg));
1017 break;
1018 }
1019 case 4: {
1020 nir_ssa_def *rg = nir_channel(b, color, 0);
1021 nir_ssa_def *ba = nir_channel(b, color, 1);
1022 color = nir_vec4(b,
1023 nir_unpack_half_2x16_split_x(b, rg),
1024 nir_unpack_half_2x16_split_y(b, rg),
1025 nir_unpack_half_2x16_split_x(b, ba),
1026 nir_unpack_half_2x16_split_y(b, ba));
1027 break;
1028 }
1029 default:
1030 unreachable("wrong dest_size");
1031 }
1032 break;
1033
1034 case nir_type_int:
1035 color = nir_format_unpack_sint(b, color, bits, 4);
1036 break;
1037
1038 case nir_type_uint:
1039 color = nir_format_unpack_uint(b, color, bits, 4);
1040 break;
1041
1042 default:
1043 unreachable("unknown base type");
1044 }
1045 break;
1046 }
1047
1048 case nir_lower_tex_packing_8:
1049 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1050 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1051 break;
1052 }
1053
1054 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color,
1055 color->parent_instr);
1056 }
1057
1058 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1059 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1060 {
1061 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1062
1063 unsigned sampler_index = tex->sampler_index;
1064
1065 int sampler_offset_idx =
1066 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1067 if (sampler_offset_idx >= 0) {
1068 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1069 return false;
1070
1071 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1072 }
1073
1074 return sampler_index < max;
1075 }
1076
1077 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1078 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1079 {
1080 assert(tex->op == nir_texop_tg4);
1081 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1082 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1083
1084 b->cursor = nir_after_instr(&tex->instr);
1085
1086 nir_ssa_def *dest[5] = {NULL};
1087 for (unsigned i = 0; i < 4; ++i) {
1088 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1089 tex_copy->op = tex->op;
1090 tex_copy->coord_components = tex->coord_components;
1091 tex_copy->sampler_dim = tex->sampler_dim;
1092 tex_copy->is_array = tex->is_array;
1093 tex_copy->is_shadow = tex->is_shadow;
1094 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1095 tex_copy->is_sparse = tex->is_sparse;
1096 tex_copy->component = tex->component;
1097 tex_copy->dest_type = tex->dest_type;
1098
1099 for (unsigned j = 0; j < tex->num_srcs; ++j) {
1100 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src);
1101 tex_copy->src[j].src_type = tex->src[j].src_type;
1102 }
1103
1104 nir_tex_src src;
1105 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1106 tex->tg4_offsets[i][1]));
1107 src.src_type = nir_tex_src_offset;
1108 tex_copy->src[tex_copy->num_srcs - 1] = src;
1109
1110 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
1111 nir_tex_instr_dest_size(tex), 32, NULL);
1112
1113 nir_builder_instr_insert(b, &tex_copy->instr);
1114
1115 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
1116 if (tex->is_sparse) {
1117 nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
1118 dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code;
1119 }
1120 }
1121
1122 nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components);
1123 nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
1124 nir_instr_remove(&tex->instr);
1125
1126 return true;
1127 }
1128
1129 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1130 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1131 {
1132 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1133 if (lod_idx < 0 ||
1134 (nir_src_is_const(tex->src[lod_idx].src) &&
1135 nir_src_as_int(tex->src[lod_idx].src) == 0))
1136 return false;
1137
1138 unsigned dest_size = nir_tex_instr_dest_size(tex);
1139
1140 b->cursor = nir_before_instr(&tex->instr);
1141 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
1142
1143 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1144 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
1145 nir_src_for_ssa(nir_imm_int(b, 0)));
1146
1147 /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1148 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1149 * which should return 0, not 1.
1150 */
1151 b->cursor = nir_after_instr(&tex->instr);
1152 nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa,
1153 nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
1154 nir_imm_int(b, 1)));
1155
1156 /* Make sure the component encoding the array size (if any) is not
1157 * minified.
1158 */
1159 if (tex->is_array) {
1160 nir_ssa_def *comp[3];
1161
1162 assert(dest_size <= ARRAY_SIZE(comp));
1163 for (unsigned i = 0; i < dest_size - 1; i++)
1164 comp[i] = nir_channel(b, minified, i);
1165
1166 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1167 minified = nir_vec(b, comp, dest_size);
1168 }
1169
1170 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified,
1171 minified->parent_instr);
1172 return true;
1173 }
1174
1175 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1176 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1177 {
1178 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1179 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1180
1181 b->cursor = nir_after_instr(&tex->instr);
1182
1183 assert(tex->dest.is_ssa);
1184 assert(tex->dest.ssa.num_components == 3);
1185 nir_ssa_def *size = &tex->dest.ssa;
1186 size = nir_vec3(b, nir_channel(b, size, 0),
1187 nir_channel(b, size, 1),
1188 nir_idiv(b, nir_channel(b, size, 2),
1189 nir_imm_int(b, 6)));
1190
1191 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr);
1192 }
1193
1194 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1195 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1196 {
1197 lower_offset(b, tex);
1198
1199 b->cursor = nir_before_instr(&tex->instr);
1200
1201 /* Create FMASK fetch. */
1202 assert(tex->texture_index == 0);
1203 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1204 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1205 fmask_fetch->coord_components = tex->coord_components;
1206 fmask_fetch->sampler_dim = tex->sampler_dim;
1207 fmask_fetch->is_array = tex->is_array;
1208 fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1209 fmask_fetch->dest_type = nir_type_uint32;
1210 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1211
1212 fmask_fetch->num_srcs = 0;
1213 for (unsigned i = 0; i < tex->num_srcs; i++) {
1214 if (tex->src[i].src_type == nir_tex_src_ms_index)
1215 continue;
1216 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1217 src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1218 src->src_type = tex->src[i].src_type;
1219 }
1220
1221 nir_builder_instr_insert(b, &fmask_fetch->instr);
1222
1223 /* Obtain new sample index. */
1224 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1225 assert(ms_index >= 0);
1226 nir_src sample = tex->src[ms_index].src;
1227 nir_ssa_def *new_sample = NULL;
1228 if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) {
1229 if (nir_src_as_uint(sample) == 7)
1230 new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28));
1231 else
1232 new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf);
1233 } else {
1234 new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa,
1235 nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4));
1236 }
1237
1238 /* Update instruction. */
1239 tex->op = nir_texop_fragment_fetch_amd;
1240 nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample);
1241 }
1242
1243 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1244 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1245 {
1246 b->cursor = nir_after_instr(&tex->instr);
1247
1248 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1249 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1250 fmask_fetch->dest_type = nir_type_uint32;
1251 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1252 nir_builder_instr_insert(b, &fmask_fetch->instr);
1253
1254 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0));
1255 nir_instr_remove_v(&tex->instr);
1256 }
1257
1258 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1259 nir_lower_tex_block(nir_block *block, nir_builder *b,
1260 const nir_lower_tex_options *options,
1261 const struct nir_shader_compiler_options *compiler_options)
1262 {
1263 bool progress = false;
1264
1265 nir_foreach_instr_safe(instr, block) {
1266 if (instr->type != nir_instr_type_tex)
1267 continue;
1268
1269 nir_tex_instr *tex = nir_instr_as_tex(instr);
1270 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1271
1272 /* mask of src coords to saturate (clamp): */
1273 unsigned sat_mask = 0;
1274
1275 if ((1 << tex->sampler_index) & options->saturate_r)
1276 sat_mask |= (1 << 2); /* .z */
1277 if ((1 << tex->sampler_index) & options->saturate_t)
1278 sat_mask |= (1 << 1); /* .y */
1279 if ((1 << tex->sampler_index) & options->saturate_s)
1280 sat_mask |= (1 << 0); /* .x */
1281
1282 /* If we are clamping any coords, we must lower projector first
1283 * as clamping happens *after* projection:
1284 */
1285 if (lower_txp || sat_mask) {
1286 progress |= project_src(b, tex);
1287 }
1288
1289 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1290 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1291 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1292 options->lower_rect_offset)) {
1293 progress = lower_offset(b, tex) || progress;
1294 }
1295
1296 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1297 tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) {
1298
1299 if (compiler_options->has_txs)
1300 lower_rect(b, tex);
1301 else
1302 lower_rect_tex_scale(b, tex);
1303
1304 progress = true;
1305 }
1306
1307 unsigned texture_index = tex->texture_index;
1308 uint32_t texture_mask = 1u << texture_index;
1309 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1310 if (tex_index >= 0) {
1311 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1312 nir_variable *var = nir_deref_instr_get_variable(deref);
1313 texture_index = var ? var->data.binding : 0;
1314 texture_mask = var ? (1u << texture_index) : 0u;
1315 }
1316
1317 if (texture_mask & options->lower_y_uv_external) {
1318 lower_y_uv_external(b, tex, options, texture_index);
1319 progress = true;
1320 }
1321
1322 if (texture_mask & options->lower_y_u_v_external) {
1323 lower_y_u_v_external(b, tex, options, texture_index);
1324 progress = true;
1325 }
1326
1327 if (texture_mask & options->lower_yx_xuxv_external) {
1328 lower_yx_xuxv_external(b, tex, options, texture_index);
1329 progress = true;
1330 }
1331
1332 if (texture_mask & options->lower_xy_uxvx_external) {
1333 lower_xy_uxvx_external(b, tex, options, texture_index);
1334 progress = true;
1335 }
1336
1337 if (texture_mask & options->lower_ayuv_external) {
1338 lower_ayuv_external(b, tex, options, texture_index);
1339 progress = true;
1340 }
1341
1342 if (texture_mask & options->lower_xyuv_external) {
1343 lower_xyuv_external(b, tex, options, texture_index);
1344 progress = true;
1345 }
1346
1347 if (texture_mask & options->lower_yuv_external) {
1348 lower_yuv_external(b, tex, options, texture_index);
1349 progress = true;
1350 }
1351
1352 if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1353 lower_yu_yv_external(b, tex, options, texture_index);
1354 progress = true;
1355 }
1356
1357 if ((1 << tex->texture_index) & options->lower_y41x_external) {
1358 lower_y41x_external(b, tex, options, texture_index);
1359 progress = true;
1360 }
1361
1362 if (sat_mask) {
1363 tex = saturate_src(b, tex, sat_mask);
1364 progress = true;
1365 }
1366
1367 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1368 swizzle_tg4_broadcom(b, tex);
1369 progress = true;
1370 }
1371
1372 if ((texture_mask & options->swizzle_result) &&
1373 !nir_tex_instr_is_query(tex) &&
1374 !(tex->is_shadow && tex->is_new_style_shadow)) {
1375 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1376 progress = true;
1377 }
1378
1379 /* should be after swizzle so we know which channels are rgb: */
1380 if ((texture_mask & options->lower_srgb) &&
1381 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1382 linearize_srgb_result(b, tex);
1383 progress = true;
1384 }
1385
1386 const bool has_min_lod =
1387 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1388 const bool has_offset =
1389 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1390
1391 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1392 options->lower_txb_shadow_clamp) {
1393 lower_implicit_lod(b, tex);
1394 progress = true;
1395 }
1396
1397 if (options->lower_tex_packing[tex->sampler_index] !=
1398 nir_lower_tex_packing_none &&
1399 tex->op != nir_texop_txs &&
1400 tex->op != nir_texop_query_levels &&
1401 tex->op != nir_texop_texture_samples) {
1402 lower_tex_packing(b, tex, options);
1403 progress = true;
1404 }
1405
1406 if (tex->op == nir_texop_txd &&
1407 (options->lower_txd ||
1408 (options->lower_txd_shadow && tex->is_shadow) ||
1409 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1410 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1411 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1412 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1413 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1414 has_min_lod && !sampler_index_lt(tex, 16)) ||
1415 (options->lower_txd_cube_map &&
1416 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1417 (options->lower_txd_3d &&
1418 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) {
1419 lower_gradient(b, tex);
1420 progress = true;
1421 continue;
1422 }
1423
1424 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1425 * three opcodes provides one. Provide a default LOD of 0.
1426 */
1427 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1428 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1429 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1430 b->cursor = nir_before_instr(&tex->instr);
1431 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1432 progress = true;
1433 continue;
1434 }
1435
1436 /* Only fragment and compute (in some cases) support implicit
1437 * derivatives. Lower those opcodes which use implicit derivatives to
1438 * use an explicit LOD of 0.
1439 */
1440 if (nir_tex_instr_has_implicit_derivative(tex) &&
1441 !nir_shader_supports_implicit_lod(b->shader)) {
1442 lower_zero_lod(b, tex);
1443 progress = true;
1444 }
1445
1446 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1447 progress |= nir_lower_txs_lod(b, tex);
1448 continue;
1449 }
1450
1451 if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1452 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1453 nir_lower_txs_cube_array(b, tex);
1454 progress = true;
1455 continue;
1456 }
1457
1458 /* has to happen after all the other lowerings as the original tg4 gets
1459 * replaced by 4 tg4 instructions.
1460 */
1461 if (tex->op == nir_texop_tg4 &&
1462 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1463 options->lower_tg4_offsets) {
1464 progress |= lower_tg4_offsets(b, tex);
1465 continue;
1466 }
1467
1468 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1469 nir_lower_ms_txf_to_fragment_fetch(b, tex);
1470 progress = true;
1471 continue;
1472 }
1473
1474 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1475 nir_lower_samples_identical_to_fragment_fetch(b, tex);
1476 progress = true;
1477 continue;
1478 }
1479 }
1480
1481 return progress;
1482 }
1483
1484 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1485 nir_lower_tex_impl(nir_function_impl *impl,
1486 const nir_lower_tex_options *options,
1487 const struct nir_shader_compiler_options *compiler_options)
1488 {
1489 bool progress = false;
1490 nir_builder builder;
1491 nir_builder_init(&builder, impl);
1492
1493 nir_foreach_block(block, impl) {
1494 progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1495 }
1496
1497 nir_metadata_preserve(impl, nir_metadata_block_index |
1498 nir_metadata_dominance);
1499 return progress;
1500 }
1501
1502 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1503 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1504 {
1505 bool progress = false;
1506
1507 nir_foreach_function(function, shader) {
1508 if (function->impl)
1509 progress |= nir_lower_tex_impl(function->impl, options, shader->options);
1510 }
1511
1512 return progress;
1513 }
1514