1 /* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /* 25 * This lowering pass supports (as configured via nir_lower_tex_options) 26 * various texture related conversions: 27 * + texture projector lowering: converts the coordinate division for 28 * texture projection to be done in ALU instructions instead of 29 * asking the texture operation to do so. 30 * + lowering RECT: converts the un-normalized RECT texture coordinates 31 * to normalized coordinates with txs plus ALU instructions 32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 33 * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 34 * Note that this automatically triggers texture projector lowering if 35 * needed, since clamping must happen after projector lowering. 36 */ 37 38 #include "nir.h" 39 #include "nir_builder.h" 40 #include "nir_builtin_builder.h" 41 #include "nir_format_convert.h" 42 43 typedef struct nir_const_value_3_4 { 44 nir_const_value v[3][4]; 45 } nir_const_value_3_4; 46 47 static const nir_const_value_3_4 bt601_csc_coeffs = { { 48 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 49 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } }, 50 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } }, 51 } }; 52 static const nir_const_value_3_4 bt709_csc_coeffs = { { 53 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 54 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } }, 55 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } }, 56 } }; 57 static const nir_const_value_3_4 bt2020_csc_coeffs = { { 58 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 59 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } }, 60 { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f } }, 61 } }; 62 63 static const float bt601_csc_offsets[3] = { 64 -0.874202218f, 0.531667823f, -1.085630789f 65 }; 66 static const float bt709_csc_offsets[3] = { 67 -0.972945075f, 0.301482665f, -1.133402218f 68 }; 69 static const float bt2020_csc_offsets[3] = { 70 -0.915687932f, 0.347458499f, -1.148145075f 71 }; 72 73 static bool project_src(nir_builder * b,nir_tex_instr * tex)74 project_src(nir_builder *b, nir_tex_instr *tex) 75 { 76 /* Find the projector in the srcs list, if present. */ 77 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); 78 if (proj_index < 0) 79 return false; 80 81 b->cursor = nir_before_instr(&tex->instr); 82 83 nir_ssa_def *inv_proj = 84 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); 85 86 /* Walk through the sources projecting the arguments. */ 87 for (unsigned i = 0; i < tex->num_srcs; i++) { 88 switch (tex->src[i].src_type) { 89 case nir_tex_src_coord: 90 case nir_tex_src_comparator: 91 break; 92 default: 93 continue; 94 } 95 nir_ssa_def *unprojected = 96 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); 97 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); 98 99 /* Array indices don't get projected, so make an new vector with the 100 * coordinate's array index untouched. 101 */ 102 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 103 switch (tex->coord_components) { 104 case 4: 105 projected = nir_vec4(b, 106 nir_channel(b, projected, 0), 107 nir_channel(b, projected, 1), 108 nir_channel(b, projected, 2), 109 nir_channel(b, unprojected, 3)); 110 break; 111 case 3: 112 projected = nir_vec3(b, 113 nir_channel(b, projected, 0), 114 nir_channel(b, projected, 1), 115 nir_channel(b, unprojected, 2)); 116 break; 117 case 2: 118 projected = nir_vec2(b, 119 nir_channel(b, projected, 0), 120 nir_channel(b, unprojected, 1)); 121 break; 122 default: 123 unreachable("bad texture coord count for array"); 124 break; 125 } 126 } 127 128 nir_instr_rewrite_src(&tex->instr, 129 &tex->src[i].src, 130 nir_src_for_ssa(projected)); 131 } 132 133 nir_tex_instr_remove_src(tex, proj_index); 134 return true; 135 } 136 137 static bool lower_offset(nir_builder * b,nir_tex_instr * tex)138 lower_offset(nir_builder *b, nir_tex_instr *tex) 139 { 140 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); 141 if (offset_index < 0) 142 return false; 143 144 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 145 assert(coord_index >= 0); 146 147 assert(tex->src[offset_index].src.is_ssa); 148 assert(tex->src[coord_index].src.is_ssa); 149 nir_ssa_def *offset = tex->src[offset_index].src.ssa; 150 nir_ssa_def *coord = tex->src[coord_index].src.ssa; 151 152 b->cursor = nir_before_instr(&tex->instr); 153 154 nir_ssa_def *offset_coord; 155 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 156 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 157 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); 158 } else { 159 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 160 nir_ssa_def *scale = nir_frcp(b, txs); 161 162 offset_coord = nir_fadd(b, coord, 163 nir_fmul(b, 164 nir_i2f32(b, offset), 165 scale)); 166 } 167 } else { 168 offset_coord = nir_iadd(b, coord, offset); 169 } 170 171 if (tex->is_array) { 172 /* The offset is not applied to the array index */ 173 if (tex->coord_components == 2) { 174 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 175 nir_channel(b, coord, 1)); 176 } else if (tex->coord_components == 3) { 177 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 178 nir_channel(b, offset_coord, 1), 179 nir_channel(b, coord, 2)); 180 } else { 181 unreachable("Invalid number of components"); 182 } 183 } 184 185 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, 186 nir_src_for_ssa(offset_coord)); 187 188 nir_tex_instr_remove_src(tex, offset_index); 189 190 return true; 191 } 192 193 static void lower_rect(nir_builder * b,nir_tex_instr * tex)194 lower_rect(nir_builder *b, nir_tex_instr *tex) 195 { 196 /* Set the sampler_dim to 2D here so that get_texture_size picks up the 197 * right dimensionality. 198 */ 199 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 200 201 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 202 nir_ssa_def *scale = nir_frcp(b, txs); 203 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 204 205 if (coord_index != -1) { 206 nir_ssa_def *coords = 207 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 208 nir_instr_rewrite_src(&tex->instr, 209 &tex->src[coord_index].src, 210 nir_src_for_ssa(nir_fmul(b, coords, scale))); 211 } 212 } 213 214 static void lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)215 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex) 216 { 217 b->cursor = nir_before_instr(&tex->instr); 218 219 nir_ssa_def *idx = nir_imm_int(b, tex->texture_index); 220 nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx); 221 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 222 223 if (coord_index != -1) { 224 nir_ssa_def *coords = 225 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 226 nir_instr_rewrite_src(&tex->instr, 227 &tex->src[coord_index].src, 228 nir_src_for_ssa(nir_fmul(b, coords, scale))); 229 } 230 } 231 232 static void lower_lod(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * lod)233 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod) 234 { 235 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb); 236 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0); 237 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); 238 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); 239 240 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 241 if (bias_idx >= 0) { 242 /* If we have a bias, add it in */ 243 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 244 nir_tex_instr_remove_src(tex, bias_idx); 245 } 246 247 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 248 if (min_lod_idx >= 0) { 249 /* If we have a minimum LOD, clamp LOD accordingly */ 250 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 251 nir_tex_instr_remove_src(tex, min_lod_idx); 252 } 253 254 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 255 tex->op = nir_texop_txl; 256 } 257 258 static void lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)259 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex) 260 { 261 b->cursor = nir_before_instr(&tex->instr); 262 lower_lod(b, tex, nir_get_texture_lod(b, tex)); 263 } 264 265 static void lower_zero_lod(nir_builder * b,nir_tex_instr * tex)266 lower_zero_lod(nir_builder *b, nir_tex_instr *tex) 267 { 268 b->cursor = nir_before_instr(&tex->instr); 269 270 if (tex->op == nir_texop_lod) { 271 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0)); 272 nir_instr_remove(&tex->instr); 273 return; 274 } 275 276 lower_lod(b, tex, nir_imm_int(b, 0)); 277 } 278 279 static nir_ssa_def * sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)280 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, 281 const nir_lower_tex_options *options) 282 { 283 assert(tex->dest.is_ssa); 284 assert(nir_tex_instr_dest_size(tex) == 4); 285 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 286 assert(tex->op == nir_texop_tex); 287 assert(tex->coord_components == 2); 288 289 nir_tex_instr *plane_tex = 290 nir_tex_instr_create(b->shader, tex->num_srcs + 1); 291 for (unsigned i = 0; i < tex->num_srcs; i++) { 292 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src); 293 plane_tex->src[i].src_type = tex->src[i].src_type; 294 } 295 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane)); 296 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane; 297 plane_tex->op = nir_texop_tex; 298 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 299 plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest); 300 plane_tex->coord_components = 2; 301 302 plane_tex->texture_index = tex->texture_index; 303 plane_tex->sampler_index = tex->sampler_index; 304 305 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 306 nir_dest_bit_size(tex->dest), NULL); 307 308 nir_builder_instr_insert(b, &plane_tex->instr); 309 310 /* If scaling_factor is set, return a scaled value. */ 311 if (options->scale_factors[tex->texture_index]) 312 return nir_fmul_imm(b, &plane_tex->dest.ssa, 313 options->scale_factors[tex->texture_index]); 314 315 return &plane_tex->dest.ssa; 316 } 317 318 static void convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_ssa_def * y,nir_ssa_def * u,nir_ssa_def * v,nir_ssa_def * a,const nir_lower_tex_options * options,unsigned texture_index)319 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 320 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, 321 nir_ssa_def *a, 322 const nir_lower_tex_options *options, 323 unsigned texture_index) 324 { 325 326 const float *offset_vals; 327 const nir_const_value_3_4 *m; 328 assert((options->bt709_external & options->bt2020_external) == 0); 329 if (options->bt709_external & (1u << texture_index)) { 330 m = &bt709_csc_coeffs; 331 offset_vals = bt709_csc_offsets; 332 } else if (options->bt2020_external & (1u << texture_index)) { 333 m = &bt2020_csc_coeffs; 334 offset_vals = bt2020_csc_offsets; 335 } else { 336 m = &bt601_csc_coeffs; 337 offset_vals = bt601_csc_offsets; 338 } 339 340 unsigned bit_size = nir_dest_bit_size(tex->dest); 341 342 nir_ssa_def *offset = 343 nir_vec4(b, 344 nir_imm_floatN_t(b, offset_vals[0], a->bit_size), 345 nir_imm_floatN_t(b, offset_vals[1], a->bit_size), 346 nir_imm_floatN_t(b, offset_vals[2], a->bit_size), 347 a); 348 349 offset = nir_f2fN(b, offset, bit_size); 350 351 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size); 352 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size); 353 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size); 354 355 nir_ssa_def *result = 356 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); 357 358 nir_ssa_def_rewrite_uses(&tex->dest.ssa, result); 359 } 360 361 static void lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)362 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex, 363 const nir_lower_tex_options *options, 364 unsigned texture_index) 365 { 366 b->cursor = nir_after_instr(&tex->instr); 367 368 nir_ssa_def *y = sample_plane(b, tex, 0, options); 369 nir_ssa_def *uv = sample_plane(b, tex, 1, options); 370 371 convert_yuv_to_rgb(b, tex, 372 nir_channel(b, y, 0), 373 nir_channel(b, uv, 0), 374 nir_channel(b, uv, 1), 375 nir_imm_float(b, 1.0f), 376 options, 377 texture_index); 378 } 379 380 static void lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)381 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex, 382 const nir_lower_tex_options *options, 383 unsigned texture_index) 384 { 385 b->cursor = nir_after_instr(&tex->instr); 386 387 nir_ssa_def *y = sample_plane(b, tex, 0, options); 388 nir_ssa_def *u = sample_plane(b, tex, 1, options); 389 nir_ssa_def *v = sample_plane(b, tex, 2, options); 390 391 convert_yuv_to_rgb(b, tex, 392 nir_channel(b, y, 0), 393 nir_channel(b, u, 0), 394 nir_channel(b, v, 0), 395 nir_imm_float(b, 1.0f), 396 options, 397 texture_index); 398 } 399 400 static void lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)401 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex, 402 const nir_lower_tex_options *options, 403 unsigned texture_index) 404 { 405 b->cursor = nir_after_instr(&tex->instr); 406 407 nir_ssa_def *y = sample_plane(b, tex, 0, options); 408 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options); 409 410 convert_yuv_to_rgb(b, tex, 411 nir_channel(b, y, 0), 412 nir_channel(b, xuxv, 1), 413 nir_channel(b, xuxv, 3), 414 nir_imm_float(b, 1.0f), 415 options, 416 texture_index); 417 } 418 419 static void lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)420 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex, 421 const nir_lower_tex_options *options, 422 unsigned texture_index) 423 { 424 b->cursor = nir_after_instr(&tex->instr); 425 426 nir_ssa_def *y = sample_plane(b, tex, 0, options); 427 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options); 428 429 convert_yuv_to_rgb(b, tex, 430 nir_channel(b, y, 1), 431 nir_channel(b, uxvx, 0), 432 nir_channel(b, uxvx, 2), 433 nir_imm_float(b, 1.0f), 434 options, 435 texture_index); 436 } 437 438 static void lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)439 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex, 440 const nir_lower_tex_options *options, 441 unsigned texture_index) 442 { 443 b->cursor = nir_after_instr(&tex->instr); 444 445 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options); 446 447 convert_yuv_to_rgb(b, tex, 448 nir_channel(b, ayuv, 2), 449 nir_channel(b, ayuv, 1), 450 nir_channel(b, ayuv, 0), 451 nir_channel(b, ayuv, 3), 452 options, 453 texture_index); 454 } 455 456 static void lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)457 lower_y41x_external(nir_builder *b, nir_tex_instr *tex, 458 const nir_lower_tex_options *options, 459 unsigned texture_index) 460 { 461 b->cursor = nir_after_instr(&tex->instr); 462 463 nir_ssa_def *y41x = sample_plane(b, tex, 0, options); 464 465 convert_yuv_to_rgb(b, tex, 466 nir_channel(b, y41x, 1), 467 nir_channel(b, y41x, 0), 468 nir_channel(b, y41x, 2), 469 nir_channel(b, y41x, 3), 470 options, 471 texture_index); 472 } 473 474 static void lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)475 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex, 476 const nir_lower_tex_options *options, 477 unsigned texture_index) 478 { 479 b->cursor = nir_after_instr(&tex->instr); 480 481 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options); 482 483 convert_yuv_to_rgb(b, tex, 484 nir_channel(b, xyuv, 2), 485 nir_channel(b, xyuv, 1), 486 nir_channel(b, xyuv, 0), 487 nir_imm_float(b, 1.0f), 488 options, 489 texture_index); 490 } 491 492 static void lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)493 lower_yuv_external(nir_builder *b, nir_tex_instr *tex, 494 const nir_lower_tex_options *options, 495 unsigned texture_index) 496 { 497 b->cursor = nir_after_instr(&tex->instr); 498 499 nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 500 501 convert_yuv_to_rgb(b, tex, 502 nir_channel(b, yuv, 0), 503 nir_channel(b, yuv, 1), 504 nir_channel(b, yuv, 2), 505 nir_imm_float(b, 1.0f), 506 options, 507 texture_index); 508 } 509 510 static void lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)511 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex, 512 const nir_lower_tex_options *options, 513 unsigned texture_index) 514 { 515 b->cursor = nir_after_instr(&tex->instr); 516 517 nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 518 519 convert_yuv_to_rgb(b, tex, 520 nir_channel(b, yuv, 1), 521 nir_channel(b, yuv, 2), 522 nir_channel(b, yuv, 0), 523 nir_imm_float(b, 1.0f), 524 options, 525 texture_index); 526 } 527 528 /* 529 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod 530 * computed from the gradients. 531 */ 532 static void replace_gradient_with_lod(nir_builder * b,nir_ssa_def * lod,nir_tex_instr * tex)533 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) 534 { 535 assert(tex->op == nir_texop_txd); 536 537 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); 538 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); 539 540 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 541 if (min_lod_idx >= 0) { 542 /* If we have a minimum LOD, clamp LOD accordingly */ 543 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 544 nir_tex_instr_remove_src(tex, min_lod_idx); 545 } 546 547 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 548 tex->op = nir_texop_txl; 549 } 550 551 static void lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)552 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 553 { 554 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 555 assert(tex->op == nir_texop_txd); 556 assert(tex->dest.is_ssa); 557 558 /* Use textureSize() to get the width and height of LOD 0 */ 559 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); 560 561 /* Cubemap texture lookups first generate a texture coordinate normalized 562 * to [-1, 1] on the appropiate face. The appropiate face is determined 563 * by which component has largest magnitude and its sign. The texture 564 * coordinate is the quotient of the remaining texture coordinates against 565 * that absolute value of the component of largest magnitude. This 566 * division requires that the computing of the derivative of the texel 567 * coordinate must use the quotient rule. The high level GLSL code is as 568 * follows: 569 * 570 * Step 1: selection 571 * 572 * vec3 abs_p, Q, dQdx, dQdy; 573 * abs_p = abs(ir->coordinate); 574 * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 575 * Q = ir->coordinate.yzx; 576 * dQdx = ir->lod_info.grad.dPdx.yzx; 577 * dQdy = ir->lod_info.grad.dPdy.yzx; 578 * } 579 * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 580 * Q = ir->coordinate.xzy; 581 * dQdx = ir->lod_info.grad.dPdx.xzy; 582 * dQdy = ir->lod_info.grad.dPdy.xzy; 583 * } 584 * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 585 * Q = ir->coordinate; 586 * dQdx = ir->lod_info.grad.dPdx; 587 * dQdy = ir->lod_info.grad.dPdy; 588 * } 589 * 590 * Step 2: use quotient rule to compute derivative. The normalized to 591 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 592 * only concerned with the magnitudes of the derivatives whose values are 593 * not affected by the sign. We drop the sign from the computation. 594 * 595 * vec2 dx, dy; 596 * float recip; 597 * 598 * recip = 1.0 / Q.z; 599 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 600 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 601 * 602 * Step 3: compute LOD. At this point we have the derivatives of the 603 * texture coordinates normalized to [-1,1]. We take the LOD to be 604 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 605 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 606 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 607 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 608 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 609 * where L is the dimension of the cubemap. The code is: 610 * 611 * float M, result; 612 * M = max(dot(dx, dx), dot(dy, dy)); 613 * L = textureSize(sampler, 0).x; 614 * result = -1.0 + 0.5 * log2(L * L * M); 615 */ 616 617 /* coordinate */ 618 nir_ssa_def *p = 619 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 620 621 /* unmodified dPdx, dPdy values */ 622 nir_ssa_def *dPdx = 623 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 624 nir_ssa_def *dPdy = 625 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 626 627 nir_ssa_def *abs_p = nir_fabs(b, p); 628 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0); 629 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1); 630 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2); 631 632 /* 1. compute selector */ 633 nir_ssa_def *Q, *dQdx, *dQdy; 634 635 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 636 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 637 638 unsigned yzx[3] = { 1, 2, 0 }; 639 unsigned xzy[3] = { 0, 2, 1 }; 640 641 Q = nir_bcsel(b, cond_z, 642 p, 643 nir_bcsel(b, cond_y, 644 nir_swizzle(b, p, xzy, 3), 645 nir_swizzle(b, p, yzx, 3))); 646 647 dQdx = nir_bcsel(b, cond_z, 648 dPdx, 649 nir_bcsel(b, cond_y, 650 nir_swizzle(b, dPdx, xzy, 3), 651 nir_swizzle(b, dPdx, yzx, 3))); 652 653 dQdy = nir_bcsel(b, cond_z, 654 dPdy, 655 nir_bcsel(b, cond_y, 656 nir_swizzle(b, dPdy, xzy, 3), 657 nir_swizzle(b, dPdy, yzx, 3))); 658 659 /* 2. quotient rule */ 660 661 /* tmp = Q.xy * recip; 662 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 663 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 664 */ 665 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 666 667 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3); 668 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 669 670 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3); 671 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); 672 nir_ssa_def *dx = 673 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 674 675 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3); 676 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); 677 nir_ssa_def *dy = 678 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 679 680 /* M = max(dot(dx, dx), dot(dy, dy)); */ 681 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 682 683 /* size has textureSize() of LOD 0 */ 684 nir_ssa_def *L = nir_channel(b, size, 0); 685 686 /* lod = -1.0 + 0.5 * log2(L * L * M); */ 687 nir_ssa_def *lod = 688 nir_fadd(b, 689 nir_imm_float(b, -1.0f), 690 nir_fmul(b, 691 nir_imm_float(b, 0.5f), 692 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 693 694 /* 3. Replace the gradient instruction with an equivalent lod instruction */ 695 replace_gradient_with_lod(b, lod, tex); 696 } 697 698 static void lower_gradient(nir_builder * b,nir_tex_instr * tex)699 lower_gradient(nir_builder *b, nir_tex_instr *tex) 700 { 701 /* Cubes are more complicated and have their own function */ 702 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 703 lower_gradient_cube_map(b, tex); 704 return; 705 } 706 707 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 708 assert(tex->op == nir_texop_txd); 709 assert(tex->dest.is_ssa); 710 711 /* Use textureSize() to get the width and height of LOD 0 */ 712 unsigned component_mask; 713 switch (tex->sampler_dim) { 714 case GLSL_SAMPLER_DIM_3D: 715 component_mask = 7; 716 break; 717 case GLSL_SAMPLER_DIM_1D: 718 component_mask = 1; 719 break; 720 default: 721 component_mask = 3; 722 break; 723 } 724 725 nir_ssa_def *size = 726 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)), 727 component_mask); 728 729 /* Scale the gradients by width and height. Effectively, the incoming 730 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 731 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 732 */ 733 nir_ssa_def *ddx = 734 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 735 nir_ssa_def *ddy = 736 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 737 738 nir_ssa_def *dPdx = nir_fmul(b, ddx, size); 739 nir_ssa_def *dPdy = nir_fmul(b, ddy, size); 740 741 nir_ssa_def *rho; 742 if (dPdx->num_components == 1) { 743 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 744 } else { 745 rho = nir_fmax(b, 746 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 747 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 748 } 749 750 /* lod = log2(rho). We're ignoring GL state biases for now. */ 751 nir_ssa_def *lod = nir_flog2(b, rho); 752 753 /* Replace the gradient instruction with an equivalent lod instruction */ 754 replace_gradient_with_lod(b, lod, tex); 755 } 756 757 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */ 758 static nir_tex_instr * lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)759 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) 760 { 761 b->cursor = nir_after_instr(&tex->instr); 762 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2); 763 764 txd->op = nir_texop_txd; 765 txd->sampler_dim = tex->sampler_dim; 766 txd->dest_type = tex->dest_type; 767 txd->coord_components = tex->coord_components; 768 txd->texture_index = tex->texture_index; 769 txd->sampler_index = tex->sampler_index; 770 771 /* reuse existing srcs */ 772 for (unsigned i = 0; i < tex->num_srcs; i++) { 773 nir_src_copy(&txd->src[i].src, &tex->src[i].src); 774 txd->src[i].src_type = tex->src[i].src_type; 775 } 776 int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord); 777 assert(coord >= 0); 778 nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa); 779 nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa); 780 txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx); 781 txd->src[tex->num_srcs].src_type = nir_tex_src_ddx; 782 txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy); 783 txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy; 784 785 nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest), 786 nir_dest_bit_size(tex->dest), NULL); 787 nir_builder_instr_insert(b, &txd->instr); 788 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa); 789 nir_instr_remove(&tex->instr); 790 return txd; 791 } 792 793 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */ 794 static nir_tex_instr * lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)795 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) 796 { 797 b->cursor = nir_after_instr(&tex->instr); 798 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs); 799 800 txl->op = nir_texop_txl; 801 txl->sampler_dim = tex->sampler_dim; 802 txl->dest_type = tex->dest_type; 803 txl->coord_components = tex->coord_components; 804 txl->texture_index = tex->texture_index; 805 txl->sampler_index = tex->sampler_index; 806 807 /* reuse all but bias src */ 808 for (int i = 0; i < 2; i++) { 809 if (tex->src[i].src_type != nir_tex_src_bias) { 810 nir_src_copy(&txl->src[i].src, &tex->src[i].src); 811 txl->src[i].src_type = tex->src[i].src_type; 812 } 813 } 814 nir_ssa_def *lod = nir_get_texture_lod(b, txl); 815 816 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 817 assert(bias_idx >= 0); 818 lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 819 txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod); 820 txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod; 821 822 nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest), 823 nir_dest_bit_size(tex->dest), NULL); 824 nir_builder_instr_insert(b, &txl->instr); 825 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa); 826 nir_instr_remove(&tex->instr); 827 return txl; 828 } 829 830 static nir_tex_instr * saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)831 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 832 { 833 if (tex->op == nir_texop_tex) 834 tex = lower_tex_to_txd(b, tex); 835 else if (tex->op == nir_texop_txb) 836 tex = lower_txb_to_txl(b, tex); 837 838 b->cursor = nir_before_instr(&tex->instr); 839 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 840 841 if (coord_index != -1) { 842 nir_ssa_def *src = 843 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 844 845 /* split src into components: */ 846 nir_ssa_def *comp[4]; 847 848 assume(tex->coord_components >= 1); 849 850 for (unsigned j = 0; j < tex->coord_components; j++) 851 comp[j] = nir_channel(b, src, j); 852 853 /* clamp requested components, array index does not get clamped: */ 854 unsigned ncomp = tex->coord_components; 855 if (tex->is_array) 856 ncomp--; 857 858 for (unsigned j = 0; j < ncomp; j++) { 859 if ((1 << j) & sat_mask) { 860 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 861 /* non-normalized texture coords, so clamp to texture 862 * size rather than [0.0, 1.0] 863 */ 864 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 865 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 866 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 867 } else { 868 comp[j] = nir_fsat(b, comp[j]); 869 } 870 } 871 } 872 873 /* and move the result back into a single vecN: */ 874 src = nir_vec(b, comp, tex->coord_components); 875 876 nir_instr_rewrite_src(&tex->instr, 877 &tex->src[coord_index].src, 878 nir_src_for_ssa(src)); 879 } 880 return tex; 881 } 882 883 static nir_ssa_def * get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)884 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 885 { 886 nir_const_value v[4]; 887 888 memset(&v, 0, sizeof(v)); 889 890 if (swizzle_val == 4) { 891 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0; 892 } else { 893 assert(swizzle_val == 5); 894 if (type == nir_type_float32) 895 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0; 896 else 897 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1; 898 } 899 900 return nir_build_imm(b, 4, 32, v); 901 } 902 903 static void swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)904 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex) 905 { 906 assert(tex->dest.is_ssa); 907 908 b->cursor = nir_after_instr(&tex->instr); 909 910 assert(nir_tex_instr_dest_size(tex) == 4); 911 unsigned swiz[4] = { 2, 3, 1, 0 }; 912 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 913 914 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 915 swizzled->parent_instr); 916 } 917 918 static void swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])919 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 920 { 921 assert(tex->dest.is_ssa); 922 923 b->cursor = nir_after_instr(&tex->instr); 924 925 nir_ssa_def *swizzled; 926 if (tex->op == nir_texop_tg4) { 927 if (swizzle[tex->component] < 4) { 928 /* This one's easy */ 929 tex->component = swizzle[tex->component]; 930 return; 931 } else { 932 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 933 } 934 } else { 935 assert(nir_tex_instr_dest_size(tex) == 4); 936 if (swizzle[0] < 4 && swizzle[1] < 4 && 937 swizzle[2] < 4 && swizzle[3] < 4) { 938 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 939 /* We have no 0s or 1s, just emit a swizzling MOV */ 940 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 941 } else { 942 nir_ssa_def *srcs[4]; 943 for (unsigned i = 0; i < 4; i++) { 944 if (swizzle[i] < 4) { 945 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); 946 } else { 947 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); 948 } 949 } 950 swizzled = nir_vec(b, srcs, 4); 951 } 952 } 953 954 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 955 swizzled->parent_instr); 956 } 957 958 static void linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)959 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 960 { 961 assert(tex->dest.is_ssa); 962 assert(nir_tex_instr_dest_size(tex) == 4); 963 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 964 965 b->cursor = nir_after_instr(&tex->instr); 966 967 nir_ssa_def *rgb = 968 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7)); 969 970 /* alpha is untouched: */ 971 nir_ssa_def *result = nir_vec4(b, 972 nir_channel(b, rgb, 0), 973 nir_channel(b, rgb, 1), 974 nir_channel(b, rgb, 2), 975 nir_channel(b, &tex->dest.ssa, 3)); 976 977 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result, 978 result->parent_instr); 979 } 980 981 /** 982 * Lowers texture instructions from giving a vec4 result to a vec2 of f16, 983 * i16, or u16, or a single unorm4x8 value. 984 * 985 * Note that we don't change the destination num_components, because 986 * nir_tex_instr_dest_size() will still return 4. The driver is just expected 987 * to not store the other channels, given that nothing at the NIR level will 988 * read them. 989 */ 990 static void lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)991 lower_tex_packing(nir_builder *b, nir_tex_instr *tex, 992 const nir_lower_tex_options *options) 993 { 994 nir_ssa_def *color = &tex->dest.ssa; 995 996 b->cursor = nir_after_instr(&tex->instr); 997 998 switch (options->lower_tex_packing[tex->sampler_index]) { 999 case nir_lower_tex_packing_none: 1000 return; 1001 1002 case nir_lower_tex_packing_16: { 1003 static const unsigned bits[4] = {16, 16, 16, 16}; 1004 1005 switch (nir_alu_type_get_base_type(tex->dest_type)) { 1006 case nir_type_float: 1007 switch (nir_tex_instr_dest_size(tex)) { 1008 case 1: 1009 assert(tex->is_shadow && tex->is_new_style_shadow); 1010 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0)); 1011 break; 1012 case 2: { 1013 nir_ssa_def *rg = nir_channel(b, color, 0); 1014 color = nir_vec2(b, 1015 nir_unpack_half_2x16_split_x(b, rg), 1016 nir_unpack_half_2x16_split_y(b, rg)); 1017 break; 1018 } 1019 case 4: { 1020 nir_ssa_def *rg = nir_channel(b, color, 0); 1021 nir_ssa_def *ba = nir_channel(b, color, 1); 1022 color = nir_vec4(b, 1023 nir_unpack_half_2x16_split_x(b, rg), 1024 nir_unpack_half_2x16_split_y(b, rg), 1025 nir_unpack_half_2x16_split_x(b, ba), 1026 nir_unpack_half_2x16_split_y(b, ba)); 1027 break; 1028 } 1029 default: 1030 unreachable("wrong dest_size"); 1031 } 1032 break; 1033 1034 case nir_type_int: 1035 color = nir_format_unpack_sint(b, color, bits, 4); 1036 break; 1037 1038 case nir_type_uint: 1039 color = nir_format_unpack_uint(b, color, bits, 4); 1040 break; 1041 1042 default: 1043 unreachable("unknown base type"); 1044 } 1045 break; 1046 } 1047 1048 case nir_lower_tex_packing_8: 1049 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 1050 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0)); 1051 break; 1052 } 1053 1054 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color, 1055 color->parent_instr); 1056 } 1057 1058 static bool sampler_index_lt(nir_tex_instr * tex,unsigned max)1059 sampler_index_lt(nir_tex_instr *tex, unsigned max) 1060 { 1061 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1); 1062 1063 unsigned sampler_index = tex->sampler_index; 1064 1065 int sampler_offset_idx = 1066 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset); 1067 if (sampler_offset_idx >= 0) { 1068 if (!nir_src_is_const(tex->src[sampler_offset_idx].src)) 1069 return false; 1070 1071 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src); 1072 } 1073 1074 return sampler_index < max; 1075 } 1076 1077 static bool lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1078 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) 1079 { 1080 assert(tex->op == nir_texop_tg4); 1081 assert(nir_tex_instr_has_explicit_tg4_offsets(tex)); 1082 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1); 1083 1084 b->cursor = nir_after_instr(&tex->instr); 1085 1086 nir_ssa_def *dest[5] = {NULL}; 1087 for (unsigned i = 0; i < 4; ++i) { 1088 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1); 1089 tex_copy->op = tex->op; 1090 tex_copy->coord_components = tex->coord_components; 1091 tex_copy->sampler_dim = tex->sampler_dim; 1092 tex_copy->is_array = tex->is_array; 1093 tex_copy->is_shadow = tex->is_shadow; 1094 tex_copy->is_new_style_shadow = tex->is_new_style_shadow; 1095 tex_copy->is_sparse = tex->is_sparse; 1096 tex_copy->component = tex->component; 1097 tex_copy->dest_type = tex->dest_type; 1098 1099 for (unsigned j = 0; j < tex->num_srcs; ++j) { 1100 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src); 1101 tex_copy->src[j].src_type = tex->src[j].src_type; 1102 } 1103 1104 nir_tex_src src; 1105 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0], 1106 tex->tg4_offsets[i][1])); 1107 src.src_type = nir_tex_src_offset; 1108 tex_copy->src[tex_copy->num_srcs - 1] = src; 1109 1110 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest, 1111 nir_tex_instr_dest_size(tex), 32, NULL); 1112 1113 nir_builder_instr_insert(b, &tex_copy->instr); 1114 1115 dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3); 1116 if (tex->is_sparse) { 1117 nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4); 1118 dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code; 1119 } 1120 } 1121 1122 nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components); 1123 nir_ssa_def_rewrite_uses(&tex->dest.ssa, res); 1124 nir_instr_remove(&tex->instr); 1125 1126 return true; 1127 } 1128 1129 static bool nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1130 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex) 1131 { 1132 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); 1133 if (lod_idx < 0 || 1134 (nir_src_is_const(tex->src[lod_idx].src) && 1135 nir_src_as_int(tex->src[lod_idx].src) == 0)) 1136 return false; 1137 1138 unsigned dest_size = nir_tex_instr_dest_size(tex); 1139 1140 b->cursor = nir_before_instr(&tex->instr); 1141 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1); 1142 1143 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */ 1144 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src, 1145 nir_src_for_ssa(nir_imm_int(b, 0))); 1146 1147 /* TXS(LOD) = max(TXS(0) >> LOD, 1) 1148 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface, 1149 * which should return 0, not 1. 1150 */ 1151 b->cursor = nir_after_instr(&tex->instr); 1152 nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa, 1153 nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod), 1154 nir_imm_int(b, 1))); 1155 1156 /* Make sure the component encoding the array size (if any) is not 1157 * minified. 1158 */ 1159 if (tex->is_array) { 1160 nir_ssa_def *comp[3]; 1161 1162 assert(dest_size <= ARRAY_SIZE(comp)); 1163 for (unsigned i = 0; i < dest_size - 1; i++) 1164 comp[i] = nir_channel(b, minified, i); 1165 1166 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1); 1167 minified = nir_vec(b, comp, dest_size); 1168 } 1169 1170 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified, 1171 minified->parent_instr); 1172 return true; 1173 } 1174 1175 static void nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1176 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex) 1177 { 1178 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array); 1179 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 1180 1181 b->cursor = nir_after_instr(&tex->instr); 1182 1183 assert(tex->dest.is_ssa); 1184 assert(tex->dest.ssa.num_components == 3); 1185 nir_ssa_def *size = &tex->dest.ssa; 1186 size = nir_vec3(b, nir_channel(b, size, 0), 1187 nir_channel(b, size, 1), 1188 nir_idiv(b, nir_channel(b, size, 2), 1189 nir_imm_int(b, 6))); 1190 1191 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr); 1192 } 1193 1194 static void nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1195 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1196 { 1197 lower_offset(b, tex); 1198 1199 b->cursor = nir_before_instr(&tex->instr); 1200 1201 /* Create FMASK fetch. */ 1202 assert(tex->texture_index == 0); 1203 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1); 1204 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1205 fmask_fetch->coord_components = tex->coord_components; 1206 fmask_fetch->sampler_dim = tex->sampler_dim; 1207 fmask_fetch->is_array = tex->is_array; 1208 fmask_fetch->texture_non_uniform = tex->texture_non_uniform; 1209 fmask_fetch->dest_type = nir_type_uint32; 1210 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 1211 1212 fmask_fetch->num_srcs = 0; 1213 for (unsigned i = 0; i < tex->num_srcs; i++) { 1214 if (tex->src[i].src_type == nir_tex_src_ms_index) 1215 continue; 1216 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++]; 1217 src->src = nir_src_for_ssa(tex->src[i].src.ssa); 1218 src->src_type = tex->src[i].src_type; 1219 } 1220 1221 nir_builder_instr_insert(b, &fmask_fetch->instr); 1222 1223 /* Obtain new sample index. */ 1224 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index); 1225 assert(ms_index >= 0); 1226 nir_src sample = tex->src[ms_index].src; 1227 nir_ssa_def *new_sample = NULL; 1228 if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) { 1229 if (nir_src_as_uint(sample) == 7) 1230 new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28)); 1231 else 1232 new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf); 1233 } else { 1234 new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa, 1235 nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4)); 1236 } 1237 1238 /* Update instruction. */ 1239 tex->op = nir_texop_fragment_fetch_amd; 1240 nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample); 1241 } 1242 1243 static void nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1244 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1245 { 1246 b->cursor = nir_after_instr(&tex->instr); 1247 1248 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); 1249 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1250 fmask_fetch->dest_type = nir_type_uint32; 1251 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 1252 nir_builder_instr_insert(b, &fmask_fetch->instr); 1253 1254 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0)); 1255 nir_instr_remove_v(&tex->instr); 1256 } 1257 1258 static bool nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1259 nir_lower_tex_block(nir_block *block, nir_builder *b, 1260 const nir_lower_tex_options *options, 1261 const struct nir_shader_compiler_options *compiler_options) 1262 { 1263 bool progress = false; 1264 1265 nir_foreach_instr_safe(instr, block) { 1266 if (instr->type != nir_instr_type_tex) 1267 continue; 1268 1269 nir_tex_instr *tex = nir_instr_as_tex(instr); 1270 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 1271 1272 /* mask of src coords to saturate (clamp): */ 1273 unsigned sat_mask = 0; 1274 1275 if ((1 << tex->sampler_index) & options->saturate_r) 1276 sat_mask |= (1 << 2); /* .z */ 1277 if ((1 << tex->sampler_index) & options->saturate_t) 1278 sat_mask |= (1 << 1); /* .y */ 1279 if ((1 << tex->sampler_index) & options->saturate_s) 1280 sat_mask |= (1 << 0); /* .x */ 1281 1282 /* If we are clamping any coords, we must lower projector first 1283 * as clamping happens *after* projection: 1284 */ 1285 if (lower_txp || sat_mask) { 1286 progress |= project_src(b, tex); 1287 } 1288 1289 if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 1290 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) || 1291 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 1292 options->lower_rect_offset)) { 1293 progress = lower_offset(b, tex) || progress; 1294 } 1295 1296 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect && 1297 tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) { 1298 1299 if (compiler_options->has_txs) 1300 lower_rect(b, tex); 1301 else 1302 lower_rect_tex_scale(b, tex); 1303 1304 progress = true; 1305 } 1306 1307 unsigned texture_index = tex->texture_index; 1308 uint32_t texture_mask = 1u << texture_index; 1309 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 1310 if (tex_index >= 0) { 1311 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src); 1312 nir_variable *var = nir_deref_instr_get_variable(deref); 1313 texture_index = var ? var->data.binding : 0; 1314 texture_mask = var ? (1u << texture_index) : 0u; 1315 } 1316 1317 if (texture_mask & options->lower_y_uv_external) { 1318 lower_y_uv_external(b, tex, options, texture_index); 1319 progress = true; 1320 } 1321 1322 if (texture_mask & options->lower_y_u_v_external) { 1323 lower_y_u_v_external(b, tex, options, texture_index); 1324 progress = true; 1325 } 1326 1327 if (texture_mask & options->lower_yx_xuxv_external) { 1328 lower_yx_xuxv_external(b, tex, options, texture_index); 1329 progress = true; 1330 } 1331 1332 if (texture_mask & options->lower_xy_uxvx_external) { 1333 lower_xy_uxvx_external(b, tex, options, texture_index); 1334 progress = true; 1335 } 1336 1337 if (texture_mask & options->lower_ayuv_external) { 1338 lower_ayuv_external(b, tex, options, texture_index); 1339 progress = true; 1340 } 1341 1342 if (texture_mask & options->lower_xyuv_external) { 1343 lower_xyuv_external(b, tex, options, texture_index); 1344 progress = true; 1345 } 1346 1347 if (texture_mask & options->lower_yuv_external) { 1348 lower_yuv_external(b, tex, options, texture_index); 1349 progress = true; 1350 } 1351 1352 if ((1 << tex->texture_index) & options->lower_yu_yv_external) { 1353 lower_yu_yv_external(b, tex, options, texture_index); 1354 progress = true; 1355 } 1356 1357 if ((1 << tex->texture_index) & options->lower_y41x_external) { 1358 lower_y41x_external(b, tex, options, texture_index); 1359 progress = true; 1360 } 1361 1362 if (sat_mask) { 1363 tex = saturate_src(b, tex, sat_mask); 1364 progress = true; 1365 } 1366 1367 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) { 1368 swizzle_tg4_broadcom(b, tex); 1369 progress = true; 1370 } 1371 1372 if ((texture_mask & options->swizzle_result) && 1373 !nir_tex_instr_is_query(tex) && 1374 !(tex->is_shadow && tex->is_new_style_shadow)) { 1375 swizzle_result(b, tex, options->swizzles[tex->texture_index]); 1376 progress = true; 1377 } 1378 1379 /* should be after swizzle so we know which channels are rgb: */ 1380 if ((texture_mask & options->lower_srgb) && 1381 !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 1382 linearize_srgb_result(b, tex); 1383 progress = true; 1384 } 1385 1386 const bool has_min_lod = 1387 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0; 1388 const bool has_offset = 1389 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; 1390 1391 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod && 1392 options->lower_txb_shadow_clamp) { 1393 lower_implicit_lod(b, tex); 1394 progress = true; 1395 } 1396 1397 if (options->lower_tex_packing[tex->sampler_index] != 1398 nir_lower_tex_packing_none && 1399 tex->op != nir_texop_txs && 1400 tex->op != nir_texop_query_levels && 1401 tex->op != nir_texop_texture_samples) { 1402 lower_tex_packing(b, tex, options); 1403 progress = true; 1404 } 1405 1406 if (tex->op == nir_texop_txd && 1407 (options->lower_txd || 1408 (options->lower_txd_shadow && tex->is_shadow) || 1409 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) || 1410 (options->lower_txd_offset_clamp && has_offset && has_min_lod) || 1411 (options->lower_txd_clamp_bindless_sampler && has_min_lod && 1412 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) || 1413 (options->lower_txd_clamp_if_sampler_index_not_lt_16 && 1414 has_min_lod && !sampler_index_lt(tex, 16)) || 1415 (options->lower_txd_cube_map && 1416 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) || 1417 (options->lower_txd_3d && 1418 tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) { 1419 lower_gradient(b, tex); 1420 progress = true; 1421 continue; 1422 } 1423 1424 /* TXF, TXS and TXL require a LOD but not everything we implement using those 1425 * three opcodes provides one. Provide a default LOD of 0. 1426 */ 1427 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && 1428 (tex->op == nir_texop_txf || tex->op == nir_texop_txs || 1429 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) { 1430 b->cursor = nir_before_instr(&tex->instr); 1431 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0))); 1432 progress = true; 1433 continue; 1434 } 1435 1436 /* Only fragment and compute (in some cases) support implicit 1437 * derivatives. Lower those opcodes which use implicit derivatives to 1438 * use an explicit LOD of 0. 1439 */ 1440 if (nir_tex_instr_has_implicit_derivative(tex) && 1441 !nir_shader_supports_implicit_lod(b->shader)) { 1442 lower_zero_lod(b, tex); 1443 progress = true; 1444 } 1445 1446 if (options->lower_txs_lod && tex->op == nir_texop_txs) { 1447 progress |= nir_lower_txs_lod(b, tex); 1448 continue; 1449 } 1450 1451 if (options->lower_txs_cube_array && tex->op == nir_texop_txs && 1452 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) { 1453 nir_lower_txs_cube_array(b, tex); 1454 progress = true; 1455 continue; 1456 } 1457 1458 /* has to happen after all the other lowerings as the original tg4 gets 1459 * replaced by 4 tg4 instructions. 1460 */ 1461 if (tex->op == nir_texop_tg4 && 1462 nir_tex_instr_has_explicit_tg4_offsets(tex) && 1463 options->lower_tg4_offsets) { 1464 progress |= lower_tg4_offsets(b, tex); 1465 continue; 1466 } 1467 1468 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) { 1469 nir_lower_ms_txf_to_fragment_fetch(b, tex); 1470 progress = true; 1471 continue; 1472 } 1473 1474 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) { 1475 nir_lower_samples_identical_to_fragment_fetch(b, tex); 1476 progress = true; 1477 continue; 1478 } 1479 } 1480 1481 return progress; 1482 } 1483 1484 static bool nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1485 nir_lower_tex_impl(nir_function_impl *impl, 1486 const nir_lower_tex_options *options, 1487 const struct nir_shader_compiler_options *compiler_options) 1488 { 1489 bool progress = false; 1490 nir_builder builder; 1491 nir_builder_init(&builder, impl); 1492 1493 nir_foreach_block(block, impl) { 1494 progress |= nir_lower_tex_block(block, &builder, options, compiler_options); 1495 } 1496 1497 nir_metadata_preserve(impl, nir_metadata_block_index | 1498 nir_metadata_dominance); 1499 return progress; 1500 } 1501 1502 bool nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1503 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 1504 { 1505 bool progress = false; 1506 1507 nir_foreach_function(function, shader) { 1508 if (function->impl) 1509 progress |= nir_lower_tex_impl(function->impl, options, shader->options); 1510 } 1511 1512 return progress; 1513 } 1514