1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_tex.h"
28
29 #include "nir_builder.h"
30 #include "sfn_debug.h"
31 #include "sfn_instr_alu.h"
32 #include "sfn_instr_fetch.h"
33 #include "sfn_nir.h"
34
35 namespace r600 {
36
37 using std::string;
38
TexInstr(Opcode op,const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,const RegisterVec4 & src,unsigned resource_id,PRegister resource_offs,int sampler_id,PRegister sampler_offset)39 TexInstr::TexInstr(Opcode op,
40 const RegisterVec4& dest,
41 const RegisterVec4::Swizzle& dest_swizzle,
42 const RegisterVec4& src,
43 unsigned resource_id,
44 PRegister resource_offs,
45 int sampler_id, PRegister sampler_offset):
46 InstrWithVectorResult(dest, dest_swizzle, resource_id, resource_offs),
47 m_opcode(op),
48 m_src(src),
49 m_inst_mode(0),
50 m_sampler(this, sampler_id, sampler_offset)
51 {
52 memset(m_coord_offset, 0, sizeof(m_coord_offset));
53 m_src.add_use(this);
54 }
55
56 void
accept(ConstInstrVisitor & visitor) const57 TexInstr::accept(ConstInstrVisitor& visitor) const
58 {
59 visitor.visit(*this);
60 }
61
62 void
accept(InstrVisitor & visitor)63 TexInstr::accept(InstrVisitor& visitor)
64 {
65 visitor.visit(this);
66 }
67
68 void
set_offset(unsigned index,int32_t val)69 TexInstr::set_offset(unsigned index, int32_t val)
70 {
71 assert(index < 3);
72 m_coord_offset[index] = val;
73 }
74
75 int
get_offset(unsigned index) const76 TexInstr::get_offset(unsigned index) const
77 {
78 assert(index < 3);
79 return m_coord_offset[index] << 1;
80 }
81
82 void
set_gather_comp(int cmp)83 TexInstr::set_gather_comp(int cmp)
84 {
85 m_inst_mode = cmp;
86 }
87
88 bool
is_equal_to(const TexInstr & lhs) const89 TexInstr::is_equal_to(const TexInstr& lhs) const
90 {
91 if (m_opcode != lhs.m_opcode)
92 return false;
93
94 if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
95 return false;
96
97 if (m_src != lhs.m_src)
98 return false;
99
100 if (resource_offset() && lhs.resource_offset()) {
101 if (!resource_offset()->equal_to(*lhs.resource_offset()))
102 return false;
103 } else if ((resource_offset() && !lhs.resource_offset()) ||
104 (!resource_offset() && lhs.resource_offset()))
105 return false;
106
107 if (sampler_offset() && lhs.sampler_offset()) {
108 if (!sampler_offset()->equal_to(*lhs.sampler_offset()))
109 return false;
110 } else if ((sampler_offset() && !lhs.sampler_offset()) ||
111 (!sampler_offset() && lhs.sampler_offset()))
112 return false;
113
114 if (m_tex_flags != lhs.m_tex_flags)
115 return false;
116
117 for (int i = 0; i < 3; ++i) {
118 if (m_coord_offset[i] != lhs.m_coord_offset[i])
119 return false;
120 }
121
122 return m_inst_mode == lhs.m_inst_mode &&
123 resource_id() == lhs.resource_id() &&
124 resource_index_mode() == lhs.resource_index_mode() &&
125 sampler_id() == lhs.sampler_id() &&
126 sampler_index_mode() == lhs.sampler_index_mode();
127 }
128
129 bool
propagate_death()130 TexInstr::propagate_death()
131 {
132 m_src.del_use(this);
133 return true;
134 }
135
forward_set_blockid(int id,int index)136 void TexInstr::forward_set_blockid(int id, int index)
137 {
138 for (auto p : m_prepare_instr)
139 p->set_blockid(id, index);
140 }
141
142 bool
do_ready() const143 TexInstr::do_ready() const
144 {
145 for (auto p : m_prepare_instr)
146 if (!p->ready())
147 return false;
148
149 for (auto p : required_instr())
150 if (!p->is_scheduled() && !p->is_dead()) {
151 return false;
152 }
153
154 if (resource_offset() && !resource_offset()->ready(block_id(), index()))
155 return false;
156 return m_src.ready(block_id(), index());
157 }
158
159 void
do_print(std::ostream & os) const160 TexInstr::do_print(std::ostream& os) const
161 {
162
163 for (auto& p : prepare_instr()) {
164 os << *p << "\n";
165 }
166
167 os << "TEX " << opname(m_opcode) << " ";
168 print_dest(os);
169
170 os << " : ";
171 m_src.print(os);
172
173 os << " RID:" << resource_id();
174 if (resource_offset())
175 os << " RO:" << *resource_offset();
176
177 os << " SID:" << sampler_id();
178 if (sampler_offset())
179 os << " SO:" << *sampler_offset();
180
181 if (m_coord_offset[0])
182 os << " OX:" << m_coord_offset[0];
183 if (m_coord_offset[1])
184 os << " OY:" << m_coord_offset[1];
185 if (m_coord_offset[2])
186 os << " OZ:" << m_coord_offset[2];
187
188 if (m_inst_mode || is_gather(m_opcode))
189 os << " MODE:" << m_inst_mode;
190
191 os << " ";
192 os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
193 os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
194 os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
195 os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
196 }
197
198 const char *
opname(Opcode op)199 TexInstr::opname(Opcode op)
200 {
201 switch (op) {
202 case ld:
203 return "LD";
204 case get_resinfo:
205 return "GET_TEXTURE_RESINFO";
206 case get_nsamples:
207 return "GET_NUMBER_OF_SAMPLES";
208 case get_tex_lod:
209 return "GET_LOD";
210 case get_gradient_h:
211 return "GET_GRADIENTS_H";
212 case get_gradient_v:
213 return "GET_GRADIENTS_V";
214 case set_offsets:
215 return "SET_TEXTURE_OFFSETS";
216 case keep_gradients:
217 return "KEEP_GRADIENTS";
218 case set_gradient_h:
219 return "SET_GRADIENTS_H";
220 case set_gradient_v:
221 return "SET_GRADIENTS_V";
222 case sample:
223 return "SAMPLE";
224 case sample_l:
225 return "SAMPLE_L";
226 case sample_lb:
227 return "SAMPLE_LB";
228 case sample_lz:
229 return "SAMPLE_LZ";
230 case sample_g:
231 return "SAMPLE_G";
232 case sample_g_lb:
233 return "SAMPLE_G_L";
234 case gather4:
235 return "GATHER4";
236 case gather4_o:
237 return "GATHER4_O";
238 case sample_c:
239 return "SAMPLE_C";
240 case sample_c_l:
241 return "SAMPLE_C_L";
242 case sample_c_lb:
243 return "SAMPLE_C_LB";
244 case sample_c_lz:
245 return "SAMPLE_C_LZ";
246 case sample_c_g:
247 return "SAMPLE_C_G";
248 case sample_c_g_lb:
249 return "SAMPLE_C_G_L";
250 case gather4_c:
251 return "GATHER4_C";
252 case gather4_c_o:
253 return "OP_GATHER4_C_O";
254 default:
255 return "ERROR";
256 }
257 }
258
259 const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
260 {ld, "LD" },
261 {get_resinfo, "GET_TEXTURE_RESINFO" },
262 {get_nsamples, "GET_NUMBER_OF_SAMPLES"},
263 {get_tex_lod, "GET_LOD" },
264 {get_gradient_h, "GET_GRADIENTS_H" },
265 {get_gradient_v, "GET_GRADIENTS_V" },
266 {set_offsets, "SET_TEXTURE_OFFSETS" },
267 {keep_gradients, "KEEP_GRADIENTS" },
268 {set_gradient_h, "SET_GRADIENTS_H" },
269 {set_gradient_v, "SET_GRADIENTS_V" },
270 {sample, "SAMPLE" },
271 {sample_l, "SAMPLE_L" },
272 {sample_lb, "SAMPLE_LB" },
273 {sample_lz, "SAMPLE_LZ" },
274 {sample_g, "SAMPLE_G" },
275 {sample_g_lb, "SAMPLE_G_L" },
276 {gather4, "GATHER4" },
277 {gather4_o, "GATHER4_O" },
278 {sample_c, "SAMPLE_C" },
279 {sample_c_l, "SAMPLE_C_L" },
280 {sample_c_lb, "SAMPLE_C_LB" },
281 {sample_c_lz, "SAMPLE_C_LZ" },
282 {sample_c_g, "SAMPLE_C_G" },
283 {sample_c_g_lb, "SAMPLE_C_G_L" },
284 {gather4_c, "GATHER4_C" },
285 {gather4_c_o, "OP_GATHER4_C_O" },
286 {unknown, "ERROR" }
287 };
288
289 bool
is_gather(Opcode op)290 TexInstr::is_gather(Opcode op)
291 {
292 return op == gather4 || op == gather4_c || op == gather4_o || op == gather4_c_o;
293 }
294
295 TexInstr::Opcode
op_from_string(const std::string & s)296 TexInstr::op_from_string(const std::string& s)
297 {
298 for (auto& [op, str] : s_opcode_map) {
299 if (s == str)
300 return op;
301 }
302 return unknown;
303 }
304
305 Instr::Pointer
from_string(std::istream & is,ValueFactory & value_fctory)306 TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
307 {
308 string opstr;
309 string deststr;
310 is >> opstr >> deststr;
311
312 auto opcode = TexInstr::op_from_string(opstr);
313
314 RegisterVec4::Swizzle dest_swz;
315
316 auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
317
318 char dummy;
319 is >> dummy;
320 assert(dummy == ':');
321
322 string srcstr;
323 is >> srcstr;
324
325 auto src = value_fctory.src_vec4_from_string(srcstr);
326
327 string res_id_str;
328 string sampler_id_str;
329
330 is >> res_id_str >> sampler_id_str;
331
332 int res_id = int_from_string_with_prefix(res_id_str, "RID:");
333 int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
334
335 auto tex = new TexInstr(opcode, dest, dest_swz, src, res_id, nullptr,
336 sampler_id, nullptr);
337
338 while (!is.eof() && is.good()) {
339 std::string next_token;
340 is >> next_token;
341
342 if (next_token.empty())
343 break;
344
345 if (next_token[0] == 'U' || next_token[0] == 'N') {
346 tex->read_tex_coord_normalitazion(next_token);
347 } else {
348 tex->set_tex_param(next_token);
349 }
350 }
351
352 return tex;
353 }
354
355 void
read_tex_coord_normalitazion(const std::string & flags)356 TexInstr::read_tex_coord_normalitazion(const std::string& flags)
357 {
358 assert(flags.length() == 4);
359 if (flags[0] == 'U')
360 set_tex_flag(x_unnormalized);
361 if (flags[1] == 'U')
362 set_tex_flag(y_unnormalized);
363 if (flags[2] == 'U')
364 set_tex_flag(z_unnormalized);
365 if (flags[3] == 'U')
366 set_tex_flag(w_unnormalized);
367 }
368
369 void
set_tex_param(const std::string & token)370 TexInstr::set_tex_param(const std::string& token)
371 {
372 if (token.substr(0, 3) == "OX:")
373 set_offset(0, int_from_string_with_prefix(token, "OX:"));
374 else if (token.substr(0, 3) == "OY:")
375 set_offset(1, int_from_string_with_prefix(token, "OY:"));
376 else if (token.substr(0, 3) == "OZ:")
377 set_offset(2, int_from_string_with_prefix(token, "OZ:"));
378 else if (token.substr(0, 5) == "MODE:")
379 set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
380 else if (token.substr(0, 3) == "SO:")
381 set_sampler_offset(VirtualValue::from_string(token.substr(3))->as_register());
382 else if (token.substr(0, 3) == "RO:")
383 set_resource_offset(VirtualValue::from_string(token.substr(3))->as_register());
384 else {
385 std::cerr << "Token '" << token << "': ";
386 unreachable("Unknown token in tex param");
387 }
388 }
389
390 bool
from_nir(nir_tex_instr * tex,Shader & shader)391 TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
392 {
393 Inputs src(*tex, shader.value_factory());
394
395 if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) != -1)
396 return emit_lowered_tex(tex, src, shader);
397
398 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
399 switch (tex->op) {
400 case nir_texop_txs:
401 return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
402 case nir_texop_txf:
403 return emit_buf_txf(tex, src, shader);
404 default:
405 return false;
406 }
407 } else {
408 switch (tex->op) {
409 case nir_texop_txs:
410 return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
411 case nir_texop_lod:
412 return emit_tex_lod(tex, src, shader);
413 case nir_texop_query_levels:
414 return emit_tex_txs(tex, src, {3, 7, 7, 7}, shader);
415 case nir_texop_texture_samples:
416 return emit_tex_texture_samples(tex, src, shader);
417 default:
418 return false;
419 }
420 }
421 return true;
422 }
423
424 bool
replace_source(PRegister old_src,PVirtualValue new_src)425 TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
426 {
427 if (old_src->pin() != pin_free)
428 return false;
429
430 if (!new_src->as_register())
431 return false;
432
433 bool success = false;
434 for (int i = 0; i < 4; ++i) {
435 if (m_src[i]->equal_to(*old_src)) {
436 m_src.set_value(i, new_src->as_register());
437 success = true;
438 }
439 }
440 m_src.validate();
441 if (success) {
442 old_src->del_use(this);
443 new_src->as_register()->add_use(this);
444 }
445 return success;
446 }
447
update_indirect_addr(PRegister old_reg,PRegister addr)448 void TexInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
449 {
450 if (resource_offset() && old_reg->equal_to(*resource_offset()))
451 set_resource_offset(addr);
452 else if (sampler_offset() && old_reg->equal_to(*sampler_offset()))
453 set_sampler_offset(addr);
454
455 for (auto& p : m_prepare_instr)
456 p->update_indirect_addr(old_reg, addr);
457 }
458
459 uint8_t
allowed_src_chan_mask() const460 TexInstr::allowed_src_chan_mask() const
461 {
462 return m_src.free_chan_mask();
463 }
464
465 struct SamplerId {
466 int id;
467 bool indirect;
468 };
469
470 SamplerId
get_sampler_id(int sampler_id,const nir_variable * deref)471 get_sampler_id(int sampler_id, const nir_variable *deref)
472 {
473 SamplerId result = {sampler_id, false};
474
475 if (deref) {
476 assert(glsl_type_is_sampler(deref->type));
477 result.id = deref->data.binding;
478 }
479 return result;
480 }
481
482 void
emit_set_gradients(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)483 TexInstr::emit_set_gradients(
484 nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
485 {
486 TexInstr *grad[2] = {nullptr, nullptr};
487 RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
488 grad[0] = new TexInstr(set_gradient_h,
489 empty_dst,
490 {7, 7, 7, 7},
491 src.ddx,
492 texture_id,
493 src.texture_offset);
494 grad[0]->set_rect_coordinate_flags(tex);
495 grad[0]->set_always_keep();
496
497 grad[1] = new TexInstr(set_gradient_v,
498 empty_dst,
499 {7, 7, 7, 7},
500 src.ddy,
501 texture_id,
502 src.texture_offset);
503 grad[1]->set_rect_coordinate_flags(tex);
504 grad[1]->set_always_keep();
505 irt->add_prepare_instr(grad[0]);
506 irt->add_prepare_instr(grad[1]);
507 if (shader.last_txd())
508 irt->add_required_instr(shader.last_txd());
509 shader.set_last_txd(irt);
510 }
511
512 void
emit_set_offsets(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)513 TexInstr::emit_set_offsets(nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
514 {
515 RegisterVec4::Swizzle swizzle = {4, 4, 4, 4};
516 int src_components = tex->coord_components;
517 if (tex->is_array)
518 --src_components;
519
520 for (int i = 0; i < src_components; ++i)
521 swizzle[i] = i;
522
523 auto ofs = shader.value_factory().src_vec4(*src.offset, pin_group, swizzle);
524 RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
525
526 auto set_ofs = new TexInstr(TexInstr::set_offsets,
527 empty_dst,
528 {7, 7, 7, 7},
529 ofs,
530 texture_id + R600_MAX_CONST_BUFFERS,
531 src.texture_offset);
532 set_ofs->set_always_keep();
533 irt->add_prepare_instr(set_ofs);
534 }
535
536 bool
emit_lowered_tex(nir_tex_instr * tex,Inputs & src,Shader & shader)537 TexInstr::emit_lowered_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
538 {
539 assert(src.backend1);
540 assert(src.backend2);
541
542 auto& vf = shader.value_factory();
543 sfn_log << SfnLog::instr << "emit '" << *reinterpret_cast<nir_instr *>(tex) << "' ("
544 << __func__ << ")\n";
545
546 auto params = nir_src_as_const_value(*src.backend2);
547 int32_t coord_mask = params[0].i32;
548 int32_t flags = params[1].i32;
549 int32_t inst_mode = params[2].i32;
550 uint32_t dst_swz_packed = params[3].u32;
551
552 auto dst = vf.dest_vec4(tex->def, pin_group);
553
554 RegisterVec4::Swizzle src_swizzle = {0};
555 for (int i = 0; i < 4; ++i)
556 src_swizzle[i] = (coord_mask & (1 << i)) ? i : 7;
557
558 auto src_coord = vf.src_vec4(*src.backend1, pin_group, src_swizzle);
559
560 RegisterVec4::Swizzle dst_swz = {0, 1, 2, 3};
561 if (dst_swz_packed) {
562 for (int i = 0; i < 4; ++i) {
563 dst_swz[i] = (dst_swz_packed >> (8 * i)) & 0xff;
564 }
565 }
566
567 int texture_id = tex->texture_index + R600_MAX_CONST_BUFFERS;
568 auto irt = new TexInstr(src.opcode,
569 dst,
570 dst_swz,
571 src_coord,
572 texture_id,
573 src.texture_offset,
574 tex->sampler_index,
575 src.sampler_offset);
576
577 if (tex->op == nir_texop_txd)
578 emit_set_gradients(tex, texture_id, src, irt, shader);
579
580 if (!irt->set_coord_offsets(src.offset)) {
581 assert(tex->op == nir_texop_tg4);
582 emit_set_offsets(tex, texture_id, src, irt, shader);
583 }
584
585 for (const auto f : TexFlags) {
586 if (flags & (1 << f))
587 irt->set_tex_flag(f);
588 }
589
590 irt->set_inst_mode(inst_mode);
591
592 shader.emit_instruction(irt);
593 return true;
594 }
595
596 bool
emit_buf_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)597 TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
598 {
599 auto& vf = shader.value_factory();
600 auto dst = vf.dest_vec4(tex->def, pin_group);
601
602 PRegister tex_offset = nullptr;
603 if (src.sampler_offset)
604 tex_offset = shader.emit_load_to_register(src.sampler_offset);
605
606 auto *real_dst = &dst;
607 RegisterVec4 tmp = vf.temp_vec4(pin_group);
608
609 if (shader.chip_class() < ISA_CC_EVERGREEN) {
610 real_dst = &tmp;
611 }
612
613 auto ir = new LoadFromBuffer(*real_dst,
614 {0, 1, 2, 3},
615 src.coord[0],
616 0,
617 tex->texture_index + R600_MAX_CONST_BUFFERS,
618 tex_offset,
619 fmt_invalid);
620 ir->set_fetch_flag(FetchInstr::use_const_field);
621 shader.emit_instruction(ir);
622 shader.set_flag(Shader::sh_uses_tex_buffer);
623
624 if (shader.chip_class() < ISA_CC_EVERGREEN) {
625 auto tmp_w = vf.temp_register();
626 int buf_sel = (512 + R600_BUFFER_INFO_OFFSET / 16) + 2 * tex->texture_index;
627 AluInstr *ir = nullptr;
628 for (int i = 0; i < 4; ++i) {
629 auto d = i < 3 ? dst[i] : tmp_w;
630 ir = new AluInstr(op2_and_int,
631 d,
632 tmp[i],
633 vf.uniform(buf_sel, i, R600_BUFFER_INFO_CONST_BUFFER),
634 AluInstr::write);
635 shader.emit_instruction(ir);
636 }
637
638 ir->set_alu_flag(alu_last_instr);
639 shader.emit_instruction(
640 new AluInstr(op2_or_int,
641 dst[3],
642 tmp_w,
643 vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
644 AluInstr::last_write));
645 }
646
647 return true;
648 }
649
650 bool
emit_tex_texture_samples(nir_tex_instr * instr,Inputs & src,Shader & shader)651 TexInstr::emit_tex_texture_samples(nir_tex_instr *instr, Inputs& src, Shader& shader)
652 {
653 RegisterVec4 dest = shader.value_factory().dest_vec4(instr->def, pin_chan);
654 RegisterVec4 help{
655 0, true, {4, 4, 4, 4}
656 };
657
658 int res_id = R600_MAX_CONST_BUFFERS + instr->texture_index;
659
660 // Fishy: should the zero be instr->sampler_index?
661 auto ir =
662 new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help, res_id, src.texture_offset);
663 shader.emit_instruction(ir);
664 return true;
665 }
666
667 bool
emit_tex_txs(nir_tex_instr * tex,Inputs & src,RegisterVec4::Swizzle dest_swz,Shader & shader)668 TexInstr::emit_tex_txs(nir_tex_instr *tex,
669 Inputs& src,
670 RegisterVec4::Swizzle dest_swz,
671 Shader& shader)
672 {
673 auto& vf = shader.value_factory();
674
675 auto dest = vf.dest_vec4(tex->def, pin_group);
676
677 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
678 if (shader.chip_class() >= ISA_CC_EVERGREEN) {
679 shader.emit_instruction(new QueryBufferSizeInstr(
680 dest, {0, 7, 7, 7}, tex->texture_index + R600_MAX_CONST_BUFFERS));
681 } else {
682 int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
683 auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
684 shader.emit_instruction(
685 new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
686 shader.set_flag(Shader::sh_uses_tex_buffer);
687 }
688 } else {
689
690 auto src_lod = vf.temp_register();
691 shader.emit_instruction(
692 new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
693
694 RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
695
696 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
697 dest_swz[2] = 7;
698
699 auto ir = new TexInstr(get_resinfo,
700 dest,
701 dest_swz,
702 src_coord,
703 tex->texture_index + R600_MAX_CONST_BUFFERS,
704 src.texture_offset);
705
706 ir->set_dest_swizzle(dest_swz);
707 shader.emit_instruction(ir);
708
709 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
710 auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (tex->texture_index >> 2),
711 tex->texture_index & 3,
712 R600_BUFFER_INFO_CONST_BUFFER);
713
714 auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
715 shader.emit_instruction(alu);
716 shader.set_flag(Shader::sh_txs_cube_array_comp);
717 }
718 }
719
720 return true;
721 }
722
723 auto
prepare_source(nir_tex_instr * tex,const Inputs & inputs,Shader & shader)724 TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader)
725 -> RegisterVec4
726 {
727 RegisterVec4::Swizzle target{7, 7, 7, 7};
728 PVirtualValue src[4]{nullptr, nullptr, nullptr, nullptr};
729
730 for (unsigned i = 0; i < tex->coord_components; ++i) {
731 target[i] = i;
732 src[i] = inputs.coord[i];
733 }
734
735 // array index always goes into z
736 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
737 target[2] = 1;
738 target[1] = 7;
739 src[2] = inputs.coord[1];
740 }
741
742 /* With txl and txb shadow goes into z and lod or bias go into w */
743 if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
744 target[3] = 3;
745 src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
746 if (tex->is_shadow) {
747 target[2] = 2;
748 src[2] = inputs.comperator;
749 }
750 } else if (tex->is_shadow) {
751 /* Other ops have shadow in w */
752 target[3] = 3;
753 src[3] = inputs.comperator;
754 }
755
756 auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
757
758 AluInstr *ir = nullptr;
759 for (int i = 0; i < 4; ++i) {
760 if (target[i] > 3)
761 continue;
762
763 auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
764
765 ir = new AluInstr(op, src_coord[i], src[i], AluInstr::write);
766 shader.emit_instruction(ir);
767 }
768
769 if (ir)
770 ir->set_alu_flag(alu_last_instr);
771
772 return src_coord;
773 }
774
Inputs(const nir_tex_instr & instr,ValueFactory & vf)775 TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
776 sampler_deref(nullptr),
777 texture_deref(nullptr),
778 bias(nullptr),
779 comperator(nullptr),
780 lod(nullptr),
781 offset(nullptr),
782 gather_comp(nullptr),
783 ms_index(nullptr),
784 texture_offset(nullptr),
785 sampler_offset(nullptr),
786 backend1(nullptr),
787 backend2(nullptr),
788 opcode(ld)
789 {
790 // sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components
791 // << " components\n";
792
793 unsigned grad_components = instr.coord_components;
794 if (instr.is_array && !instr.array_is_lowered_cube)
795 --grad_components;
796
797 for (unsigned i = 0; i < instr.num_srcs; ++i) {
798 switch (instr.src[i].src_type) {
799 case nir_tex_src_bias:
800 bias = vf.src(instr.src[i], 0);
801 break;
802
803 case nir_tex_src_coord: {
804 coord = vf.src_vec4(instr.src[i].src,
805 pin_none,
806 swizzle_from_ncomps(instr.coord_components));
807 } break;
808 case nir_tex_src_comparator:
809 comperator = vf.src(instr.src[i], 0);
810 break;
811 case nir_tex_src_ddx:
812 ddx = vf.src_vec4(instr.src[i].src,
813 pin_group,
814 swizzle_from_ncomps(grad_components));
815 break;
816 case nir_tex_src_ddy:
817 ddy = vf.src_vec4(instr.src[i].src,
818 pin_group,
819 swizzle_from_ncomps(grad_components));
820 break;
821 case nir_tex_src_lod:
822 lod = vf.src(instr.src[i].src, 0);
823 break;
824 case nir_tex_src_offset:
825 offset = &instr.src[i].src;
826 break;
827 /* case nir_tex_src_sampler_deref:
828 sampler_deref = get_deref_location(instr.src[i].src);
829 break;
830 case nir_tex_src_texture_deref:
831 texture_deref = get_deref_location(instr.src[i].src);
832 break;
833 */
834 case nir_tex_src_ms_index:
835 ms_index = vf.src(instr.src[i], 0);
836 break;
837 case nir_tex_src_texture_offset:
838 texture_offset = vf.src(instr.src[i], 0)->as_register();
839 break;
840 case nir_tex_src_sampler_offset:
841 sampler_offset = vf.src(instr.src[i], 0)->as_register();
842 break;
843 case nir_tex_src_backend1:
844 backend1 = &instr.src[i].src;
845 break;
846 case nir_tex_src_backend2:
847 backend2 = &instr.src[i].src;
848 break;
849 case nir_tex_src_plane:
850 case nir_tex_src_projector:
851 case nir_tex_src_min_lod:
852 default:
853 unreachable("unsupported texture input type");
854 }
855 }
856
857 opcode = get_opcode(instr);
858 }
859
860 auto
get_opcode(const nir_tex_instr & instr)861 TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
862 {
863 switch (instr.op) {
864 case nir_texop_tex:
865 return instr.is_shadow ? sample_c : sample;
866 case nir_texop_txf:
867 return ld;
868 case nir_texop_txb:
869 return instr.is_shadow ? sample_c_lb : sample_lb;
870 case nir_texop_txl:
871 return instr.is_shadow ? sample_c_l : sample_l;
872 case nir_texop_txs:
873 return get_resinfo;
874 case nir_texop_lod:
875 return get_resinfo;
876 case nir_texop_txd:
877 return instr.is_shadow ? sample_c_g : sample_g;
878 case nir_texop_tg4: {
879 auto var_offset = offset && nir_src_as_const_value(*offset) == nullptr;
880 return instr.is_shadow ? (var_offset ? gather4_c_o : gather4_c)
881 : (var_offset ? gather4_o : gather4);
882 }
883 case nir_texop_txf_ms:
884 return ld;
885 case nir_texop_query_levels:
886 return get_resinfo;
887 case nir_texop_texture_samples:
888 return TexInstr::get_nsamples;
889 default:
890 unreachable("unsupported texture input opcode");
891 }
892 }
893
894 bool
emit_tex_lod(nir_tex_instr * tex,Inputs & src,Shader & shader)895 TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader)
896 {
897 auto& vf = shader.value_factory();
898
899 auto dst = shader.value_factory().dest_vec4(tex->def, pin_group);
900
901 auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
902
903 auto src_coord = vf.temp_vec4(pin_group, swizzle);
904
905 AluInstr *ir = nullptr;
906 for (unsigned i = 0; i < tex->coord_components; ++i) {
907 ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write);
908 shader.emit_instruction(ir);
909 }
910 if (ir)
911 ir->set_alu_flag(alu_last_instr);
912
913 auto irt = new TexInstr(TexInstr::get_tex_lod,
914 dst,
915 {1, 0, 7, 7},
916 src_coord,
917 tex->texture_index + R600_MAX_CONST_BUFFERS,
918 src.texture_offset);
919
920 shader.emit_instruction(irt);
921 return true;
922 }
923
924 RegisterVec4::Swizzle
swizzle_from_ncomps(int comps) const925 TexInstr::Inputs::swizzle_from_ncomps(int comps) const
926 {
927 RegisterVec4::Swizzle swz;
928 for (int i = 0; i < 4; ++i)
929 swz[i] = i < comps ? i : 7;
930 return swz;
931 }
932
933 bool
set_coord_offsets(nir_src * offset)934 TexInstr::set_coord_offsets(nir_src *offset)
935 {
936 if (!offset)
937 return true;
938
939 auto literal = nir_src_as_const_value(*offset);
940 if (!literal)
941 return false;
942
943 for (int i = 0; i < offset->ssa->num_components; ++i)
944 set_offset(i, literal[i].i32);
945 return true;
946 }
947
948 void
set_rect_coordinate_flags(nir_tex_instr * instr)949 TexInstr::set_rect_coordinate_flags(nir_tex_instr *instr)
950 {
951 if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
952 set_tex_flag(x_unnormalized);
953 set_tex_flag(y_unnormalized);
954 }
955 }
956
957 class LowerTexToBackend : public NirLowerInstruction {
958 public:
959 LowerTexToBackend(amd_gfx_level chip_class);
960
961 private:
962 bool filter(const nir_instr *instr) const override;
963 nir_def *lower(nir_instr *instr) override;
964
965 nir_def *lower_tex(nir_tex_instr *tex);
966 nir_def *lower_txf(nir_tex_instr *tex);
967 nir_def *lower_tg4(nir_tex_instr *tex);
968 nir_def *lower_txf_ms(nir_tex_instr *tex);
969 nir_def *lower_txf_ms_direct(nir_tex_instr *tex);
970
971 nir_def *
972 prepare_coord(nir_tex_instr *tex, int& unnormalized_mask, int& used_coord_mask);
973 int get_src_coords(nir_tex_instr *tex,
974 std::array<nir_def *, 4>& coord,
975 bool round_array_index);
976 nir_def *prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask);
977 nir_def *
978 finalize(nir_tex_instr *tex, nir_def *backend1, nir_def *backend2);
979
980 nir_def *get_undef();
981
982 amd_gfx_level m_chip_class;
983 nir_def *m_undef {nullptr};
984 };
985
986 bool
r600_nir_lower_tex_to_backend(nir_shader * shader,amd_gfx_level chip_class)987 r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class)
988 {
989 return LowerTexToBackend(chip_class).run(shader);
990 }
991
LowerTexToBackend(amd_gfx_level chip_class)992 LowerTexToBackend::LowerTexToBackend(amd_gfx_level chip_class):
993 m_chip_class(chip_class)
994 {
995 }
996
997 bool
filter(const nir_instr * instr) const998 LowerTexToBackend::filter(const nir_instr *instr) const
999 {
1000 if (instr->type != nir_instr_type_tex)
1001 return false;
1002
1003 auto tex = nir_instr_as_tex(instr);
1004 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
1005 return false;
1006 switch (tex->op) {
1007 case nir_texop_tex:
1008 case nir_texop_txb:
1009 case nir_texop_txl:
1010 case nir_texop_txf:
1011 case nir_texop_txd:
1012 case nir_texop_tg4:
1013 case nir_texop_txf_ms:
1014 break;
1015 default:
1016 return false;
1017 }
1018
1019 return nir_tex_instr_src_index(tex, nir_tex_src_backend1) == -1;
1020 }
1021
get_undef()1022 nir_def *LowerTexToBackend::get_undef()
1023 {
1024 if (!m_undef)
1025 m_undef = nir_undef(b, 1, 32);
1026 return m_undef;
1027 }
1028
1029 nir_def *
lower(nir_instr * instr)1030 LowerTexToBackend::lower(nir_instr *instr)
1031 {
1032 b->cursor = nir_before_instr(instr);
1033
1034 auto tex = nir_instr_as_tex(instr);
1035 switch (tex->op) {
1036 case nir_texop_tex:
1037 case nir_texop_txb:
1038 case nir_texop_txl:
1039 case nir_texop_txd:
1040 return lower_tex(tex);
1041 case nir_texop_txf:
1042 return lower_txf(tex);
1043 case nir_texop_tg4:
1044 return lower_tg4(tex);
1045 case nir_texop_txf_ms:
1046 if (m_chip_class < EVERGREEN)
1047 return lower_txf_ms_direct(tex);
1048 else
1049 return lower_txf_ms(tex);
1050 default:
1051 return nullptr;
1052 }
1053 }
1054
1055 nir_def *
lower_tex(nir_tex_instr * tex)1056 LowerTexToBackend::lower_tex(nir_tex_instr *tex)
1057 {
1058 int unnormalized_mask = 0;
1059 int used_coord_mask = 0;
1060
1061 nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1062
1063 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, 0, 0);
1064
1065 return finalize(tex, backend1, backend2);
1066 }
1067
1068 nir_def *
lower_txf(nir_tex_instr * tex)1069 LowerTexToBackend::lower_txf(nir_tex_instr *tex)
1070 {
1071 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1072
1073 get_src_coords(tex, new_coord, false);
1074
1075 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1076 new_coord[3] = tex->src[lod_idx].src.ssa;
1077
1078 int used_coord_mask = 0;
1079 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1080 nir_def *backend2 =
1081 nir_imm_ivec4(b, used_coord_mask, tex->is_array ? 0x4 : 0, 0, 0);
1082
1083 return finalize(tex, backend1, backend2);
1084 }
1085
1086 nir_def *
lower_tg4(nir_tex_instr * tex)1087 LowerTexToBackend::lower_tg4(nir_tex_instr *tex)
1088 {
1089 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1090
1091 get_src_coords(tex, new_coord, false);
1092 uint32_t dest_swizzle =
1093 m_chip_class <= EVERGREEN ? 1 | (2 << 8) | (0 << 16) | (3 << 24) : 0;
1094
1095 int used_coord_mask = 0;
1096 int unnormalized_mask = 0;
1097 nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1098
1099 nir_def *backend2 =
1100 nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, tex->component, dest_swizzle);
1101 return finalize(tex, backend1, backend2);
1102 }
1103
1104 nir_def *
lower_txf_ms(nir_tex_instr * tex)1105 LowerTexToBackend::lower_txf_ms(nir_tex_instr *tex)
1106 {
1107 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1108
1109 get_src_coords(tex, new_coord, false);
1110
1111 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1112 new_coord[3] = tex->src[ms_index].src.ssa;
1113
1114 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
1115 if (offset_index >= 0) {
1116 auto offset = tex->src[offset_index].src.ssa;
1117 for (int i = 0; i < offset->num_components; ++i) {
1118 new_coord[i] = nir_iadd(b, new_coord[i], nir_channel(b, offset, i));
1119 }
1120 }
1121
1122 auto fetch_sample = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1123 nir_def_init(&fetch_sample->instr, &fetch_sample->def, 4, 32);
1124
1125 int used_coord_mask = 0;
1126 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1127 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0xf, 1, 0);
1128
1129 nir_builder_instr_insert(b, &fetch_sample->instr);
1130 finalize(fetch_sample, backend1, backend2);
1131
1132 new_coord[3] = nir_iand_imm(b,
1133 nir_ushr(b,
1134 nir_channel(b, &fetch_sample->def, 0),
1135 nir_ishl_imm(b, new_coord[3], 2)),
1136 15);
1137
1138 nir_def *backend1b = prep_src(new_coord, used_coord_mask);
1139 nir_def *backend2b = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1140 return finalize(tex, backend1b, backend2b);
1141 }
1142
1143 nir_def *
lower_txf_ms_direct(nir_tex_instr * tex)1144 LowerTexToBackend::lower_txf_ms_direct(nir_tex_instr *tex)
1145 {
1146 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1147
1148 get_src_coords(tex, new_coord, false);
1149
1150 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1151 new_coord[3] = tex->src[ms_index].src.ssa;
1152
1153 int used_coord_mask = 0;
1154 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1155 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1156
1157 return finalize(tex, backend1, backend2);
1158 }
1159
1160 nir_def *
finalize(nir_tex_instr * tex,nir_def * backend1,nir_def * backend2)1161 LowerTexToBackend::finalize(nir_tex_instr *tex,
1162 nir_def *backend1,
1163 nir_def *backend2)
1164 {
1165 nir_tex_instr_add_src(tex, nir_tex_src_backend1, backend1);
1166 nir_tex_instr_add_src(tex, nir_tex_src_backend2, backend2);
1167
1168 static const nir_tex_src_type cleanup[] = {nir_tex_src_coord,
1169 nir_tex_src_lod,
1170 nir_tex_src_bias,
1171 nir_tex_src_comparator,
1172 nir_tex_src_ms_index};
1173
1174 for (const auto type : cleanup) {
1175 int pos = nir_tex_instr_src_index(tex, type);
1176 if (pos >= 0)
1177 nir_tex_instr_remove_src(tex, pos);
1178 }
1179 return NIR_LOWER_INSTR_PROGRESS;
1180 }
1181
1182 nir_def *
prep_src(std::array<nir_def *,4> & coord,int & used_coord_mask)1183 LowerTexToBackend::prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask)
1184 {
1185 int max_coord = 0;
1186 for (int i = 0; i < 4; ++i) {
1187 if (coord[i]) {
1188 used_coord_mask |= 1 << i;
1189 max_coord = i;
1190 } else
1191 coord[i] = get_undef();
1192 }
1193
1194 return nir_vec(b, coord.data(), max_coord + 1);
1195 }
1196
1197 nir_def *
prepare_coord(nir_tex_instr * tex,int & unnormalized_mask,int & used_coord_mask)1198 LowerTexToBackend::prepare_coord(nir_tex_instr *tex,
1199 int& unnormalized_mask,
1200 int& used_coord_mask)
1201 {
1202 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1203
1204 unnormalized_mask = get_src_coords(tex, new_coord, true);
1205 used_coord_mask = 0;
1206
1207 int comp_idx =
1208 tex->is_shadow ? nir_tex_instr_src_index(tex, nir_tex_src_comparator) : -1;
1209
1210 if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
1211 int idx = tex->op == nir_texop_txl ? nir_tex_instr_src_index(tex, nir_tex_src_lod)
1212 : nir_tex_instr_src_index(tex, nir_tex_src_bias);
1213 assert(idx != -1);
1214 new_coord[3] = tex->src[idx].src.ssa;
1215
1216 if (comp_idx >= 0)
1217 new_coord[2] = tex->src[comp_idx].src.ssa;
1218 } else if (comp_idx >= 0) {
1219 new_coord[3] = tex->src[comp_idx].src.ssa;
1220 }
1221 return prep_src(new_coord, used_coord_mask);
1222 }
1223
1224 int
get_src_coords(nir_tex_instr * tex,std::array<nir_def *,4> & coord,bool round_array_index)1225 LowerTexToBackend::get_src_coords(nir_tex_instr *tex,
1226 std::array<nir_def *, 4>& coord,
1227 bool round_array_index)
1228 {
1229 int unnormalized_mask = 0;
1230 auto coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1231 assert(coord_idx != -1);
1232 auto old_coord = tex->src[coord_idx];
1233
1234 coord = {nir_channel(b, old_coord.src.ssa, 0), nullptr, nullptr, nullptr};
1235
1236 if (tex->coord_components > 1) {
1237 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D)
1238 coord[2] = nir_channel(b, old_coord.src.ssa, 1);
1239 else
1240 coord[1] = nir_channel(b, old_coord.src.ssa, 1);
1241 }
1242
1243 if (tex->coord_components > 2) {
1244 coord[2] = nir_channel(b, old_coord.src.ssa, 2);
1245 }
1246 if (tex->is_array) {
1247 unnormalized_mask |= 0x4;
1248 if (round_array_index)
1249 coord[2] = nir_fround_even(b, coord[2]);
1250 }
1251
1252 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1253 unnormalized_mask |= 0x3;
1254 }
1255
1256 return unnormalized_mask;
1257 }
1258
1259 } // namespace r600
1260