• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_tex.h"
28 
29 #include "nir_builder.h"
30 #include "sfn_debug.h"
31 #include "sfn_instr_alu.h"
32 #include "sfn_instr_fetch.h"
33 #include "sfn_nir.h"
34 
35 namespace r600 {
36 
37 using std::string;
38 
TexInstr(Opcode op,const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,const RegisterVec4 & src,unsigned resource_id,PRegister resource_offs,int sampler_id,PRegister sampler_offset)39 TexInstr::TexInstr(Opcode op,
40                    const RegisterVec4& dest,
41                    const RegisterVec4::Swizzle& dest_swizzle,
42                    const RegisterVec4& src,
43                    unsigned resource_id,
44                    PRegister resource_offs,
45                    int sampler_id, PRegister sampler_offset):
46     InstrWithVectorResult(dest, dest_swizzle, resource_id, resource_offs),
47     m_opcode(op),
48     m_src(src),
49     m_inst_mode(0),
50     m_sampler(this, sampler_id, sampler_offset)
51 {
52    memset(m_coord_offset, 0, sizeof(m_coord_offset));
53    m_src.add_use(this);
54 }
55 
56 void
accept(ConstInstrVisitor & visitor) const57 TexInstr::accept(ConstInstrVisitor& visitor) const
58 {
59    visitor.visit(*this);
60 }
61 
62 void
accept(InstrVisitor & visitor)63 TexInstr::accept(InstrVisitor& visitor)
64 {
65    visitor.visit(this);
66 }
67 
68 void
set_offset(unsigned index,int32_t val)69 TexInstr::set_offset(unsigned index, int32_t val)
70 {
71    assert(index < 3);
72    m_coord_offset[index] = val;
73 }
74 
75 int
get_offset(unsigned index) const76 TexInstr::get_offset(unsigned index) const
77 {
78    assert(index < 3);
79    return m_coord_offset[index] << 1;
80 }
81 
82 void
set_gather_comp(int cmp)83 TexInstr::set_gather_comp(int cmp)
84 {
85    m_inst_mode = cmp;
86 }
87 
88 bool
is_equal_to(const TexInstr & lhs) const89 TexInstr::is_equal_to(const TexInstr& lhs) const
90 {
91    if (m_opcode != lhs.m_opcode)
92       return false;
93 
94    if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
95       return false;
96 
97    if (m_src != lhs.m_src)
98       return false;
99 
100    if (resource_offset() && lhs.resource_offset()) {
101       if (!resource_offset()->equal_to(*lhs.resource_offset()))
102          return false;
103    } else if ((resource_offset() && !lhs.resource_offset()) ||
104               (!resource_offset() && lhs.resource_offset()))
105       return false;
106 
107    if (sampler_offset() && lhs.sampler_offset()) {
108       if (!sampler_offset()->equal_to(*lhs.sampler_offset()))
109          return false;
110    } else if ((sampler_offset() && !lhs.sampler_offset()) ||
111               (!sampler_offset() && lhs.sampler_offset()))
112       return false;
113 
114    if (m_tex_flags != lhs.m_tex_flags)
115       return false;
116 
117    for (int i = 0; i < 3; ++i) {
118       if (m_coord_offset[i] != lhs.m_coord_offset[i])
119          return false;
120    }
121 
122    return m_inst_mode == lhs.m_inst_mode &&
123          resource_id() == lhs.resource_id() &&
124          resource_index_mode() == lhs.resource_index_mode() &&
125          sampler_id() == lhs.sampler_id() &&
126          sampler_index_mode() == lhs.sampler_index_mode();
127 }
128 
129 bool
propagate_death()130 TexInstr::propagate_death()
131 {
132    m_src.del_use(this);
133    return true;
134 }
135 
forward_set_blockid(int id,int index)136 void TexInstr::forward_set_blockid(int id, int index)
137 {
138    for (auto p : m_prepare_instr)
139       p->set_blockid(id, index);
140 }
141 
142 bool
do_ready() const143 TexInstr::do_ready() const
144 {
145    for (auto p : m_prepare_instr)
146       if (!p->ready())
147          return false;
148 
149    for (auto p : required_instr())
150       if (!p->is_scheduled() && !p->is_dead()) {
151          return false;
152       }
153 
154    if (resource_offset() && !resource_offset()->ready(block_id(), index()))
155       return false;
156    return m_src.ready(block_id(), index());
157 }
158 
159 void
do_print(std::ostream & os) const160 TexInstr::do_print(std::ostream& os) const
161 {
162 
163    for (auto& p : prepare_instr()) {
164       os << *p << "\n";
165    }
166 
167    os << "TEX " << opname(m_opcode) << " ";
168    print_dest(os);
169 
170    os << " : ";
171    m_src.print(os);
172 
173    os << " RID:" << resource_id();
174    if (resource_offset())
175       os << " RO:" << *resource_offset();
176 
177    os << " SID:" << sampler_id();
178    if (sampler_offset())
179       os << " SO:" << *sampler_offset();
180 
181    if (m_coord_offset[0])
182       os << " OX:" << m_coord_offset[0];
183    if (m_coord_offset[1])
184       os << " OY:" << m_coord_offset[1];
185    if (m_coord_offset[2])
186       os << " OZ:" << m_coord_offset[2];
187 
188    if (m_inst_mode || is_gather(m_opcode))
189       os << " MODE:" << m_inst_mode;
190 
191    os << " ";
192    os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
193    os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
194    os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
195    os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
196 }
197 
198 const char *
opname(Opcode op)199 TexInstr::opname(Opcode op)
200 {
201    switch (op) {
202    case ld:
203       return "LD";
204    case get_resinfo:
205       return "GET_TEXTURE_RESINFO";
206    case get_nsamples:
207       return "GET_NUMBER_OF_SAMPLES";
208    case get_tex_lod:
209       return "GET_LOD";
210    case get_gradient_h:
211       return "GET_GRADIENTS_H";
212    case get_gradient_v:
213       return "GET_GRADIENTS_V";
214    case set_offsets:
215       return "SET_TEXTURE_OFFSETS";
216    case keep_gradients:
217       return "KEEP_GRADIENTS";
218    case set_gradient_h:
219       return "SET_GRADIENTS_H";
220    case set_gradient_v:
221       return "SET_GRADIENTS_V";
222    case sample:
223       return "SAMPLE";
224    case sample_l:
225       return "SAMPLE_L";
226    case sample_lb:
227       return "SAMPLE_LB";
228    case sample_lz:
229       return "SAMPLE_LZ";
230    case sample_g:
231       return "SAMPLE_G";
232    case sample_g_lb:
233       return "SAMPLE_G_L";
234    case gather4:
235       return "GATHER4";
236    case gather4_o:
237       return "GATHER4_O";
238    case sample_c:
239       return "SAMPLE_C";
240    case sample_c_l:
241       return "SAMPLE_C_L";
242    case sample_c_lb:
243       return "SAMPLE_C_LB";
244    case sample_c_lz:
245       return "SAMPLE_C_LZ";
246    case sample_c_g:
247       return "SAMPLE_C_G";
248    case sample_c_g_lb:
249       return "SAMPLE_C_G_L";
250    case gather4_c:
251       return "GATHER4_C";
252    case gather4_c_o:
253       return "OP_GATHER4_C_O";
254    default:
255       return "ERROR";
256    }
257 }
258 
259 const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
260    {ld,             "LD"                   },
261    {get_resinfo,    "GET_TEXTURE_RESINFO"  },
262    {get_nsamples,   "GET_NUMBER_OF_SAMPLES"},
263    {get_tex_lod,    "GET_LOD"              },
264    {get_gradient_h, "GET_GRADIENTS_H"      },
265    {get_gradient_v, "GET_GRADIENTS_V"      },
266    {set_offsets,    "SET_TEXTURE_OFFSETS"  },
267    {keep_gradients, "KEEP_GRADIENTS"       },
268    {set_gradient_h, "SET_GRADIENTS_H"      },
269    {set_gradient_v, "SET_GRADIENTS_V"      },
270    {sample,         "SAMPLE"               },
271    {sample_l,       "SAMPLE_L"             },
272    {sample_lb,      "SAMPLE_LB"            },
273    {sample_lz,      "SAMPLE_LZ"            },
274    {sample_g,       "SAMPLE_G"             },
275    {sample_g_lb,    "SAMPLE_G_L"           },
276    {gather4,        "GATHER4"              },
277    {gather4_o,      "GATHER4_O"            },
278    {sample_c,       "SAMPLE_C"             },
279    {sample_c_l,     "SAMPLE_C_L"           },
280    {sample_c_lb,    "SAMPLE_C_LB"          },
281    {sample_c_lz,    "SAMPLE_C_LZ"          },
282    {sample_c_g,     "SAMPLE_C_G"           },
283    {sample_c_g_lb,  "SAMPLE_C_G_L"         },
284    {gather4_c,      "GATHER4_C"            },
285    {gather4_c_o,    "OP_GATHER4_C_O"       },
286    {unknown,        "ERROR"                }
287 };
288 
289 bool
is_gather(Opcode op)290 TexInstr::is_gather(Opcode op)
291 {
292    return op == gather4 || op == gather4_c || op == gather4_o || op == gather4_c_o;
293 }
294 
295 TexInstr::Opcode
op_from_string(const std::string & s)296 TexInstr::op_from_string(const std::string& s)
297 {
298    for (auto& [op, str] : s_opcode_map) {
299       if (s == str)
300          return op;
301    }
302    return unknown;
303 }
304 
305 Instr::Pointer
from_string(std::istream & is,ValueFactory & value_fctory)306 TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
307 {
308    string opstr;
309    string deststr;
310    is >> opstr >> deststr;
311 
312    auto opcode = TexInstr::op_from_string(opstr);
313 
314    RegisterVec4::Swizzle dest_swz;
315 
316    auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
317 
318    char dummy;
319    is >> dummy;
320    assert(dummy == ':');
321 
322    string srcstr;
323    is >> srcstr;
324 
325    auto src = value_fctory.src_vec4_from_string(srcstr);
326 
327    string res_id_str;
328    string sampler_id_str;
329 
330    is >> res_id_str >> sampler_id_str;
331 
332    int res_id = int_from_string_with_prefix(res_id_str, "RID:");
333    int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
334 
335    auto tex = new TexInstr(opcode, dest, dest_swz, src, res_id, nullptr,
336                            sampler_id, nullptr);
337 
338    while (!is.eof() && is.good()) {
339       std::string next_token;
340       is >> next_token;
341 
342       if (next_token.empty())
343          break;
344 
345       if (next_token[0] == 'U' || next_token[0] == 'N') {
346          tex->read_tex_coord_normalitazion(next_token);
347       } else {
348          tex->set_tex_param(next_token);
349       }
350    }
351 
352    return tex;
353 }
354 
355 void
read_tex_coord_normalitazion(const std::string & flags)356 TexInstr::read_tex_coord_normalitazion(const std::string& flags)
357 {
358    assert(flags.length() == 4);
359    if (flags[0] == 'U')
360       set_tex_flag(x_unnormalized);
361    if (flags[1] == 'U')
362       set_tex_flag(y_unnormalized);
363    if (flags[2] == 'U')
364       set_tex_flag(z_unnormalized);
365    if (flags[3] == 'U')
366       set_tex_flag(w_unnormalized);
367 }
368 
369 void
set_tex_param(const std::string & token)370 TexInstr::set_tex_param(const std::string& token)
371 {
372    if (token.substr(0, 3) == "OX:")
373       set_offset(0, int_from_string_with_prefix(token, "OX:"));
374    else if (token.substr(0, 3) == "OY:")
375       set_offset(1, int_from_string_with_prefix(token, "OY:"));
376    else if (token.substr(0, 3) == "OZ:")
377       set_offset(2, int_from_string_with_prefix(token, "OZ:"));
378    else if (token.substr(0, 5) == "MODE:")
379       set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
380    else if (token.substr(0, 3) == "SO:")
381       set_sampler_offset(VirtualValue::from_string(token.substr(3))->as_register());
382    else if (token.substr(0, 3) == "RO:")
383       set_resource_offset(VirtualValue::from_string(token.substr(3))->as_register());
384    else {
385       std::cerr << "Token '" << token << "': ";
386       unreachable("Unknown token in tex param");
387    }
388 }
389 
390 bool
from_nir(nir_tex_instr * tex,Shader & shader)391 TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
392 {
393    Inputs src(*tex, shader.value_factory());
394 
395    if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) != -1)
396       return emit_lowered_tex(tex, src, shader);
397 
398    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
399       switch (tex->op) {
400       case nir_texop_txs:
401          return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
402       case nir_texop_txf:
403          return emit_buf_txf(tex, src, shader);
404       default:
405          return false;
406       }
407    } else {
408       switch (tex->op) {
409       case nir_texop_txs:
410          return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
411       case nir_texop_lod:
412          return emit_tex_lod(tex, src, shader);
413       case nir_texop_query_levels:
414          return emit_tex_txs(tex, src, {3, 7, 7, 7}, shader);
415       case nir_texop_texture_samples:
416          return emit_tex_texture_samples(tex, src, shader);
417       default:
418          return false;
419       }
420    }
421    return true;
422 }
423 
424 bool
replace_source(PRegister old_src,PVirtualValue new_src)425 TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
426 {
427    if (old_src->pin() != pin_free)
428       return false;
429 
430    if (!new_src->as_register())
431       return false;
432 
433    bool success = false;
434    for (int i = 0; i < 4; ++i) {
435       if (m_src[i]->equal_to(*old_src)) {
436          m_src.set_value(i, new_src->as_register());
437          success = true;
438       }
439    }
440    m_src.validate();
441    if (success) {
442       old_src->del_use(this);
443       new_src->as_register()->add_use(this);
444    }
445    return success;
446 }
447 
update_indirect_addr(PRegister old_reg,PRegister addr)448 void TexInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
449 {
450    if (resource_offset() && old_reg->equal_to(*resource_offset()))
451       set_resource_offset(addr);
452    else if (sampler_offset() && old_reg->equal_to(*sampler_offset()))
453       set_sampler_offset(addr);
454 
455    for (auto& p : m_prepare_instr)
456       p->update_indirect_addr(old_reg, addr);
457 }
458 
459 uint8_t
allowed_src_chan_mask() const460 TexInstr::allowed_src_chan_mask() const
461 {
462    return m_src.free_chan_mask();
463 }
464 
465 struct SamplerId {
466    int id;
467    bool indirect;
468 };
469 
470 SamplerId
get_sampler_id(int sampler_id,const nir_variable * deref)471 get_sampler_id(int sampler_id, const nir_variable *deref)
472 {
473    SamplerId result = {sampler_id, false};
474 
475    if (deref) {
476       assert(glsl_type_is_sampler(deref->type));
477       result.id = deref->data.binding;
478    }
479    return result;
480 }
481 
482 void
emit_set_gradients(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)483 TexInstr::emit_set_gradients(
484    nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
485 {
486    TexInstr *grad[2] = {nullptr, nullptr};
487    RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
488    grad[0] = new TexInstr(set_gradient_h,
489                           empty_dst,
490                           {7, 7, 7, 7},
491                           src.ddx,
492                           texture_id,
493                           src.texture_offset);
494    grad[0]->set_rect_coordinate_flags(tex);
495    grad[0]->set_always_keep();
496 
497    grad[1] = new TexInstr(set_gradient_v,
498                           empty_dst,
499                           {7, 7, 7, 7},
500                           src.ddy,
501                           texture_id,
502                           src.texture_offset);
503    grad[1]->set_rect_coordinate_flags(tex);
504    grad[1]->set_always_keep();
505    irt->add_prepare_instr(grad[0]);
506    irt->add_prepare_instr(grad[1]);
507    if (shader.last_txd())
508       irt->add_required_instr(shader.last_txd());
509    shader.set_last_txd(irt);
510 }
511 
512 void
emit_set_offsets(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)513 TexInstr::emit_set_offsets(nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
514 {
515    RegisterVec4::Swizzle swizzle = {4, 4, 4, 4};
516    int src_components = tex->coord_components;
517    if (tex->is_array)
518       --src_components;
519 
520    for (int i = 0; i < src_components; ++i)
521       swizzle[i] = i;
522 
523    auto ofs = shader.value_factory().src_vec4(*src.offset, pin_group, swizzle);
524    RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
525 
526    auto set_ofs = new TexInstr(TexInstr::set_offsets,
527                                empty_dst,
528                                {7, 7, 7, 7},
529                                ofs,
530                                texture_id + R600_MAX_CONST_BUFFERS,
531                                src.texture_offset);
532    set_ofs->set_always_keep();
533    irt->add_prepare_instr(set_ofs);
534 }
535 
536 bool
emit_lowered_tex(nir_tex_instr * tex,Inputs & src,Shader & shader)537 TexInstr::emit_lowered_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
538 {
539    assert(src.backend1);
540    assert(src.backend2);
541 
542    auto& vf = shader.value_factory();
543    sfn_log << SfnLog::instr << "emit '" << *reinterpret_cast<nir_instr *>(tex) << "' ("
544            << __func__ << ")\n";
545 
546    auto params = nir_src_as_const_value(*src.backend2);
547    int32_t coord_mask = params[0].i32;
548    int32_t flags = params[1].i32;
549    int32_t inst_mode = params[2].i32;
550    uint32_t dst_swz_packed = params[3].u32;
551 
552    auto dst = vf.dest_vec4(tex->def, pin_group);
553 
554    RegisterVec4::Swizzle src_swizzle = {0};
555    for (int i = 0; i < 4; ++i)
556       src_swizzle[i] = (coord_mask & (1 << i)) ? i : 7;
557 
558    auto src_coord = vf.src_vec4(*src.backend1, pin_group, src_swizzle);
559 
560    RegisterVec4::Swizzle dst_swz = {0, 1, 2, 3};
561    if (dst_swz_packed) {
562       for (int i = 0; i < 4; ++i) {
563          dst_swz[i] = (dst_swz_packed >> (8 * i)) & 0xff;
564       }
565    }
566 
567    int texture_id = tex->texture_index + R600_MAX_CONST_BUFFERS;
568    auto irt = new TexInstr(src.opcode,
569                            dst,
570                            dst_swz,
571                            src_coord,
572                            texture_id,
573                            src.texture_offset,
574                            tex->sampler_index,
575                            src.sampler_offset);
576 
577    if (tex->op == nir_texop_txd)
578       emit_set_gradients(tex, texture_id, src, irt, shader);
579 
580    if (!irt->set_coord_offsets(src.offset)) {
581       assert(tex->op == nir_texop_tg4);
582       emit_set_offsets(tex, texture_id, src, irt, shader);
583    }
584 
585    for (const auto f : TexFlags) {
586       if (flags & (1 << f))
587          irt->set_tex_flag(f);
588    }
589 
590    irt->set_inst_mode(inst_mode);
591 
592    shader.emit_instruction(irt);
593    return true;
594 }
595 
596 bool
emit_buf_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)597 TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
598 {
599    auto& vf = shader.value_factory();
600    auto dst = vf.dest_vec4(tex->def, pin_group);
601 
602    PRegister tex_offset = nullptr;
603    if (src.sampler_offset)
604       tex_offset = shader.emit_load_to_register(src.sampler_offset);
605 
606    auto *real_dst = &dst;
607    RegisterVec4 tmp = vf.temp_vec4(pin_group);
608 
609    if (shader.chip_class() < ISA_CC_EVERGREEN) {
610       real_dst = &tmp;
611    }
612 
613    auto ir = new LoadFromBuffer(*real_dst,
614                                 {0, 1, 2, 3},
615                                 src.coord[0],
616                                 0,
617                                 tex->texture_index + R600_MAX_CONST_BUFFERS,
618                                 tex_offset,
619                                 fmt_invalid);
620    ir->set_fetch_flag(FetchInstr::use_const_field);
621    shader.emit_instruction(ir);
622    shader.set_flag(Shader::sh_uses_tex_buffer);
623 
624    if (shader.chip_class() < ISA_CC_EVERGREEN) {
625       auto tmp_w = vf.temp_register();
626       int buf_sel = (512 + R600_BUFFER_INFO_OFFSET / 16) + 2 * tex->texture_index;
627       AluInstr *ir = nullptr;
628       for (int i = 0; i < 4; ++i) {
629          auto d = i < 3 ? dst[i] : tmp_w;
630          ir = new AluInstr(op2_and_int,
631                            d,
632                            tmp[i],
633                            vf.uniform(buf_sel, i, R600_BUFFER_INFO_CONST_BUFFER),
634                            AluInstr::write);
635          shader.emit_instruction(ir);
636       }
637 
638       ir->set_alu_flag(alu_last_instr);
639       shader.emit_instruction(
640          new AluInstr(op2_or_int,
641                       dst[3],
642                       tmp_w,
643                       vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
644                       AluInstr::last_write));
645    }
646 
647    return true;
648 }
649 
650 bool
emit_tex_texture_samples(nir_tex_instr * instr,Inputs & src,Shader & shader)651 TexInstr::emit_tex_texture_samples(nir_tex_instr *instr, Inputs& src, Shader& shader)
652 {
653    RegisterVec4 dest = shader.value_factory().dest_vec4(instr->def, pin_chan);
654    RegisterVec4 help{
655       0, true, {4, 4, 4, 4}
656    };
657 
658    int res_id = R600_MAX_CONST_BUFFERS + instr->texture_index;
659 
660    // Fishy: should the zero be instr->sampler_index?
661    auto ir =
662       new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help, res_id, src.texture_offset);
663    shader.emit_instruction(ir);
664    return true;
665 }
666 
667 bool
emit_tex_txs(nir_tex_instr * tex,Inputs & src,RegisterVec4::Swizzle dest_swz,Shader & shader)668 TexInstr::emit_tex_txs(nir_tex_instr *tex,
669                        Inputs& src,
670                        RegisterVec4::Swizzle dest_swz,
671                        Shader& shader)
672 {
673    auto& vf = shader.value_factory();
674 
675    auto dest = vf.dest_vec4(tex->def, pin_group);
676 
677    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
678       if (shader.chip_class() >= ISA_CC_EVERGREEN) {
679          shader.emit_instruction(new QueryBufferSizeInstr(
680             dest, {0, 7, 7, 7}, tex->texture_index + R600_MAX_CONST_BUFFERS));
681       } else {
682          int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
683          auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
684          shader.emit_instruction(
685             new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
686          shader.set_flag(Shader::sh_uses_tex_buffer);
687       }
688    } else {
689 
690       auto src_lod = vf.temp_register();
691       shader.emit_instruction(
692          new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
693 
694       RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
695 
696       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
697          dest_swz[2] = 7;
698 
699       auto ir = new TexInstr(get_resinfo,
700                              dest,
701                              dest_swz,
702                              src_coord,
703                              tex->texture_index + R600_MAX_CONST_BUFFERS,
704                              src.texture_offset);
705 
706       ir->set_dest_swizzle(dest_swz);
707       shader.emit_instruction(ir);
708 
709       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
710          auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (tex->texture_index >> 2),
711                                    tex->texture_index & 3,
712                                    R600_BUFFER_INFO_CONST_BUFFER);
713 
714          auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
715          shader.emit_instruction(alu);
716          shader.set_flag(Shader::sh_txs_cube_array_comp);
717       }
718    }
719 
720    return true;
721 }
722 
723 auto
prepare_source(nir_tex_instr * tex,const Inputs & inputs,Shader & shader)724 TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader)
725    -> RegisterVec4
726 {
727    RegisterVec4::Swizzle target{7, 7, 7, 7};
728    PVirtualValue src[4]{nullptr, nullptr, nullptr, nullptr};
729 
730    for (unsigned i = 0; i < tex->coord_components; ++i) {
731       target[i] = i;
732       src[i] = inputs.coord[i];
733    }
734 
735    // array index always goes into z
736    if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
737       target[2] = 1;
738       target[1] = 7;
739       src[2] = inputs.coord[1];
740    }
741 
742    /* With txl and txb shadow goes into z and lod or bias go into w */
743    if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
744       target[3] = 3;
745       src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
746       if (tex->is_shadow) {
747          target[2] = 2;
748          src[2] = inputs.comperator;
749       }
750    } else if (tex->is_shadow) {
751       /* Other ops have shadow in w */
752       target[3] = 3;
753       src[3] = inputs.comperator;
754    }
755 
756    auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
757 
758    AluInstr *ir = nullptr;
759    for (int i = 0; i < 4; ++i) {
760       if (target[i] > 3)
761          continue;
762 
763       auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
764 
765       ir = new AluInstr(op, src_coord[i], src[i], AluInstr::write);
766       shader.emit_instruction(ir);
767    }
768 
769    if (ir)
770       ir->set_alu_flag(alu_last_instr);
771 
772    return src_coord;
773 }
774 
Inputs(const nir_tex_instr & instr,ValueFactory & vf)775 TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
776     sampler_deref(nullptr),
777     texture_deref(nullptr),
778     bias(nullptr),
779     comperator(nullptr),
780     lod(nullptr),
781     offset(nullptr),
782     gather_comp(nullptr),
783     ms_index(nullptr),
784     texture_offset(nullptr),
785     sampler_offset(nullptr),
786     backend1(nullptr),
787     backend2(nullptr),
788     opcode(ld)
789 {
790    // sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components
791    // << " components\n";
792 
793    unsigned grad_components = instr.coord_components;
794    if (instr.is_array && !instr.array_is_lowered_cube)
795       --grad_components;
796 
797    for (unsigned i = 0; i < instr.num_srcs; ++i) {
798       switch (instr.src[i].src_type) {
799       case nir_tex_src_bias:
800          bias = vf.src(instr.src[i], 0);
801          break;
802 
803       case nir_tex_src_coord: {
804          coord = vf.src_vec4(instr.src[i].src,
805                              pin_none,
806                              swizzle_from_ncomps(instr.coord_components));
807       } break;
808       case nir_tex_src_comparator:
809          comperator = vf.src(instr.src[i], 0);
810          break;
811       case nir_tex_src_ddx:
812          ddx = vf.src_vec4(instr.src[i].src,
813                            pin_group,
814                            swizzle_from_ncomps(grad_components));
815          break;
816       case nir_tex_src_ddy:
817          ddy = vf.src_vec4(instr.src[i].src,
818                            pin_group,
819                            swizzle_from_ncomps(grad_components));
820          break;
821       case nir_tex_src_lod:
822          lod = vf.src(instr.src[i].src, 0);
823          break;
824       case nir_tex_src_offset:
825          offset = &instr.src[i].src;
826          break;
827          /* case nir_tex_src_sampler_deref:
828          sampler_deref = get_deref_location(instr.src[i].src);
829          break;
830       case nir_tex_src_texture_deref:
831          texture_deref = get_deref_location(instr.src[i].src);
832          break;
833       */
834       case nir_tex_src_ms_index:
835          ms_index = vf.src(instr.src[i], 0);
836          break;
837       case nir_tex_src_texture_offset:
838          texture_offset = vf.src(instr.src[i], 0)->as_register();
839          break;
840       case nir_tex_src_sampler_offset:
841          sampler_offset = vf.src(instr.src[i], 0)->as_register();
842          break;
843       case nir_tex_src_backend1:
844          backend1 = &instr.src[i].src;
845          break;
846       case nir_tex_src_backend2:
847          backend2 = &instr.src[i].src;
848          break;
849       case nir_tex_src_plane:
850       case nir_tex_src_projector:
851       case nir_tex_src_min_lod:
852       default:
853          unreachable("unsupported texture input type");
854       }
855    }
856 
857    opcode = get_opcode(instr);
858 }
859 
860 auto
get_opcode(const nir_tex_instr & instr)861 TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
862 {
863    switch (instr.op) {
864    case nir_texop_tex:
865       return instr.is_shadow ? sample_c : sample;
866    case nir_texop_txf:
867       return ld;
868    case nir_texop_txb:
869       return instr.is_shadow ? sample_c_lb : sample_lb;
870    case nir_texop_txl:
871       return instr.is_shadow ? sample_c_l : sample_l;
872    case nir_texop_txs:
873       return get_resinfo;
874    case nir_texop_lod:
875       return get_resinfo;
876    case nir_texop_txd:
877       return instr.is_shadow ? sample_c_g : sample_g;
878    case nir_texop_tg4: {
879       auto var_offset = offset && nir_src_as_const_value(*offset) == nullptr;
880       return instr.is_shadow ? (var_offset ? gather4_c_o : gather4_c)
881                              : (var_offset ? gather4_o : gather4);
882    }
883    case nir_texop_txf_ms:
884       return ld;
885    case nir_texop_query_levels:
886       return get_resinfo;
887    case nir_texop_texture_samples:
888       return TexInstr::get_nsamples;
889    default:
890       unreachable("unsupported texture input opcode");
891    }
892 }
893 
894 bool
emit_tex_lod(nir_tex_instr * tex,Inputs & src,Shader & shader)895 TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader)
896 {
897    auto& vf = shader.value_factory();
898 
899    auto dst = shader.value_factory().dest_vec4(tex->def, pin_group);
900 
901    auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
902 
903    auto src_coord = vf.temp_vec4(pin_group, swizzle);
904 
905    AluInstr *ir = nullptr;
906    for (unsigned i = 0; i < tex->coord_components; ++i) {
907       ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write);
908       shader.emit_instruction(ir);
909    }
910    if (ir)
911       ir->set_alu_flag(alu_last_instr);
912 
913    auto irt = new TexInstr(TexInstr::get_tex_lod,
914                            dst,
915                            {1, 0, 7, 7},
916                            src_coord,
917                            tex->texture_index + R600_MAX_CONST_BUFFERS,
918                            src.texture_offset);
919 
920    shader.emit_instruction(irt);
921    return true;
922 }
923 
924 RegisterVec4::Swizzle
swizzle_from_ncomps(int comps) const925 TexInstr::Inputs::swizzle_from_ncomps(int comps) const
926 {
927    RegisterVec4::Swizzle swz;
928    for (int i = 0; i < 4; ++i)
929       swz[i] = i < comps ? i : 7;
930    return swz;
931 }
932 
933 bool
set_coord_offsets(nir_src * offset)934 TexInstr::set_coord_offsets(nir_src *offset)
935 {
936    if (!offset)
937       return true;
938 
939    auto literal = nir_src_as_const_value(*offset);
940    if (!literal)
941       return false;
942 
943    for (int i = 0; i < offset->ssa->num_components; ++i)
944       set_offset(i, literal[i].i32);
945    return true;
946 }
947 
948 void
set_rect_coordinate_flags(nir_tex_instr * instr)949 TexInstr::set_rect_coordinate_flags(nir_tex_instr *instr)
950 {
951    if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
952       set_tex_flag(x_unnormalized);
953       set_tex_flag(y_unnormalized);
954    }
955 }
956 
957 class LowerTexToBackend : public NirLowerInstruction {
958 public:
959    LowerTexToBackend(amd_gfx_level chip_class);
960 
961 private:
962    bool filter(const nir_instr *instr) const override;
963    nir_def *lower(nir_instr *instr) override;
964 
965    nir_def *lower_tex(nir_tex_instr *tex);
966    nir_def *lower_txf(nir_tex_instr *tex);
967    nir_def *lower_tg4(nir_tex_instr *tex);
968    nir_def *lower_txf_ms(nir_tex_instr *tex);
969    nir_def *lower_txf_ms_direct(nir_tex_instr *tex);
970 
971    nir_def *
972    prepare_coord(nir_tex_instr *tex, int& unnormalized_mask, int& used_coord_mask);
973    int get_src_coords(nir_tex_instr *tex,
974                       std::array<nir_def *, 4>& coord,
975                       bool round_array_index);
976    nir_def *prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask);
977    nir_def *
978    finalize(nir_tex_instr *tex, nir_def *backend1, nir_def *backend2);
979 
980    nir_def *get_undef();
981 
982    amd_gfx_level m_chip_class;
983    nir_def *m_undef {nullptr};
984 };
985 
986 bool
r600_nir_lower_tex_to_backend(nir_shader * shader,amd_gfx_level chip_class)987 r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class)
988 {
989    return LowerTexToBackend(chip_class).run(shader);
990 }
991 
LowerTexToBackend(amd_gfx_level chip_class)992 LowerTexToBackend::LowerTexToBackend(amd_gfx_level chip_class):
993     m_chip_class(chip_class)
994 {
995 }
996 
997 bool
filter(const nir_instr * instr) const998 LowerTexToBackend::filter(const nir_instr *instr) const
999 {
1000    if (instr->type != nir_instr_type_tex)
1001       return false;
1002 
1003    auto tex = nir_instr_as_tex(instr);
1004    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
1005       return false;
1006    switch (tex->op) {
1007    case nir_texop_tex:
1008    case nir_texop_txb:
1009    case nir_texop_txl:
1010    case nir_texop_txf:
1011    case nir_texop_txd:
1012    case nir_texop_tg4:
1013    case nir_texop_txf_ms:
1014       break;
1015    default:
1016       return false;
1017    }
1018 
1019    return nir_tex_instr_src_index(tex, nir_tex_src_backend1) == -1;
1020 }
1021 
get_undef()1022 nir_def *LowerTexToBackend::get_undef()
1023 {
1024    if (!m_undef)
1025       m_undef = nir_undef(b, 1, 32);
1026    return m_undef;
1027 }
1028 
1029 nir_def *
lower(nir_instr * instr)1030 LowerTexToBackend::lower(nir_instr *instr)
1031 {
1032    b->cursor = nir_before_instr(instr);
1033 
1034    auto tex = nir_instr_as_tex(instr);
1035    switch (tex->op) {
1036    case nir_texop_tex:
1037    case nir_texop_txb:
1038    case nir_texop_txl:
1039    case nir_texop_txd:
1040       return lower_tex(tex);
1041    case nir_texop_txf:
1042       return lower_txf(tex);
1043    case nir_texop_tg4:
1044       return lower_tg4(tex);
1045    case nir_texop_txf_ms:
1046       if (m_chip_class < EVERGREEN)
1047          return lower_txf_ms_direct(tex);
1048       else
1049          return lower_txf_ms(tex);
1050    default:
1051       return nullptr;
1052    }
1053 }
1054 
1055 nir_def *
lower_tex(nir_tex_instr * tex)1056 LowerTexToBackend::lower_tex(nir_tex_instr *tex)
1057 {
1058    int unnormalized_mask = 0;
1059    int used_coord_mask = 0;
1060 
1061    nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1062 
1063    nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, 0, 0);
1064 
1065    return finalize(tex, backend1, backend2);
1066 }
1067 
1068 nir_def *
lower_txf(nir_tex_instr * tex)1069 LowerTexToBackend::lower_txf(nir_tex_instr *tex)
1070 {
1071    std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1072 
1073    get_src_coords(tex, new_coord, false);
1074 
1075    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1076    new_coord[3] = tex->src[lod_idx].src.ssa;
1077 
1078    int used_coord_mask = 0;
1079    nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1080    nir_def *backend2 =
1081       nir_imm_ivec4(b, used_coord_mask, tex->is_array ? 0x4 : 0, 0, 0);
1082 
1083    return finalize(tex, backend1, backend2);
1084 }
1085 
1086 nir_def *
lower_tg4(nir_tex_instr * tex)1087 LowerTexToBackend::lower_tg4(nir_tex_instr *tex)
1088 {
1089    std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1090 
1091    get_src_coords(tex, new_coord, false);
1092    uint32_t dest_swizzle =
1093       m_chip_class <= EVERGREEN ? 1 | (2 << 8) | (0 << 16) | (3 << 24) : 0;
1094 
1095    int used_coord_mask = 0;
1096    int unnormalized_mask = 0;
1097    nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1098 
1099    nir_def *backend2 =
1100       nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, tex->component, dest_swizzle);
1101    return finalize(tex, backend1, backend2);
1102 }
1103 
1104 nir_def *
lower_txf_ms(nir_tex_instr * tex)1105 LowerTexToBackend::lower_txf_ms(nir_tex_instr *tex)
1106 {
1107    std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1108 
1109    get_src_coords(tex, new_coord, false);
1110 
1111    int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1112    new_coord[3] = tex->src[ms_index].src.ssa;
1113 
1114    int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
1115    if (offset_index >= 0) {
1116       auto offset = tex->src[offset_index].src.ssa;
1117       for (int i = 0; i < offset->num_components; ++i) {
1118          new_coord[i] = nir_iadd(b, new_coord[i], nir_channel(b, offset, i));
1119       }
1120    }
1121 
1122    auto fetch_sample = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1123    nir_def_init(&fetch_sample->instr, &fetch_sample->def, 4, 32);
1124 
1125    int used_coord_mask = 0;
1126    nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1127    nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0xf, 1, 0);
1128 
1129    nir_builder_instr_insert(b, &fetch_sample->instr);
1130    finalize(fetch_sample, backend1, backend2);
1131 
1132    new_coord[3] = nir_iand_imm(b,
1133                                nir_ushr(b,
1134                                         nir_channel(b, &fetch_sample->def, 0),
1135                                         nir_ishl_imm(b, new_coord[3], 2)),
1136                                15);
1137 
1138    nir_def *backend1b = prep_src(new_coord, used_coord_mask);
1139    nir_def *backend2b = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1140    return finalize(tex, backend1b, backend2b);
1141 }
1142 
1143 nir_def *
lower_txf_ms_direct(nir_tex_instr * tex)1144 LowerTexToBackend::lower_txf_ms_direct(nir_tex_instr *tex)
1145 {
1146    std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1147 
1148    get_src_coords(tex, new_coord, false);
1149 
1150    int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1151    new_coord[3] = tex->src[ms_index].src.ssa;
1152 
1153    int used_coord_mask = 0;
1154    nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1155    nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1156 
1157    return finalize(tex, backend1, backend2);
1158 }
1159 
1160 nir_def *
finalize(nir_tex_instr * tex,nir_def * backend1,nir_def * backend2)1161 LowerTexToBackend::finalize(nir_tex_instr *tex,
1162                             nir_def *backend1,
1163                             nir_def *backend2)
1164 {
1165    nir_tex_instr_add_src(tex, nir_tex_src_backend1, backend1);
1166    nir_tex_instr_add_src(tex, nir_tex_src_backend2, backend2);
1167 
1168    static const nir_tex_src_type cleanup[] = {nir_tex_src_coord,
1169                                               nir_tex_src_lod,
1170                                               nir_tex_src_bias,
1171                                               nir_tex_src_comparator,
1172                                               nir_tex_src_ms_index};
1173 
1174    for (const auto type : cleanup) {
1175       int pos = nir_tex_instr_src_index(tex, type);
1176       if (pos >= 0)
1177          nir_tex_instr_remove_src(tex, pos);
1178    }
1179    return NIR_LOWER_INSTR_PROGRESS;
1180 }
1181 
1182 nir_def *
prep_src(std::array<nir_def *,4> & coord,int & used_coord_mask)1183 LowerTexToBackend::prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask)
1184 {
1185    int max_coord = 0;
1186    for (int i = 0; i < 4; ++i) {
1187       if (coord[i]) {
1188          used_coord_mask |= 1 << i;
1189          max_coord = i;
1190       } else
1191          coord[i] = get_undef();
1192    }
1193 
1194    return nir_vec(b, coord.data(), max_coord + 1);
1195 }
1196 
1197 nir_def *
prepare_coord(nir_tex_instr * tex,int & unnormalized_mask,int & used_coord_mask)1198 LowerTexToBackend::prepare_coord(nir_tex_instr *tex,
1199                                  int& unnormalized_mask,
1200                                  int& used_coord_mask)
1201 {
1202    std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1203 
1204    unnormalized_mask = get_src_coords(tex, new_coord, true);
1205    used_coord_mask = 0;
1206 
1207    int comp_idx =
1208       tex->is_shadow ? nir_tex_instr_src_index(tex, nir_tex_src_comparator) : -1;
1209 
1210    if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
1211       int idx = tex->op == nir_texop_txl ? nir_tex_instr_src_index(tex, nir_tex_src_lod)
1212                                          : nir_tex_instr_src_index(tex, nir_tex_src_bias);
1213       assert(idx != -1);
1214       new_coord[3] = tex->src[idx].src.ssa;
1215 
1216       if (comp_idx >= 0)
1217          new_coord[2] = tex->src[comp_idx].src.ssa;
1218    } else if (comp_idx >= 0) {
1219       new_coord[3] = tex->src[comp_idx].src.ssa;
1220    }
1221    return prep_src(new_coord, used_coord_mask);
1222 }
1223 
1224 int
get_src_coords(nir_tex_instr * tex,std::array<nir_def *,4> & coord,bool round_array_index)1225 LowerTexToBackend::get_src_coords(nir_tex_instr *tex,
1226                                   std::array<nir_def *, 4>& coord,
1227                                   bool round_array_index)
1228 {
1229    int unnormalized_mask = 0;
1230    auto coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1231    assert(coord_idx != -1);
1232    auto old_coord = tex->src[coord_idx];
1233 
1234    coord = {nir_channel(b, old_coord.src.ssa, 0), nullptr, nullptr, nullptr};
1235 
1236    if (tex->coord_components > 1) {
1237       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D)
1238          coord[2] = nir_channel(b, old_coord.src.ssa, 1);
1239       else
1240          coord[1] = nir_channel(b, old_coord.src.ssa, 1);
1241    }
1242 
1243    if (tex->coord_components > 2) {
1244       coord[2] = nir_channel(b, old_coord.src.ssa, 2);
1245    }
1246    if (tex->is_array) {
1247       unnormalized_mask |= 0x4;
1248       if (round_array_index)
1249          coord[2] = nir_fround_even(b, coord[2]);
1250    }
1251 
1252    if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1253       unnormalized_mask |= 0x3;
1254    }
1255 
1256    return unnormalized_mask;
1257 }
1258 
1259 } // namespace r600
1260