• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_tex.h"
28 #include "sfn_instr_alu.h"
29 #include "sfn_instr_fetch.h"
30 #include "sfn_debug.h"
31 
32 namespace r600 {
33 
34 using std::string;
35 
TexInstr(Opcode op,const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,const RegisterVec4 & src,unsigned sid,unsigned rid,PVirtualValue sampler_offs)36 TexInstr::TexInstr(Opcode op, const RegisterVec4& dest,
37                    const RegisterVec4::Swizzle& dest_swizzle,
38                    const RegisterVec4& src, unsigned sid, unsigned rid,
39                    PVirtualValue sampler_offs):
40    InstrWithVectorResult(dest, dest_swizzle),
41    m_opcode(op),
42    m_src(src),
43    m_sampler_offset(sampler_offs),
44    m_inst_mode(0),
45    m_sampler_id(sid),
46    m_resource_id(rid)
47 {
48    memset(m_offset, 0, sizeof(m_offset));
49    m_src.add_use(this);
50 
51    if (m_sampler_offset && m_sampler_offset->as_register())
52       m_sampler_offset->as_register()->add_use(this);
53 }
54 
accept(ConstInstrVisitor & visitor) const55 void TexInstr::accept(ConstInstrVisitor& visitor) const
56 {
57    visitor.visit(*this);
58 }
59 
accept(InstrVisitor & visitor)60 void TexInstr::accept(InstrVisitor& visitor)
61 {
62    visitor.visit(this);
63 }
64 
set_offset(unsigned index,int32_t val)65 void TexInstr::set_offset(unsigned index, int32_t val)
66 {
67    assert(index < 3);
68    m_offset[index] = val;
69 }
70 
get_offset(unsigned index) const71 int TexInstr::get_offset(unsigned index) const
72 {
73    assert(index < 3);
74    return m_offset[index] << 1;
75 }
76 
set_gather_comp(int cmp)77 void TexInstr::set_gather_comp(int cmp)
78 {
79    m_inst_mode = cmp;
80 }
81 
is_equal_to(const TexInstr & lhs) const82 bool TexInstr::is_equal_to(const TexInstr& lhs) const
83 {
84    if (m_opcode != lhs.m_opcode)
85       return false;
86 
87    if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
88       return false;
89 
90    if (m_src != lhs.m_src)
91       return false;
92 
93    if (m_sampler_offset && lhs.m_sampler_offset) {
94       if (!m_sampler_offset->equal_to(*lhs.m_sampler_offset))
95          return false;
96    } else if ((m_sampler_offset && !lhs.m_sampler_offset) ||
97               (!m_sampler_offset && lhs.m_sampler_offset))
98       return false;
99 
100    if (m_tex_flags != lhs.m_tex_flags)
101       return false;
102 
103    for(int i = 0; i < 3; ++i) {
104       if (m_offset[i] != lhs.m_offset[i])
105          return false;
106    }
107    return m_inst_mode == lhs.m_inst_mode &&
108          m_sampler_id == lhs.m_sampler_id &&
109          m_resource_id == lhs.m_resource_id;
110 }
111 
propagate_death()112 bool TexInstr::propagate_death()
113 {
114    m_src.del_use(this);
115    return true;
116 }
117 
do_ready() const118 bool TexInstr::do_ready() const
119 {
120    for (auto p : m_prepare_instr)
121       if (!p->ready())
122          return false;
123 
124    for (auto p :required_instr())
125       if (!p->is_scheduled() && !p->is_dead()) {
126          return false;
127       }
128 
129    if (m_sampler_offset && m_sampler_offset->as_register() &&
130        !m_sampler_offset->as_register()->ready(block_id(), index()))
131        return false;
132    return m_src.ready(block_id(), index());
133 }
134 
do_print(std::ostream & os) const135 void TexInstr::do_print(std::ostream& os) const
136 {
137 
138    os << "TEX " << opname(m_opcode) << " ";
139    print_dest(os);
140 
141    os << " : ";
142    m_src.print(os);
143 
144    os << " RID:" << m_resource_id
145       << " SID:" << m_sampler_id;
146 
147    if (m_sampler_offset)
148       os << " SO:" << *m_sampler_offset;
149 
150    if (m_offset[0])
151       os << " OX:" << m_offset[0];
152    if (m_offset[1])
153       os << " OY:" << m_offset[1];
154    if (m_offset[2])
155       os << " OZ:" << m_offset[2];
156 
157    if (m_inst_mode || is_gather(m_opcode))
158       os << " MODE:" << m_inst_mode;
159 
160    os << " ";
161    os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
162    os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
163    os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
164    os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
165 }
166 
opname(Opcode op)167 const char *TexInstr::opname(Opcode op)
168 {
169    switch (op) {
170    case ld: return "LD";
171    case get_resinfo: return "GET_TEXTURE_RESINFO";
172    case get_nsamples: return "GET_NUMBER_OF_SAMPLES";
173    case get_tex_lod: return "GET_LOD";
174    case get_gradient_h: return "GET_GRADIENTS_H";
175    case get_gradient_v: return "GET_GRADIENTS_V";
176    case set_offsets: return "SET_TEXTURE_OFFSETS";
177    case keep_gradients: return "KEEP_GRADIENTS";
178    case set_gradient_h: return "SET_GRADIENTS_H";
179    case set_gradient_v: return "SET_GRADIENTS_V";
180    case sample: return "SAMPLE";
181    case sample_l: return "SAMPLE_L";
182    case sample_lb: return "SAMPLE_LB";
183    case sample_lz: return "SAMPLE_LZ";
184    case sample_g: return "SAMPLE_G";
185    case sample_g_lb: return "SAMPLE_G_L";
186    case gather4: return "GATHER4";
187    case gather4_o: return "GATHER4_O";
188    case sample_c: return "SAMPLE_C";
189    case sample_c_l: return "SAMPLE_C_L";
190    case sample_c_lb: return "SAMPLE_C_LB";
191    case sample_c_lz: return "SAMPLE_C_LZ";
192    case sample_c_g: return "SAMPLE_C_G";
193    case sample_c_g_lb: return "SAMPLE_C_G_L";
194    case gather4_c: return "GATHER4_C";
195    case gather4_c_o: return "OP_GATHER4_C_O";
196    default: return "ERROR";
197    }
198 
199 }
200 
201 const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
202    {ld, "LD"},
203    {get_resinfo,"GET_TEXTURE_RESINFO"},
204    {get_nsamples,"GET_NUMBER_OF_SAMPLES"},
205    {get_tex_lod,"GET_LOD"},
206    {get_gradient_h,"GET_GRADIENTS_H"},
207    {get_gradient_v,"GET_GRADIENTS_V"},
208    {set_offsets,"SET_TEXTURE_OFFSETS"},
209    {keep_gradients,"KEEP_GRADIENTS"},
210    {set_gradient_h,"SET_GRADIENTS_H"},
211    {set_gradient_v,"SET_GRADIENTS_V"},
212    {sample,"SAMPLE"},
213    {sample_l,"SAMPLE_L"},
214    {sample_lb,"SAMPLE_LB"},
215    {sample_lz,"SAMPLE_LZ"},
216    {sample_g,"SAMPLE_G"},
217    {sample_g_lb,"SAMPLE_G_L"},
218    {gather4,"GATHER4"},
219    {gather4_o,"GATHER4_O"},
220    {sample_c,"SAMPLE_C"},
221    {sample_c_l,"SAMPLE_C_L"},
222    {sample_c_lb,"SAMPLE_C_LB"},
223    {sample_c_lz,"SAMPLE_C_LZ"},
224    {sample_c_g,"SAMPLE_C_G"},
225    {sample_c_g_lb,"SAMPLE_C_G_L"},
226    {gather4_c,"GATHER4_C"},
227    {gather4_c_o,"OP_GATHER4_C_O"},
228    {unknown, "ERROR"}
229 };
230 
is_gather(Opcode op)231 bool TexInstr::is_gather(Opcode op)
232 {
233    return op == gather4 || op == gather4_c ||
234          op == gather4_o || op == gather4_c_o;
235 }
236 
op_from_string(const std::string & s)237 TexInstr::Opcode TexInstr::op_from_string(const std::string& s)
238 {
239    for (auto& [op, str] : s_opcode_map) {
240       if (s == str)
241          return op;
242    }
243    return unknown;
244 }
245 
from_string(std::istream & is,ValueFactory & value_fctory)246 Instr::Pointer TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
247 {
248    string opstr;
249    string deststr;
250    is >> opstr >> deststr;
251 
252    auto opcode = TexInstr::op_from_string(opstr);
253 
254    RegisterVec4::Swizzle dest_swz;
255 
256    auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
257 
258    char dummy;
259    is >> dummy;
260    assert(dummy == ':');
261 
262    string srcstr;
263    is >> srcstr;
264 
265    auto src = value_fctory.src_vec4_from_string(srcstr);
266 
267    string res_id_str;
268    string sampler_id_str;
269 
270    is >> res_id_str >> sampler_id_str;
271 
272    int res_id = int_from_string_with_prefix(res_id_str, "RID:");
273    int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
274 
275    auto tex = new TexInstr( opcode, dest, dest_swz, src, sampler_id, res_id, nullptr);
276 
277    while (!is.eof() && is.good()) {
278       std::string next_token;
279       is >> next_token;
280 
281       if (next_token.empty())
282          break;
283 
284       if (next_token[0] == 'U' || next_token[0] == 'N') {
285          tex->read_tex_coord_normalitazion(next_token);
286       } else {
287          tex->set_tex_param(next_token);
288       }
289    }
290 
291    return tex;
292 }
293 
read_tex_coord_normalitazion(const std::string & flags)294 void TexInstr::read_tex_coord_normalitazion(const std::string& flags)
295 {
296    assert(flags.length() == 4);
297    if (flags[0] == 'U') set_tex_flag(x_unnormalized);
298    if (flags[1] == 'U') set_tex_flag(y_unnormalized);
299    if (flags[2] == 'U') set_tex_flag(z_unnormalized);
300    if (flags[3] == 'U') set_tex_flag(w_unnormalized);
301 }
302 
set_tex_param(const std::string & token)303 void TexInstr::set_tex_param(const std::string& token)
304 {
305    if (token.substr(0,3) == "OX:")
306       set_offset(0, int_from_string_with_prefix(token, "OX:"));
307    else if (token.substr(0,3) == "OY:")
308       set_offset(1, int_from_string_with_prefix(token, "OY:"));
309    else if (token.substr(0,3) == "OZ:")
310       set_offset(2, int_from_string_with_prefix(token, "OZ:"));
311    else if (token.substr(0,5) == "MODE:")
312       set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
313    else if (token.substr(0,3) == "SO:")
314       set_sampler_offset(VirtualValue::from_string(token.substr(3)));
315    else {
316       std::cerr << "Token '" << token << "': ";
317       unreachable("Unknown token in tex param");
318    }
319 }
320 
from_nir(nir_tex_instr * tex,Shader & shader)321 bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
322 {
323    Inputs src(*tex, shader.value_factory());
324 
325    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
326       switch (tex->op) {
327       case nir_texop_txs:
328          return emit_tex_txs(tex, src, {0,1,2,3}, shader);
329       case nir_texop_txf:
330          return emit_buf_txf(tex, src, shader);
331       default:
332          return false;
333       }
334    } else {
335       switch (tex->op) {
336       case nir_texop_tex:
337          return emit_tex_tex(tex, src, shader);
338       case nir_texop_txf:
339          return emit_tex_txf(tex, src, shader);
340       case nir_texop_txb:
341       case nir_texop_txl:
342          return emit_tex_txl_txb(tex, src, shader);
343       case nir_texop_txs:
344          return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
345       case nir_texop_lod:
346          return emit_tex_lod(tex, src, shader);
347       case nir_texop_query_levels:
348          return emit_tex_txs(tex, src, {3,7,7,7}, shader);
349       case nir_texop_txd:
350           return emit_tex_txd(tex, src, shader);
351       case nir_texop_txf_ms:
352          if (shader.chip_class() < ISA_CC_EVERGREEN)
353             return emit_tex_tex_ms_direct(tex, src, shader);
354          else
355             return emit_tex_tex_ms(tex, src, shader);
356       case nir_texop_tg4:
357          return emit_tex_tg4(tex, src, shader);
358       case nir_texop_texture_samples:
359          return emit_tex_texture_samples(tex, src, shader);
360       default:
361       return false;
362       }
363    }
364    return true;
365 }
366 
367 struct SamplerId {
368    int id;
369    bool indirect;
370 };
371 
372 SamplerId
get_sampler_id(int sampler_id,const nir_variable * deref)373 get_sampler_id(int sampler_id, const nir_variable *deref)
374 {
375    SamplerId result = {sampler_id, false};
376 
377    if (deref) {
378       assert(glsl_type_is_sampler(deref->type));
379       result.id = deref->data.binding;
380    }
381    return result;
382 }
383 
384 
emit_tex_tex(nir_tex_instr * tex,Inputs & src,Shader & shader)385 bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
386 {
387    auto& vf = shader.value_factory();
388 
389    sfn_log << SfnLog::instr << "emit '"
390                  << *reinterpret_cast<nir_instr*>(tex)
391                  << "' (" << __func__ << ")\n";
392 
393    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
394    assert(!sampler.indirect);
395 
396    auto src_coord = prepare_source(tex, src, shader);
397    auto dst = vf.dest_vec4(tex->dest, pin_group);
398 
399    auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
400                            sampler.id + R600_MAX_CONST_BUFFERS,
401                            src.sampler_offset);
402    if (tex->is_array)
403       irt->set_tex_flag(TexInstr::z_unnormalized);
404 
405    irt->set_rect_coordinate_flags(tex);
406    irt->set_coord_offsets(src.offset);
407 
408    shader.emit_instruction(irt);
409    return true;
410 }
411 
emit_tex_txl_txb(nir_tex_instr * tex,Inputs & src,Shader & shader)412 bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader)
413 {
414    auto& vf = shader.value_factory();
415 
416    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
417    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
418 
419    auto src_coord = prepare_source(tex, src, shader);
420 
421    auto dst = vf.dest_vec4(tex->dest, pin_group);
422 
423    auto irt = new TexInstr(src.opcode, dst, {0,1,2,3},  src_coord, sampler.id,
424                            sampler.id + R600_MAX_CONST_BUFFERS,
425                            src.sampler_offset);
426 
427    if (tex->is_array)
428       irt->set_tex_flag(TexInstr::z_unnormalized);
429 
430    irt->set_rect_coordinate_flags(tex);
431    irt->set_coord_offsets(src.offset);
432 
433    shader.emit_instruction(irt);
434    return true;
435 }
436 
437 
emit_tex_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)438 bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
439 {
440    auto& vf = shader.value_factory();
441 
442    int sampler = tex->sampler_index;
443 
444    auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
445    swizzle[3] = 3;
446 
447    if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
448       swizzle[2] = 1;
449       swizzle[1] = 7;
450    }
451 
452    auto src_coord = vf.temp_vec4(pin_group, swizzle);
453 
454    for (unsigned i = 0; i < tex->coord_components; i++) {
455       unsigned k = i;
456       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
457          k = 2;
458 
459 
460       if (src.offset) {
461          shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i],
462                                               vf.src(src.offset[i], i),
463                                               AluInstr::write));
464       } else {
465          shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write));
466       }
467    }
468 
469    shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write));
470 
471    auto dst = vf.dest_vec4(tex->dest, pin_group);
472 
473    auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord,
474                               sampler,
475                               sampler + R600_MAX_CONST_BUFFERS,
476                               src.sampler_offset);
477 
478    if (tex->is_array)
479       tex_ir->set_tex_flag(z_unnormalized);
480 
481    tex_ir->set_rect_coordinate_flags(tex);
482    tex_ir->set_sampler_offset(src.sampler_offset);
483 
484    shader.emit_instruction(tex_ir);
485 
486    return true;
487 }
488 
emit_buf_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)489 bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
490 {
491    auto& vf = shader.value_factory();
492    auto dst = vf.dest_vec4(tex->dest, pin_group);
493 
494    PRegister tex_offset = nullptr;
495    if (src.texture_offset)
496       tex_offset = shader.emit_load_to_register(src.texture_offset);
497 
498    auto *real_dst = &dst;
499    RegisterVec4 tmp = vf.temp_vec4(pin_group);
500 
501    if (shader.chip_class() < ISA_CC_EVERGREEN) {
502       real_dst = &tmp;
503    }
504 
505    auto ir = new LoadFromBuffer(*real_dst, {0,1,2,3}, src.coord[0], 0,
506                                 tex->texture_index +  R600_MAX_CONST_BUFFERS,
507                                 tex_offset, fmt_invalid);
508    ir->set_fetch_flag(FetchInstr::use_const_field);
509    shader.emit_instruction(ir);
510    shader.set_flag(Shader::sh_uses_tex_buffer);
511 
512    if (shader.chip_class() < ISA_CC_EVERGREEN) {
513       auto tmp_w = vf.temp_register();
514       int buf_sel = (512 + R600_BUFFER_INFO_OFFSET / 16) + 2 * tex->texture_index;
515       AluInstr *ir = nullptr;
516       for (int i = 0; i < 4; ++i) {
517          auto d = i < 3 ? dst[i] : tmp_w;
518          ir = new AluInstr(op2_and_int,  d, tmp[i],
519                            vf.uniform(buf_sel, i, R600_BUFFER_INFO_CONST_BUFFER),
520                            AluInstr::write);
521          shader.emit_instruction(ir);
522       }
523 
524       ir->set_alu_flag(alu_last_instr);
525       shader.emit_instruction(new AluInstr(op2_or_int, dst[3], tmp_w,
526                               vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
527                               AluInstr::last_write));
528    }
529 
530    return true;
531 }
532 
emit_tex_tex_ms_direct(nir_tex_instr * tex,Inputs & src,Shader & shader)533 bool TexInstr::emit_tex_tex_ms_direct(nir_tex_instr *tex, Inputs& src, Shader& shader)
534 {
535    assert(tex->src[0].src.is_ssa);
536    auto& vf = shader.value_factory();
537 
538    r600::sfn_log << SfnLog::instr << "emit '"
539                  << *reinterpret_cast<nir_instr*>(tex)
540                  << "' (" << __func__ << ")\n";
541 
542    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
543    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
544 
545    auto temp2 = vf.temp_vec4(pin_group);
546 
547    for (unsigned i = 0; i < tex->coord_components; ++i) {
548       unsigned k = i;
549       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
550          k = 2;
551 
552       shader.emit_instruction(new AluInstr(op1_mov, temp2[k],
553                                            src.coord[k], AluInstr::write));
554    }
555 
556    shader.emit_instruction(new AluInstr(op1_mov, temp2[3], src.ms_index,
557                            AluInstr::last_write));
558 
559    auto dst = vf.dest_vec4(tex->dest, pin_group);
560 
561    /* txf doesn't need rounding for the array index, but 1D has the array index
562     * in the z component */
563    auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2,
564                                     sampler.id,
565                                     sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
566 
567    shader.emit_instruction(tex_ir);
568    return true;
569 }
570 
emit_tex_tex_ms(nir_tex_instr * tex,Inputs & src,Shader & shader)571 bool TexInstr::emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader)
572 {
573    assert(tex->src[0].src.is_ssa);
574    auto& vf = shader.value_factory();
575 
576    r600::sfn_log << SfnLog::instr << "emit '"
577                  << *reinterpret_cast<nir_instr*>(tex)
578                  << "' (" << __func__ << ")\n";
579 
580    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
581    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
582 
583    auto sample_id_dest = vf.temp_vec4(pin_group);
584    RegisterVec4::Swizzle dest_swz = {0,7,7,7};
585 
586    auto temp1 = vf.temp_vec4(pin_group);
587    for (unsigned i = 0; i < tex->coord_components; ++i) {
588       unsigned k = i;
589       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
590          k = 2;
591 
592       if (src.offset && i < src.offset->ssa->num_components)
593          shader.emit_instruction(new AluInstr(op2_add_int, temp1[k],
594                                               src.coord[i],
595                                               vf.src(*src.offset, i),
596                                               AluInstr::write));
597       else
598          shader.emit_instruction(new AluInstr(op1_mov, temp1[k],
599                                               src.coord[i], AluInstr::write));
600    }
601 
602    shader.emit_instruction(new AluInstr(op1_mov, temp1[3],
603                                        src.ms_index, AluInstr::last_write));
604 
605    auto tex_sample_id_ir = new TexInstr(ld, sample_id_dest, dest_swz, temp1,
606                                               sampler.id,
607                                               sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
608 
609    tex_sample_id_ir->set_tex_flag(x_unnormalized);
610    tex_sample_id_ir->set_tex_flag(y_unnormalized);
611    tex_sample_id_ir->set_tex_flag(z_unnormalized);
612    tex_sample_id_ir->set_tex_flag(w_unnormalized);
613    tex_sample_id_ir->set_inst_mode(1);
614 
615    shader.emit_instruction(tex_sample_id_ir);
616 
617    Register *sample_id_dest_reg = sample_id_dest[0];
618 
619    if (!src.ms_index->as_inline_const() ||
620        src.ms_index->as_inline_const()->sel() != ALU_SRC_0) {
621 
622       auto help = vf.temp_register();
623 
624       shader.emit_instruction(new AluInstr(op2_lshl_int, help,
625                                            src.ms_index, vf.literal(2),
626                                            AluInstr::last_write));
627 
628       sample_id_dest_reg = vf.temp_register();
629       shader.emit_instruction(new AluInstr(op2_lshr_int, sample_id_dest_reg,
630                                            sample_id_dest[0], help,
631                                            AluInstr::last_write));
632    }
633 
634    auto temp2 = vf.temp_vec4(pin_group);
635 
636    for (unsigned i = 0; i < tex->coord_components; ++i) {
637       unsigned k = i;
638       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
639          k = 2;
640 
641       shader.emit_instruction(new AluInstr(op1_mov, temp2[k],
642                                            temp1[k], AluInstr::write));
643    }
644 
645    shader.emit_instruction(new AluInstr(op2_and_int, temp2[3],
646                                         sample_id_dest_reg, vf.literal(15),
647                                         AluInstr::last_write));
648 
649    auto dst = vf.dest_vec4(tex->dest, pin_group);
650 
651    /* txf doesn't need rounding for the array index, but 1D has the array index
652     * in the z component */
653    auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2,
654                                     sampler.id,
655                                     sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
656 
657    shader.emit_instruction(tex_ir);
658    return true;
659 }
660 
emit_tex_texture_samples(nir_tex_instr * instr,Inputs & src,Shader & shader)661 bool TexInstr::emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader)
662 {
663    RegisterVec4 dest = shader.value_factory().dest_vec4(instr->dest, pin_chan);
664    RegisterVec4 help{0, true, {4,4,4,4}};
665 
666    int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
667 
668    auto ir = new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help,
669                           0, res_id, src.sampler_offset);
670    shader.emit_instruction(ir);
671    return true;
672 }
673 
674 
emit_tex_txd(nir_tex_instr * tex,Inputs & src,Shader & shader)675 bool TexInstr::emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader)
676 {
677 
678    auto& vf = shader.value_factory();
679 
680    r600::sfn_log << SfnLog::instr << "emit '"
681                  << *reinterpret_cast<nir_instr*>(tex)
682                  << "' (" << __func__ << ")\n";
683 
684    auto dst = vf.dest_vec4(tex->dest, pin_group);
685    RegisterVec4 empty_dst(0, false, {0,0,0,0}, pin_group);
686 
687    auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
688 
689    if (tex->is_shadow)
690       swizzle[3] = 3;
691 
692    unsigned array_coord = 2;
693    if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
694       swizzle[2] = 1;
695       swizzle[1] = 7;
696       array_coord = 1;
697    }
698 
699    auto src_coord = vf.temp_vec4(pin_group, swizzle);
700 
701    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
702    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
703 
704    auto irgh = new TexInstr(set_gradient_h, empty_dst, {7,7,7,7}, src.ddx,
705                             sampler.id,
706                             sampler.id + R600_MAX_CONST_BUFFERS,
707                             src.sampler_offset);
708 
709    auto irgv = new TexInstr(set_gradient_v, empty_dst, {7,7,7,7}, src.ddy,
710                             sampler.id, sampler.id + R600_MAX_CONST_BUFFERS,
711                             src.sampler_offset);
712 
713    auto tir = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id,
714                           sampler.id + R600_MAX_CONST_BUFFERS,
715                           src.sampler_offset);
716 
717 
718    /* r600_bytecode_add_tex has a hack that will start a new tex CF if
719     * set_gradient_h is emitted, so make sure it is emitted first */
720 
721    AluInstr *ir = nullptr;
722    for (unsigned i = 0; i < tex->coord_components; ++i) {
723       int k = i;
724       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
725          k = 2;
726 
727       ir = new AluInstr(tex->is_array && i == array_coord  ? op1_rndne : op1_mov,
728                         src_coord[k], src.coord[i],
729                         AluInstr::write);
730       shader.emit_instruction(ir);
731    }
732 
733    if (tex->is_shadow)  {
734       ir = new AluInstr(op1_mov, src_coord[3], src.comperator, AluInstr::last_write);
735       shader.emit_instruction(ir);
736    }
737 
738    tir->add_prepare_instr(irgh);
739    tir->add_prepare_instr(irgv);
740 
741    if (tex->is_array)
742       tir->set_tex_flag(TexInstr::z_unnormalized);
743 
744    irgh->set_rect_coordinate_flags(tex);
745    irgv->set_rect_coordinate_flags(tex);
746    irgh->set_always_keep();
747    irgv->set_always_keep();
748 
749    tir->set_rect_coordinate_flags(tex);
750 
751    tir->set_coord_offsets(src.offset);
752 
753    if (shader.last_txd())
754       tir->add_required_instr(shader.last_txd());
755 
756    shader.emit_instruction(tir);
757    shader.set_last_txd(tir);
758 
759    return true;
760 }
761 
emit_tex_txs(nir_tex_instr * tex,Inputs & src,RegisterVec4::Swizzle dest_swz,Shader & shader)762 bool TexInstr::emit_tex_txs(nir_tex_instr *tex, Inputs& src,
763                             RegisterVec4::Swizzle dest_swz, Shader& shader)
764 {
765    auto& vf = shader.value_factory();
766 
767    auto dest = vf.dest_vec4(tex->dest, pin_group);
768 
769    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
770       if (shader.chip_class() >= ISA_CC_EVERGREEN) {
771          shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,7,7,7},
772                                                           tex->sampler_index + R600_MAX_CONST_BUFFERS));
773       } else {
774          int id = 2 * tex->sampler_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
775          auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
776          shader.emit_instruction(new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
777          shader.set_flag(Shader::sh_uses_tex_buffer);
778       }
779    } else {
780 
781       auto src_lod = vf.temp_register();
782       shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
783 
784       RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
785 
786       auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
787       assert(!sampler.indirect && "Indirect sampler selection not yet supported");
788 
789       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
790          dest_swz[2] = 7;
791 
792       auto ir = new TexInstr(get_resinfo, dest, dest_swz, src_coord,
793                              sampler.id,
794                              sampler.id + R600_MAX_CONST_BUFFERS,
795                              src.sampler_offset);
796 
797       ir->set_dest_swizzle(dest_swz);
798       shader.emit_instruction(ir);
799 
800       if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
801          auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
802                                    sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER);
803 
804 
805          auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
806          shader.emit_instruction(alu);
807          shader.set_flag(Shader::sh_txs_cube_array_comp);
808       }
809    }
810 
811    return true;
812 }
813 
emit_tex_tg4(nir_tex_instr * tex,Inputs & src,Shader & shader)814 bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader)
815 {
816    auto& vf = shader.value_factory();
817 
818    r600::sfn_log << SfnLog::instr << "emit '"
819               << *reinterpret_cast<nir_instr*>(tex)
820               << "' (" << __func__ << ")\n";
821 
822    TexInstr *set_ofs = nullptr;
823 
824    auto src_coord = prepare_source(tex, src, shader);
825 
826    r600::sfn_log << SfnLog::instr << "emit '"
827                  << *reinterpret_cast<nir_instr*>(tex)
828                  << "' (" << __func__ << ")\n";
829 
830    auto dst = vf.dest_vec4(tex->dest, pin_group);
831 
832    RegisterVec4 empty_dst(125, false, {7,7,7,7}, pin_group);
833 
834    /* pre CAYMAN needs swizzle */
835    auto dest_swizzle = shader.chip_class() <= ISA_CC_EVERGREEN ?
836             RegisterVec4::Swizzle{1, 2, 0, 3} :
837             RegisterVec4::Swizzle{0, 1, 2, 3};
838 
839    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
840    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
841 
842    bool literal_offset = false;
843    if (src.offset) {
844       literal_offset =  nir_src_as_const_value(*src.offset) != 0;
845       r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
846                        (literal_offset ? "literal" : "varying") <<
847                        "\n";
848 
849       if (!literal_offset) {
850          RegisterVec4::Swizzle swizzle = {4,4,4,4};
851          int src_components = tex->coord_components;
852          if (tex->is_array)
853             --src_components;
854 
855          for (int i = 0; i < src_components; ++i)
856             swizzle[i] = i;
857 
858          int noffsets = tex->coord_components;
859          if (tex->is_array)
860             --noffsets;
861 
862          auto ofs = vf.src_vec4(*src.offset, pin_group, swizzle);
863          RegisterVec4 dummy(0, true, {7,7,7,7});
864 
865          set_ofs = new TexInstr(TexInstr::set_offsets, dummy, {7,7,7,7},
866                                 ofs, sampler.id,
867                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
868       } else {
869          src.opcode = src.opcode == gather4_o ? gather4 : gather4_c;
870       }
871    }
872 
873    auto irt = new TexInstr(src.opcode, dst, dest_swizzle, src_coord, sampler.id,
874                            sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
875 
876    irt->set_gather_comp(tex->component);
877 
878    if (tex->is_array)
879       irt->set_tex_flag(z_unnormalized);
880 
881    if (literal_offset) {
882       r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
883       irt->set_coord_offsets(src.offset);
884    }
885 
886    irt->set_rect_coordinate_flags(tex);
887 
888    if (set_ofs) {
889       set_ofs->set_always_keep();
890       irt->add_prepare_instr(set_ofs);
891    }
892 
893    shader.emit_instruction(irt);
894    return true;
895 }
896 
prepare_source(nir_tex_instr * tex,const Inputs & inputs,Shader & shader)897 auto TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader) -> RegisterVec4
898 {
899    RegisterVec4::Swizzle target{7,7,7,7};
900    PVirtualValue src[4]{nullptr,nullptr,nullptr,nullptr};
901 
902 
903    for (unsigned i = 0; i < tex->coord_components; ++i) {
904       target[i] = i;
905       src[i] = inputs.coord[i];
906    }
907 
908    // array index always goes into z
909    if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
910       target[2]  = 1;
911       target[1]  = 7;
912       src[2] = inputs.coord[1];
913    }
914 
915    /* With txl and txb shadow goes into z and lod or bias go into w */
916    if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
917       target[3] = 3;
918       src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
919       if (tex->is_shadow){
920          target[2] = 2;
921          src[2] = inputs.comperator;
922       }
923    } else if (tex->is_shadow) {
924       /* Other ops have shadow in w */
925       target[3] = 3;
926       src[3] = inputs.comperator;
927    }
928 
929    auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
930 
931    AluInstr *ir = nullptr;
932    for (int i = 0; i < 4; ++i) {
933       if (target[i] > 3)
934         continue;
935 
936       auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
937 
938       ir = new AluInstr(op,  src_coord[i], src[i], AluInstr::write);
939       shader.emit_instruction(ir);
940    }
941 
942    if (ir)
943       ir->set_alu_flag(alu_last_instr);
944 
945    return src_coord;
946 }
947 
Inputs(const nir_tex_instr & instr,ValueFactory & vf)948 TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
949    sampler_deref(nullptr),
950    texture_deref(nullptr),
951    bias(nullptr),
952    comperator(nullptr),
953    lod(nullptr),
954    offset(nullptr),
955    gather_comp(nullptr),
956    ms_index(nullptr),
957    sampler_offset(nullptr),
958    texture_offset(nullptr),
959    opcode(ld)
960 {
961    //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
962 
963    unsigned grad_components = instr.coord_components;
964    if (instr.is_array && !instr.array_is_lowered_cube)
965       --grad_components;
966 
967    for (unsigned i = 0; i < instr.num_srcs; ++i) {
968       switch (instr.src[i].src_type) {
969       case nir_tex_src_bias:
970          bias = vf.src(instr.src[i], 0);
971       break;
972 
973       case nir_tex_src_coord: {
974          coord = vf.src_vec4(instr.src[i].src, pin_none, swizzle_from_ncomps(instr.coord_components));
975       } break;
976       case nir_tex_src_comparator:
977          comperator = vf.src(instr.src[i], 0);
978       break;
979       case nir_tex_src_ddx:
980          ddx = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
981       break;
982       case nir_tex_src_ddy:
983          ddy = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
984       break;
985       case nir_tex_src_lod:
986          lod = vf.src(instr.src[i].src, 0);
987       break;
988       case nir_tex_src_offset:
989          offset = &instr.src[i].src;
990       break;
991          /* case nir_tex_src_sampler_deref:
992          sampler_deref = get_deref_location(instr.src[i].src);
993          break;
994       case nir_tex_src_texture_deref:
995          texture_deref = get_deref_location(instr.src[i].src);
996          break;
997       */
998       case nir_tex_src_ms_index:
999          ms_index = vf.src(instr.src[i], 0);
1000       break;
1001       case nir_tex_src_texture_offset:
1002          texture_offset = vf.src(instr.src[i], 0);
1003       break;
1004       case nir_tex_src_sampler_offset:
1005          sampler_offset = vf.src(instr.src[i], 0);
1006       break;
1007       case nir_tex_src_plane:
1008       case nir_tex_src_projector:
1009       case nir_tex_src_min_lod:
1010       default:
1011          unreachable("unsupported texture input type");
1012       }
1013    }
1014 
1015    opcode = get_opcode(instr);
1016 
1017 
1018 }
1019 
get_opcode(const nir_tex_instr & instr)1020 auto TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
1021 {
1022    switch (instr.op) {
1023    case nir_texop_tex:
1024       return instr.is_shadow ? sample_c : sample;
1025    case nir_texop_txf:
1026       return ld;
1027    case nir_texop_txb:
1028       return instr.is_shadow ? sample_c_lb : sample_lb;
1029    case nir_texop_txl:
1030       return instr.is_shadow ? sample_c_l : sample_l;
1031    case nir_texop_txs:
1032       return get_resinfo;
1033    case nir_texop_lod:
1034       return get_resinfo;
1035    case nir_texop_txd:
1036       return instr.is_shadow ? sample_c_g : sample_g;
1037    case nir_texop_tg4:
1038       return instr.is_shadow ?
1039                (offset ? gather4_c_o : gather4_c) :
1040                (offset ? gather4_o : gather4);
1041 
1042    case nir_texop_txf_ms:
1043       return ld;
1044    case nir_texop_query_levels:
1045       return get_resinfo;
1046    case nir_texop_texture_samples:
1047       return TexInstr::get_nsamples;
1048    default:
1049       unreachable("unsupported texture input opcode");
1050    }
1051 }
1052 
emit_tex_lod(nir_tex_instr * tex,Inputs & src,Shader & shader)1053 bool TexInstr::emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader)
1054 {
1055    auto& vf = shader.value_factory();
1056    auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
1057    assert(!sampler.indirect && "Indirect sampler selection not yet supported");
1058 
1059    auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group);
1060 
1061    auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
1062 
1063    auto src_coord = vf.temp_vec4(pin_group, swizzle);
1064 
1065    AluInstr *ir = nullptr;
1066    for (unsigned i = 0; i < tex->coord_components; ++i) {
1067       ir = new AluInstr(op1_mov,
1068                         src_coord[i], src.coord[i],
1069                         AluInstr::write);
1070       shader.emit_instruction(ir);
1071    }
1072    if (ir)
1073       ir->set_alu_flag(alu_last_instr);
1074 
1075    auto irt = new TexInstr(TexInstr::get_tex_lod, dst, {1,0,7,7}, src_coord,
1076                            sampler.id, sampler.id + R600_MAX_CONST_BUFFERS);
1077 
1078    shader.emit_instruction(irt);
1079    return true;
1080 }
1081 
1082 
swizzle_from_ncomps(int comps) const1083 RegisterVec4::Swizzle TexInstr::Inputs::swizzle_from_ncomps(int comps) const
1084 {
1085    RegisterVec4::Swizzle swz;
1086    for (int i = 0; i < 4; ++i)
1087       swz[i] = i < comps ? i : 7;
1088    return swz;
1089 }
1090 
set_coord_offsets(nir_src * offset)1091 void TexInstr::set_coord_offsets(nir_src *offset)
1092 {
1093    if (!offset)
1094       return;
1095 
1096    assert(offset->is_ssa);
1097    auto literal = nir_src_as_const_value(*offset);
1098    assert(literal);
1099 
1100    for (int i = 0; i < offset->ssa->num_components; ++i)
1101       set_offset(i, literal[i].i32);
1102 }
1103 
set_rect_coordinate_flags(nir_tex_instr * instr)1104 void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr)
1105 {
1106    if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1107       set_tex_flag(x_unnormalized);
1108       set_tex_flag(y_unnormalized);
1109    }
1110 }
1111 
1112 
1113 }
1114