1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_tex.h"
28 #include "sfn_instr_alu.h"
29 #include "sfn_instr_fetch.h"
30 #include "sfn_debug.h"
31
32 namespace r600 {
33
34 using std::string;
35
TexInstr(Opcode op,const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,const RegisterVec4 & src,unsigned sid,unsigned rid,PVirtualValue sampler_offs)36 TexInstr::TexInstr(Opcode op, const RegisterVec4& dest,
37 const RegisterVec4::Swizzle& dest_swizzle,
38 const RegisterVec4& src, unsigned sid, unsigned rid,
39 PVirtualValue sampler_offs):
40 InstrWithVectorResult(dest, dest_swizzle),
41 m_opcode(op),
42 m_src(src),
43 m_sampler_offset(sampler_offs),
44 m_inst_mode(0),
45 m_sampler_id(sid),
46 m_resource_id(rid)
47 {
48 memset(m_offset, 0, sizeof(m_offset));
49 m_src.add_use(this);
50
51 if (m_sampler_offset && m_sampler_offset->as_register())
52 m_sampler_offset->as_register()->add_use(this);
53 }
54
accept(ConstInstrVisitor & visitor) const55 void TexInstr::accept(ConstInstrVisitor& visitor) const
56 {
57 visitor.visit(*this);
58 }
59
accept(InstrVisitor & visitor)60 void TexInstr::accept(InstrVisitor& visitor)
61 {
62 visitor.visit(this);
63 }
64
set_offset(unsigned index,int32_t val)65 void TexInstr::set_offset(unsigned index, int32_t val)
66 {
67 assert(index < 3);
68 m_offset[index] = val;
69 }
70
get_offset(unsigned index) const71 int TexInstr::get_offset(unsigned index) const
72 {
73 assert(index < 3);
74 return m_offset[index] << 1;
75 }
76
set_gather_comp(int cmp)77 void TexInstr::set_gather_comp(int cmp)
78 {
79 m_inst_mode = cmp;
80 }
81
is_equal_to(const TexInstr & lhs) const82 bool TexInstr::is_equal_to(const TexInstr& lhs) const
83 {
84 if (m_opcode != lhs.m_opcode)
85 return false;
86
87 if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
88 return false;
89
90 if (m_src != lhs.m_src)
91 return false;
92
93 if (m_sampler_offset && lhs.m_sampler_offset) {
94 if (!m_sampler_offset->equal_to(*lhs.m_sampler_offset))
95 return false;
96 } else if ((m_sampler_offset && !lhs.m_sampler_offset) ||
97 (!m_sampler_offset && lhs.m_sampler_offset))
98 return false;
99
100 if (m_tex_flags != lhs.m_tex_flags)
101 return false;
102
103 for(int i = 0; i < 3; ++i) {
104 if (m_offset[i] != lhs.m_offset[i])
105 return false;
106 }
107 return m_inst_mode == lhs.m_inst_mode &&
108 m_sampler_id == lhs.m_sampler_id &&
109 m_resource_id == lhs.m_resource_id;
110 }
111
propagate_death()112 bool TexInstr::propagate_death()
113 {
114 m_src.del_use(this);
115 return true;
116 }
117
do_ready() const118 bool TexInstr::do_ready() const
119 {
120 for (auto p : m_prepare_instr)
121 if (!p->ready())
122 return false;
123
124 for (auto p :required_instr())
125 if (!p->is_scheduled() && !p->is_dead()) {
126 return false;
127 }
128
129 if (m_sampler_offset && m_sampler_offset->as_register() &&
130 !m_sampler_offset->as_register()->ready(block_id(), index()))
131 return false;
132 return m_src.ready(block_id(), index());
133 }
134
do_print(std::ostream & os) const135 void TexInstr::do_print(std::ostream& os) const
136 {
137
138 os << "TEX " << opname(m_opcode) << " ";
139 print_dest(os);
140
141 os << " : ";
142 m_src.print(os);
143
144 os << " RID:" << m_resource_id
145 << " SID:" << m_sampler_id;
146
147 if (m_sampler_offset)
148 os << " SO:" << *m_sampler_offset;
149
150 if (m_offset[0])
151 os << " OX:" << m_offset[0];
152 if (m_offset[1])
153 os << " OY:" << m_offset[1];
154 if (m_offset[2])
155 os << " OZ:" << m_offset[2];
156
157 if (m_inst_mode || is_gather(m_opcode))
158 os << " MODE:" << m_inst_mode;
159
160 os << " ";
161 os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
162 os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
163 os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
164 os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
165 }
166
opname(Opcode op)167 const char *TexInstr::opname(Opcode op)
168 {
169 switch (op) {
170 case ld: return "LD";
171 case get_resinfo: return "GET_TEXTURE_RESINFO";
172 case get_nsamples: return "GET_NUMBER_OF_SAMPLES";
173 case get_tex_lod: return "GET_LOD";
174 case get_gradient_h: return "GET_GRADIENTS_H";
175 case get_gradient_v: return "GET_GRADIENTS_V";
176 case set_offsets: return "SET_TEXTURE_OFFSETS";
177 case keep_gradients: return "KEEP_GRADIENTS";
178 case set_gradient_h: return "SET_GRADIENTS_H";
179 case set_gradient_v: return "SET_GRADIENTS_V";
180 case sample: return "SAMPLE";
181 case sample_l: return "SAMPLE_L";
182 case sample_lb: return "SAMPLE_LB";
183 case sample_lz: return "SAMPLE_LZ";
184 case sample_g: return "SAMPLE_G";
185 case sample_g_lb: return "SAMPLE_G_L";
186 case gather4: return "GATHER4";
187 case gather4_o: return "GATHER4_O";
188 case sample_c: return "SAMPLE_C";
189 case sample_c_l: return "SAMPLE_C_L";
190 case sample_c_lb: return "SAMPLE_C_LB";
191 case sample_c_lz: return "SAMPLE_C_LZ";
192 case sample_c_g: return "SAMPLE_C_G";
193 case sample_c_g_lb: return "SAMPLE_C_G_L";
194 case gather4_c: return "GATHER4_C";
195 case gather4_c_o: return "OP_GATHER4_C_O";
196 default: return "ERROR";
197 }
198
199 }
200
201 const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
202 {ld, "LD"},
203 {get_resinfo,"GET_TEXTURE_RESINFO"},
204 {get_nsamples,"GET_NUMBER_OF_SAMPLES"},
205 {get_tex_lod,"GET_LOD"},
206 {get_gradient_h,"GET_GRADIENTS_H"},
207 {get_gradient_v,"GET_GRADIENTS_V"},
208 {set_offsets,"SET_TEXTURE_OFFSETS"},
209 {keep_gradients,"KEEP_GRADIENTS"},
210 {set_gradient_h,"SET_GRADIENTS_H"},
211 {set_gradient_v,"SET_GRADIENTS_V"},
212 {sample,"SAMPLE"},
213 {sample_l,"SAMPLE_L"},
214 {sample_lb,"SAMPLE_LB"},
215 {sample_lz,"SAMPLE_LZ"},
216 {sample_g,"SAMPLE_G"},
217 {sample_g_lb,"SAMPLE_G_L"},
218 {gather4,"GATHER4"},
219 {gather4_o,"GATHER4_O"},
220 {sample_c,"SAMPLE_C"},
221 {sample_c_l,"SAMPLE_C_L"},
222 {sample_c_lb,"SAMPLE_C_LB"},
223 {sample_c_lz,"SAMPLE_C_LZ"},
224 {sample_c_g,"SAMPLE_C_G"},
225 {sample_c_g_lb,"SAMPLE_C_G_L"},
226 {gather4_c,"GATHER4_C"},
227 {gather4_c_o,"OP_GATHER4_C_O"},
228 {unknown, "ERROR"}
229 };
230
is_gather(Opcode op)231 bool TexInstr::is_gather(Opcode op)
232 {
233 return op == gather4 || op == gather4_c ||
234 op == gather4_o || op == gather4_c_o;
235 }
236
op_from_string(const std::string & s)237 TexInstr::Opcode TexInstr::op_from_string(const std::string& s)
238 {
239 for (auto& [op, str] : s_opcode_map) {
240 if (s == str)
241 return op;
242 }
243 return unknown;
244 }
245
from_string(std::istream & is,ValueFactory & value_fctory)246 Instr::Pointer TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
247 {
248 string opstr;
249 string deststr;
250 is >> opstr >> deststr;
251
252 auto opcode = TexInstr::op_from_string(opstr);
253
254 RegisterVec4::Swizzle dest_swz;
255
256 auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
257
258 char dummy;
259 is >> dummy;
260 assert(dummy == ':');
261
262 string srcstr;
263 is >> srcstr;
264
265 auto src = value_fctory.src_vec4_from_string(srcstr);
266
267 string res_id_str;
268 string sampler_id_str;
269
270 is >> res_id_str >> sampler_id_str;
271
272 int res_id = int_from_string_with_prefix(res_id_str, "RID:");
273 int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
274
275 auto tex = new TexInstr( opcode, dest, dest_swz, src, sampler_id, res_id, nullptr);
276
277 while (!is.eof() && is.good()) {
278 std::string next_token;
279 is >> next_token;
280
281 if (next_token.empty())
282 break;
283
284 if (next_token[0] == 'U' || next_token[0] == 'N') {
285 tex->read_tex_coord_normalitazion(next_token);
286 } else {
287 tex->set_tex_param(next_token);
288 }
289 }
290
291 return tex;
292 }
293
read_tex_coord_normalitazion(const std::string & flags)294 void TexInstr::read_tex_coord_normalitazion(const std::string& flags)
295 {
296 assert(flags.length() == 4);
297 if (flags[0] == 'U') set_tex_flag(x_unnormalized);
298 if (flags[1] == 'U') set_tex_flag(y_unnormalized);
299 if (flags[2] == 'U') set_tex_flag(z_unnormalized);
300 if (flags[3] == 'U') set_tex_flag(w_unnormalized);
301 }
302
set_tex_param(const std::string & token)303 void TexInstr::set_tex_param(const std::string& token)
304 {
305 if (token.substr(0,3) == "OX:")
306 set_offset(0, int_from_string_with_prefix(token, "OX:"));
307 else if (token.substr(0,3) == "OY:")
308 set_offset(1, int_from_string_with_prefix(token, "OY:"));
309 else if (token.substr(0,3) == "OZ:")
310 set_offset(2, int_from_string_with_prefix(token, "OZ:"));
311 else if (token.substr(0,5) == "MODE:")
312 set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
313 else if (token.substr(0,3) == "SO:")
314 set_sampler_offset(VirtualValue::from_string(token.substr(3)));
315 else {
316 std::cerr << "Token '" << token << "': ";
317 unreachable("Unknown token in tex param");
318 }
319 }
320
from_nir(nir_tex_instr * tex,Shader & shader)321 bool TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
322 {
323 Inputs src(*tex, shader.value_factory());
324
325 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
326 switch (tex->op) {
327 case nir_texop_txs:
328 return emit_tex_txs(tex, src, {0,1,2,3}, shader);
329 case nir_texop_txf:
330 return emit_buf_txf(tex, src, shader);
331 default:
332 return false;
333 }
334 } else {
335 switch (tex->op) {
336 case nir_texop_tex:
337 return emit_tex_tex(tex, src, shader);
338 case nir_texop_txf:
339 return emit_tex_txf(tex, src, shader);
340 case nir_texop_txb:
341 case nir_texop_txl:
342 return emit_tex_txl_txb(tex, src, shader);
343 case nir_texop_txs:
344 return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
345 case nir_texop_lod:
346 return emit_tex_lod(tex, src, shader);
347 case nir_texop_query_levels:
348 return emit_tex_txs(tex, src, {3,7,7,7}, shader);
349 case nir_texop_txd:
350 return emit_tex_txd(tex, src, shader);
351 case nir_texop_txf_ms:
352 if (shader.chip_class() < ISA_CC_EVERGREEN)
353 return emit_tex_tex_ms_direct(tex, src, shader);
354 else
355 return emit_tex_tex_ms(tex, src, shader);
356 case nir_texop_tg4:
357 return emit_tex_tg4(tex, src, shader);
358 case nir_texop_texture_samples:
359 return emit_tex_texture_samples(tex, src, shader);
360 default:
361 return false;
362 }
363 }
364 return true;
365 }
366
367 struct SamplerId {
368 int id;
369 bool indirect;
370 };
371
372 SamplerId
get_sampler_id(int sampler_id,const nir_variable * deref)373 get_sampler_id(int sampler_id, const nir_variable *deref)
374 {
375 SamplerId result = {sampler_id, false};
376
377 if (deref) {
378 assert(glsl_type_is_sampler(deref->type));
379 result.id = deref->data.binding;
380 }
381 return result;
382 }
383
384
emit_tex_tex(nir_tex_instr * tex,Inputs & src,Shader & shader)385 bool TexInstr::emit_tex_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
386 {
387 auto& vf = shader.value_factory();
388
389 sfn_log << SfnLog::instr << "emit '"
390 << *reinterpret_cast<nir_instr*>(tex)
391 << "' (" << __func__ << ")\n";
392
393 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
394 assert(!sampler.indirect);
395
396 auto src_coord = prepare_source(tex, src, shader);
397 auto dst = vf.dest_vec4(tex->dest, pin_group);
398
399 auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id,
400 sampler.id + R600_MAX_CONST_BUFFERS,
401 src.sampler_offset);
402 if (tex->is_array)
403 irt->set_tex_flag(TexInstr::z_unnormalized);
404
405 irt->set_rect_coordinate_flags(tex);
406 irt->set_coord_offsets(src.offset);
407
408 shader.emit_instruction(irt);
409 return true;
410 }
411
emit_tex_txl_txb(nir_tex_instr * tex,Inputs & src,Shader & shader)412 bool TexInstr::emit_tex_txl_txb(nir_tex_instr *tex, Inputs& src, Shader& shader)
413 {
414 auto& vf = shader.value_factory();
415
416 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
417 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
418
419 auto src_coord = prepare_source(tex, src, shader);
420
421 auto dst = vf.dest_vec4(tex->dest, pin_group);
422
423 auto irt = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id,
424 sampler.id + R600_MAX_CONST_BUFFERS,
425 src.sampler_offset);
426
427 if (tex->is_array)
428 irt->set_tex_flag(TexInstr::z_unnormalized);
429
430 irt->set_rect_coordinate_flags(tex);
431 irt->set_coord_offsets(src.offset);
432
433 shader.emit_instruction(irt);
434 return true;
435 }
436
437
emit_tex_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)438 bool TexInstr::emit_tex_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
439 {
440 auto& vf = shader.value_factory();
441
442 int sampler = tex->sampler_index;
443
444 auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
445 swizzle[3] = 3;
446
447 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
448 swizzle[2] = 1;
449 swizzle[1] = 7;
450 }
451
452 auto src_coord = vf.temp_vec4(pin_group, swizzle);
453
454 for (unsigned i = 0; i < tex->coord_components; i++) {
455 unsigned k = i;
456 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
457 k = 2;
458
459
460 if (src.offset) {
461 shader.emit_instruction(new AluInstr(op2_add_int, src_coord[k], src.coord[i],
462 vf.src(src.offset[i], i),
463 AluInstr::write));
464 } else {
465 shader.emit_instruction(new AluInstr(op1_mov, src_coord[k], src.coord[i],AluInstr::write));
466 }
467 }
468
469 shader.emit_instruction(new AluInstr(op1_mov, src_coord[3], src.lod, AluInstr::last_write));
470
471 auto dst = vf.dest_vec4(tex->dest, pin_group);
472
473 auto tex_ir = new TexInstr(src.opcode, dst, {0, 1, 2, 3}, src_coord,
474 sampler,
475 sampler + R600_MAX_CONST_BUFFERS,
476 src.sampler_offset);
477
478 if (tex->is_array)
479 tex_ir->set_tex_flag(z_unnormalized);
480
481 tex_ir->set_rect_coordinate_flags(tex);
482 tex_ir->set_sampler_offset(src.sampler_offset);
483
484 shader.emit_instruction(tex_ir);
485
486 return true;
487 }
488
emit_buf_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)489 bool TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
490 {
491 auto& vf = shader.value_factory();
492 auto dst = vf.dest_vec4(tex->dest, pin_group);
493
494 PRegister tex_offset = nullptr;
495 if (src.texture_offset)
496 tex_offset = shader.emit_load_to_register(src.texture_offset);
497
498 auto *real_dst = &dst;
499 RegisterVec4 tmp = vf.temp_vec4(pin_group);
500
501 if (shader.chip_class() < ISA_CC_EVERGREEN) {
502 real_dst = &tmp;
503 }
504
505 auto ir = new LoadFromBuffer(*real_dst, {0,1,2,3}, src.coord[0], 0,
506 tex->texture_index + R600_MAX_CONST_BUFFERS,
507 tex_offset, fmt_invalid);
508 ir->set_fetch_flag(FetchInstr::use_const_field);
509 shader.emit_instruction(ir);
510 shader.set_flag(Shader::sh_uses_tex_buffer);
511
512 if (shader.chip_class() < ISA_CC_EVERGREEN) {
513 auto tmp_w = vf.temp_register();
514 int buf_sel = (512 + R600_BUFFER_INFO_OFFSET / 16) + 2 * tex->texture_index;
515 AluInstr *ir = nullptr;
516 for (int i = 0; i < 4; ++i) {
517 auto d = i < 3 ? dst[i] : tmp_w;
518 ir = new AluInstr(op2_and_int, d, tmp[i],
519 vf.uniform(buf_sel, i, R600_BUFFER_INFO_CONST_BUFFER),
520 AluInstr::write);
521 shader.emit_instruction(ir);
522 }
523
524 ir->set_alu_flag(alu_last_instr);
525 shader.emit_instruction(new AluInstr(op2_or_int, dst[3], tmp_w,
526 vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
527 AluInstr::last_write));
528 }
529
530 return true;
531 }
532
emit_tex_tex_ms_direct(nir_tex_instr * tex,Inputs & src,Shader & shader)533 bool TexInstr::emit_tex_tex_ms_direct(nir_tex_instr *tex, Inputs& src, Shader& shader)
534 {
535 assert(tex->src[0].src.is_ssa);
536 auto& vf = shader.value_factory();
537
538 r600::sfn_log << SfnLog::instr << "emit '"
539 << *reinterpret_cast<nir_instr*>(tex)
540 << "' (" << __func__ << ")\n";
541
542 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
543 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
544
545 auto temp2 = vf.temp_vec4(pin_group);
546
547 for (unsigned i = 0; i < tex->coord_components; ++i) {
548 unsigned k = i;
549 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
550 k = 2;
551
552 shader.emit_instruction(new AluInstr(op1_mov, temp2[k],
553 src.coord[k], AluInstr::write));
554 }
555
556 shader.emit_instruction(new AluInstr(op1_mov, temp2[3], src.ms_index,
557 AluInstr::last_write));
558
559 auto dst = vf.dest_vec4(tex->dest, pin_group);
560
561 /* txf doesn't need rounding for the array index, but 1D has the array index
562 * in the z component */
563 auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2,
564 sampler.id,
565 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
566
567 shader.emit_instruction(tex_ir);
568 return true;
569 }
570
emit_tex_tex_ms(nir_tex_instr * tex,Inputs & src,Shader & shader)571 bool TexInstr::emit_tex_tex_ms(nir_tex_instr *tex, Inputs& src, Shader& shader)
572 {
573 assert(tex->src[0].src.is_ssa);
574 auto& vf = shader.value_factory();
575
576 r600::sfn_log << SfnLog::instr << "emit '"
577 << *reinterpret_cast<nir_instr*>(tex)
578 << "' (" << __func__ << ")\n";
579
580 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
581 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
582
583 auto sample_id_dest = vf.temp_vec4(pin_group);
584 RegisterVec4::Swizzle dest_swz = {0,7,7,7};
585
586 auto temp1 = vf.temp_vec4(pin_group);
587 for (unsigned i = 0; i < tex->coord_components; ++i) {
588 unsigned k = i;
589 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
590 k = 2;
591
592 if (src.offset && i < src.offset->ssa->num_components)
593 shader.emit_instruction(new AluInstr(op2_add_int, temp1[k],
594 src.coord[i],
595 vf.src(*src.offset, i),
596 AluInstr::write));
597 else
598 shader.emit_instruction(new AluInstr(op1_mov, temp1[k],
599 src.coord[i], AluInstr::write));
600 }
601
602 shader.emit_instruction(new AluInstr(op1_mov, temp1[3],
603 src.ms_index, AluInstr::last_write));
604
605 auto tex_sample_id_ir = new TexInstr(ld, sample_id_dest, dest_swz, temp1,
606 sampler.id,
607 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
608
609 tex_sample_id_ir->set_tex_flag(x_unnormalized);
610 tex_sample_id_ir->set_tex_flag(y_unnormalized);
611 tex_sample_id_ir->set_tex_flag(z_unnormalized);
612 tex_sample_id_ir->set_tex_flag(w_unnormalized);
613 tex_sample_id_ir->set_inst_mode(1);
614
615 shader.emit_instruction(tex_sample_id_ir);
616
617 Register *sample_id_dest_reg = sample_id_dest[0];
618
619 if (!src.ms_index->as_inline_const() ||
620 src.ms_index->as_inline_const()->sel() != ALU_SRC_0) {
621
622 auto help = vf.temp_register();
623
624 shader.emit_instruction(new AluInstr(op2_lshl_int, help,
625 src.ms_index, vf.literal(2),
626 AluInstr::last_write));
627
628 sample_id_dest_reg = vf.temp_register();
629 shader.emit_instruction(new AluInstr(op2_lshr_int, sample_id_dest_reg,
630 sample_id_dest[0], help,
631 AluInstr::last_write));
632 }
633
634 auto temp2 = vf.temp_vec4(pin_group);
635
636 for (unsigned i = 0; i < tex->coord_components; ++i) {
637 unsigned k = i;
638 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
639 k = 2;
640
641 shader.emit_instruction(new AluInstr(op1_mov, temp2[k],
642 temp1[k], AluInstr::write));
643 }
644
645 shader.emit_instruction(new AluInstr(op2_and_int, temp2[3],
646 sample_id_dest_reg, vf.literal(15),
647 AluInstr::last_write));
648
649 auto dst = vf.dest_vec4(tex->dest, pin_group);
650
651 /* txf doesn't need rounding for the array index, but 1D has the array index
652 * in the z component */
653 auto tex_ir = new TexInstr(ld, dst, {0,1,2,3}, temp2,
654 sampler.id,
655 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
656
657 shader.emit_instruction(tex_ir);
658 return true;
659 }
660
emit_tex_texture_samples(nir_tex_instr * instr,Inputs & src,Shader & shader)661 bool TexInstr::emit_tex_texture_samples(nir_tex_instr* instr, Inputs& src, Shader& shader)
662 {
663 RegisterVec4 dest = shader.value_factory().dest_vec4(instr->dest, pin_chan);
664 RegisterVec4 help{0, true, {4,4,4,4}};
665
666 int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
667
668 auto ir = new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help,
669 0, res_id, src.sampler_offset);
670 shader.emit_instruction(ir);
671 return true;
672 }
673
674
emit_tex_txd(nir_tex_instr * tex,Inputs & src,Shader & shader)675 bool TexInstr::emit_tex_txd(nir_tex_instr *tex, Inputs& src, Shader& shader)
676 {
677
678 auto& vf = shader.value_factory();
679
680 r600::sfn_log << SfnLog::instr << "emit '"
681 << *reinterpret_cast<nir_instr*>(tex)
682 << "' (" << __func__ << ")\n";
683
684 auto dst = vf.dest_vec4(tex->dest, pin_group);
685 RegisterVec4 empty_dst(0, false, {0,0,0,0}, pin_group);
686
687 auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
688
689 if (tex->is_shadow)
690 swizzle[3] = 3;
691
692 unsigned array_coord = 2;
693 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
694 swizzle[2] = 1;
695 swizzle[1] = 7;
696 array_coord = 1;
697 }
698
699 auto src_coord = vf.temp_vec4(pin_group, swizzle);
700
701 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
702 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
703
704 auto irgh = new TexInstr(set_gradient_h, empty_dst, {7,7,7,7}, src.ddx,
705 sampler.id,
706 sampler.id + R600_MAX_CONST_BUFFERS,
707 src.sampler_offset);
708
709 auto irgv = new TexInstr(set_gradient_v, empty_dst, {7,7,7,7}, src.ddy,
710 sampler.id, sampler.id + R600_MAX_CONST_BUFFERS,
711 src.sampler_offset);
712
713 auto tir = new TexInstr(src.opcode, dst, {0,1,2,3}, src_coord, sampler.id,
714 sampler.id + R600_MAX_CONST_BUFFERS,
715 src.sampler_offset);
716
717
718 /* r600_bytecode_add_tex has a hack that will start a new tex CF if
719 * set_gradient_h is emitted, so make sure it is emitted first */
720
721 AluInstr *ir = nullptr;
722 for (unsigned i = 0; i < tex->coord_components; ++i) {
723 int k = i;
724 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D && i == 1)
725 k = 2;
726
727 ir = new AluInstr(tex->is_array && i == array_coord ? op1_rndne : op1_mov,
728 src_coord[k], src.coord[i],
729 AluInstr::write);
730 shader.emit_instruction(ir);
731 }
732
733 if (tex->is_shadow) {
734 ir = new AluInstr(op1_mov, src_coord[3], src.comperator, AluInstr::last_write);
735 shader.emit_instruction(ir);
736 }
737
738 tir->add_prepare_instr(irgh);
739 tir->add_prepare_instr(irgv);
740
741 if (tex->is_array)
742 tir->set_tex_flag(TexInstr::z_unnormalized);
743
744 irgh->set_rect_coordinate_flags(tex);
745 irgv->set_rect_coordinate_flags(tex);
746 irgh->set_always_keep();
747 irgv->set_always_keep();
748
749 tir->set_rect_coordinate_flags(tex);
750
751 tir->set_coord_offsets(src.offset);
752
753 if (shader.last_txd())
754 tir->add_required_instr(shader.last_txd());
755
756 shader.emit_instruction(tir);
757 shader.set_last_txd(tir);
758
759 return true;
760 }
761
emit_tex_txs(nir_tex_instr * tex,Inputs & src,RegisterVec4::Swizzle dest_swz,Shader & shader)762 bool TexInstr::emit_tex_txs(nir_tex_instr *tex, Inputs& src,
763 RegisterVec4::Swizzle dest_swz, Shader& shader)
764 {
765 auto& vf = shader.value_factory();
766
767 auto dest = vf.dest_vec4(tex->dest, pin_group);
768
769 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
770 if (shader.chip_class() >= ISA_CC_EVERGREEN) {
771 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0,7,7,7},
772 tex->sampler_index + R600_MAX_CONST_BUFFERS));
773 } else {
774 int id = 2 * tex->sampler_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
775 auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
776 shader.emit_instruction(new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
777 shader.set_flag(Shader::sh_uses_tex_buffer);
778 }
779 } else {
780
781 auto src_lod = vf.temp_register();
782 shader.emit_instruction(new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
783
784 RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
785
786 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
787 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
788
789 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
790 dest_swz[2] = 7;
791
792 auto ir = new TexInstr(get_resinfo, dest, dest_swz, src_coord,
793 sampler.id,
794 sampler.id + R600_MAX_CONST_BUFFERS,
795 src.sampler_offset);
796
797 ir->set_dest_swizzle(dest_swz);
798 shader.emit_instruction(ir);
799
800 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
801 auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
802 sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER);
803
804
805 auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
806 shader.emit_instruction(alu);
807 shader.set_flag(Shader::sh_txs_cube_array_comp);
808 }
809 }
810
811 return true;
812 }
813
emit_tex_tg4(nir_tex_instr * tex,Inputs & src,Shader & shader)814 bool TexInstr::emit_tex_tg4(nir_tex_instr* tex, Inputs& src , Shader& shader)
815 {
816 auto& vf = shader.value_factory();
817
818 r600::sfn_log << SfnLog::instr << "emit '"
819 << *reinterpret_cast<nir_instr*>(tex)
820 << "' (" << __func__ << ")\n";
821
822 TexInstr *set_ofs = nullptr;
823
824 auto src_coord = prepare_source(tex, src, shader);
825
826 r600::sfn_log << SfnLog::instr << "emit '"
827 << *reinterpret_cast<nir_instr*>(tex)
828 << "' (" << __func__ << ")\n";
829
830 auto dst = vf.dest_vec4(tex->dest, pin_group);
831
832 RegisterVec4 empty_dst(125, false, {7,7,7,7}, pin_group);
833
834 /* pre CAYMAN needs swizzle */
835 auto dest_swizzle = shader.chip_class() <= ISA_CC_EVERGREEN ?
836 RegisterVec4::Swizzle{1, 2, 0, 3} :
837 RegisterVec4::Swizzle{0, 1, 2, 3};
838
839 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
840 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
841
842 bool literal_offset = false;
843 if (src.offset) {
844 literal_offset = nir_src_as_const_value(*src.offset) != 0;
845 r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
846 (literal_offset ? "literal" : "varying") <<
847 "\n";
848
849 if (!literal_offset) {
850 RegisterVec4::Swizzle swizzle = {4,4,4,4};
851 int src_components = tex->coord_components;
852 if (tex->is_array)
853 --src_components;
854
855 for (int i = 0; i < src_components; ++i)
856 swizzle[i] = i;
857
858 int noffsets = tex->coord_components;
859 if (tex->is_array)
860 --noffsets;
861
862 auto ofs = vf.src_vec4(*src.offset, pin_group, swizzle);
863 RegisterVec4 dummy(0, true, {7,7,7,7});
864
865 set_ofs = new TexInstr(TexInstr::set_offsets, dummy, {7,7,7,7},
866 ofs, sampler.id,
867 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
868 } else {
869 src.opcode = src.opcode == gather4_o ? gather4 : gather4_c;
870 }
871 }
872
873 auto irt = new TexInstr(src.opcode, dst, dest_swizzle, src_coord, sampler.id,
874 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
875
876 irt->set_gather_comp(tex->component);
877
878 if (tex->is_array)
879 irt->set_tex_flag(z_unnormalized);
880
881 if (literal_offset) {
882 r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
883 irt->set_coord_offsets(src.offset);
884 }
885
886 irt->set_rect_coordinate_flags(tex);
887
888 if (set_ofs) {
889 set_ofs->set_always_keep();
890 irt->add_prepare_instr(set_ofs);
891 }
892
893 shader.emit_instruction(irt);
894 return true;
895 }
896
prepare_source(nir_tex_instr * tex,const Inputs & inputs,Shader & shader)897 auto TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader) -> RegisterVec4
898 {
899 RegisterVec4::Swizzle target{7,7,7,7};
900 PVirtualValue src[4]{nullptr,nullptr,nullptr,nullptr};
901
902
903 for (unsigned i = 0; i < tex->coord_components; ++i) {
904 target[i] = i;
905 src[i] = inputs.coord[i];
906 }
907
908 // array index always goes into z
909 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
910 target[2] = 1;
911 target[1] = 7;
912 src[2] = inputs.coord[1];
913 }
914
915 /* With txl and txb shadow goes into z and lod or bias go into w */
916 if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
917 target[3] = 3;
918 src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
919 if (tex->is_shadow){
920 target[2] = 2;
921 src[2] = inputs.comperator;
922 }
923 } else if (tex->is_shadow) {
924 /* Other ops have shadow in w */
925 target[3] = 3;
926 src[3] = inputs.comperator;
927 }
928
929 auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
930
931 AluInstr *ir = nullptr;
932 for (int i = 0; i < 4; ++i) {
933 if (target[i] > 3)
934 continue;
935
936 auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
937
938 ir = new AluInstr(op, src_coord[i], src[i], AluInstr::write);
939 shader.emit_instruction(ir);
940 }
941
942 if (ir)
943 ir->set_alu_flag(alu_last_instr);
944
945 return src_coord;
946 }
947
Inputs(const nir_tex_instr & instr,ValueFactory & vf)948 TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
949 sampler_deref(nullptr),
950 texture_deref(nullptr),
951 bias(nullptr),
952 comperator(nullptr),
953 lod(nullptr),
954 offset(nullptr),
955 gather_comp(nullptr),
956 ms_index(nullptr),
957 sampler_offset(nullptr),
958 texture_offset(nullptr),
959 opcode(ld)
960 {
961 //sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
962
963 unsigned grad_components = instr.coord_components;
964 if (instr.is_array && !instr.array_is_lowered_cube)
965 --grad_components;
966
967 for (unsigned i = 0; i < instr.num_srcs; ++i) {
968 switch (instr.src[i].src_type) {
969 case nir_tex_src_bias:
970 bias = vf.src(instr.src[i], 0);
971 break;
972
973 case nir_tex_src_coord: {
974 coord = vf.src_vec4(instr.src[i].src, pin_none, swizzle_from_ncomps(instr.coord_components));
975 } break;
976 case nir_tex_src_comparator:
977 comperator = vf.src(instr.src[i], 0);
978 break;
979 case nir_tex_src_ddx:
980 ddx = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
981 break;
982 case nir_tex_src_ddy:
983 ddy = vf.src_vec4(instr.src[i].src, pin_group, swizzle_from_ncomps(grad_components));
984 break;
985 case nir_tex_src_lod:
986 lod = vf.src(instr.src[i].src, 0);
987 break;
988 case nir_tex_src_offset:
989 offset = &instr.src[i].src;
990 break;
991 /* case nir_tex_src_sampler_deref:
992 sampler_deref = get_deref_location(instr.src[i].src);
993 break;
994 case nir_tex_src_texture_deref:
995 texture_deref = get_deref_location(instr.src[i].src);
996 break;
997 */
998 case nir_tex_src_ms_index:
999 ms_index = vf.src(instr.src[i], 0);
1000 break;
1001 case nir_tex_src_texture_offset:
1002 texture_offset = vf.src(instr.src[i], 0);
1003 break;
1004 case nir_tex_src_sampler_offset:
1005 sampler_offset = vf.src(instr.src[i], 0);
1006 break;
1007 case nir_tex_src_plane:
1008 case nir_tex_src_projector:
1009 case nir_tex_src_min_lod:
1010 default:
1011 unreachable("unsupported texture input type");
1012 }
1013 }
1014
1015 opcode = get_opcode(instr);
1016
1017
1018 }
1019
get_opcode(const nir_tex_instr & instr)1020 auto TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
1021 {
1022 switch (instr.op) {
1023 case nir_texop_tex:
1024 return instr.is_shadow ? sample_c : sample;
1025 case nir_texop_txf:
1026 return ld;
1027 case nir_texop_txb:
1028 return instr.is_shadow ? sample_c_lb : sample_lb;
1029 case nir_texop_txl:
1030 return instr.is_shadow ? sample_c_l : sample_l;
1031 case nir_texop_txs:
1032 return get_resinfo;
1033 case nir_texop_lod:
1034 return get_resinfo;
1035 case nir_texop_txd:
1036 return instr.is_shadow ? sample_c_g : sample_g;
1037 case nir_texop_tg4:
1038 return instr.is_shadow ?
1039 (offset ? gather4_c_o : gather4_c) :
1040 (offset ? gather4_o : gather4);
1041
1042 case nir_texop_txf_ms:
1043 return ld;
1044 case nir_texop_query_levels:
1045 return get_resinfo;
1046 case nir_texop_texture_samples:
1047 return TexInstr::get_nsamples;
1048 default:
1049 unreachable("unsupported texture input opcode");
1050 }
1051 }
1052
emit_tex_lod(nir_tex_instr * tex,Inputs & src,Shader & shader)1053 bool TexInstr::emit_tex_lod(nir_tex_instr* tex, Inputs& src, Shader& shader)
1054 {
1055 auto& vf = shader.value_factory();
1056 auto sampler = get_sampler_id(tex->sampler_index, src.sampler_deref);
1057 assert(!sampler.indirect && "Indirect sampler selection not yet supported");
1058
1059 auto dst = shader.value_factory().dest_vec4(tex->dest, pin_group);
1060
1061 auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
1062
1063 auto src_coord = vf.temp_vec4(pin_group, swizzle);
1064
1065 AluInstr *ir = nullptr;
1066 for (unsigned i = 0; i < tex->coord_components; ++i) {
1067 ir = new AluInstr(op1_mov,
1068 src_coord[i], src.coord[i],
1069 AluInstr::write);
1070 shader.emit_instruction(ir);
1071 }
1072 if (ir)
1073 ir->set_alu_flag(alu_last_instr);
1074
1075 auto irt = new TexInstr(TexInstr::get_tex_lod, dst, {1,0,7,7}, src_coord,
1076 sampler.id, sampler.id + R600_MAX_CONST_BUFFERS);
1077
1078 shader.emit_instruction(irt);
1079 return true;
1080 }
1081
1082
swizzle_from_ncomps(int comps) const1083 RegisterVec4::Swizzle TexInstr::Inputs::swizzle_from_ncomps(int comps) const
1084 {
1085 RegisterVec4::Swizzle swz;
1086 for (int i = 0; i < 4; ++i)
1087 swz[i] = i < comps ? i : 7;
1088 return swz;
1089 }
1090
set_coord_offsets(nir_src * offset)1091 void TexInstr::set_coord_offsets(nir_src *offset)
1092 {
1093 if (!offset)
1094 return;
1095
1096 assert(offset->is_ssa);
1097 auto literal = nir_src_as_const_value(*offset);
1098 assert(literal);
1099
1100 for (int i = 0; i < offset->ssa->num_components; ++i)
1101 set_offset(i, literal[i].i32);
1102 }
1103
set_rect_coordinate_flags(nir_tex_instr * instr)1104 void TexInstr::set_rect_coordinate_flags(nir_tex_instr* instr)
1105 {
1106 if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1107 set_tex_flag(x_unnormalized);
1108 set_tex_flag(y_unnormalized);
1109 }
1110 }
1111
1112
1113 }
1114