1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
FragmentShaderFromNir(const nir_shader & nir,r600_shader & sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key,
38 enum chip_class chip_class):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key.ps.color_two_side),
43 m_last_pixel_export(nullptr),
44 m_nir(nir),
45 m_reserved_registers(0),
46 m_frag_pos_index(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
49 m_depth_exports(0),
50 m_apply_sample_mask(key.ps.apply_sample_id_mask),
51 m_dual_source_blend(key.ps.dual_source_blend),
52 m_pos_input(nullptr)
53 {
54 for (auto& i: m_interpolator) {
55 i.enabled = false;
56 i.ij_index= 0;
57 }
58
59 sh_info().rat_base = key.ps.nr_cbufs;
60 sh_info().atomic_base = key.ps.first_atomic_counter;
61 }
62
barycentric_ij_index(nir_intrinsic_instr * instr)63 unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
64 {
65 unsigned index = 0;
66 switch (instr->intrinsic) {
67 case nir_intrinsic_load_barycentric_sample:
68 index = 0;
69 break;
70 case nir_intrinsic_load_barycentric_at_sample:
71 case nir_intrinsic_load_barycentric_at_offset:
72 case nir_intrinsic_load_barycentric_pixel:
73 index = 1;
74 break;
75 case nir_intrinsic_load_barycentric_centroid:
76 index = 2;
77 break;
78 default:
79 unreachable("Unknown interpolator intrinsic");
80 }
81
82 switch (nir_intrinsic_interp_mode(instr)) {
83 case INTERP_MODE_NONE:
84 case INTERP_MODE_SMOOTH:
85 case INTERP_MODE_COLOR:
86 return index;
87 case INTERP_MODE_NOPERSPECTIVE:
88 return index + 3;
89 case INTERP_MODE_FLAT:
90 case INTERP_MODE_EXPLICIT:
91 default:
92 unreachable("unknown/unsupported mode for load_interpolated");
93 }
94 return 0;
95 }
96
process_load_input(nir_intrinsic_instr * instr,bool interpolated)97 bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
98 bool interpolated)
99 {
100 sfn_log << SfnLog::io << "Parse " << instr->instr
101 << "\n";
102
103 auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
104 assert(index);
105
106 unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
107 auto semantic = r600_get_varying_semantic(location);
108 tgsi_semantic name = (tgsi_semantic)semantic.first;
109 unsigned sid = semantic.second;
110
111
112 if (location == VARYING_SLOT_POS) {
113 m_sv_values.set(es_pos);
114 m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
115 nir_intrinsic_component(instr),
116 nir_dest_num_components(instr->dest),
117 TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER);
118 m_shaderio.add_input(m_pos_input);
119 return true;
120 }
121
122 if (location == VARYING_SLOT_FACE) {
123 m_sv_values.set(es_face);
124 return true;
125 }
126
127
128 tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
129 tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
130
131 bool uses_interpol_at_centroid = false;
132
133 if (interpolated) {
134
135 glsl_interp_mode mode = INTERP_MODE_NONE;
136 auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
137 mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
138 switch (parent->intrinsic) {
139 case nir_intrinsic_load_barycentric_sample:
140 tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
141 break;
142 case nir_intrinsic_load_barycentric_at_sample:
143 case nir_intrinsic_load_barycentric_at_offset:
144 case nir_intrinsic_load_barycentric_pixel:
145 tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
146 break;
147 case nir_intrinsic_load_barycentric_centroid:
148 tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
149 uses_interpol_at_centroid = true;
150 break;
151 default:
152 std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
153 << nir_intrinsic_infos[instr->intrinsic].name
154 << " interpolator?\n";
155 assert(0);
156 }
157
158 switch (mode) {
159 case INTERP_MODE_NONE:
160 if (name == TGSI_SEMANTIC_COLOR) {
161 tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
162 break;
163 }
164 FALLTHROUGH;
165 case INTERP_MODE_SMOOTH:
166 tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
167 break;
168 case INTERP_MODE_NOPERSPECTIVE:
169 tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
170 break;
171 case INTERP_MODE_FLAT:
172 break;
173 case INTERP_MODE_COLOR:
174 tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
175 break;
176 case INTERP_MODE_EXPLICIT:
177 default:
178 assert(0);
179 }
180
181 m_interpolators_used.set(barycentric_ij_index(parent));
182
183 }
184
185 switch (name) {
186 case TGSI_SEMANTIC_COLOR: {
187 auto input = m_shaderio.find_varying(name, sid);
188 if (!input) {
189 m_shaderio.add_input(new ShaderInputColor(name, sid,
190 nir_intrinsic_base(instr) + index->u32,
191 nir_intrinsic_component(instr),
192 nir_dest_num_components(instr->dest),
193 tgsi_interpolate, tgsi_loc));
194 } else {
195 if (uses_interpol_at_centroid)
196 input->set_uses_interpolate_at_centroid();
197
198 auto varying = static_cast<ShaderInputVarying&>(*input);
199 varying.update_mask(nir_dest_num_components(instr->dest),
200 nir_intrinsic_component(instr));
201 }
202
203 m_need_back_color = m_two_sided_color;
204 return true;
205 }
206 case TGSI_SEMANTIC_PRIMID:
207 sh_info().gs_prim_id_input = true;
208 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
209 FALLTHROUGH;
210 case TGSI_SEMANTIC_FOG:
211 case TGSI_SEMANTIC_GENERIC:
212 case TGSI_SEMANTIC_TEXCOORD:
213 case TGSI_SEMANTIC_LAYER:
214 case TGSI_SEMANTIC_PCOORD:
215 case TGSI_SEMANTIC_VIEWPORT_INDEX:
216 case TGSI_SEMANTIC_CLIPDIST: {
217 auto input = m_shaderio.find_varying(name, sid);
218 if (!input) {
219 m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
220 nir_intrinsic_component(instr),
221 nir_dest_num_components(instr->dest),
222 tgsi_interpolate, tgsi_loc));
223 } else {
224 if (uses_interpol_at_centroid)
225 input->set_uses_interpolate_at_centroid();
226
227 auto varying = static_cast<ShaderInputVarying&>(*input);
228 varying.update_mask(nir_dest_num_components(instr->dest),
229 nir_intrinsic_component(instr));
230 }
231
232 return true;
233 }
234 default:
235 return false;
236 }
237 }
238
239
scan_sysvalue_access(nir_instr * instr)240 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
241 {
242 switch (instr->type) {
243 case nir_instr_type_intrinsic: {
244 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
245
246 switch (ii->intrinsic) {
247 case nir_intrinsic_load_front_face:
248 m_sv_values.set(es_face);
249 break;
250 case nir_intrinsic_load_sample_mask_in:
251 m_sv_values.set(es_sample_mask_in);
252 break;
253 case nir_intrinsic_load_sample_pos:
254 m_sv_values.set(es_sample_pos);
255 FALLTHROUGH;
256 case nir_intrinsic_load_sample_id:
257 m_sv_values.set(es_sample_id);
258 break;
259 case nir_intrinsic_load_helper_invocation:
260 m_sv_values.set(es_helper_invocation);
261 sh_info().uses_helper_invocation = true;
262 break;
263 case nir_intrinsic_load_input:
264 return process_load_input(ii, false);
265 case nir_intrinsic_load_interpolated_input: {
266 return process_load_input(ii, true);
267 }
268 case nir_intrinsic_store_output:
269 return process_store_output(ii);
270
271 default:
272 ;
273 }
274 }
275 default:
276 ;
277 }
278 return true;
279 }
280
do_allocate_reserved_registers()281 bool FragmentShaderFromNir::do_allocate_reserved_registers()
282 {
283 assert(!m_reserved_registers);
284
285 int face_reg_index = -1;
286 int sample_id_index = -1;
287 // enabled interpolators based on inputs
288 for (unsigned i = 0; i < s_max_interpolators; ++i) {
289 if (m_interpolators_used.test(i)) {
290 sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
291 m_interpolator[i].enabled = true;
292 }
293 }
294
295 // sort the varying inputs
296 m_shaderio.sort_varying_inputs();
297
298 // handle interpolators
299 int num_baryc = 0;
300 for (int i = 0; i < 6; ++i) {
301 if (m_interpolator[i].enabled) {
302 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
303
304 m_interpolator[i].ij_index = num_baryc;
305
306 unsigned sel = num_baryc / 2;
307 unsigned chan = 2 * (num_baryc % 2);
308
309 auto ip_i = new GPRValue(sel, chan + 1);
310 ip_i->set_as_input();
311 m_interpolator[i].i.reset(ip_i);
312 inject_register(sel, chan + 1, m_interpolator[i].i, false);
313
314 auto ip_j = new GPRValue(sel, chan);
315 ip_j->set_as_input();
316 m_interpolator[i].j.reset(ip_j);
317 inject_register(sel, chan, m_interpolator[i].j, false);
318
319 ++num_baryc;
320 }
321 }
322 m_reserved_registers += (num_baryc + 1) >> 1;
323
324 if (m_sv_values.test(es_pos)) {
325 m_frag_pos_index = m_reserved_registers++;
326 assert(m_pos_input);
327 m_pos_input->set_gpr(m_frag_pos_index);
328 }
329
330 // handle system values
331 if (m_sv_values.test(es_face) || m_need_back_color) {
332 face_reg_index = m_reserved_registers++;
333 m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
334 m_front_face_reg->set_as_input();
335 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
336 inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
337
338 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
339 load_front_face();
340 }
341
342 if (m_sv_values.test(es_sample_mask_in)) {
343 if (face_reg_index < 0)
344 face_reg_index = m_reserved_registers++;
345
346 m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
347 m_sample_mask_reg->set_as_input();
348 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
349 sh_info().nsys_inputs = 1;
350 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
351 }
352
353 if (m_sv_values.test(es_sample_id) ||
354 m_sv_values.test(es_sample_mask_in)) {
355 if (sample_id_index < 0)
356 sample_id_index = m_reserved_registers++;
357
358 m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
359 m_sample_id_reg->set_as_input();
360 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
361 sh_info().nsys_inputs++;
362 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
363 }
364
365 // The back color handling is not emmited in the code, so we have
366 // to add the inputs here and later we also need to inject the code to set
367 // the right color
368 if (m_need_back_color) {
369 size_t ninputs = m_shaderio.inputs().size();
370 for (size_t k = 0; k < ninputs; ++k) {
371 ShaderInput& i = m_shaderio.input(k);
372
373 if (i.name() != TGSI_SEMANTIC_COLOR)
374 continue;
375
376 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
377
378 size_t next_pos = m_shaderio.size();
379 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
380 m_shaderio.add_input(bcol);
381 col.set_back_color(next_pos);
382 }
383 m_shaderio.set_two_sided();
384 }
385
386 m_shaderio.update_lds_pos();
387
388 set_reserved_registers(m_reserved_registers);
389
390 return true;
391 }
392
emit_shader_start()393 void FragmentShaderFromNir::emit_shader_start()
394 {
395 if (m_sv_values.test(es_face))
396 load_front_face();
397
398 if (m_sv_values.test(es_pos)) {
399 for (int i = 0; i < 4; ++i) {
400 auto v = new GPRValue(m_frag_pos_index, i);
401 v->set_as_input();
402 auto reg = PValue(v);
403 m_frag_pos[i] = reg;
404 }
405 }
406
407 if (m_sv_values.test(es_helper_invocation)) {
408 m_helper_invocation = get_temp_register();
409 auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
410 emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
411 GPRVector dst({dummy, dummy, dummy, dummy});
412 std::array<int,4> swz = {7,7,7,7};
413 dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation);
414 swz[m_helper_invocation->chan()] = 4;
415
416 auto vtx = new FetchInstruction(dst, m_helper_invocation,
417 R600_BUFFER_INFO_CONST_BUFFER, bim_none);
418 vtx->set_flag(vtx_vpm);
419 vtx->set_flag(vtx_use_tc);
420 vtx->set_dest_swizzle(swz);
421 emit_instruction(vtx);
422 }
423 }
424
process_store_output(nir_intrinsic_instr * instr)425 bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
426 {
427
428 auto semantic = nir_intrinsic_io_semantics(instr);
429 unsigned driver_loc = nir_intrinsic_base(instr);
430
431 if (sh_info().noutput <= driver_loc)
432 sh_info().noutput = driver_loc + 1;
433
434 r600_shader_io& io = sh_info().output[driver_loc];
435 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
436 &io.name, &io.sid);
437
438 unsigned component = nir_intrinsic_component(instr);
439 io.write_mask |= nir_intrinsic_write_mask(instr) << component;
440
441 if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
442 sh_info().fs_write_all = true;
443 }
444
445 if (semantic.location == FRAG_RESULT_COLOR ||
446 (semantic.location >= FRAG_RESULT_DATA0 &&
447 semantic.location <= FRAG_RESULT_DATA7)) {
448 ++m_max_counted_color_exports;
449
450 /* Hack: force dual source output handling if one color output has a
451 * dual_source_blend_index > 0 */
452 if (semantic.location == FRAG_RESULT_COLOR &&
453 semantic.dual_source_blend_index > 0)
454 m_dual_source_blend = true;
455
456 if (m_max_counted_color_exports > 1)
457 sh_info().fs_write_all = false;
458 return true;
459 }
460
461 if (semantic.location == FRAG_RESULT_DEPTH ||
462 semantic.location == FRAG_RESULT_STENCIL ||
463 semantic.location == FRAG_RESULT_SAMPLE_MASK) {
464 io.write_mask = 15;
465 return true;
466 }
467
468 return false;
469
470
471 }
472
emit_load_sample_mask_in(nir_intrinsic_instr * instr)473 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
474 {
475 auto dest = from_nir(instr->dest, 0);
476 assert(m_sample_id_reg);
477 assert(m_sample_mask_reg);
478
479 emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
480 emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
481 return true;
482 }
483
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)484 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
485 {
486 switch (instr->intrinsic) {
487 case nir_intrinsic_load_sample_mask_in:
488 if (m_apply_sample_mask) {
489 return emit_load_sample_mask_in(instr);
490 } else
491 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
492 case nir_intrinsic_load_sample_id:
493 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
494 case nir_intrinsic_load_front_face:
495 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
496 case nir_intrinsic_load_sample_pos:
497 return emit_load_sample_pos(instr);
498 case nir_intrinsic_load_helper_invocation:
499 return load_preloaded_value(instr->dest, 0, m_helper_invocation);
500 case nir_intrinsic_load_input:
501 return emit_load_input(instr);
502 case nir_intrinsic_load_barycentric_sample:
503 case nir_intrinsic_load_barycentric_pixel:
504 case nir_intrinsic_load_barycentric_centroid: {
505 unsigned ij = barycentric_ij_index(instr);
506 return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
507 load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
508 }
509 case nir_intrinsic_load_barycentric_at_offset:
510 return load_barycentric_at_offset(instr);
511 case nir_intrinsic_load_barycentric_at_sample:
512 return load_barycentric_at_sample(instr);
513
514 case nir_intrinsic_load_interpolated_input: {
515 return emit_load_interpolated_input(instr);
516 }
517 case nir_intrinsic_store_output:
518 return emit_store_output(instr);
519
520 default:
521 return false;
522 }
523 }
524
emit_store_output(nir_intrinsic_instr * instr)525 bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
526 {
527 auto location = nir_intrinsic_io_semantics(instr).location;
528
529 if (location == FRAG_RESULT_COLOR)
530 return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
531
532 if ((location >= FRAG_RESULT_DATA0 &&
533 location <= FRAG_RESULT_DATA7) ||
534 location == FRAG_RESULT_DEPTH ||
535 location == FRAG_RESULT_STENCIL ||
536 location == FRAG_RESULT_SAMPLE_MASK)
537 return emit_export_pixel(instr, 1);
538
539 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
540 return false;
541
542 }
543
emit_load_interpolated_input(nir_intrinsic_instr * instr)544 bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
545 {
546 unsigned loc = nir_intrinsic_io_semantics(instr).location;
547 switch (loc) {
548 case VARYING_SLOT_POS:
549 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
550 load_preloaded_value(instr->dest, i, m_frag_pos[i]);
551 }
552 return true;
553 case VARYING_SLOT_FACE:
554 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
555 default:
556 ;
557 }
558
559 auto param = nir_src_as_const_value(instr->src[1]);
560 assert(param && "Indirect PS inputs not (yet) supported");
561
562 auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
563 auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
564
565 io.set_gpr(dst.sel());
566
567 Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
568
569
570 if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
571 nir_intrinsic_component(instr)))
572 return false;
573
574 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
575
576 auto & color_input = static_cast<ShaderInputColor&> (io);
577 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
578
579 GPRVector bgcol = get_temp_vec4();
580 bgio.set_gpr(bgcol.sel());
581 load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
582
583 load_front_face();
584
585 AluInstruction *ir = nullptr;
586 for (unsigned i = 0; i < 4 ; ++i) {
587 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
588 emit_instruction(ir);
589 }
590 if (ir)
591 ir->set_flag(alu_last_instr);
592 }
593
594
595 AluInstruction *ir = nullptr;
596 if (nir_intrinsic_component(instr) != 0) {
597 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
598 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
599 emit_instruction(ir);
600 }
601 if (ir)
602 ir->set_flag(alu_last_instr);
603 }
604
605 return true;
606 }
607
load_barycentric_at_offset(nir_intrinsic_instr * instr)608 bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
609 {
610 auto interpolator = m_interpolator[barycentric_ij_index(instr)];
611 PValue dummy(new GPRValue(interpolator.i->sel(), 0));
612
613 GPRVector help = get_temp_vec4();
614 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
615
616 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
617 getgradh->set_dest_swizzle({0,1,7,7});
618 getgradh->set_flag(TexInstruction::x_unnormalized);
619 getgradh->set_flag(TexInstruction::y_unnormalized);
620 getgradh->set_flag(TexInstruction::z_unnormalized);
621 getgradh->set_flag(TexInstruction::w_unnormalized);
622 getgradh->set_flag(TexInstruction::grad_fine);
623 emit_instruction(getgradh);
624
625 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
626 getgradv->set_dest_swizzle({7,7,0,1});
627 getgradv->set_flag(TexInstruction::x_unnormalized);
628 getgradv->set_flag(TexInstruction::y_unnormalized);
629 getgradv->set_flag(TexInstruction::z_unnormalized);
630 getgradv->set_flag(TexInstruction::w_unnormalized);
631 getgradv->set_flag(TexInstruction::grad_fine);
632 emit_instruction(getgradv);
633
634 PValue ofs_x = from_nir(instr->src[0], 0);
635 PValue ofs_y = from_nir(instr->src[0], 1);
636 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
637 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
638 emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
639 emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
640
641 return true;
642 }
643
load_barycentric_at_sample(nir_intrinsic_instr * instr)644 bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
645 {
646 GPRVector slope = get_temp_vec4();
647
648 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
649 from_nir_with_fetch_constant(instr->src[0], 0),
650 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
651 fetch->set_flag(vtx_srf_mode);
652 emit_instruction(fetch);
653
654 GPRVector grad = get_temp_vec4();
655
656 auto interpolator = m_interpolator[barycentric_ij_index(instr)];
657 assert(interpolator.enabled);
658 PValue dummy(new GPRValue(interpolator.i->sel(), 0));
659
660 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
661
662 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
663 tex->set_flag(TexInstruction::grad_fine);
664 tex->set_flag(TexInstruction::x_unnormalized);
665 tex->set_flag(TexInstruction::y_unnormalized);
666 tex->set_flag(TexInstruction::z_unnormalized);
667 tex->set_flag(TexInstruction::w_unnormalized);
668 tex->set_dest_swizzle({0,1,7,7});
669 emit_instruction(tex);
670
671 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
672 tex->set_flag(TexInstruction::x_unnormalized);
673 tex->set_flag(TexInstruction::y_unnormalized);
674 tex->set_flag(TexInstruction::z_unnormalized);
675 tex->set_flag(TexInstruction::w_unnormalized);
676 tex->set_flag(TexInstruction::grad_fine);
677 tex->set_dest_swizzle({7,7,0,1});
678 emit_instruction(tex);
679
680 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
681 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
682
683 emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
684 emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
685
686 return true;
687 }
688
emit_load_input(nir_intrinsic_instr * instr)689 bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
690 {
691 unsigned loc = nir_intrinsic_io_semantics(instr).location;
692 auto param = nir_src_as_const_value(instr->src[0]);
693 assert(param && "Indirect PS inputs not (yet) supported");
694
695 auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
696
697 assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
698
699 unsigned num_components = nir_dest_num_components(instr->dest);
700
701 switch (loc) {
702 case VARYING_SLOT_POS:
703 for (unsigned i = 0; i < num_components; ++i) {
704 load_preloaded_value(instr->dest, i, m_frag_pos[i]);
705 }
706 return true;
707 case VARYING_SLOT_FACE:
708 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
709 default:
710 ;
711 }
712
713 auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
714
715 AluInstruction *ir = nullptr;
716 for (unsigned i = 0; i < 4 ; ++i) {
717 ir = new AluInstruction(op1_interp_load_p0, dst[i],
718 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
719 io.lds_pos(), i)),
720 EmitInstruction::write);
721 emit_instruction(ir);
722 }
723 ir->set_flag(alu_last_instr);
724
725 /* TODO: back color */
726 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
727 Interpolator ip = {false, 0, NULL, NULL};
728
729 auto & color_input = static_cast<ShaderInputColor&> (io);
730 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
731
732 GPRVector bgcol = get_temp_vec4();
733 bgio.set_gpr(bgcol.sel());
734 load_interpolated(bgcol, bgio, ip, num_components, 0);
735
736 load_front_face();
737
738 AluInstruction *ir = nullptr;
739 for (unsigned i = 0; i < 4 ; ++i) {
740 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
741 emit_instruction(ir);
742 }
743 if (ir)
744 ir->set_flag(alu_last_instr);
745 }
746
747 if (nir_intrinsic_component(instr) != 0) {
748 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
749 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
750 emit_instruction(ir);
751 }
752 if (ir)
753 ir->set_flag(alu_last_instr);
754 }
755
756
757 return true;
758 }
759
load_front_face()760 void FragmentShaderFromNir::load_front_face()
761 {
762 assert(m_front_face_reg);
763 if (m_front_face_loaded)
764 return;
765
766 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
767 Value::zero, {alu_write, alu_last_instr});
768 m_front_face_loaded = true;
769 emit_instruction(ir);
770 }
771
emit_load_sample_pos(nir_intrinsic_instr * instr)772 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
773 {
774 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
775 auto fetch = new FetchInstruction(vc_fetch,
776 no_index_offset,
777 fmt_32_32_32_32_float,
778 vtx_nf_scaled,
779 vtx_es_none,
780 m_sample_id_reg,
781 dest,
782 0,
783 false,
784 0xf,
785 R600_BUFFER_INFO_CONST_BUFFER,
786 0,
787 bim_none,
788 false,
789 false,
790 0,
791 0,
792 0,
793 PValue(),
794 {0,1,2,3});
795 fetch->set_flag(vtx_srf_mode);
796 emit_instruction(fetch);
797 return true;
798 }
799
load_interpolated(GPRVector & dest,ShaderInput & io,const Interpolator & ip,int num_components,int start_comp)800 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
801 ShaderInput& io, const Interpolator &ip,
802 int num_components, int start_comp)
803 {
804 // replace io with ShaderInputVarying
805 if (io.interpolate() > 0) {
806
807 sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i << ")" << "\n";
808
809 if (num_components == 1) {
810 switch (start_comp) {
811 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
812 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
813 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
814 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
815 default:
816 assert(0);
817 }
818 }
819
820 if (num_components == 2) {
821 switch (start_comp) {
822 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
823 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
824 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
825 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
826 default:
827 assert(0);
828 }
829 }
830
831 if (num_components == 3 && start_comp == 0)
832 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
833 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
834
835 int full_write_mask = ((1 << num_components) - 1) << start_comp;
836
837 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
838 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
839 return success;
840
841 } else {
842 AluInstruction *ir = nullptr;
843 for (unsigned i = 0; i < 4 ; ++i) {
844 ir = new AluInstruction(op1_interp_load_p0, dest[i],
845 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
846 EmitInstruction::write);
847 emit_instruction(ir);
848 }
849 ir->set_flag(alu_last_instr);
850 }
851 return true;
852 }
853
load_interpolated_one_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op)854 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
855 ShaderInput& io, const Interpolator& ip, EAluOp op)
856 {
857 for (unsigned i = 0; i < 2 ; ++i) {
858 int chan = i;
859 if (op == op2_interp_z)
860 chan += 2;
861
862
863 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
864 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
865 i == 0 ? EmitInstruction::write : EmitInstruction::last);
866 dest.pin_to_channel(chan);
867
868 ir->set_bank_swizzle(alu_vec_210);
869 emit_instruction(ir);
870 }
871 return true;
872 }
873
load_interpolated_two_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,int writemask)874 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
875 const Interpolator& ip, EAluOp op, int writemask)
876 {
877 AluInstruction *ir = nullptr;
878 assert(ip.j);
879 assert(ip.i);
880 for (unsigned i = 0; i < 4 ; ++i) {
881 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
882 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
883 dest.pin_to_channel(i);
884 ir->set_bank_swizzle(alu_vec_210);
885 emit_instruction(ir);
886 }
887 ir->set_flag(alu_last_instr);
888 return true;
889 }
890
load_interpolated_two_comp_for_one(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,UNUSED int start,int comp)891 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
892 ShaderInput& io, const Interpolator& ip,
893 EAluOp op, UNUSED int start, int comp)
894 {
895 AluInstruction *ir = nullptr;
896 for (int i = 0; i < 4 ; ++i) {
897 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
898 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
899 i == comp ? EmitInstruction::write : EmitInstruction::empty);
900 ir->set_bank_swizzle(alu_vec_210);
901 dest.pin_to_channel(i);
902 emit_instruction(ir);
903 }
904 ir->set_flag(alu_last_instr);
905 return true;
906 }
907
908
emit_export_pixel(nir_intrinsic_instr * instr,int outputs)909 bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
910 {
911 std::array<uint32_t,4> swizzle;
912 unsigned writemask = nir_intrinsic_write_mask(instr);
913 auto semantics = nir_intrinsic_io_semantics(instr);
914 unsigned driver_location = nir_intrinsic_base(instr);
915
916 switch (semantics.location) {
917 case FRAG_RESULT_DEPTH:
918 writemask = 1;
919 swizzle = {0,7,7,7};
920 break;
921 case FRAG_RESULT_STENCIL:
922 writemask = 2;
923 swizzle = {7,0,7,7};
924 break;
925 case FRAG_RESULT_SAMPLE_MASK:
926 writemask = 4;
927 swizzle = {7,7,0,7};
928 break;
929 default:
930 for (int i = 0; i < 4; ++i) {
931 swizzle[i] = (i < instr->num_components) ? i : 7;
932 }
933 }
934
935 auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
936
937 set_output(driver_location, value.sel());
938
939 if (semantics.location == FRAG_RESULT_COLOR ||
940 (semantics.location >= FRAG_RESULT_DATA0 &&
941 semantics.location <= FRAG_RESULT_DATA7)) {
942 for (int k = 0 ; k < outputs; ++k) {
943
944 unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
945 ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
946
947 sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
948
949 if (location >= m_max_color_exports) {
950 sfn_log << SfnLog::io << "Pixel output loc:" << location
951 << " dl:" << driver_location
952 << " skipped because we have only " << m_max_color_exports << " CBs\n";
953 continue;
954 }
955
956 m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
957
958 if (sh_info().ps_export_highest < location)
959 sh_info().ps_export_highest = location;
960
961 sh_info().nr_ps_color_exports++;
962
963 unsigned mask = (0xfu << (location * 4));
964 sh_info().ps_color_export_mask |= mask;
965
966 emit_export_instruction(m_last_pixel_export);
967 };
968 } else if (semantics.location == FRAG_RESULT_DEPTH ||
969 semantics.location == FRAG_RESULT_STENCIL ||
970 semantics.location == FRAG_RESULT_SAMPLE_MASK) {
971 m_depth_exports++;
972 emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
973 } else {
974 return false;
975 }
976 return true;
977 }
978
979
emit_export_pixel(const nir_variable * out_var,nir_intrinsic_instr * instr,int outputs)980 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
981 {
982 std::array<uint32_t,4> swizzle;
983 unsigned writemask = nir_intrinsic_write_mask(instr);
984 switch (out_var->data.location) {
985 case FRAG_RESULT_DEPTH:
986 writemask = 1;
987 swizzle = {0,7,7,7};
988 break;
989 case FRAG_RESULT_STENCIL:
990 writemask = 2;
991 swizzle = {7,0,7,7};
992 break;
993 case FRAG_RESULT_SAMPLE_MASK:
994 writemask = 4;
995 swizzle = {7,7,0,7};
996 break;
997 default:
998 for (int i = 0; i < 4; ++i) {
999 swizzle[i] = (i < instr->num_components) ? i : 7;
1000 }
1001 }
1002
1003 auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
1004
1005 set_output(out_var->data.driver_location, value.sel());
1006
1007 if (out_var->data.location == FRAG_RESULT_COLOR ||
1008 (out_var->data.location >= FRAG_RESULT_DATA0 &&
1009 out_var->data.location <= FRAG_RESULT_DATA7)) {
1010 for (int k = 0 ; k < outputs; ++k) {
1011
1012 unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR)
1013 ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
1014
1015 sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
1016
1017 if (location >= m_max_color_exports) {
1018 sfn_log << SfnLog::io << "Pixel output loc:" << location
1019 << " dl:" << out_var->data.location
1020 << " skipped because we have only " << m_max_color_exports << " CBs\n";
1021 continue;
1022 }
1023
1024 m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
1025
1026 if (sh_info().ps_export_highest < location)
1027 sh_info().ps_export_highest = location;
1028
1029 sh_info().nr_ps_color_exports++;
1030
1031 unsigned mask = (0xfu << (location * 4));
1032 sh_info().ps_color_export_mask |= mask;
1033
1034 emit_export_instruction(m_last_pixel_export);
1035 };
1036 } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
1037 out_var->data.location == FRAG_RESULT_STENCIL ||
1038 out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
1039 m_depth_exports++;
1040 emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
1041 } else {
1042 return false;
1043 }
1044 return true;
1045 }
1046
do_finalize()1047 void FragmentShaderFromNir::do_finalize()
1048 {
1049 // update shader io info and set LDS etc.
1050 sh_info().ninput = m_shaderio.inputs().size();
1051
1052 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
1053 for (size_t i = 0; i < sh_info().ninput; ++i) {
1054 ShaderInput& input = m_shaderio.input(i);
1055 int ij_idx = (input.ij_index() < 6 &&
1056 input.ij_index() >= 0) ? input.ij_index() : 0;
1057 input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
1058 }
1059
1060 sh_info().two_side = m_shaderio.two_sided();
1061 sh_info().nlds = m_shaderio.nlds();
1062
1063 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
1064
1065 if (sh_info().fs_write_all) {
1066 sh_info().nr_ps_max_color_exports = m_max_color_exports;
1067 }
1068
1069 if (!m_last_pixel_export) {
1070 GPRVector v(0, {7,7,7,7});
1071 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
1072 sh_info().nr_ps_color_exports++;
1073 sh_info().ps_color_export_mask = 0xf;
1074 emit_export_instruction(m_last_pixel_export);
1075 }
1076
1077 m_last_pixel_export->set_last();
1078
1079 if (sh_info().fs_write_all)
1080 sh_info().nr_ps_max_color_exports = 8;
1081 }
1082
1083 }
1084