1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_debug.h"
29 #include "sfn_shader_fs.h"
30
31 #include "sfn_instr_alugroup.h"
32 #include "sfn_instr_tex.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_export.h"
35
36 #include "tgsi/tgsi_from_mesa.h"
37
38 #include <sstream>
39
40 namespace r600 {
41
42 using std::string;
43
FragmentShader(const r600_shader_key & key)44 FragmentShader::FragmentShader(const r600_shader_key& key):
45 Shader("FS"),
46 m_dual_source_blend(key.ps.dual_source_blend),
47 m_max_color_exports(MAX2(key.ps.nr_cbufs, 1)),
48 m_export_highest(0),
49 m_num_color_exports(0),
50 m_color_export_mask(0),
51 m_depth_exports(0),
52 m_last_pixel_export(nullptr),
53 m_pos_input(127, false),
54 m_fs_write_all(false),
55 m_apply_sample_mask(key.ps.apply_sample_id_mask),
56 m_rat_base(key.ps.nr_cbufs)
57 {
58 }
59
do_get_shader_info(r600_shader * sh_info)60 void FragmentShader::do_get_shader_info(r600_shader *sh_info)
61 {
62 sh_info->processor_type = PIPE_SHADER_FRAGMENT;
63
64 sh_info->ps_color_export_mask = m_color_export_mask;
65 sh_info->ps_export_highest = m_export_highest;
66 sh_info->nr_ps_color_exports = m_num_color_exports;
67
68 sh_info->fs_write_all = m_fs_write_all;
69
70 sh_info->rat_base = m_rat_base;
71 sh_info->uses_kill = m_uses_discard;
72 sh_info->gs_prim_id_input = m_gs_prim_id_input;
73 sh_info->ps_prim_id_input = m_ps_prim_id_input &&
74 chip_class() >= ISA_CC_EVERGREEN;
75 sh_info->nsys_inputs = m_nsys_inputs;
76 sh_info->uses_helper_invocation = m_helper_invocation != nullptr;
77 }
78
79
load_input(nir_intrinsic_instr * intr)80 bool FragmentShader::load_input(nir_intrinsic_instr *intr)
81 {
82 auto& vf = value_factory();
83
84 auto location = nir_intrinsic_io_semantics(intr).location;
85 if (location == VARYING_SLOT_POS) {
86 AluInstr *ir = nullptr;
87 for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
88 ir = new AluInstr(op1_mov,
89 vf.dest(intr->dest, i, pin_none),
90 m_pos_input[i],
91 AluInstr::write);
92 emit_instruction(ir);
93 }
94 ir->set_alu_flag(alu_last_instr);
95 return true;
96 }
97
98 if (location == VARYING_SLOT_FACE) {
99 auto ir = new AluInstr(op2_setgt_dx10,
100 vf.dest(intr->dest, 0, pin_none),
101 m_face_input,
102 vf.inline_const(ALU_SRC_0, 0),
103 AluInstr::last_write);
104 emit_instruction(ir);
105 return true;
106 }
107
108 return load_input_hw(intr);
109 }
110
store_output(nir_intrinsic_instr * intr)111 bool FragmentShader::store_output(nir_intrinsic_instr *intr)
112 {
113 auto location = nir_intrinsic_io_semantics(intr).location;
114
115 if (location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
116 m_fs_write_all = true;
117 }
118
119 return emit_export_pixel(*intr);
120 }
121
122 unsigned
barycentric_ij_index(nir_intrinsic_instr * intr)123 barycentric_ij_index(nir_intrinsic_instr *intr)
124 {
125 unsigned index = 0;
126 switch (intr->intrinsic) {
127 case nir_intrinsic_load_barycentric_sample:
128 index = 0;
129 break;
130 case nir_intrinsic_load_barycentric_at_sample:
131 case nir_intrinsic_load_barycentric_at_offset:
132 case nir_intrinsic_load_barycentric_pixel:
133 index = 1;
134 break;
135 case nir_intrinsic_load_barycentric_centroid:
136 index = 2;
137 break;
138 default:
139 unreachable("Unknown interpolator intrinsic");
140 }
141
142 switch (nir_intrinsic_interp_mode(intr)) {
143 case INTERP_MODE_NONE:
144 case INTERP_MODE_SMOOTH:
145 case INTERP_MODE_COLOR:
146 return index;
147 case INTERP_MODE_NOPERSPECTIVE:
148 return index + 3;
149 case INTERP_MODE_FLAT:
150 case INTERP_MODE_EXPLICIT:
151 default:
152 unreachable("unknown/unsupported mode for load_interpolated");
153 }
154 return 0;
155 }
156
process_stage_intrinsic(nir_intrinsic_instr * intr)157 bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
158 {
159 if (process_stage_intrinsic_hw(intr))
160 return true;
161
162 switch (intr->intrinsic) {
163 case nir_intrinsic_load_input:
164 return load_input(intr);
165 case nir_intrinsic_load_interpolated_input:
166 return load_interpolated_input(intr);
167 case nir_intrinsic_discard_if:
168 m_uses_discard = true;
169 emit_instruction(new AluInstr(op2_killne_int, nullptr,
170 value_factory().src(intr->src[0], 0),
171 value_factory().zero(),
172 {AluInstr::last}));
173 start_new_block(0);
174 return true;
175 case nir_intrinsic_discard:
176 m_uses_discard = true;
177 emit_instruction(new AluInstr(op2_kille_int, nullptr,
178 value_factory().zero(),
179 value_factory().zero(),
180 {AluInstr::last}));
181 start_new_block(0);
182 return true;
183 case nir_intrinsic_load_sample_mask_in:
184 if (m_apply_sample_mask) {
185 return emit_load_sample_mask_in(intr);
186 } else
187 return emit_simple_mov(intr->dest, 0, m_sample_mask_reg);
188 case nir_intrinsic_load_sample_id:
189 return emit_simple_mov(intr->dest, 0, m_sample_id_reg);
190 case nir_intrinsic_load_helper_invocation:
191 return emit_load_helper_invocation(intr);
192 case nir_intrinsic_load_sample_pos:
193 return emit_load_sample_pos(intr);
194 default:
195 return false;
196 }
197 }
198
load_interpolated_input(nir_intrinsic_instr * intr)199 bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
200 {
201 auto& vf = value_factory();
202 unsigned loc = nir_intrinsic_io_semantics(intr).location;
203 switch (loc) {
204 case VARYING_SLOT_POS:
205 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
206 vf.inject_value(intr->dest, i, m_pos_input[i]);
207 return true;
208 case VARYING_SLOT_FACE:
209 return false;
210 default:
211 ;
212 }
213
214 return load_interpolated_input_hw(intr);
215 }
216
217
do_allocate_reserved_registers()218 int FragmentShader::do_allocate_reserved_registers()
219 {
220 int next_register = allocate_interpolators_or_inputs();
221
222 if (m_sv_values.test(es_pos)) {
223 set_input_gpr(m_pos_driver_loc, next_register);
224 m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false);
225 for (int i = 0; i < 4; ++i)
226 m_pos_input[i]->pin_live_range(true);
227
228 }
229
230 int face_reg_index = -1;
231 if (m_sv_values.test(es_face)) {
232 set_input_gpr(m_face_driver_loc, next_register);
233 face_reg_index = next_register++;
234 m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0);
235 m_face_input->pin_live_range(true);
236 }
237
238 if (m_sv_values.test(es_sample_mask_in)) {
239 if (face_reg_index < 0)
240 face_reg_index = next_register++;
241 m_sample_mask_reg = value_factory().allocate_pinned_register(face_reg_index, 2);
242 m_sample_mask_reg->pin_live_range(true);
243 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
244 m_nsys_inputs = 1;
245 ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEMASK);
246 input.set_gpr(face_reg_index);
247 add_input(input);
248 }
249
250 if (m_sv_values.test(es_sample_id) ||
251 m_sv_values.test(es_sample_mask_in)) {
252 int sample_id_reg = next_register++;
253 m_sample_id_reg = value_factory().allocate_pinned_register(sample_id_reg, 3);
254 m_sample_id_reg->pin_live_range(true);
255 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
256 m_nsys_inputs++;
257 ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEID);
258 input.set_gpr(sample_id_reg);
259 add_input(input);
260 }
261
262 if (m_sv_values.test(es_helper_invocation)) {
263 m_helper_invocation = value_factory().allocate_pinned_register(next_register++, 0);
264 }
265
266 return next_register;
267 }
268
do_scan_instruction(nir_instr * instr)269 bool FragmentShader::do_scan_instruction(nir_instr *instr)
270 {
271 if (instr->type != nir_instr_type_intrinsic)
272 return false;
273
274 auto intr = nir_instr_as_intrinsic(instr);
275 switch (intr->intrinsic) {
276 case nir_intrinsic_load_barycentric_pixel:
277 case nir_intrinsic_load_barycentric_sample:
278 case nir_intrinsic_load_barycentric_at_sample:
279 case nir_intrinsic_load_barycentric_at_offset:
280 case nir_intrinsic_load_barycentric_centroid:
281 m_interpolators_used.set(barycentric_ij_index(intr));
282 break;
283 case nir_intrinsic_load_front_face:
284 m_sv_values.set(es_face);
285 break;
286 case nir_intrinsic_load_sample_mask_in:
287 m_sv_values.set(es_sample_mask_in);
288 break;
289 case nir_intrinsic_load_sample_pos:
290 m_sv_values.set(es_sample_pos);
291 FALLTHROUGH;
292 case nir_intrinsic_load_sample_id:
293 m_sv_values.set(es_sample_id);
294 break;
295 case nir_intrinsic_load_helper_invocation:
296 m_sv_values.set(es_helper_invocation);
297 break;
298 case nir_intrinsic_load_input:
299 return scan_input(intr, 0);
300 case nir_intrinsic_load_interpolated_input:
301 return scan_input(intr, 1);
302 default:
303 return false;
304 }
305 return true;
306 }
307
emit_load_sample_mask_in(nir_intrinsic_instr * instr)308 bool FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
309 {
310 auto& vf = value_factory();
311 auto dest = vf.dest(instr->dest, 0, pin_free);
312 auto tmp = vf.temp_register();
313 assert(m_sample_id_reg);
314 assert(m_sample_mask_reg);
315
316 emit_instruction(new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write));
317 emit_instruction(new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write));
318 return true;
319 }
320
emit_load_helper_invocation(nir_intrinsic_instr * instr)321 bool FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr* instr)
322 {
323 assert(m_helper_invocation);
324 auto& vf = value_factory();
325 emit_instruction(new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write));
326 RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group};
327
328 auto vtx = new LoadFromBuffer(destvec, {4,7,7,7}, m_helper_invocation, 0,
329 R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
330 vtx->set_fetch_flag(FetchInstr::vpm);
331 vtx->set_fetch_flag(FetchInstr::use_tc);
332 vtx->set_always_keep();
333 auto dst = value_factory().dest(instr->dest, 0, pin_free);
334 auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write);
335 ir->add_required_instr(vtx);
336 emit_instruction(vtx);
337 emit_instruction(ir);
338
339 return true;
340 }
341
scan_input(nir_intrinsic_instr * intr,int index_src_id)342 bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
343 {
344 auto index = nir_src_as_const_value(intr->src[index_src_id]);
345 assert(index);
346
347 const unsigned location_offset = chip_class() < ISA_CC_EVERGREEN ? 32 : 0;
348 bool uses_interpol_at_centroid = false;
349
350 unsigned location = nir_intrinsic_io_semantics(intr).location + index->u32;
351 unsigned driver_location = nir_intrinsic_base(intr) + index->u32;
352 auto semantic = r600_get_varying_semantic(location);
353 tgsi_semantic name = (tgsi_semantic)semantic.first;
354 unsigned sid = semantic.second;
355
356 if (location == VARYING_SLOT_POS) {
357 m_sv_values.set(es_pos);
358 m_pos_driver_loc = driver_location + location_offset;
359 ShaderInput pos_input(m_pos_driver_loc, name);
360 pos_input.set_sid(sid);
361 pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER, false);
362 add_input(pos_input);
363 return true;
364 }
365
366 if (location == VARYING_SLOT_FACE) {
367 m_sv_values.set(es_face);
368 m_face_driver_loc = driver_location + location_offset;
369 ShaderInput face_input(m_face_driver_loc, name);
370 face_input.set_sid(sid);
371 add_input(face_input);
372 return true;
373 }
374
375 tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
376 tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
377
378 if (index_src_id > 0) {
379 glsl_interp_mode mode = INTERP_MODE_NONE;
380 auto parent = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
381 mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
382 switch (parent->intrinsic) {
383 case nir_intrinsic_load_barycentric_sample:
384 tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
385 break;
386 case nir_intrinsic_load_barycentric_at_sample:
387 case nir_intrinsic_load_barycentric_at_offset:
388 case nir_intrinsic_load_barycentric_pixel:
389 tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
390 break;
391 case nir_intrinsic_load_barycentric_centroid:
392 tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
393 uses_interpol_at_centroid = true;
394 break;
395 default:
396 std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
397 << nir_intrinsic_infos[intr->intrinsic].name
398 << " interpolator?\n";
399 assert(0);
400 }
401
402 switch (mode) {
403 case INTERP_MODE_NONE:
404 if (name == TGSI_SEMANTIC_COLOR ||
405 name == TGSI_SEMANTIC_BCOLOR) {
406 tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
407 break;
408 }
409 FALLTHROUGH;
410 case INTERP_MODE_SMOOTH:
411 tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
412 break;
413 case INTERP_MODE_NOPERSPECTIVE:
414 tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
415 break;
416 case INTERP_MODE_FLAT:
417 break;
418 case INTERP_MODE_COLOR:
419 tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
420 break;
421 case INTERP_MODE_EXPLICIT:
422 default:
423 assert(0);
424 }
425 }
426
427 switch (name) {
428 case TGSI_SEMANTIC_PRIMID:
429 m_gs_prim_id_input = true;
430 m_ps_prim_id_input = ninputs();
431 FALLTHROUGH;
432 case TGSI_SEMANTIC_COLOR:
433 case TGSI_SEMANTIC_BCOLOR:
434 case TGSI_SEMANTIC_FOG:
435 case TGSI_SEMANTIC_GENERIC:
436 case TGSI_SEMANTIC_TEXCOORD:
437 case TGSI_SEMANTIC_LAYER:
438 case TGSI_SEMANTIC_PCOORD:
439 case TGSI_SEMANTIC_VIEWPORT_INDEX:
440 case TGSI_SEMANTIC_CLIPDIST: {
441 sfn_log << SfnLog::io << " have IO at " << driver_location << "\n";
442 auto iinput = find_input(driver_location);
443 if (iinput == input_not_found()) {
444 ShaderInput input(driver_location, name);
445 input.set_sid(sid);
446 input.set_need_lds_pos();
447 input.set_interpolator(tgsi_interpolate, tgsi_loc, uses_interpol_at_centroid);
448 sfn_log << SfnLog::io << "add IO with LDS ID at " << input.location() << "\n";
449 add_input(input);
450 assert(find_input(input.location()) != input_not_found());
451 } else {
452 if (uses_interpol_at_centroid) {
453 iinput->second.set_uses_interpolate_at_centroid();
454 }
455 }
456 return true;
457 }
458 default:
459 return false;
460 }
461 }
462
emit_export_pixel(nir_intrinsic_instr & intr)463 bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr)
464 {
465 RegisterVec4::Swizzle swizzle;
466 auto semantics = nir_intrinsic_io_semantics(&intr);
467 unsigned driver_location = nir_intrinsic_base(&intr);
468 unsigned write_mask = nir_intrinsic_write_mask(&intr);
469
470 switch (semantics.location) {
471 case FRAG_RESULT_DEPTH:
472 swizzle = {0,7,7,7};
473 break;
474 case FRAG_RESULT_STENCIL:
475 swizzle = {7,0,7,7};
476 break;
477 case FRAG_RESULT_SAMPLE_MASK:
478 swizzle = {7,7,0,7};
479 break;
480 default:
481 for (int i = 0; i < 4; ++i) {
482 swizzle[i] = (1 << i) & write_mask ? i : 7;
483 }
484 }
485
486 auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
487
488 if (semantics.location == FRAG_RESULT_COLOR ||
489 (semantics.location >= FRAG_RESULT_DATA0 &&
490 semantics.location <= FRAG_RESULT_DATA7)) {
491
492 ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask);
493 add_output(output);
494
495 unsigned color_outputs = m_fs_write_all && chip_class() >= ISA_CC_R700 ?
496 m_max_color_exports : 1;
497
498 for (unsigned k = 0; k < color_outputs; ++k) {
499
500 unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
501 ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
502
503 sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
504
505 if (location >= m_max_color_exports) {
506 sfn_log << SfnLog::io << "Pixel output loc:" << location
507 << " dl:" << driver_location
508 << " skipped because we have only " << m_max_color_exports << " CBs\n";
509 return true; ;
510 }
511
512 m_last_pixel_export = new ExportInstr(ExportInstr::pixel, location, value);
513
514 if (m_export_highest < location)
515 m_export_highest = location;
516
517 m_num_color_exports++;
518
519 /* Hack: force dual source output handling if one color output has a
520 * dual_source_blend_index > 0 */
521 if (semantics.location == FRAG_RESULT_COLOR &&
522 semantics.dual_source_blend_index > 0)
523 m_dual_source_blend = true;
524
525 if (m_num_color_exports > 1)
526 m_fs_write_all = false;
527 unsigned mask = (0xfu << (location * 4));
528 m_color_export_mask |= mask;
529
530 emit_instruction(m_last_pixel_export);
531 }
532 } else if (semantics.location == FRAG_RESULT_DEPTH ||
533 semantics.location == FRAG_RESULT_STENCIL ||
534 semantics.location == FRAG_RESULT_SAMPLE_MASK) {
535 m_depth_exports++;
536 emit_instruction(new ExportInstr(ExportInstr::pixel, 61, value));
537 int semantic = TGSI_SEMANTIC_POSITION;
538 if (semantics.location == FRAG_RESULT_STENCIL)
539 semantic = TGSI_SEMANTIC_STENCIL;
540 else if (semantics.location == FRAG_RESULT_SAMPLE_MASK)
541 semantic = TGSI_SEMANTIC_SAMPLEMASK;
542
543 ShaderOutput output(driver_location, semantic, write_mask);
544 add_output(output);
545
546 } else {
547 return false;
548 }
549 return true;
550 }
551
emit_load_sample_pos(nir_intrinsic_instr * instr)552 bool FragmentShader::emit_load_sample_pos(nir_intrinsic_instr* instr)
553 {
554 auto dest = value_factory().dest_vec4(instr->dest, pin_group);
555
556
557 auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, m_sample_id_reg, 0,
558 R600_BUFFER_INFO_CONST_BUFFER,
559 nullptr, fmt_32_32_32_32_float);
560 fetch->set_fetch_flag(FetchInstr::srf_mode);
561 emit_instruction(fetch);
562 return true;
563 }
564
do_finalize()565 void FragmentShader::do_finalize()
566 {
567 if (!m_last_pixel_export) {
568 RegisterVec4 value(0, false, {7,7,7,7});
569 m_last_pixel_export = new ExportInstr(ExportInstr::pixel, 0, value);
570 emit_instruction(m_last_pixel_export);
571 m_num_color_exports++;
572 m_color_export_mask |= 0xf;
573 }
574 m_last_pixel_export->set_is_last_export(true);
575 }
576
read_prop(std::istream & is)577 bool FragmentShader::read_prop(std::istream& is)
578 {
579 string value;
580 is >> value;
581
582 auto splitpos = value.find(':');
583 assert(splitpos != string::npos);
584
585 std::istringstream ival(value);
586 string name;
587 string val;
588
589 std::getline(ival, name, ':');
590
591 if (name == "MAX_COLOR_EXPORTS")
592 ival >> m_max_color_exports;
593 else if (name == "COLOR_EXPORTS")
594 ival >> m_num_color_exports;
595 else if (name == "COLOR_EXPORT_MASK")
596 ival >> m_color_export_mask;
597 else if (name == "WRITE_ALL_COLORS")
598 ival >> m_fs_write_all;
599 else
600 return false;
601 return true;
602 }
603
do_print_properties(std::ostream & os) const604 void FragmentShader::do_print_properties(std::ostream& os) const
605 {
606 os << "PROP MAX_COLOR_EXPORTS:" << m_max_color_exports << "\n";
607 os << "PROP COLOR_EXPORTS:" << m_num_color_exports << "\n";
608 os << "PROP COLOR_EXPORT_MASK:" << m_color_export_mask << "\n";
609 os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n";
610 }
611
allocate_interpolators_or_inputs()612 int FragmentShaderR600::allocate_interpolators_or_inputs()
613 {
614 int pos = 0;
615 auto& vf = value_factory();
616 for (auto& [index, inp]: inputs()) {
617 if (inp.need_lds_pos()) {
618
619 RegisterVec4 input(vf.allocate_pinned_register(pos, 0),
620 vf.allocate_pinned_register(pos, 1),
621 vf.allocate_pinned_register(pos, 2),
622 vf.allocate_pinned_register(pos, 3), pin_fully);
623 inp.set_gpr(pos++);
624 for (int i = 0; i < 4; ++i) {
625 input[i]->pin_live_range(true);
626 }
627
628 sfn_log << SfnLog::io << "Reseve input register at pos " <<
629 index << " as " << input << " with register " << inp.gpr() << "\n";
630
631 m_interpolated_inputs[index] = input;
632 }
633 }
634 return pos;
635 }
636
load_input_hw(nir_intrinsic_instr * intr)637 bool FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
638 {
639 auto& vf = value_factory();
640 AluInstr *ir = nullptr;
641 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
642 sfn_log << SfnLog::io << "Inject register " << *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n";
643 unsigned index = nir_intrinsic_component(intr) + i;
644 assert (index < 4);
645 if (intr->dest.is_ssa) {
646 vf.inject_value(intr->dest, i, m_interpolated_inputs[nir_intrinsic_base(intr)][index]);
647 } else {
648 ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none),
649 m_interpolated_inputs[nir_intrinsic_base(intr)][index],
650 AluInstr::write);
651 emit_instruction(ir);
652 }
653 }
654 if (ir)
655 ir->set_alu_flag(alu_last_instr);
656 return true;
657 }
658
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)659 bool FragmentShaderR600::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
660 {
661 switch (intr->intrinsic) {
662 case nir_intrinsic_load_barycentric_centroid:
663 case nir_intrinsic_load_barycentric_pixel:
664 case nir_intrinsic_load_barycentric_sample:
665 return true;
666 default:
667 return false;
668 }
669 }
670
load_interpolated_input_hw(nir_intrinsic_instr * intr)671 bool FragmentShaderR600::load_interpolated_input_hw(nir_intrinsic_instr *intr)
672 {
673 return load_input_hw(intr);
674 }
675
load_input_hw(nir_intrinsic_instr * intr)676 bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
677 {
678 auto& vf = value_factory();
679 auto io = input(nir_intrinsic_base(intr));
680 auto comp = nir_intrinsic_component(intr);
681
682 bool need_temp = comp > 0 || !intr->dest.is_ssa;
683 AluInstr *ir = nullptr;
684 for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
685 if (need_temp) {
686 auto tmp = vf.temp_register(comp + i);
687 ir = new AluInstr(op1_interp_load_p0,
688 tmp,
689 new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
690 AluInstr::last_write);
691 emit_instruction(ir);
692 emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
693 } else {
694
695 ir = new AluInstr(op1_interp_load_p0,
696 vf.dest(intr->dest, i, pin_chan),
697 new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
698 AluInstr::write);
699 emit_instruction(ir);
700 }
701
702 }
703 ir->set_alu_flag(alu_last_instr);
704 return true;
705 }
706
allocate_interpolators_or_inputs()707 int FragmentShaderEG::allocate_interpolators_or_inputs()
708 {
709 for (unsigned i = 0; i < s_max_interpolators; ++i) {
710 if (interpolators_used(i)) {
711 sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
712 m_interpolator[i].enabled = true;
713 }
714 }
715
716 int num_baryc = 0;
717 for (int i = 0; i < 6; ++i) {
718 if (m_interpolator[i].enabled) {
719 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
720 unsigned sel = num_baryc / 2;
721 unsigned chan = 2 * (num_baryc % 2);
722
723 m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
724 m_interpolator[i].i->pin_live_range(true, false);
725
726 m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
727 m_interpolator[i].j->pin_live_range(true, false);
728
729 m_interpolator[i].ij_index = num_baryc++;
730 }
731 }
732 return (num_baryc + 1) >> 1;
733 }
734
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)735 bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
736 {
737 auto& vf = value_factory();
738 switch (intr->intrinsic) {
739 case nir_intrinsic_load_barycentric_centroid:
740 case nir_intrinsic_load_barycentric_pixel:
741 case nir_intrinsic_load_barycentric_sample: {
742 unsigned ij = barycentric_ij_index(intr);
743 vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
744 vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
745 return true;
746 }
747 case nir_intrinsic_load_barycentric_at_offset:
748 return load_barycentric_at_offset(intr);
749 case nir_intrinsic_load_barycentric_at_sample:
750 return load_barycentric_at_sample(intr);
751 default:
752 return false;
753 }
754 }
755
load_interpolated_input_hw(nir_intrinsic_instr * intr)756 bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
757 {
758 auto& vf = value_factory();
759 auto param = nir_src_as_const_value(intr->src[1]);
760 assert(param && "Indirect PS inputs not (yet) supported");
761
762 int dest_num_comp = nir_dest_num_components(intr->dest);
763 int start_comp = nir_intrinsic_component(intr);
764 bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
765
766 auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
767
768 InterpolateParams params;
769
770 params.i = vf.src(intr->src[0], 0);
771 params.j = vf.src(intr->src[0], 1);
772 params.base = input(nir_intrinsic_base(intr)).lds_pos();
773
774 if (!load_interpolated(dst, params, dest_num_comp, start_comp))
775 return false;
776
777 if (need_temp) {
778 AluInstr *ir = nullptr;
779 for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
780 auto real_dst = vf.dest(intr->dest, i, pin_chan);
781 ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
782 emit_instruction(ir);
783 }
784 assert(ir);
785 ir->set_alu_flag(alu_last_instr);
786 }
787
788 return true;
789 }
790
load_interpolated(RegisterVec4 & dest,const InterpolateParams & params,int num_dest_comp,int start_comp)791 bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
792 int num_dest_comp, int start_comp)
793 {
794 sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
795
796 if (num_dest_comp == 1) {
797 switch (start_comp) {
798 case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
799 case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
800 case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
801 case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
802 default:
803 assert(0);
804 }
805 }
806
807 if (num_dest_comp == 2) {
808 switch (start_comp) {
809 case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
810 case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
811 case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
812 load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
813 default:
814 assert(0);
815 }
816 }
817
818 if (num_dest_comp == 3 && start_comp == 0)
819 return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
820 load_interpolated_one_comp(dest, params, op2_interp_z);
821
822 int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
823
824 bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
825 success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
826 return success;
827 }
828
829
load_barycentric_at_sample(nir_intrinsic_instr * instr)830 bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
831 {
832 auto& vf = value_factory();
833 RegisterVec4 slope = vf.temp_vec4(pin_group);
834 auto src = emit_load_to_register(vf.src(instr->src[0], 0));
835 auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
836 R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
837
838 fetch->set_fetch_flag(FetchInstr::srf_mode);
839 emit_instruction(fetch);
840
841 auto grad = vf.temp_vec4(pin_group);
842
843 auto interpolator = m_interpolator[barycentric_ij_index(instr)];
844 assert(interpolator.enabled);
845
846 RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
847
848 auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
849 tex->set_tex_flag(TexInstr::grad_fine);
850 tex->set_tex_flag(TexInstr::x_unnormalized);
851 tex->set_tex_flag(TexInstr::y_unnormalized);
852 tex->set_tex_flag(TexInstr::z_unnormalized);
853 tex->set_tex_flag(TexInstr::w_unnormalized);
854 emit_instruction(tex);
855
856 tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
857 tex->set_tex_flag(TexInstr::x_unnormalized);
858 tex->set_tex_flag(TexInstr::y_unnormalized);
859 tex->set_tex_flag(TexInstr::z_unnormalized);
860 tex->set_tex_flag(TexInstr::w_unnormalized);
861 tex->set_tex_flag(TexInstr::grad_fine);
862 emit_instruction(tex);
863
864 auto tmp0 = vf.temp_register();
865 auto tmp1 = vf.temp_register();
866
867 emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
868 emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
869
870 emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
871 emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
872
873 return true;
874 }
875
load_barycentric_at_offset(nir_intrinsic_instr * instr)876 bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
877 {
878 auto& vf = value_factory();
879 auto interpolator = m_interpolator[barycentric_ij_index(instr)];
880
881 auto help = vf.temp_vec4(pin_group);
882 RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
883
884 auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
885 getgradh->set_tex_flag(TexInstr::x_unnormalized);
886 getgradh->set_tex_flag(TexInstr::y_unnormalized);
887 getgradh->set_tex_flag(TexInstr::z_unnormalized);
888 getgradh->set_tex_flag(TexInstr::w_unnormalized);
889 getgradh->set_tex_flag(TexInstr::grad_fine);
890 emit_instruction(getgradh);
891
892 auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
893 getgradv->set_tex_flag(TexInstr::x_unnormalized);
894 getgradv->set_tex_flag(TexInstr::y_unnormalized);
895 getgradv->set_tex_flag(TexInstr::z_unnormalized);
896 getgradv->set_tex_flag(TexInstr::w_unnormalized);
897 getgradv->set_tex_flag(TexInstr::grad_fine);
898 emit_instruction(getgradv);
899
900 auto ofs_x = vf.src(instr->src[0], 0);
901 auto ofs_y = vf.src(instr->src[0], 1);
902 auto tmp0 = vf.temp_register();
903 auto tmp1 = vf.temp_register();
904 emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
905 emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
906 emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
907 emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
908
909 return true;
910 }
911
load_interpolated_one_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op)912 bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
913 const InterpolateParams& params,
914 EAluOp op)
915 {
916 auto group = new AluGroup();
917 bool success = true;
918
919 AluInstr *ir = nullptr;
920 for (unsigned i = 0; i < 2 && success; ++i) {
921 int chan = i;
922 if (op == op2_interp_z)
923 chan += 2;
924
925
926 ir = new AluInstr(op, dest[chan],
927 i & 1 ? params.j : params.i,
928 new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
929 i == 0 ? AluInstr::write : AluInstr::last);
930
931 ir->set_bank_swizzle(alu_vec_210);
932 success = group->add_instruction(ir);
933 }
934 ir->set_alu_flag(alu_last_instr);
935 if (success)
936 emit_instruction(group);
937 return success;
938 }
939
load_interpolated_two_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int writemask)940 bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
941 const InterpolateParams& params,
942 EAluOp op, int writemask)
943 {
944 auto group = new AluGroup();
945 bool success = true;
946
947 AluInstr *ir = nullptr;
948 assert(params.j);
949 assert(params.i);
950 for (unsigned i = 0; i < 4 ; ++i) {
951 ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
952 new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
953 (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
954 ir->set_bank_swizzle(alu_vec_210);
955 success = group->add_instruction(ir);
956 }
957 ir->set_alu_flag(alu_last_instr);
958 if (success)
959 emit_instruction(group);
960 return success;
961 }
962
load_interpolated_two_comp_for_one(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int comp)963 bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
964 const InterpolateParams& params, EAluOp op,
965 int comp)
966 {
967 auto group = new AluGroup();
968 bool success = true;
969 AluInstr *ir = nullptr;
970
971 for (int i = 0; i < 4 ; ++i) {
972 ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
973 new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
974 i == comp ? AluInstr::write : AluInstr::empty);
975 ir->set_bank_swizzle(alu_vec_210);
976 success = group->add_instruction(ir);
977 }
978 ir->set_alu_flag(alu_last_instr);
979 if (success)
980 emit_instruction(group);
981
982 return success;
983 }
984
985
Interpolator()986 FragmentShaderEG::Interpolator::Interpolator():
987 enabled(false)
988 {
989 }
990
991 }
992