• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 
28 #include "sfn_debug.h"
29 #include "sfn_shader_fs.h"
30 
31 #include "sfn_instr_alugroup.h"
32 #include "sfn_instr_tex.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_export.h"
35 
36 #include "tgsi/tgsi_from_mesa.h"
37 
38 #include <sstream>
39 
40 namespace r600 {
41 
42 using std::string;
43 
FragmentShader(const r600_shader_key & key)44 FragmentShader::FragmentShader(const r600_shader_key& key):
45    Shader("FS"),
46    m_dual_source_blend(key.ps.dual_source_blend),
47    m_max_color_exports(MAX2(key.ps.nr_cbufs, 1)),
48    m_export_highest(0),
49    m_num_color_exports(0),
50    m_color_export_mask(0),
51    m_depth_exports(0),
52    m_last_pixel_export(nullptr),
53    m_pos_input(127, false),
54    m_fs_write_all(false),
55    m_apply_sample_mask(key.ps.apply_sample_id_mask),
56    m_rat_base(key.ps.nr_cbufs)
57 {
58 }
59 
do_get_shader_info(r600_shader * sh_info)60 void FragmentShader::do_get_shader_info(r600_shader *sh_info)
61 {
62    sh_info->processor_type = PIPE_SHADER_FRAGMENT;
63 
64    sh_info->ps_color_export_mask = m_color_export_mask;
65    sh_info->ps_export_highest = m_export_highest;
66    sh_info->nr_ps_color_exports = m_num_color_exports;
67 
68    sh_info->fs_write_all = m_fs_write_all;
69 
70    sh_info->rat_base = m_rat_base;
71    sh_info->uses_kill = m_uses_discard;
72    sh_info->gs_prim_id_input = m_gs_prim_id_input;
73    sh_info->ps_prim_id_input = m_ps_prim_id_input &&
74                                chip_class() >= ISA_CC_EVERGREEN;
75    sh_info->nsys_inputs = m_nsys_inputs;
76    sh_info->uses_helper_invocation = m_helper_invocation != nullptr;
77 }
78 
79 
load_input(nir_intrinsic_instr * intr)80 bool FragmentShader::load_input(nir_intrinsic_instr *intr)
81 {
82    auto& vf = value_factory();
83 
84    auto location = nir_intrinsic_io_semantics(intr).location;
85    if (location == VARYING_SLOT_POS) {
86       AluInstr *ir = nullptr;
87       for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
88          ir = new AluInstr(op1_mov,
89                            vf.dest(intr->dest, i, pin_none),
90                            m_pos_input[i],
91                            AluInstr::write);
92          emit_instruction(ir);
93       }
94       ir->set_alu_flag(alu_last_instr);
95       return true;
96    }
97 
98    if (location == VARYING_SLOT_FACE) {
99       auto ir = new AluInstr(op2_setgt_dx10,
100                              vf.dest(intr->dest, 0, pin_none),
101                              m_face_input,
102                              vf.inline_const(ALU_SRC_0, 0),
103                              AluInstr::last_write);
104       emit_instruction(ir);
105       return true;
106    }
107 
108    return load_input_hw(intr);
109 }
110 
store_output(nir_intrinsic_instr * intr)111 bool FragmentShader::store_output(nir_intrinsic_instr *intr)
112 {
113    auto location = nir_intrinsic_io_semantics(intr).location;
114 
115    if (location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
116          m_fs_write_all = true;
117    }
118 
119    return emit_export_pixel(*intr);
120 }
121 
122 unsigned
barycentric_ij_index(nir_intrinsic_instr * intr)123 barycentric_ij_index(nir_intrinsic_instr *intr)
124 {
125    unsigned index = 0;
126    switch (intr->intrinsic) {
127    case nir_intrinsic_load_barycentric_sample:
128       index = 0;
129    break;
130    case nir_intrinsic_load_barycentric_at_sample:
131    case nir_intrinsic_load_barycentric_at_offset:
132    case nir_intrinsic_load_barycentric_pixel:
133       index = 1;
134    break;
135    case nir_intrinsic_load_barycentric_centroid:
136       index = 2;
137    break;
138    default:
139       unreachable("Unknown interpolator intrinsic");
140    }
141 
142    switch (nir_intrinsic_interp_mode(intr)) {
143    case INTERP_MODE_NONE:
144    case INTERP_MODE_SMOOTH:
145    case INTERP_MODE_COLOR:
146    return index;
147    case INTERP_MODE_NOPERSPECTIVE:
148    return index + 3;
149    case INTERP_MODE_FLAT:
150    case INTERP_MODE_EXPLICIT:
151    default:
152       unreachable("unknown/unsupported mode for load_interpolated");
153    }
154    return 0;
155 }
156 
process_stage_intrinsic(nir_intrinsic_instr * intr)157 bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
158 {
159    if (process_stage_intrinsic_hw(intr))
160       return true;
161 
162    switch (intr->intrinsic) {
163    case nir_intrinsic_load_input:
164       return load_input(intr);
165    case nir_intrinsic_load_interpolated_input:
166       return load_interpolated_input(intr);
167    case nir_intrinsic_discard_if:
168       m_uses_discard = true;
169       emit_instruction(new AluInstr(op2_killne_int, nullptr,
170                                     value_factory().src(intr->src[0], 0),
171                                     value_factory().zero(),
172                                     {AluInstr::last}));
173       start_new_block(0);
174       return true;
175    case nir_intrinsic_discard:
176       m_uses_discard = true;
177       emit_instruction(new AluInstr(op2_kille_int, nullptr,
178                                     value_factory().zero(),
179                                     value_factory().zero(),
180                                     {AluInstr::last}));
181       start_new_block(0);
182       return true;
183    case nir_intrinsic_load_sample_mask_in:
184       if (m_apply_sample_mask) {
185          return emit_load_sample_mask_in(intr);
186       } else
187          return emit_simple_mov(intr->dest, 0, m_sample_mask_reg);
188    case nir_intrinsic_load_sample_id:
189       return emit_simple_mov(intr->dest, 0, m_sample_id_reg);
190    case nir_intrinsic_load_helper_invocation:
191        return emit_load_helper_invocation(intr);
192    case nir_intrinsic_load_sample_pos:
193       return emit_load_sample_pos(intr);
194    default:
195       return false;
196    }
197 }
198 
load_interpolated_input(nir_intrinsic_instr * intr)199 bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
200 {
201    auto& vf = value_factory();
202    unsigned loc = nir_intrinsic_io_semantics(intr).location;
203    switch (loc) {
204    case VARYING_SLOT_POS:
205       for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
206          vf.inject_value(intr->dest, i,  m_pos_input[i]);
207       return true;
208    case VARYING_SLOT_FACE:
209       return false;
210    default:
211       ;
212    }
213 
214    return load_interpolated_input_hw(intr);
215 }
216 
217 
do_allocate_reserved_registers()218 int FragmentShader::do_allocate_reserved_registers()
219 {
220    int next_register = allocate_interpolators_or_inputs();
221 
222    if (m_sv_values.test(es_pos)) {
223       set_input_gpr(m_pos_driver_loc, next_register);
224       m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false);
225       for (int i = 0; i < 4; ++i)
226          m_pos_input[i]->pin_live_range(true);
227 
228    }
229 
230    int face_reg_index = -1;
231    if (m_sv_values.test(es_face)) {
232       set_input_gpr(m_face_driver_loc, next_register);
233       face_reg_index = next_register++;
234       m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0);
235       m_face_input->pin_live_range(true);
236    }
237 
238    if (m_sv_values.test(es_sample_mask_in)) {
239       if (face_reg_index < 0)
240          face_reg_index = next_register++;
241       m_sample_mask_reg = value_factory().allocate_pinned_register(face_reg_index, 2);
242       m_sample_mask_reg->pin_live_range(true);
243       sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
244       m_nsys_inputs = 1;
245       ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEMASK);
246       input.set_gpr(face_reg_index);
247       add_input(input);
248    }
249 
250    if (m_sv_values.test(es_sample_id) ||
251        m_sv_values.test(es_sample_mask_in)) {
252       int sample_id_reg = next_register++;
253       m_sample_id_reg = value_factory().allocate_pinned_register(sample_id_reg, 3);
254       m_sample_id_reg->pin_live_range(true);
255       sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
256       m_nsys_inputs++;
257       ShaderInput input(ninputs(), TGSI_SEMANTIC_SAMPLEID);
258       input.set_gpr(sample_id_reg);
259       add_input(input);
260    }
261 
262    if (m_sv_values.test(es_helper_invocation)) {
263       m_helper_invocation = value_factory().allocate_pinned_register(next_register++, 0);
264    }
265 
266    return next_register;
267 }
268 
do_scan_instruction(nir_instr * instr)269 bool FragmentShader::do_scan_instruction(nir_instr *instr)
270 {
271    if (instr->type != nir_instr_type_intrinsic)
272       return false;
273 
274    auto intr = nir_instr_as_intrinsic(instr);
275    switch (intr->intrinsic) {
276    case nir_intrinsic_load_barycentric_pixel:
277    case nir_intrinsic_load_barycentric_sample:
278    case nir_intrinsic_load_barycentric_at_sample:
279    case nir_intrinsic_load_barycentric_at_offset:
280    case nir_intrinsic_load_barycentric_centroid:
281       m_interpolators_used.set(barycentric_ij_index(intr));
282       break;
283    case nir_intrinsic_load_front_face:
284       m_sv_values.set(es_face);
285       break;
286    case nir_intrinsic_load_sample_mask_in:
287       m_sv_values.set(es_sample_mask_in);
288       break;
289    case nir_intrinsic_load_sample_pos:
290       m_sv_values.set(es_sample_pos);
291       FALLTHROUGH;
292    case nir_intrinsic_load_sample_id:
293       m_sv_values.set(es_sample_id);
294       break;
295    case nir_intrinsic_load_helper_invocation:
296       m_sv_values.set(es_helper_invocation);
297       break;
298    case nir_intrinsic_load_input:
299       return scan_input(intr, 0);
300    case nir_intrinsic_load_interpolated_input:
301       return scan_input(intr, 1);
302    default:
303       return false;
304    }
305    return true;
306 }
307 
emit_load_sample_mask_in(nir_intrinsic_instr * instr)308 bool FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
309 {
310    auto& vf = value_factory();
311    auto dest = vf.dest(instr->dest, 0, pin_free);
312    auto tmp = vf.temp_register();
313    assert(m_sample_id_reg);
314    assert(m_sample_mask_reg);
315 
316    emit_instruction(new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write));
317    emit_instruction(new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write));
318    return true;
319 }
320 
emit_load_helper_invocation(nir_intrinsic_instr * instr)321 bool FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr* instr)
322 {
323    assert(m_helper_invocation);
324    auto& vf = value_factory();
325    emit_instruction(new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write));
326    RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group};
327 
328    auto vtx = new LoadFromBuffer(destvec, {4,7,7,7}, m_helper_invocation, 0,
329                                    R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
330    vtx->set_fetch_flag(FetchInstr::vpm);
331    vtx->set_fetch_flag(FetchInstr::use_tc);
332    vtx->set_always_keep();
333    auto dst = value_factory().dest(instr->dest, 0, pin_free);
334    auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write);
335    ir->add_required_instr(vtx);
336    emit_instruction(vtx);
337    emit_instruction(ir);
338 
339    return true;
340 }
341 
scan_input(nir_intrinsic_instr * intr,int index_src_id)342 bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
343 {
344    auto index = nir_src_as_const_value(intr->src[index_src_id]);
345    assert(index);
346 
347    const unsigned location_offset = chip_class() < ISA_CC_EVERGREEN ? 32 : 0;
348    bool uses_interpol_at_centroid = false;
349 
350    unsigned location = nir_intrinsic_io_semantics(intr).location  + index->u32;
351    unsigned driver_location = nir_intrinsic_base(intr) + index->u32;
352    auto semantic = r600_get_varying_semantic(location);
353    tgsi_semantic name = (tgsi_semantic)semantic.first;
354    unsigned sid = semantic.second;
355 
356    if (location == VARYING_SLOT_POS) {
357       m_sv_values.set(es_pos);
358       m_pos_driver_loc = driver_location + location_offset;
359       ShaderInput pos_input(m_pos_driver_loc, name);
360       pos_input.set_sid(sid);
361       pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER, false);
362       add_input(pos_input);
363       return true;
364    }
365 
366    if (location == VARYING_SLOT_FACE) {
367       m_sv_values.set(es_face);
368       m_face_driver_loc = driver_location + location_offset;
369       ShaderInput face_input(m_face_driver_loc, name);
370       face_input.set_sid(sid);
371       add_input(face_input);
372       return true;
373    }
374 
375    tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
376    tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
377 
378    if (index_src_id > 0) {
379       glsl_interp_mode mode = INTERP_MODE_NONE;
380       auto parent = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
381       mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
382       switch (parent->intrinsic) {
383       case nir_intrinsic_load_barycentric_sample:
384          tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
385       break;
386       case nir_intrinsic_load_barycentric_at_sample:
387       case nir_intrinsic_load_barycentric_at_offset:
388       case nir_intrinsic_load_barycentric_pixel:
389          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
390       break;
391       case nir_intrinsic_load_barycentric_centroid:
392          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
393          uses_interpol_at_centroid = true;
394       break;
395       default:
396          std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
397                    << nir_intrinsic_infos[intr->intrinsic].name
398                    << " interpolator?\n";
399          assert(0);
400       }
401 
402       switch (mode) {
403       case INTERP_MODE_NONE:
404          if (name == TGSI_SEMANTIC_COLOR ||
405              name == TGSI_SEMANTIC_BCOLOR) {
406             tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
407             break;
408          }
409          FALLTHROUGH;
410       case INTERP_MODE_SMOOTH:
411          tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
412       break;
413       case INTERP_MODE_NOPERSPECTIVE:
414          tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
415       break;
416       case INTERP_MODE_FLAT:
417       break;
418       case INTERP_MODE_COLOR:
419          tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
420       break;
421       case INTERP_MODE_EXPLICIT:
422       default:
423          assert(0);
424       }
425    }
426 
427    switch (name) {
428    case TGSI_SEMANTIC_PRIMID:
429       m_gs_prim_id_input = true;
430       m_ps_prim_id_input = ninputs();
431       FALLTHROUGH;
432    case TGSI_SEMANTIC_COLOR:
433    case TGSI_SEMANTIC_BCOLOR:
434    case TGSI_SEMANTIC_FOG:
435    case TGSI_SEMANTIC_GENERIC:
436    case TGSI_SEMANTIC_TEXCOORD:
437    case TGSI_SEMANTIC_LAYER:
438    case TGSI_SEMANTIC_PCOORD:
439    case TGSI_SEMANTIC_VIEWPORT_INDEX:
440    case TGSI_SEMANTIC_CLIPDIST: {
441       sfn_log << SfnLog::io <<  " have IO at " << driver_location << "\n";
442       auto iinput = find_input(driver_location);
443       if (iinput == input_not_found()) {
444          ShaderInput input(driver_location, name);
445          input.set_sid(sid);
446          input.set_need_lds_pos();
447          input.set_interpolator(tgsi_interpolate, tgsi_loc, uses_interpol_at_centroid);
448          sfn_log << SfnLog::io <<  "add IO with LDS ID at " << input.location() << "\n";
449          add_input(input);
450          assert(find_input(input.location()) != input_not_found());
451       } else {
452          if (uses_interpol_at_centroid) {
453             iinput->second.set_uses_interpolate_at_centroid();
454          }
455       }
456       return true;
457    }
458    default:
459       return false;
460    }
461 }
462 
emit_export_pixel(nir_intrinsic_instr & intr)463 bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr)
464 {
465    RegisterVec4::Swizzle swizzle;
466    auto semantics = nir_intrinsic_io_semantics(&intr);
467    unsigned driver_location = nir_intrinsic_base(&intr);
468    unsigned write_mask = nir_intrinsic_write_mask(&intr);
469 
470    switch (semantics.location) {
471    case FRAG_RESULT_DEPTH:
472       swizzle = {0,7,7,7};
473    break;
474    case FRAG_RESULT_STENCIL:
475       swizzle = {7,0,7,7};
476    break;
477    case FRAG_RESULT_SAMPLE_MASK:
478       swizzle = {7,7,0,7};
479    break;
480    default:
481       for (int i = 0; i < 4; ++i) {
482          swizzle[i] = (1 << i) & write_mask ? i : 7;
483       }
484    }
485 
486    auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
487 
488    if (semantics.location == FRAG_RESULT_COLOR ||
489        (semantics.location >= FRAG_RESULT_DATA0 &&
490         semantics.location <= FRAG_RESULT_DATA7)) {
491 
492       ShaderOutput output(driver_location, TGSI_SEMANTIC_COLOR, write_mask);
493       add_output(output);
494 
495       unsigned color_outputs = m_fs_write_all && chip_class() >= ISA_CC_R700 ?
496                                   m_max_color_exports : 1;
497 
498       for (unsigned k = 0; k < color_outputs; ++k) {
499 
500          unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
501                               ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
502 
503          sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
504 
505          if (location >= m_max_color_exports) {
506             sfn_log << SfnLog::io << "Pixel output loc:" << location
507                     << " dl:" << driver_location
508                     << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
509             return true; ;
510          }
511 
512          m_last_pixel_export = new ExportInstr(ExportInstr::pixel, location, value);
513 
514          if (m_export_highest < location)
515             m_export_highest = location;
516 
517          m_num_color_exports++;
518 
519          /* Hack: force dual source output handling if one color output has a
520           * dual_source_blend_index > 0 */
521          if (semantics.location == FRAG_RESULT_COLOR &&
522              semantics.dual_source_blend_index > 0)
523             m_dual_source_blend = true;
524 
525          if (m_num_color_exports > 1)
526             m_fs_write_all = false;
527          unsigned mask = (0xfu << (location * 4));
528          m_color_export_mask |= mask;
529 
530          emit_instruction(m_last_pixel_export);
531       }
532    } else if (semantics.location == FRAG_RESULT_DEPTH ||
533               semantics.location == FRAG_RESULT_STENCIL ||
534               semantics.location == FRAG_RESULT_SAMPLE_MASK) {
535       m_depth_exports++;
536       emit_instruction(new ExportInstr(ExportInstr::pixel, 61, value));
537       int semantic = TGSI_SEMANTIC_POSITION;
538       if (semantics.location == FRAG_RESULT_STENCIL)
539          semantic = TGSI_SEMANTIC_STENCIL;
540       else if (semantics.location == FRAG_RESULT_SAMPLE_MASK)
541          semantic = TGSI_SEMANTIC_SAMPLEMASK;
542 
543       ShaderOutput output(driver_location, semantic, write_mask);
544       add_output(output);
545 
546    } else {
547       return false;
548    }
549    return true;
550 }
551 
emit_load_sample_pos(nir_intrinsic_instr * instr)552 bool FragmentShader::emit_load_sample_pos(nir_intrinsic_instr* instr)
553 {
554    auto dest = value_factory().dest_vec4(instr->dest, pin_group);
555 
556 
557    auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, m_sample_id_reg, 0,
558                                    R600_BUFFER_INFO_CONST_BUFFER,
559                                    nullptr, fmt_32_32_32_32_float);
560    fetch->set_fetch_flag(FetchInstr::srf_mode);
561    emit_instruction(fetch);
562    return true;
563 }
564 
do_finalize()565 void FragmentShader::do_finalize()
566 {
567    if (!m_last_pixel_export) {
568       RegisterVec4 value(0, false, {7,7,7,7});
569       m_last_pixel_export = new ExportInstr(ExportInstr::pixel, 0, value);
570       emit_instruction(m_last_pixel_export);
571       m_num_color_exports++;
572       m_color_export_mask |= 0xf;
573    }
574    m_last_pixel_export->set_is_last_export(true);
575 }
576 
read_prop(std::istream & is)577 bool FragmentShader::read_prop(std::istream& is)
578 {
579    string value;
580    is >> value;
581 
582    auto splitpos = value.find(':');
583    assert(splitpos != string::npos);
584 
585    std::istringstream ival(value);
586    string name;
587    string val;
588 
589    std::getline(ival, name, ':');
590 
591    if (name == "MAX_COLOR_EXPORTS")
592       ival >> m_max_color_exports;
593    else if (name == "COLOR_EXPORTS")
594       ival >> m_num_color_exports;
595    else if (name == "COLOR_EXPORT_MASK")
596       ival >> m_color_export_mask;
597    else if (name == "WRITE_ALL_COLORS")
598       ival >> m_fs_write_all;
599    else
600       return false;
601    return true;
602 }
603 
do_print_properties(std::ostream & os) const604 void FragmentShader::do_print_properties(std::ostream& os) const
605 {
606    os << "PROP MAX_COLOR_EXPORTS:"  << m_max_color_exports << "\n";
607    os << "PROP COLOR_EXPORTS:"  << m_num_color_exports << "\n";
608    os << "PROP COLOR_EXPORT_MASK:"  << m_color_export_mask << "\n";
609    os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n";
610 }
611 
allocate_interpolators_or_inputs()612 int FragmentShaderR600::allocate_interpolators_or_inputs()
613 {
614    int pos = 0;
615    auto& vf = value_factory();
616    for (auto& [index, inp]: inputs()) {
617       if (inp.need_lds_pos()) {
618 
619          RegisterVec4 input(vf.allocate_pinned_register(pos, 0),
620                             vf.allocate_pinned_register(pos, 1),
621                             vf.allocate_pinned_register(pos, 2),
622                             vf.allocate_pinned_register(pos, 3), pin_fully);
623          inp.set_gpr(pos++);
624          for (int i = 0; i < 4; ++i) {
625             input[i]->pin_live_range(true);
626          }
627 
628          sfn_log << SfnLog::io << "Reseve input register at pos " <<
629                     index << " as "  << input << " with register " << inp.gpr() << "\n";
630 
631          m_interpolated_inputs[index] = input;
632       }
633    }
634    return pos;
635 }
636 
load_input_hw(nir_intrinsic_instr * intr)637 bool FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
638 {
639    auto& vf = value_factory();
640    AluInstr *ir = nullptr;
641    for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
642       sfn_log << SfnLog::io << "Inject register "  << *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n";
643       unsigned index = nir_intrinsic_component(intr) + i;
644       assert (index < 4);
645       if (intr->dest.is_ssa) {
646          vf.inject_value(intr->dest, i, m_interpolated_inputs[nir_intrinsic_base(intr)][index]);
647       } else {
648          ir = new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_none),
649                      m_interpolated_inputs[nir_intrinsic_base(intr)][index],
650                      AluInstr::write);
651          emit_instruction(ir);
652       }
653    }
654    if (ir)
655       ir->set_alu_flag(alu_last_instr);
656    return true;
657 }
658 
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)659 bool FragmentShaderR600::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
660 {
661    switch (intr->intrinsic) {
662    case nir_intrinsic_load_barycentric_centroid:
663    case nir_intrinsic_load_barycentric_pixel:
664    case nir_intrinsic_load_barycentric_sample:
665       return true;
666    default:
667       return false;
668    }
669 }
670 
load_interpolated_input_hw(nir_intrinsic_instr * intr)671 bool FragmentShaderR600::load_interpolated_input_hw(nir_intrinsic_instr *intr)
672 {
673    return load_input_hw(intr);
674 }
675 
load_input_hw(nir_intrinsic_instr * intr)676 bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
677 {
678    auto& vf = value_factory();
679    auto io = input(nir_intrinsic_base(intr));
680    auto comp = nir_intrinsic_component(intr);
681 
682    bool need_temp = comp > 0 || !intr->dest.is_ssa;
683    AluInstr *ir = nullptr;
684    for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
685       if (need_temp) {
686          auto tmp = vf.temp_register(comp + i);
687          ir = new AluInstr(op1_interp_load_p0,
688                            tmp,
689                            new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
690                            AluInstr::last_write);
691          emit_instruction(ir);
692          emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
693       } else {
694 
695          ir = new AluInstr(op1_interp_load_p0,
696                            vf.dest(intr->dest, i, pin_chan),
697                            new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
698                            AluInstr::write);
699          emit_instruction(ir);
700       }
701 
702    }
703    ir->set_alu_flag(alu_last_instr);
704    return true;
705 }
706 
allocate_interpolators_or_inputs()707 int FragmentShaderEG::allocate_interpolators_or_inputs()
708 {
709    for (unsigned i = 0; i < s_max_interpolators; ++i) {
710       if (interpolators_used(i)) {
711          sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
712          m_interpolator[i].enabled = true;
713       }
714    }
715 
716    int num_baryc = 0;
717    for (int i = 0; i < 6; ++i) {
718       if (m_interpolator[i].enabled) {
719          sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
720          unsigned sel = num_baryc / 2;
721          unsigned chan = 2 * (num_baryc % 2);
722 
723          m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
724          m_interpolator[i].i->pin_live_range(true, false);
725 
726          m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
727          m_interpolator[i].j->pin_live_range(true, false);
728 
729          m_interpolator[i].ij_index = num_baryc++;
730       }
731    }
732    return (num_baryc + 1) >> 1;
733 }
734 
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)735 bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
736 {
737    auto& vf = value_factory();
738    switch (intr->intrinsic) {
739    case nir_intrinsic_load_barycentric_centroid:
740    case nir_intrinsic_load_barycentric_pixel:
741    case nir_intrinsic_load_barycentric_sample: {
742       unsigned ij = barycentric_ij_index(intr);
743       vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
744       vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
745       return true;
746    }
747    case nir_intrinsic_load_barycentric_at_offset:
748       return load_barycentric_at_offset(intr);
749    case nir_intrinsic_load_barycentric_at_sample:
750       return load_barycentric_at_sample(intr);
751    default:
752       return false;
753    }
754 }
755 
load_interpolated_input_hw(nir_intrinsic_instr * intr)756 bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
757 {
758    auto& vf = value_factory();
759    auto param = nir_src_as_const_value(intr->src[1]);
760    assert(param && "Indirect PS inputs not (yet) supported");
761 
762    int dest_num_comp = nir_dest_num_components(intr->dest);
763    int start_comp = nir_intrinsic_component(intr);
764    bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
765 
766    auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
767 
768    InterpolateParams params;
769 
770    params.i = vf.src(intr->src[0], 0);
771    params.j = vf.src(intr->src[0], 1);
772    params.base = input(nir_intrinsic_base(intr)).lds_pos();
773 
774    if (!load_interpolated(dst, params, dest_num_comp, start_comp))
775       return false;
776 
777    if (need_temp) {
778       AluInstr *ir = nullptr;
779       for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
780          auto real_dst = vf.dest(intr->dest, i, pin_chan);
781          ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
782          emit_instruction(ir);
783       }
784       assert(ir);
785       ir->set_alu_flag(alu_last_instr);
786    }
787 
788    return true;
789 }
790 
load_interpolated(RegisterVec4 & dest,const InterpolateParams & params,int num_dest_comp,int start_comp)791 bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
792                                          int num_dest_comp, int start_comp)
793 {
794    sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i <<  ")" << "\n";
795 
796    if (num_dest_comp == 1) {
797       switch (start_comp) {
798       case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
799       case 1: return load_interpolated_two_comp_for_one(dest, params,  op2_interp_xy, 1);
800       case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
801       case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
802       default:
803          assert(0);
804       }
805    }
806 
807    if (num_dest_comp == 2) {
808       switch (start_comp) {
809       case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
810       case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
811       case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
812                load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
813       default:
814          assert(0);
815       }
816    }
817 
818    if (num_dest_comp == 3 && start_comp == 0)
819       return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
820             load_interpolated_one_comp(dest, params, op2_interp_z);
821 
822    int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
823 
824    bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
825    success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
826    return success;
827 }
828 
829 
load_barycentric_at_sample(nir_intrinsic_instr * instr)830 bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
831 {
832    auto& vf = value_factory();
833    RegisterVec4 slope = vf.temp_vec4(pin_group);
834    auto  src = emit_load_to_register(vf.src(instr->src[0], 0));
835    auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
836                                    R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
837 
838    fetch->set_fetch_flag(FetchInstr::srf_mode);
839    emit_instruction(fetch);
840 
841    auto grad = vf.temp_vec4(pin_group);
842 
843    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
844    assert(interpolator.enabled);
845 
846    RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
847 
848    auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
849    tex->set_tex_flag(TexInstr::grad_fine);
850    tex->set_tex_flag(TexInstr::x_unnormalized);
851    tex->set_tex_flag(TexInstr::y_unnormalized);
852    tex->set_tex_flag(TexInstr::z_unnormalized);
853    tex->set_tex_flag(TexInstr::w_unnormalized);
854    emit_instruction(tex);
855 
856    tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
857    tex->set_tex_flag(TexInstr::x_unnormalized);
858    tex->set_tex_flag(TexInstr::y_unnormalized);
859    tex->set_tex_flag(TexInstr::z_unnormalized);
860    tex->set_tex_flag(TexInstr::w_unnormalized);
861    tex->set_tex_flag(TexInstr::grad_fine);
862    emit_instruction(tex);
863 
864    auto tmp0 = vf.temp_register();
865    auto tmp1 = vf.temp_register();
866 
867    emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
868    emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
869 
870    emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
871    emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
872 
873    return true;
874 }
875 
load_barycentric_at_offset(nir_intrinsic_instr * instr)876 bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
877 {
878    auto& vf = value_factory();
879    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
880 
881    auto help = vf.temp_vec4(pin_group);
882    RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
883 
884    auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
885    getgradh->set_tex_flag(TexInstr::x_unnormalized);
886    getgradh->set_tex_flag(TexInstr::y_unnormalized);
887    getgradh->set_tex_flag(TexInstr::z_unnormalized);
888    getgradh->set_tex_flag(TexInstr::w_unnormalized);
889    getgradh->set_tex_flag(TexInstr::grad_fine);
890    emit_instruction(getgradh);
891 
892    auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
893    getgradv->set_tex_flag(TexInstr::x_unnormalized);
894    getgradv->set_tex_flag(TexInstr::y_unnormalized);
895    getgradv->set_tex_flag(TexInstr::z_unnormalized);
896    getgradv->set_tex_flag(TexInstr::w_unnormalized);
897    getgradv->set_tex_flag(TexInstr::grad_fine);
898    emit_instruction(getgradv);
899 
900    auto ofs_x = vf.src(instr->src[0], 0);
901    auto ofs_y = vf.src(instr->src[0], 1);
902    auto tmp0 = vf.temp_register();
903    auto tmp1 = vf.temp_register();
904    emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
905    emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
906    emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
907    emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
908 
909    return true;
910 }
911 
load_interpolated_one_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op)912 bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
913                                                 const InterpolateParams& params,
914                                                 EAluOp op)
915 {
916    auto group = new AluGroup();
917    bool success = true;
918 
919    AluInstr *ir = nullptr;
920    for (unsigned i = 0; i < 2 && success; ++i) {
921       int chan = i;
922       if (op == op2_interp_z)
923          chan += 2;
924 
925 
926       ir = new AluInstr(op, dest[chan],
927                         i & 1 ? params.j : params.i,
928                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
929                         i == 0  ? AluInstr::write : AluInstr::last);
930 
931       ir->set_bank_swizzle(alu_vec_210);
932       success = group->add_instruction(ir);
933    }
934    ir->set_alu_flag(alu_last_instr);
935    if (success)
936       emit_instruction(group);
937    return success;
938 }
939 
load_interpolated_two_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int writemask)940 bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
941                                                 const InterpolateParams& params,
942                                                 EAluOp op, int writemask)
943 {
944    auto group = new AluGroup();
945    bool success = true;
946 
947    AluInstr *ir = nullptr;
948    assert(params.j);
949    assert(params.i);
950    for (unsigned i = 0; i < 4 ; ++i) {
951       ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
952                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
953                         (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
954       ir->set_bank_swizzle(alu_vec_210);
955       success = group->add_instruction(ir);
956    }
957    ir->set_alu_flag(alu_last_instr);
958    if (success)
959       emit_instruction(group);
960    return success;
961 }
962 
load_interpolated_two_comp_for_one(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int comp)963 bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
964                                                           const InterpolateParams& params, EAluOp op,
965                                                           int comp)
966 {
967    auto group = new AluGroup();
968    bool success = true;
969    AluInstr *ir = nullptr;
970 
971    for (int i = 0; i <  4 ; ++i) {
972       ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
973                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
974                         i == comp ? AluInstr::write : AluInstr::empty);
975       ir->set_bank_swizzle(alu_vec_210);
976       success = group->add_instruction(ir);
977    }
978    ir->set_alu_flag(alu_last_instr);
979    if (success)
980       emit_instruction(group);
981 
982    return success;
983 }
984 
985 
Interpolator()986 FragmentShaderEG::Interpolator::Interpolator():
987    enabled(false)
988 {
989 }
990 
991 }
992