• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader_fs.h"
28 
29 #include "sfn_debug.h"
30 #include "sfn_instr_alugroup.h"
31 #include "sfn_instr_export.h"
32 #include "sfn_instr_fetch.h"
33 #include "sfn_instr_tex.h"
34 
35 #include <sstream>
36 
37 namespace r600 {
38 
39 using std::string;
40 
FragmentShader(const r600_shader_key & key)41 FragmentShader::FragmentShader(const r600_shader_key& key):
42     Shader("FS", key.ps.first_atomic_counter),
43     m_dual_source_blend(key.ps.dual_source_blend),
44     m_max_color_exports(MAX2(key.ps.nr_cbufs, 1)),
45     m_pos_input(127, false),
46     m_fs_write_all(false),
47     m_apply_sample_mask(key.ps.apply_sample_id_mask),
48     m_rat_base(key.ps.nr_cbufs),
49     m_image_size_const_offset(key.ps.image_size_const_offset)
50 {
51 }
52 
53 void
do_get_shader_info(r600_shader * sh_info)54 FragmentShader::do_get_shader_info(r600_shader *sh_info)
55 {
56    sh_info->processor_type = PIPE_SHADER_FRAGMENT;
57 
58    sh_info->ps_color_export_mask = m_color_export_mask;
59    sh_info->ps_export_highest = m_export_highest;
60    sh_info->nr_ps_color_exports = m_num_color_exports;
61 
62    sh_info->fs_write_all = m_fs_write_all;
63 
64    sh_info->rat_base = m_rat_base;
65    sh_info->uses_kill = m_uses_discard;
66    sh_info->gs_prim_id_input = m_gs_prim_id_input;
67    sh_info->nsys_inputs = m_nsys_inputs;
68    sh_info->uses_helper_invocation = m_helper_invocation != nullptr;
69 }
70 
71 bool
load_input(nir_intrinsic_instr * intr)72 FragmentShader::load_input(nir_intrinsic_instr *intr)
73 {
74    auto& vf = value_factory();
75 
76    auto location = nir_intrinsic_io_semantics(intr).location;
77    if (location == VARYING_SLOT_POS) {
78       AluInstr *ir = nullptr;
79       for (unsigned i = 0; i < intr->def.num_components; ++i) {
80          ir = new AluInstr(op1_mov,
81                            vf.dest(intr->def, i, pin_none),
82                            m_pos_input[i],
83                            AluInstr::write);
84          emit_instruction(ir);
85       }
86       ir->set_alu_flag(alu_last_instr);
87       return true;
88    }
89 
90    if (location == VARYING_SLOT_FACE) {
91       auto ir = new AluInstr(op2_setgt_dx10,
92                              vf.dest(intr->def, 0, pin_none),
93                              m_face_input,
94                              vf.inline_const(ALU_SRC_0, 0),
95                              AluInstr::last_write);
96       emit_instruction(ir);
97       return true;
98    }
99 
100    return load_input_hw(intr);
101 }
102 
103 bool
store_output(nir_intrinsic_instr * intr)104 FragmentShader::store_output(nir_intrinsic_instr *intr)
105 {
106    auto location = nir_intrinsic_io_semantics(intr).location;
107 
108    if (location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
109       m_fs_write_all = true;
110    }
111 
112    return emit_export_pixel(*intr);
113 }
114 
115 unsigned
barycentric_ij_index(nir_intrinsic_instr * intr)116 barycentric_ij_index(nir_intrinsic_instr *intr)
117 {
118    unsigned index = 0;
119    switch (intr->intrinsic) {
120    case nir_intrinsic_load_barycentric_sample:
121       index = 0;
122       break;
123    case nir_intrinsic_load_barycentric_at_sample:
124    case nir_intrinsic_load_barycentric_at_offset:
125    case nir_intrinsic_load_barycentric_pixel:
126       index = 1;
127       break;
128    case nir_intrinsic_load_barycentric_centroid:
129       index = 2;
130       break;
131    default:
132       unreachable("Unknown interpolator intrinsic");
133    }
134 
135    switch (nir_intrinsic_interp_mode(intr)) {
136    case INTERP_MODE_NONE:
137    case INTERP_MODE_SMOOTH:
138       return index;
139    case INTERP_MODE_NOPERSPECTIVE:
140       return index + 3;
141    case INTERP_MODE_FLAT:
142    case INTERP_MODE_EXPLICIT:
143    default:
144       unreachable("unknown/unsupported mode for load_interpolated");
145    }
146    return 0;
147 }
148 
149 bool
process_stage_intrinsic(nir_intrinsic_instr * intr)150 FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
151 {
152    if (process_stage_intrinsic_hw(intr))
153       return true;
154 
155    switch (intr->intrinsic) {
156    case nir_intrinsic_load_input:
157       return load_input(intr);
158    case nir_intrinsic_load_interpolated_input:
159       return load_interpolated_input(intr);
160    case nir_intrinsic_discard_if:
161       m_uses_discard = true;
162       emit_instruction(new AluInstr(op2_killne_int,
163                                     nullptr,
164                                     value_factory().src(intr->src[0], 0),
165                                     value_factory().zero(),
166                                     {AluInstr::last}));
167 
168       return true;
169    case nir_intrinsic_discard:
170       m_uses_discard = true;
171       emit_instruction(new AluInstr(op2_kille_int,
172                                     nullptr,
173                                     value_factory().zero(),
174                                     value_factory().zero(),
175                                     {AluInstr::last}));
176       return true;
177    case nir_intrinsic_load_sample_mask_in:
178       if (m_apply_sample_mask) {
179          return emit_load_sample_mask_in(intr);
180       } else
181          return emit_simple_mov(intr->def, 0, m_sample_mask_reg);
182    case nir_intrinsic_load_sample_id:
183       return emit_simple_mov(intr->def, 0, m_sample_id_reg);
184    case nir_intrinsic_load_helper_invocation:
185       return emit_load_helper_invocation(intr);
186    case nir_intrinsic_load_sample_pos:
187       return emit_load_sample_pos(intr);
188    default:
189       return false;
190    }
191 }
192 
193 bool
load_interpolated_input(nir_intrinsic_instr * intr)194 FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
195 {
196    auto& vf = value_factory();
197    unsigned loc = nir_intrinsic_io_semantics(intr).location;
198    switch (loc) {
199    case VARYING_SLOT_POS:
200       for (unsigned i = 0; i < intr->def.num_components; ++i)
201          vf.inject_value(intr->def, i, m_pos_input[i]);
202       return true;
203    case VARYING_SLOT_FACE:
204       return false;
205    default:;
206    }
207 
208    return load_interpolated_input_hw(intr);
209 }
210 
211 int
do_allocate_reserved_registers()212 FragmentShader::do_allocate_reserved_registers()
213 {
214    int next_register = allocate_interpolators_or_inputs();
215 
216    if (m_sv_values.test(es_pos)) {
217       set_input_gpr(m_pos_driver_loc, next_register);
218       m_pos_input = value_factory().allocate_pinned_vec4(next_register++, false);
219    }
220 
221    int face_reg_index = -1;
222    if (m_sv_values.test(es_face)) {
223       set_input_gpr(m_face_driver_loc, next_register);
224       face_reg_index = next_register++;
225       m_face_input = value_factory().allocate_pinned_register(face_reg_index, 0);
226    }
227 
228    if (m_sv_values.test(es_sample_mask_in)) {
229       if (face_reg_index < 0)
230          face_reg_index = next_register++;
231       m_sample_mask_reg = value_factory().allocate_pinned_register(face_reg_index, 2);
232       sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg
233               << "\n";
234       m_nsys_inputs = 1;
235       ShaderInput input(ninputs());
236       input.set_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN);
237       input.set_gpr(face_reg_index);
238       add_input(input);
239    }
240 
241    if (m_sv_values.test(es_sample_id) || m_sv_values.test(es_sample_mask_in)) {
242       int sample_id_reg = next_register++;
243       m_sample_id_reg = value_factory().allocate_pinned_register(sample_id_reg, 3);
244       sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
245       m_nsys_inputs++;
246       ShaderInput input(ninputs());
247       input.set_system_value(SYSTEM_VALUE_SAMPLE_ID);
248       input.set_gpr(sample_id_reg);
249       add_input(input);
250    }
251 
252    if (m_sv_values.test(es_helper_invocation)) {
253       m_helper_invocation = value_factory().temp_register(0, false);
254    }
255 
256    return next_register;
257 }
258 
259 bool
do_scan_instruction(nir_instr * instr)260 FragmentShader::do_scan_instruction(nir_instr *instr)
261 {
262    if (instr->type != nir_instr_type_intrinsic)
263       return false;
264 
265    auto intr = nir_instr_as_intrinsic(instr);
266    switch (intr->intrinsic) {
267    case nir_intrinsic_load_barycentric_pixel:
268    case nir_intrinsic_load_barycentric_sample:
269    case nir_intrinsic_load_barycentric_at_sample:
270    case nir_intrinsic_load_barycentric_at_offset:
271    case nir_intrinsic_load_barycentric_centroid:
272       m_interpolators_used.set(barycentric_ij_index(intr));
273       break;
274    case nir_intrinsic_load_front_face:
275       m_sv_values.set(es_face);
276       break;
277    case nir_intrinsic_load_sample_mask_in:
278       m_sv_values.set(es_sample_mask_in);
279       break;
280    case nir_intrinsic_load_sample_pos:
281       m_sv_values.set(es_sample_pos);
282       FALLTHROUGH;
283    case nir_intrinsic_load_sample_id:
284       m_sv_values.set(es_sample_id);
285       break;
286    case nir_intrinsic_load_helper_invocation:
287       m_sv_values.set(es_helper_invocation);
288       break;
289    case nir_intrinsic_load_input:
290       return scan_input(intr, 0);
291    case nir_intrinsic_load_interpolated_input:
292       return scan_input(intr, 1);
293    default:
294       return false;
295    }
296    return true;
297 }
298 
299 bool
emit_load_sample_mask_in(nir_intrinsic_instr * instr)300 FragmentShader::emit_load_sample_mask_in(nir_intrinsic_instr *instr)
301 {
302    auto& vf = value_factory();
303    auto dest = vf.dest(instr->def, 0, pin_free);
304    auto tmp = vf.temp_register();
305    assert(m_sample_id_reg);
306    assert(m_sample_mask_reg);
307 
308    emit_instruction(
309       new AluInstr(op2_lshl_int, tmp, vf.one_i(), m_sample_id_reg, AluInstr::last_write));
310    emit_instruction(
311       new AluInstr(op2_and_int, dest, tmp, m_sample_mask_reg, AluInstr::last_write));
312    return true;
313 }
314 
315 bool
emit_load_helper_invocation(nir_intrinsic_instr * instr)316 FragmentShader::emit_load_helper_invocation(nir_intrinsic_instr *instr)
317 {
318    assert(m_helper_invocation);
319    auto& vf = value_factory();
320    emit_instruction(
321       new AluInstr(op1_mov, m_helper_invocation, vf.literal(-1), AluInstr::last_write));
322    RegisterVec4 destvec{m_helper_invocation, nullptr, nullptr, nullptr, pin_group};
323 
324    auto vtx = new LoadFromBuffer(destvec,
325                                  {4, 7, 7, 7},
326                                  m_helper_invocation,
327                                  0,
328                                  R600_BUFFER_INFO_CONST_BUFFER,
329                                  nullptr,
330                                  fmt_32_32_32_32_float);
331    vtx->set_fetch_flag(FetchInstr::vpm);
332    vtx->set_fetch_flag(FetchInstr::use_tc);
333    vtx->set_always_keep();
334    auto dst = value_factory().dest(instr->def, 0, pin_free);
335    auto ir = new AluInstr(op1_mov, dst, m_helper_invocation, AluInstr::last_write);
336    ir->add_required_instr(vtx);
337    emit_instruction(vtx);
338    emit_instruction(ir);
339 
340    return true;
341 }
342 
343 bool
scan_input(nir_intrinsic_instr * intr,int index_src_id)344 FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
345 {
346    auto index = nir_src_as_const_value(intr->src[index_src_id]);
347    assert(index);
348 
349    const unsigned location_offset = chip_class() < ISA_CC_EVERGREEN ? 32 : 0;
350    bool uses_interpol_at_centroid = false;
351 
352    auto location =
353       static_cast<gl_varying_slot>(nir_intrinsic_io_semantics(intr).location + index->u32);
354    unsigned driver_location = nir_intrinsic_base(intr) + index->u32;
355 
356    if (location == VARYING_SLOT_POS) {
357       m_sv_values.set(es_pos);
358       m_pos_driver_loc = driver_location + location_offset;
359       ShaderInput pos_input(m_pos_driver_loc, location);
360       pos_input.set_interpolator(TGSI_INTERPOLATE_LINEAR,
361                                  TGSI_INTERPOLATE_LOC_CENTER,
362                                  false);
363       add_input(pos_input);
364       return true;
365    }
366 
367    if (location == VARYING_SLOT_FACE) {
368       m_sv_values.set(es_face);
369       m_face_driver_loc = driver_location + location_offset;
370       ShaderInput face_input(m_face_driver_loc, location);
371       add_input(face_input);
372       return true;
373    }
374 
375    tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
376    tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
377 
378    const bool is_color =
379       (location >= VARYING_SLOT_COL0 && location <= VARYING_SLOT_COL1) ||
380       (location >= VARYING_SLOT_BFC0 && location <= VARYING_SLOT_BFC1);
381 
382    if (index_src_id > 0) {
383       glsl_interp_mode mode = INTERP_MODE_NONE;
384       auto parent = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
385       mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
386       switch (parent->intrinsic) {
387       case nir_intrinsic_load_barycentric_sample:
388          tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
389          break;
390       case nir_intrinsic_load_barycentric_at_sample:
391       case nir_intrinsic_load_barycentric_at_offset:
392       case nir_intrinsic_load_barycentric_pixel:
393          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
394          break;
395       case nir_intrinsic_load_barycentric_centroid:
396          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
397          uses_interpol_at_centroid = true;
398          break;
399       default:
400          std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name
401                    << " as parent of " << nir_intrinsic_infos[intr->intrinsic].name
402                    << " interpolator?\n";
403          assert(0);
404       }
405 
406       switch (mode) {
407       case INTERP_MODE_NONE:
408          if (is_color) {
409             tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
410             break;
411          }
412          FALLTHROUGH;
413       case INTERP_MODE_SMOOTH:
414          tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
415          break;
416       case INTERP_MODE_NOPERSPECTIVE:
417          tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
418          break;
419       case INTERP_MODE_FLAT:
420          break;
421       case INTERP_MODE_EXPLICIT:
422       default:
423          assert(0);
424       }
425    }
426 
427    if (location == VARYING_SLOT_PRIMITIVE_ID) {
428       m_gs_prim_id_input = true;
429    } else if (!(is_color || (location >= VARYING_SLOT_VAR0 && location < VARYING_SLOT_MAX) ||
430                 (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7) ||
431                 (location >= VARYING_SLOT_CLIP_DIST0 && location <= VARYING_SLOT_CLIP_DIST1) ||
432                 location == VARYING_SLOT_FOGC || location == VARYING_SLOT_LAYER ||
433                 location == VARYING_SLOT_PNTC || location == VARYING_SLOT_VIEWPORT)) {
434       return false;
435    }
436 
437    sfn_log << SfnLog::io << " have IO at " << driver_location << "\n";
438    auto iinput = find_input(driver_location);
439    if (iinput == input_not_found()) {
440       ShaderInput input(driver_location, location);
441       input.set_need_lds_pos();
442       input.set_interpolator(tgsi_interpolate, tgsi_loc, uses_interpol_at_centroid);
443       sfn_log << SfnLog::io << "add IO with LDS ID at " << input.location() << "\n";
444       add_input(input);
445       assert(find_input(input.location()) != input_not_found());
446    } else {
447       if (uses_interpol_at_centroid) {
448          iinput->second.set_uses_interpolate_at_centroid();
449       }
450    }
451    return true;
452 }
453 
454 bool
emit_export_pixel(nir_intrinsic_instr & intr)455 FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr)
456 {
457    RegisterVec4::Swizzle swizzle;
458    auto semantics = nir_intrinsic_io_semantics(&intr);
459    unsigned driver_location = nir_intrinsic_base(&intr);
460    unsigned write_mask = nir_intrinsic_write_mask(&intr);
461 
462    switch (semantics.location) {
463    case FRAG_RESULT_DEPTH:
464       swizzle = {0, 7, 7, 7};
465       break;
466    case FRAG_RESULT_STENCIL:
467       swizzle = {7, 0, 7, 7};
468       break;
469    case FRAG_RESULT_SAMPLE_MASK:
470       swizzle = {7, 7, 0, 7};
471       break;
472    default:
473       for (int i = 0; i < 4; ++i) {
474          swizzle[i] = (1 << i) & write_mask ? i : 7;
475       }
476    }
477 
478    auto value = value_factory().src_vec4(intr.src[0], pin_group, swizzle);
479 
480    if (semantics.location == FRAG_RESULT_COLOR ||
481        (semantics.location >= FRAG_RESULT_DATA0 &&
482         semantics.location <= FRAG_RESULT_DATA7)) {
483 
484       ShaderOutput output(driver_location, write_mask);
485       output.set_frag_result(static_cast<gl_frag_result>(semantics.location));
486       add_output(output);
487 
488       unsigned color_outputs =
489          m_fs_write_all && chip_class() >= ISA_CC_R700 ? m_max_color_exports : 1;
490 
491       for (unsigned k = 0; k < color_outputs; ++k) {
492 
493          unsigned location = semantics.location - FRAG_RESULT_DATA0;
494 
495          if (semantics.location == FRAG_RESULT_COLOR)
496             location = driver_location + k;
497 
498          if (semantics.dual_source_blend_index)
499             location = semantics.dual_source_blend_index;
500 
501          sfn_log << SfnLog::io << "Pixel output at loc:" << location
502                  << "("<< semantics.location << ") of "<< m_max_color_exports<<"\n";
503 
504          if (location >= m_max_color_exports) {
505             sfn_log << SfnLog::io << "Pixel output loc:" << location
506                     << " dl:" << driver_location << " skipped  because  we have only "
507                     << m_max_color_exports << " CBs\n";
508             return true;
509          }
510 
511          m_last_pixel_export = new ExportInstr(ExportInstr::pixel, location, value);
512 
513          if (m_export_highest < location)
514             m_export_highest = location;
515 
516          m_num_color_exports++;
517 
518          /* Hack: force dual source output handling if one color output has a
519           * dual_source_blend_index > 0 */
520          if (semantics.dual_source_blend_index > 0)
521             m_dual_source_blend = true;
522 
523          if (m_num_color_exports > 1)
524             m_fs_write_all = false;
525          unsigned mask = (0xfu << (location * 4));
526 
527          m_color_export_written_mask |= (1 << location);
528 
529          /* If the i-th target format is set, all previous target formats must
530           * be non-zero to avoid hangs. - from radeonsi, seems to apply to eg as well.
531           /*/
532          for (unsigned i = 0; i < location; ++i)
533             mask |= (0x1u << (i * 4));
534 
535          m_color_export_mask |= mask;
536 
537          emit_instruction(m_last_pixel_export);
538       }
539    } else if (semantics.location == FRAG_RESULT_DEPTH ||
540               semantics.location == FRAG_RESULT_STENCIL ||
541               semantics.location == FRAG_RESULT_SAMPLE_MASK) {
542       emit_instruction(new ExportInstr(ExportInstr::pixel, 61, value));
543 
544       ShaderOutput output(driver_location, write_mask);
545       output.set_frag_result(static_cast<gl_frag_result>(semantics.location));
546       add_output(output);
547 
548    } else {
549       return false;
550    }
551    return true;
552 }
553 
554 bool
emit_load_sample_pos(nir_intrinsic_instr * instr)555 FragmentShader::emit_load_sample_pos(nir_intrinsic_instr *instr)
556 {
557    auto dest = value_factory().dest_vec4(instr->def, pin_group);
558 
559    auto fetch = new LoadFromBuffer(dest,
560                                    {0, 1, 2, 3},
561                                    m_sample_id_reg,
562                                    0,
563                                    R600_BUFFER_INFO_CONST_BUFFER,
564                                    nullptr,
565                                    fmt_32_32_32_32_float);
566    fetch->set_fetch_flag(FetchInstr::srf_mode);
567    emit_instruction(fetch);
568    return true;
569 }
570 
571 void
do_finalize()572 FragmentShader::do_finalize()
573 {
574    /* On pre-evergreen not emtting something to all color exports that
575     * are enabled might lead to a hang.
576     * see: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9223
577     */
578    if (chip_class() < ISA_CC_EVERGREEN) {
579       unsigned i = 0;
580       unsigned mask = m_color_export_mask;
581 
582       while (i < m_max_color_exports && (mask & (1u << (4 * i)))) {
583          if (!(m_color_export_written_mask & (1u << i))) {
584             RegisterVec4 value(0, false, {7, 7, 7, 7});
585             m_last_pixel_export = new ExportInstr(ExportInstr::pixel, i, value);
586             emit_instruction(m_last_pixel_export);
587             m_num_color_exports++;
588             if (m_export_highest < i)
589                m_export_highest = i;
590          }
591          ++i;
592       }
593    }
594 
595    if (!m_last_pixel_export) {
596       RegisterVec4 value(0, false, {7, 7, 7, 7});
597       m_last_pixel_export = new ExportInstr(ExportInstr::pixel, 0, value);
598       emit_instruction(m_last_pixel_export);
599       m_num_color_exports++;
600       m_color_export_mask |= 0xf;
601    }
602    m_last_pixel_export->set_is_last_export(true);
603 }
604 
605 bool
read_prop(std::istream & is)606 FragmentShader::read_prop(std::istream& is)
607 {
608    string value;
609    is >> value;
610 
611    ASSERTED auto splitpos = value.find(':');
612    assert(splitpos != string::npos);
613 
614    std::istringstream ival(value);
615    string name;
616    string val;
617 
618    std::getline(ival, name, ':');
619 
620    if (name == "MAX_COLOR_EXPORTS")
621       ival >> m_max_color_exports;
622    else if (name == "COLOR_EXPORTS")
623       ival >> m_num_color_exports;
624    else if (name == "COLOR_EXPORT_MASK")
625       ival >> m_color_export_mask;
626    else if (name == "WRITE_ALL_COLORS")
627       ival >> m_fs_write_all;
628    else
629       return false;
630    return true;
631 }
632 
633 void
do_print_properties(std::ostream & os) const634 FragmentShader::do_print_properties(std::ostream& os) const
635 {
636    os << "PROP MAX_COLOR_EXPORTS:" << m_max_color_exports << "\n";
637    os << "PROP COLOR_EXPORTS:" << m_num_color_exports << "\n";
638    os << "PROP COLOR_EXPORT_MASK:" << m_color_export_mask << "\n";
639    os << "PROP WRITE_ALL_COLORS:" << m_fs_write_all << "\n";
640 }
641 
642 int
allocate_interpolators_or_inputs()643 FragmentShaderR600::allocate_interpolators_or_inputs()
644 {
645    int pos = 0;
646    auto& vf = value_factory();
647    for (auto& [index, inp] : inputs()) {
648       if (inp.need_lds_pos()) {
649 
650          RegisterVec4 input(vf.allocate_pinned_register(pos, 0),
651                             vf.allocate_pinned_register(pos, 1),
652                             vf.allocate_pinned_register(pos, 2),
653                             vf.allocate_pinned_register(pos, 3),
654                             pin_fully);
655          inp.set_gpr(pos++);
656 
657          sfn_log << SfnLog::io << "Reseve input register at pos " << index << " as "
658                  << input << " with register " << inp.gpr() << "\n";
659 
660          m_interpolated_inputs[index] = input;
661       }
662    }
663    return pos;
664 }
665 
666 bool
load_input_hw(nir_intrinsic_instr * intr)667 FragmentShaderR600::load_input_hw(nir_intrinsic_instr *intr)
668 {
669    auto& vf = value_factory();
670    AluInstr *ir = nullptr;
671    for (unsigned i = 0; i < intr->def.num_components; ++i) {
672       sfn_log << SfnLog::io << "Inject register "
673               << *m_interpolated_inputs[nir_intrinsic_base(intr)][i] << "\n";
674       unsigned index = nir_intrinsic_component(intr) + i;
675       assert(index < 4);
676       vf.inject_value(intr->def,
677                       i,
678                       m_interpolated_inputs[nir_intrinsic_base(intr)][index]);
679    }
680    if (ir)
681       ir->set_alu_flag(alu_last_instr);
682    return true;
683 }
684 
685 bool
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)686 FragmentShaderR600::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
687 {
688    switch (intr->intrinsic) {
689    case nir_intrinsic_load_barycentric_centroid:
690    case nir_intrinsic_load_barycentric_pixel:
691    case nir_intrinsic_load_barycentric_sample:
692       return true;
693    default:
694       return false;
695    }
696 }
697 
698 bool
load_interpolated_input_hw(nir_intrinsic_instr * intr)699 FragmentShaderR600::load_interpolated_input_hw(nir_intrinsic_instr *intr)
700 {
701    return load_input_hw(intr);
702 }
703 
704 bool
load_input_hw(nir_intrinsic_instr * intr)705 FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
706 {
707    auto& vf = value_factory();
708    auto io = input(nir_intrinsic_base(intr));
709    auto comp = nir_intrinsic_component(intr);
710 
711    bool need_temp = comp > 0;
712    AluInstr *ir = nullptr;
713    for (unsigned i = 0; i < intr->def.num_components; ++i) {
714       if (need_temp) {
715          auto tmp = vf.temp_register(comp + i);
716          ir =
717             new AluInstr(op1_interp_load_p0,
718                          tmp,
719                          new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
720                          AluInstr::last_write);
721          emit_instruction(ir);
722          emit_instruction(new AluInstr(
723             op1_mov, vf.dest(intr->def, i, pin_chan), tmp, AluInstr::last_write));
724       } else {
725 
726          ir = new AluInstr(op1_interp_load_p0,
727                            vf.dest(intr->def, i, pin_chan),
728                            new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
729                            AluInstr::write);
730          emit_instruction(ir);
731       }
732    }
733    ir->set_alu_flag(alu_last_instr);
734    return true;
735 }
736 
737 int
allocate_interpolators_or_inputs()738 FragmentShaderEG::allocate_interpolators_or_inputs()
739 {
740    for (unsigned i = 0; i < s_max_interpolators; ++i) {
741       if (interpolators_used(i)) {
742          sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
743          m_interpolator[i].enabled = true;
744       }
745    }
746 
747    int num_baryc = 0;
748    for (int i = 0; i < 6; ++i) {
749       if (m_interpolator[i].enabled) {
750          sfn_log << SfnLog::io << "Interpolator " << i
751                  << " is enabled with ij=" << num_baryc << " \n";
752          unsigned sel = num_baryc / 2;
753          unsigned chan = 2 * (num_baryc % 2);
754 
755          m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
756          m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
757 
758          m_interpolator[i].ij_index = num_baryc++;
759       }
760    }
761    return (num_baryc + 1) >> 1;
762 }
763 
764 bool
process_stage_intrinsic_hw(nir_intrinsic_instr * intr)765 FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
766 {
767    auto& vf = value_factory();
768    switch (intr->intrinsic) {
769    case nir_intrinsic_load_barycentric_centroid:
770    case nir_intrinsic_load_barycentric_pixel:
771    case nir_intrinsic_load_barycentric_sample: {
772       unsigned ij = barycentric_ij_index(intr);
773       vf.inject_value(intr->def, 0, m_interpolator[ij].i);
774       vf.inject_value(intr->def, 1, m_interpolator[ij].j);
775       return true;
776    }
777    case nir_intrinsic_load_barycentric_at_offset:
778       return load_barycentric_at_offset(intr);
779    case nir_intrinsic_load_barycentric_at_sample:
780       return load_barycentric_at_sample(intr);
781    default:
782       return false;
783    }
784 }
785 
786 bool
load_interpolated_input_hw(nir_intrinsic_instr * intr)787 FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
788 {
789    auto& vf = value_factory();
790    ASSERTED auto param = nir_src_as_const_value(intr->src[1]);
791    assert(param && "Indirect PS inputs not (yet) supported");
792 
793    int dest_num_comp = intr->def.num_components;
794    int start_comp = nir_intrinsic_component(intr);
795    bool need_temp = start_comp > 0;
796 
797    auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->def, pin_chan);
798 
799    InterpolateParams params;
800 
801    params.i = vf.src(intr->src[0], 0);
802    params.j = vf.src(intr->src[0], 1);
803    params.base = input(nir_intrinsic_base(intr)).lds_pos();
804 
805    if (!load_interpolated(dst, params, dest_num_comp, start_comp))
806       return false;
807 
808    if (need_temp) {
809       AluInstr *ir = nullptr;
810       for (unsigned i = 0; i < intr->def.num_components; ++i) {
811          auto real_dst = vf.dest(intr->def, i, pin_chan);
812          ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
813          emit_instruction(ir);
814       }
815       assert(ir);
816       ir->set_alu_flag(alu_last_instr);
817    }
818 
819    return true;
820 }
821 
822 bool
load_interpolated(RegisterVec4 & dest,const InterpolateParams & params,int num_dest_comp,int start_comp)823 FragmentShaderEG::load_interpolated(RegisterVec4& dest,
824                                     const InterpolateParams& params,
825                                     int num_dest_comp,
826                                     int start_comp)
827 {
828    sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i
829            << ")"
830            << "\n";
831 
832    if (num_dest_comp == 1) {
833       switch (start_comp) {
834       case 0:
835          return load_interpolated_one_comp(dest, params, op2_interp_x);
836       case 1:
837          return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
838       case 2:
839          return load_interpolated_one_comp(dest, params, op2_interp_z);
840       case 3:
841          return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
842       default:
843          assert(0);
844       }
845    }
846 
847    if (num_dest_comp == 2) {
848       switch (start_comp) {
849       case 0:
850          return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
851       case 2:
852          return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
853       case 1:
854          return load_interpolated_one_comp(dest, params, op2_interp_z) &&
855                 load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
856       default:
857          assert(0);
858       }
859    }
860 
861    if (num_dest_comp == 3 && start_comp == 0)
862       return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
863              load_interpolated_one_comp(dest, params, op2_interp_z);
864 
865    int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
866 
867    bool success =
868       load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
869    success &=
870       load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
871    return success;
872 }
873 
874 bool
load_barycentric_at_sample(nir_intrinsic_instr * instr)875 FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr *instr)
876 {
877    auto& vf = value_factory();
878    RegisterVec4 slope = vf.temp_vec4(pin_group);
879    auto src = emit_load_to_register(vf.src(instr->src[0], 0));
880    auto fetch = new LoadFromBuffer(slope,
881                                    {0, 1, 2, 3},
882                                    src,
883                                    0,
884                                    R600_BUFFER_INFO_CONST_BUFFER,
885                                    nullptr,
886                                    fmt_32_32_32_32_float);
887 
888    fetch->set_fetch_flag(FetchInstr::srf_mode);
889    emit_instruction(fetch);
890 
891    auto grad = vf.temp_vec4(pin_group);
892 
893    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
894    assert(interpolator.enabled);
895 
896    RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
897 
898    auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
899    tex->set_tex_flag(TexInstr::grad_fine);
900    tex->set_tex_flag(TexInstr::x_unnormalized);
901    tex->set_tex_flag(TexInstr::y_unnormalized);
902    tex->set_tex_flag(TexInstr::z_unnormalized);
903    tex->set_tex_flag(TexInstr::w_unnormalized);
904    emit_instruction(tex);
905 
906    tex = new TexInstr(TexInstr::get_gradient_v, grad, {7, 7, 0, 1}, interp, 0, 0);
907    tex->set_tex_flag(TexInstr::x_unnormalized);
908    tex->set_tex_flag(TexInstr::y_unnormalized);
909    tex->set_tex_flag(TexInstr::z_unnormalized);
910    tex->set_tex_flag(TexInstr::w_unnormalized);
911    tex->set_tex_flag(TexInstr::grad_fine);
912    emit_instruction(tex);
913 
914    auto tmp0 = vf.temp_register();
915    auto tmp1 = vf.temp_register();
916 
917    emit_instruction(
918       new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
919    emit_instruction(new AluInstr(
920       op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
921 
922    emit_instruction(new AluInstr(op3_muladd,
923                                  vf.dest(instr->def, 0, pin_none),
924                                  grad[3],
925                                  slope[3],
926                                  tmp1,
927                                  {alu_write}));
928    emit_instruction(new AluInstr(op3_muladd,
929                                  vf.dest(instr->def, 1, pin_none),
930                                  grad[2],
931                                  slope[3],
932                                  tmp0,
933                                  {alu_write, alu_last_instr}));
934 
935    return true;
936 }
937 
938 bool
load_barycentric_at_offset(nir_intrinsic_instr * instr)939 FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr *instr)
940 {
941    auto& vf = value_factory();
942    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
943 
944    auto help = vf.temp_vec4(pin_group);
945    RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
946 
947    auto getgradh =
948       new TexInstr(TexInstr::get_gradient_h, help, {0, 1, 7, 7}, interp, 0, 0);
949    getgradh->set_tex_flag(TexInstr::x_unnormalized);
950    getgradh->set_tex_flag(TexInstr::y_unnormalized);
951    getgradh->set_tex_flag(TexInstr::z_unnormalized);
952    getgradh->set_tex_flag(TexInstr::w_unnormalized);
953    getgradh->set_tex_flag(TexInstr::grad_fine);
954    emit_instruction(getgradh);
955 
956    auto getgradv =
957       new TexInstr(TexInstr::get_gradient_v, help, {7, 7, 0, 1}, interp, 0, 0);
958    getgradv->set_tex_flag(TexInstr::x_unnormalized);
959    getgradv->set_tex_flag(TexInstr::y_unnormalized);
960    getgradv->set_tex_flag(TexInstr::z_unnormalized);
961    getgradv->set_tex_flag(TexInstr::w_unnormalized);
962    getgradv->set_tex_flag(TexInstr::grad_fine);
963    emit_instruction(getgradv);
964 
965    auto ofs_x = vf.src(instr->src[0], 0);
966    auto ofs_y = vf.src(instr->src[0], 1);
967    auto tmp0 = vf.temp_register();
968    auto tmp1 = vf.temp_register();
969    emit_instruction(
970       new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
971    emit_instruction(new AluInstr(
972       op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
973    emit_instruction(new AluInstr(
974       op3_muladd, vf.dest(instr->def, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
975    emit_instruction(new AluInstr(op3_muladd,
976                                  vf.dest(instr->def, 1, pin_none),
977                                  help[2],
978                                  ofs_y,
979                                  tmp0,
980                                  {alu_write, alu_last_instr}));
981 
982    return true;
983 }
984 
985 bool
load_interpolated_one_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op)986 FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
987                                              const InterpolateParams& params,
988                                              EAluOp op)
989 {
990    auto group = new AluGroup();
991    bool success = true;
992 
993    AluInstr *ir = nullptr;
994    for (unsigned i = 0; i < 2 && success; ++i) {
995       int chan = i;
996       if (op == op2_interp_z)
997          chan += 2;
998 
999       ir = new AluInstr(op,
1000                         dest[chan],
1001                         i & 1 ? params.j : params.i,
1002                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
1003                         i == 0 ? AluInstr::write : AluInstr::last);
1004 
1005       ir->set_bank_swizzle(alu_vec_210);
1006       success = group->add_instruction(ir);
1007    }
1008    ir->set_alu_flag(alu_last_instr);
1009    if (success)
1010       emit_instruction(group);
1011    return success;
1012 }
1013 
1014 bool
load_interpolated_two_comp(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int writemask)1015 FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
1016                                              const InterpolateParams& params,
1017                                              EAluOp op,
1018                                              int writemask)
1019 {
1020    auto group = new AluGroup();
1021    bool success = true;
1022 
1023    AluInstr *ir = nullptr;
1024    assert(params.j);
1025    assert(params.i);
1026    for (unsigned i = 0; i < 4; ++i) {
1027       ir = new AluInstr(op,
1028                         dest[i],
1029                         i & 1 ? params.j : params.i,
1030                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
1031                         (writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
1032       ir->set_bank_swizzle(alu_vec_210);
1033       success = group->add_instruction(ir);
1034    }
1035    ir->set_alu_flag(alu_last_instr);
1036    if (success)
1037       emit_instruction(group);
1038    return success;
1039 }
1040 
1041 bool
load_interpolated_two_comp_for_one(RegisterVec4 & dest,const InterpolateParams & params,EAluOp op,int comp)1042 FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
1043                                                      const InterpolateParams& params,
1044                                                      EAluOp op,
1045                                                      int comp)
1046 {
1047    auto group = new AluGroup();
1048    bool success = true;
1049    AluInstr *ir = nullptr;
1050 
1051    for (int i = 0; i < 4; ++i) {
1052       ir = new AluInstr(op,
1053                         dest[i],
1054                         i & 1 ? params.j : params.i,
1055                         new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
1056                         i == comp ? AluInstr::write : AluInstr::empty);
1057       ir->set_bank_swizzle(alu_vec_210);
1058       success = group->add_instruction(ir);
1059    }
1060    ir->set_alu_flag(alu_last_instr);
1061    if (success)
1062       emit_instruction(group);
1063 
1064    return success;
1065 }
1066 
Interpolator()1067 FragmentShaderEG::Interpolator::Interpolator():
1068     enabled(false)
1069 {
1070 }
1071 
1072 } // namespace r600
1073