1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
FragmentShaderFromNir(const nir_shader & nir,r600_shader & sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key,
38 enum chip_class chip_class):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key.ps.color_two_side),
43 m_last_pixel_export(nullptr),
44 m_nir(nir),
45 m_reserved_registers(0),
46 m_frag_pos_index(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
49 m_depth_exports(0),
50 m_enable_centroid_interpolators(false),
51 m_enable_sample_interpolators(false),
52 m_apply_sample_mask(key.ps.apply_sample_id_mask),
53 m_dual_source_blend(key.ps.dual_source_blend)
54 {
55 for (auto& i: m_interpolator) {
56 i.enabled = false;
57 i.ij_index= 0;
58 }
59
60 sh_info().rat_base = key.ps.nr_cbufs;
61 sh_info().atomic_base = key.ps.first_atomic_counter;
62 }
63
do_process_inputs(nir_variable * input)64 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
65 {
66 sfn_log << SfnLog::io << "Parse input variable "
67 << input->name << " location:" << input->data.location
68 << " driver-loc:" << input->data.driver_location
69 << " interpolation:" << input->data.interpolation
70 << "\n";
71
72 if (input->data.location == VARYING_SLOT_FACE) {
73 m_sv_values.set(es_face);
74 return true;
75 }
76
77 unsigned name, sid;
78 auto semantic = r600_get_varying_semantic(input->data.location);
79 name = semantic.first;
80 sid = semantic.second;
81
82 tgsi_semantic sname = static_cast<tgsi_semantic>(name);
83
84 switch (sname) {
85 case TGSI_SEMANTIC_POSITION: {
86 m_sv_values.set(es_pos);
87 return true;
88 }
89 case TGSI_SEMANTIC_COLOR: {
90 m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
91 m_need_back_color = m_two_sided_color;
92 return true;
93 }
94 case TGSI_SEMANTIC_PRIMID:
95 sh_info().gs_prim_id_input = true;
96 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
97 /* fallthrough */
98 case TGSI_SEMANTIC_FOG:
99 case TGSI_SEMANTIC_GENERIC:
100 case TGSI_SEMANTIC_TEXCOORD:
101 case TGSI_SEMANTIC_LAYER:
102 case TGSI_SEMANTIC_PCOORD:
103 case TGSI_SEMANTIC_VIEWPORT_INDEX:
104 case TGSI_SEMANTIC_CLIPDIST: {
105 if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
106 m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
107 return true;
108 }
109 default:
110 return false;
111 }
112 }
113
scan_sysvalue_access(nir_instr * instr)114 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
115 {
116 switch (instr->type) {
117 case nir_instr_type_intrinsic: {
118 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
119 switch (ii->intrinsic) {
120 case nir_intrinsic_load_front_face:
121 m_sv_values.set(es_face);
122 break;
123 case nir_intrinsic_load_sample_mask_in:
124 m_sv_values.set(es_sample_mask_in);
125 break;
126 case nir_intrinsic_load_sample_pos:
127 m_sv_values.set(es_sample_pos);
128 /* fallthrough */
129 case nir_intrinsic_load_sample_id:
130 m_sv_values.set(es_sample_id);
131 break;
132 case nir_intrinsic_interp_deref_at_centroid:
133 /* This is not a sysvalue, should go elsewhere */
134 m_enable_centroid_interpolators = true;
135 break;
136 case nir_intrinsic_interp_deref_at_sample:
137 m_enable_sample_interpolators = true;
138 break;
139 case nir_intrinsic_load_helper_invocation:
140 m_sv_values.set(es_helper_invocation);
141 break;
142 default:
143 ;
144 }
145 }
146 default:
147 ;
148 }
149 return true;
150 }
151
do_allocate_reserved_registers()152 bool FragmentShaderFromNir::do_allocate_reserved_registers()
153 {
154 assert(!m_reserved_registers);
155
156 int face_reg_index = -1;
157 int sample_id_index = -1;
158 // enabled interpolators based on inputs
159 for (auto& i: m_shaderio.inputs()) {
160 int ij = i->ij_index();
161 if (ij >= 0) {
162 m_interpolator[ij].enabled = true;
163 }
164 }
165
166 /* Lazy, enable both possible interpolators,
167 * TODO: check which ones are really needed */
168 if (m_enable_centroid_interpolators) {
169 m_interpolator[2].enabled = true; /* perspective */
170 m_interpolator[5].enabled = true; /* linear */
171 }
172
173 if (m_enable_sample_interpolators)
174 m_interpolator[1].enabled = true; /* perspective */
175
176 // sort the varying inputs
177 m_shaderio.sort_varying_inputs();
178
179 // handle interpolators
180 int num_baryc = 0;
181 for (int i = 0; i < 6; ++i) {
182 if (m_interpolator[i].enabled) {
183 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
184
185 m_interpolator[i].ij_index = num_baryc;
186
187 unsigned sel = num_baryc / 2;
188 unsigned chan = 2 * (num_baryc % 2);
189
190 auto ip_i = new GPRValue(sel, chan + 1);
191 ip_i->set_as_input();
192 m_interpolator[i].i.reset(ip_i);
193 inject_register(sel, chan + 1, m_interpolator[i].i, false);
194
195 auto ip_j = new GPRValue(sel, chan);
196 ip_j->set_as_input();
197 m_interpolator[i].j.reset(ip_j);
198 inject_register(sel, chan, m_interpolator[i].j, false);
199
200 ++num_baryc;
201 }
202 }
203 m_reserved_registers += (num_baryc + 1) >> 1;
204
205 if (m_sv_values.test(es_pos)) {
206 m_frag_pos_index = m_reserved_registers++;
207 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
208 }
209
210 // handle system values
211 if (m_sv_values.test(es_face) || m_need_back_color) {
212 face_reg_index = m_reserved_registers++;
213 m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
214 m_front_face_reg->set_as_input();
215 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
216 inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
217
218 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
219 load_front_face();
220 }
221
222 if (m_sv_values.test(es_sample_mask_in)) {
223 if (face_reg_index < 0)
224 face_reg_index = m_reserved_registers++;
225
226 m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
227 m_sample_mask_reg->set_as_input();
228 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
229 sh_info().nsys_inputs = 1;
230 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
231 }
232
233 if (m_sv_values.test(es_sample_id) ||
234 m_sv_values.test(es_sample_mask_in)) {
235 if (sample_id_index < 0)
236 sample_id_index = m_reserved_registers++;
237
238 m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
239 m_sample_id_reg->set_as_input();
240 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
241 sh_info().nsys_inputs++;
242 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
243 }
244
245 // The back color handling is not emmited in the code, so we have
246 // to add the inputs here and later we also need to inject the code to set
247 // the right color
248 if (m_need_back_color) {
249 size_t ninputs = m_shaderio.inputs().size();
250 for (size_t k = 0; k < ninputs; ++k) {
251 ShaderInput& i = m_shaderio.input(k);
252
253 if (i.name() != TGSI_SEMANTIC_COLOR)
254 continue;
255
256 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
257
258 size_t next_pos = m_shaderio.size();
259 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
260 m_shaderio.add_input(bcol);
261 col.set_back_color(next_pos);
262 }
263 m_shaderio.set_two_sided();
264 }
265
266 m_shaderio.update_lds_pos();
267
268 set_reserved_registers(m_reserved_registers);
269
270 return true;
271 }
272
emit_shader_start()273 void FragmentShaderFromNir::emit_shader_start()
274 {
275 if (m_sv_values.test(es_face))
276 load_front_face();
277
278 if (m_sv_values.test(es_pos)) {
279 for (int i = 0; i < 4; ++i) {
280 auto v = new GPRValue(m_frag_pos_index, i);
281 v->set_as_input();
282 auto reg = PValue(v);
283 if (i == 3)
284 emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
285 m_frag_pos[i] = reg;
286 }
287 }
288
289 if (m_sv_values.test(es_helper_invocation)) {
290 m_helper_invocation = get_temp_register();
291 auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
292 emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
293 GPRVector dst({m_helper_invocation, dummy, dummy, dummy});
294
295 auto vtx = new FetchInstruction(dst, m_helper_invocation,
296 R600_BUFFER_INFO_CONST_BUFFER, bim_none);
297 vtx->set_flag(vtx_vpm);
298 vtx->set_flag(vtx_use_tc);
299 vtx->set_dest_swizzle({4,7,7,7});
300 emit_instruction(vtx);
301 }
302 }
303
do_emit_store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)304 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
305 {
306 if (out_var->data.location == FRAG_RESULT_COLOR)
307 return emit_export_pixel(out_var, instr, m_dual_source_blend ? 1 : m_max_color_exports);
308
309 if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
310 out_var->data.location <= FRAG_RESULT_DATA7) ||
311 out_var->data.location == FRAG_RESULT_DEPTH ||
312 out_var->data.location == FRAG_RESULT_STENCIL ||
313 out_var->data.location == FRAG_RESULT_SAMPLE_MASK)
314 return emit_export_pixel(out_var, instr, 1);
315
316 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
317 out_var->data.location << "(" << out_var->data.driver_location << ")\n";
318 return false;
319 }
320
do_process_outputs(nir_variable * output)321 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
322 {
323 sfn_log << SfnLog::io << "Parse output variable "
324 << output->name << " @" << output->data.location
325 << "@dl:" << output->data.driver_location
326 << " dual source idx: " << output->data.index
327 << "\n";
328
329 ++sh_info().noutput;
330 r600_shader_io& io = sh_info().output[output->data.driver_location];
331 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
332 &io.name, &io.sid);
333
334 /* Check whether this code has become obsolete by the IO vectorization */
335 unsigned num_components = 4;
336 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
337 if (vector_elements)
338 num_components = vector_elements;
339 unsigned component = output->data.location_frac;
340
341 for (unsigned j = component; j < num_components + component; j++)
342 io.write_mask |= 1 << j;
343
344 int loc = output->data.location;
345 if (loc == FRAG_RESULT_COLOR &&
346 (m_nir.info.outputs_written & (1ull << loc)) &&
347 !m_dual_source_blend) {
348 sh_info().fs_write_all = true;
349 }
350
351 if (output->data.location == FRAG_RESULT_COLOR ||
352 (output->data.location >= FRAG_RESULT_DATA0 &&
353 output->data.location <= FRAG_RESULT_DATA7)) {
354 ++m_max_counted_color_exports;
355
356 if (m_max_counted_color_exports > 1)
357 sh_info().fs_write_all = false;
358 return true;
359 }
360 if (output->data.location == FRAG_RESULT_DEPTH ||
361 output->data.location == FRAG_RESULT_STENCIL ||
362 output->data.location == FRAG_RESULT_SAMPLE_MASK) {
363 io.write_mask = 15;
364 return true;
365 }
366
367 return false;
368 }
369
emit_load_sample_mask_in(nir_intrinsic_instr * instr)370 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
371 {
372 auto dest = from_nir(instr->dest, 0);
373 assert(m_sample_id_reg);
374 assert(m_sample_mask_reg);
375
376 emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
377 emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
378 return true;
379 }
380
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)381 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
382 {
383 switch (instr->intrinsic) {
384 case nir_intrinsic_load_sample_mask_in:
385 if (m_apply_sample_mask) {
386 return emit_load_sample_mask_in(instr);
387 } else
388 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
389 case nir_intrinsic_load_sample_id:
390 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
391 case nir_intrinsic_load_front_face:
392 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
393 case nir_intrinsic_interp_deref_at_sample:
394 return emit_interp_deref_at_sample(instr);
395 case nir_intrinsic_interp_deref_at_offset:
396 return emit_interp_deref_at_offset(instr);
397 case nir_intrinsic_interp_deref_at_centroid:
398 return emit_interp_deref_at_centroid(instr);
399 case nir_intrinsic_load_sample_pos:
400 return emit_load_sample_pos(instr);
401 case nir_intrinsic_load_helper_invocation:
402 return load_preloaded_value(instr->dest, 0, m_helper_invocation);
403 default:
404 return false;
405 }
406 }
407
load_front_face()408 void FragmentShaderFromNir::load_front_face()
409 {
410 assert(m_front_face_reg);
411 if (m_front_face_loaded)
412 return;
413
414 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
415 Value::zero, {alu_write, alu_last_instr});
416 m_front_face_loaded = true;
417 emit_instruction(ir);
418 }
419
emit_load_sample_pos(nir_intrinsic_instr * instr)420 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
421 {
422 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
423 auto fetch = new FetchInstruction(vc_fetch,
424 no_index_offset,
425 fmt_32_32_32_32_float,
426 vtx_nf_scaled,
427 vtx_es_none,
428 m_sample_id_reg,
429 dest,
430 0,
431 false,
432 0xf,
433 R600_BUFFER_INFO_CONST_BUFFER,
434 0,
435 bim_none,
436 false,
437 false,
438 0,
439 0,
440 0,
441 PValue(),
442 {0,1,2,3});
443 fetch->set_flag(vtx_srf_mode);
444 emit_instruction(fetch);
445 return true;
446 }
447
emit_interp_deref_at_sample(nir_intrinsic_instr * instr)448 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
449 {
450 GPRVector slope = get_temp_vec4();
451
452 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
453 from_nir_with_fetch_constant(instr->src[1], 0),
454 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
455 fetch->set_flag(vtx_srf_mode);
456 emit_instruction(fetch);
457
458 GPRVector grad = get_temp_vec4();
459 auto var = get_deref_location(instr->src[0]);
460 assert(var);
461
462 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
463
464 auto interpolator = m_interpolator[1];
465 assert(interpolator.enabled);
466 PValue dummy(new GPRValue(interpolator.i->sel(), 0));
467
468 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
469
470 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
471 tex->set_flag(TexInstruction::grad_fine);
472 tex->set_flag(TexInstruction::x_unnormalized);
473 tex->set_flag(TexInstruction::y_unnormalized);
474 tex->set_flag(TexInstruction::z_unnormalized);
475 tex->set_flag(TexInstruction::w_unnormalized);
476 tex->set_dest_swizzle({0,1,7,7});
477 emit_instruction(tex);
478
479 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
480 tex->set_flag(TexInstruction::x_unnormalized);
481 tex->set_flag(TexInstruction::y_unnormalized);
482 tex->set_flag(TexInstruction::z_unnormalized);
483 tex->set_flag(TexInstruction::w_unnormalized);
484 tex->set_flag(TexInstruction::grad_fine);
485 tex->set_dest_swizzle({7,7,0,1});
486 emit_instruction(tex);
487
488 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
489 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
490
491 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
492 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
493
494 Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
495
496 auto dst = vec_from_nir(instr->dest, 4);
497 int num_components = instr->dest.is_ssa ?
498 instr->dest.ssa.num_components:
499 instr->dest.reg.reg->num_components;
500
501 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
502
503 return true;
504 }
505
emit_interp_deref_at_offset(nir_intrinsic_instr * instr)506 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
507 {
508 int temp = allocate_temp_register();
509
510 GPRVector help(temp, {0,1,2,3});
511
512 auto var = get_deref_location(instr->src[0]);
513 assert(var);
514
515 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
516 auto interpolator = m_interpolator[io.ij_index()];
517 PValue dummy(new GPRValue(interpolator.i->sel(), 0));
518
519 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
520
521 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
522 getgradh->set_dest_swizzle({0,1,7,7});
523 getgradh->set_flag(TexInstruction::x_unnormalized);
524 getgradh->set_flag(TexInstruction::y_unnormalized);
525 getgradh->set_flag(TexInstruction::z_unnormalized);
526 getgradh->set_flag(TexInstruction::w_unnormalized);
527 getgradh->set_flag(TexInstruction::grad_fine);
528 emit_instruction(getgradh);
529
530 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
531 getgradv->set_dest_swizzle({7,7,0,1});
532 getgradv->set_flag(TexInstruction::x_unnormalized);
533 getgradv->set_flag(TexInstruction::y_unnormalized);
534 getgradv->set_flag(TexInstruction::z_unnormalized);
535 getgradv->set_flag(TexInstruction::w_unnormalized);
536 getgradv->set_flag(TexInstruction::grad_fine);
537 emit_instruction(getgradv);
538
539 PValue ofs_x = from_nir(instr->src[1], 0);
540 PValue ofs_y = from_nir(instr->src[1], 1);
541 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
542 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
543 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
544 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
545
546 Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
547
548 auto dst = vec_from_nir(instr->dest, 4);
549 load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
550 var->data.location_frac);
551
552 return true;
553 }
554
emit_interp_deref_at_centroid(nir_intrinsic_instr * instr)555 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
556 {
557 auto var = get_deref_location(instr->src[0]);
558 assert(var);
559
560 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
561 io.set_uses_interpolate_at_centroid();
562
563 int ij_index = io.ij_index() >= 3 ? 5 : 2;
564 assert (m_interpolator[ij_index].enabled);
565 auto ip = m_interpolator[ij_index];
566
567 int num_components = nir_dest_num_components(instr->dest);
568
569 auto dst = vec_from_nir(instr->dest, 4);
570 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
571 return true;
572 }
573
574
do_emit_load_deref(const nir_variable * in_var,nir_intrinsic_instr * instr)575 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
576 {
577 if (in_var->data.location == VARYING_SLOT_POS) {
578 assert(instr->dest.is_ssa);
579
580 for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
581 inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
582 }
583 return true;
584 }
585
586 if (in_var->data.location == VARYING_SLOT_FACE)
587 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
588
589 // todo: replace io with ShaderInputVarying
590 auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
591 unsigned num_components = 4;
592
593
594 if (instr->dest.is_ssa) {
595 num_components = instr->dest.ssa.num_components;
596 } else {
597 num_components = instr->dest.reg.reg->num_components;
598 }
599
600 auto dst = vec_from_nir(instr->dest, 4);
601
602 sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
603 << "].gpr=" << dst.sel()
604 << " interp=" << io.ij_index()
605 << "\n";
606
607 io.set_gpr(dst.sel());
608
609 auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
610
611 load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
612
613 /* These results are expected starting in slot x..*/
614 if (in_var->data.location_frac > 0) {
615 int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
616 instr->dest.reg.reg->num_components;
617 AluInstruction *ir = nullptr;
618 for (int i = 0; i < n; ++i) {
619 ir = new AluInstruction(op1_mov, dst[i],
620 dst[i + in_var->data.location_frac], {alu_write});
621 emit_instruction(ir);
622 }
623 if (ir)
624 ir->set_flag(alu_last_instr);
625 }
626
627
628 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
629
630 auto & color_input = static_cast<ShaderInputColor&> (io);
631 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
632
633 bgio.set_gpr(allocate_temp_register());
634
635 GPRVector bgcol(bgio.gpr(), {0,1,2,3});
636 load_interpolated(bgcol, bgio, ip, num_components, 0);
637
638 load_front_face();
639
640 AluInstruction *ir = nullptr;
641 for (unsigned i = 0; i < 4 ; ++i) {
642 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
643 emit_instruction(ir);
644 }
645 if (ir)
646 ir->set_flag(alu_last_instr);
647 }
648
649 return true;
650 }
651
load_interpolated(GPRVector & dest,ShaderInput & io,const Interpolator & ip,int num_components,int start_comp)652 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
653 ShaderInput& io, const Interpolator &ip,
654 int num_components, int start_comp)
655 {
656 // replace io with ShaderInputVarying
657 if (io.interpolate() > 0) {
658
659 sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
660
661 if (num_components == 1) {
662 switch (start_comp) {
663 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
664 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
665 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
666 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
667 default:
668 assert(0);
669 }
670 }
671
672 if (num_components == 2) {
673 switch (start_comp) {
674 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
675 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
676 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
677 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
678 default:
679 assert(0);
680 }
681 }
682
683 if (num_components == 3 && start_comp == 0)
684 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
685 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
686
687 int full_write_mask = ((1 << num_components) - 1) << start_comp;
688
689 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
690 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
691 return success;
692
693 } else {
694 AluInstruction *ir = nullptr;
695 for (unsigned i = 0; i < 4 ; ++i) {
696 ir = new AluInstruction(op1_interp_load_p0, dest[i],
697 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
698 EmitInstruction::write);
699 emit_instruction(ir);
700 }
701 ir->set_flag(alu_last_instr);
702 }
703 return true;
704 }
705
load_interpolated_one_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op)706 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
707 ShaderInput& io, const Interpolator& ip, EAluOp op)
708 {
709 for (unsigned i = 0; i < 2 ; ++i) {
710 int chan = i;
711 if (op == op2_interp_z)
712 chan += 2;
713
714
715 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
716 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
717 i == 0 ? EmitInstruction::write : EmitInstruction::last);
718 dest.pin_to_channel(chan);
719
720 ir->set_bank_swizzle(alu_vec_210);
721 emit_instruction(ir);
722 }
723 return true;
724 }
725
load_interpolated_two_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,int writemask)726 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
727 const Interpolator& ip, EAluOp op, int writemask)
728 {
729 AluInstruction *ir = nullptr;
730 for (unsigned i = 0; i < 4 ; ++i) {
731 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
732 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
733 dest.pin_to_channel(i);
734 ir->set_bank_swizzle(alu_vec_210);
735 emit_instruction(ir);
736 }
737 ir->set_flag(alu_last_instr);
738 return true;
739 }
740
load_interpolated_two_comp_for_one(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,UNUSED int start,int comp)741 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
742 ShaderInput& io, const Interpolator& ip,
743 EAluOp op, UNUSED int start, int comp)
744 {
745 AluInstruction *ir = nullptr;
746 for (int i = 0; i < 4 ; ++i) {
747 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
748 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
749 i == comp ? EmitInstruction::write : EmitInstruction::empty);
750 ir->set_bank_swizzle(alu_vec_210);
751 dest.pin_to_channel(i);
752 emit_instruction(ir);
753 }
754 ir->set_flag(alu_last_instr);
755 return true;
756 }
757
758
emit_export_pixel(const nir_variable * out_var,nir_intrinsic_instr * instr,int outputs)759 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
760 {
761 std::array<uint32_t,4> swizzle;
762 unsigned writemask = nir_intrinsic_write_mask(instr);
763 switch (out_var->data.location) {
764 case FRAG_RESULT_DEPTH:
765 writemask = 1;
766 swizzle = {0,7,7,7};
767 break;
768 case FRAG_RESULT_STENCIL:
769 writemask = 2;
770 swizzle = {7,0,7,7};
771 break;
772 case FRAG_RESULT_SAMPLE_MASK:
773 writemask = 4;
774 swizzle = {7,7,0,7};
775 break;
776 default:
777 for (int i = 0; i < 4; ++i) {
778 swizzle[i] = (i < instr->num_components) ? i : 7;
779 }
780 }
781
782 auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
783
784 set_output(out_var->data.driver_location, value.sel());
785
786 if (out_var->data.location == FRAG_RESULT_COLOR ||
787 (out_var->data.location >= FRAG_RESULT_DATA0 &&
788 out_var->data.location <= FRAG_RESULT_DATA7)) {
789 for (int k = 0 ; k < outputs; ++k) {
790
791 unsigned location = (m_dual_source_blend ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
792
793 sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
794
795 if (location >= m_max_color_exports) {
796 sfn_log << SfnLog::io << "Pixel output loc:" << location
797 << " dl:" << out_var->data.location
798 << " skipped because we have only " << m_max_color_exports << " CBs\n";
799 continue;
800 }
801
802 m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
803
804 if (sh_info().ps_export_highest < location)
805 sh_info().ps_export_highest = location;
806
807 sh_info().nr_ps_color_exports++;
808
809 unsigned mask = (0xfu << (location * 4));
810 sh_info().ps_color_export_mask |= mask;
811
812 emit_export_instruction(m_last_pixel_export);
813 };
814 } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
815 out_var->data.location == FRAG_RESULT_STENCIL ||
816 out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
817 m_depth_exports++;
818 emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
819 } else {
820 return false;
821 }
822 return true;
823 }
824
do_finalize()825 void FragmentShaderFromNir::do_finalize()
826 {
827 // update shader io info and set LDS etc.
828 sh_info().ninput = m_shaderio.inputs().size();
829
830 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
831 for (size_t i = 0; i < sh_info().ninput; ++i) {
832 int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
833 m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
834 m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
835 }
836
837 sh_info().two_side = m_shaderio.two_sided();
838 sh_info().nlds = m_shaderio.nlds();
839
840 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
841
842 if (sh_info().fs_write_all) {
843 sh_info().nr_ps_max_color_exports = m_max_color_exports;
844 }
845
846 if (!m_last_pixel_export) {
847 GPRVector v(0, {7,7,7,7});
848 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
849 sh_info().nr_ps_color_exports++;
850 sh_info().ps_color_export_mask = 0xf;
851 emit_export_instruction(m_last_pixel_export);
852 }
853
854 m_last_pixel_export->set_last();
855
856 if (sh_info().fs_write_all)
857 sh_info().nr_ps_max_color_exports = 8;
858 }
859
860 }
861