1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
EmitAluInstruction(ShaderFromNirProcessor & processor)37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
do_emit(nir_instr * ir)43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 preload_src(instr);
53
54 switch (instr.op) {
55 case nir_op_f2b32: return emit_alu_f2b32(instr);
56 case nir_op_b2f32: return emit_alu_b2f(instr);
57 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
58 case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
59 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
60 case nir_op_b2b1:
61 case nir_op_b2b32:
62 case nir_op_mov:return emit_mov(instr);
63 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
64 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
65 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
66 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
67 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
68 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
69 case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
70 case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
71 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
72 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
73
74 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
75 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
76 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
77 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
78 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
79 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
80
81 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
82 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
83 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
84
85 case nir_op_fsign: return emit_fsign(instr);
86 case nir_op_fdph: return emit_fdph(instr);
87
88 case nir_op_ibitfield_extract: return emit_bitfield_extract(instr, op3_bfe_int);
89 case nir_op_ubitfield_extract: return emit_bitfield_extract(instr, op3_bfe_uint);
90 case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
91 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
92 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
93
94 case nir_op_ieq32:
95 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
96
97 case nir_op_ine32:
98 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
99 case nir_op_uge32:
100 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
101 case nir_op_ige32:
102 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
103 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
104 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
105
106 case nir_op_ult32:
107 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
108
109 case nir_op_ilt32:
110 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
111 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
112 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
113 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
114 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
115 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
116 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
117 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
118 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
119 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
120 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
121 case nir_op_iabs: return emit_alu_iabs(instr);
122 case nir_op_ineg: return emit_alu_ineg(instr);
123 case nir_op_idiv: return emit_alu_div_int(instr, true, false);
124 case nir_op_udiv: return emit_alu_div_int(instr, false, false);
125 case nir_op_umod: return emit_alu_div_int(instr, false, true);
126 case nir_op_isign: return emit_alu_isign(instr);
127
128 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
129
130 case nir_op_flt32:
131 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
132
133 case nir_op_fge32:
134 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
135 case nir_op_fneu32:
136 case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
137 case nir_op_feq32:
138 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
139
140 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
141 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
143 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
144 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
145 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
146 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
147 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
148 case nir_op_fdot2: return emit_dot(instr, 2);
149 case nir_op_fdot3: return emit_dot(instr, 3);
150 case nir_op_fdot4: return emit_dot(instr, 4);
151
152 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
153 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
154 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
155
156 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
157 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
158 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
159
160 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
161 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
162 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
163
164 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
165 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
166 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
167
168
169 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
170 case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
171 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
172 case nir_op_vec2: return emit_create_vec(instr, 2);
173 case nir_op_vec3: return emit_create_vec(instr, 3);
174 case nir_op_vec4: return emit_create_vec(instr, 4);
175
176 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
177 case nir_op_ufind_msb: return emit_find_msb(instr, false);
178 case nir_op_ifind_msb: return emit_find_msb(instr, true);
179 case nir_op_b2i32: return emit_b2i32(instr);
180 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
181 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
182 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
183 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
184 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
185 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186
187
188 /* These are in the ALU instruction list, but they should be texture instructions */
189 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
190 case nir_op_fddx_coarse:
191 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
192
193 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
194 case nir_op_fddy_coarse:
195 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
196
197 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
198 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199 default:
200 return false;
201 }
202 }
203
preload_src(const nir_alu_instr & instr)204 void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
205 {
206 const nir_op_info *op_info = &nir_op_infos[instr.op];
207 assert(op_info->num_inputs <= 4);
208
209 unsigned nsrc_comp = num_src_comp(instr);
210 sfn_log << SfnLog::reg << "Preload:\n";
211 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
212 for (unsigned c = 0; c < nsrc_comp; ++c) {
213 m_src[i][c] = from_nir(instr.src[i], c);
214 sfn_log << SfnLog::reg << " " << *m_src[i][c];
215
216 }
217 sfn_log << SfnLog::reg << "\n";
218 }
219 if (instr.op == nir_op_fdph) {
220 m_src[1][3] = from_nir(instr.src[1], 3);
221 sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
222 }
223
224 split_constants(instr, nsrc_comp);
225 }
226
num_src_comp(const nir_alu_instr & instr)227 unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
228 {
229 switch (instr.op) {
230 case nir_op_fdot2:
231 case nir_op_bany_inequal2:
232 case nir_op_ball_iequal2:
233 case nir_op_bany_fnequal2:
234 case nir_op_ball_fequal2:
235 return 2;
236
237 case nir_op_fdot3:
238 case nir_op_bany_inequal3:
239 case nir_op_ball_iequal3:
240 case nir_op_bany_fnequal3:
241 case nir_op_ball_fequal3:
242 return 3;
243
244 case nir_op_fdot4:
245 case nir_op_fdph:
246 case nir_op_bany_inequal4:
247 case nir_op_ball_iequal4:
248 case nir_op_bany_fnequal4:
249 case nir_op_ball_fequal4:
250 return 4;
251
252 case nir_op_vec2:
253 case nir_op_vec3:
254 case nir_op_vec4:
255 return 1;
256
257 default:
258 return nir_dest_num_components(instr.dest.dest);
259
260 }
261 }
262
263
264
split_constants(const nir_alu_instr & instr,unsigned nsrc_comp)265 void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
266 {
267 const nir_op_info *op_info = &nir_op_infos[instr.op];
268 if (op_info->num_inputs < 2)
269 return;
270
271 int nconst = 0;
272 std::array<const UniformValue *,4> c;
273 std::array<int,4> idx;
274 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
275 PValue& src = m_src[i][0];
276 assert(src);
277 sfn_log << SfnLog::reg << "Split test " << *src;
278
279 if (src->type() == Value::kconst) {
280 c[nconst] = static_cast<const UniformValue *>(src.get());
281 idx[nconst++] = i;
282 sfn_log << SfnLog::reg << " is constant " << i;
283 }
284 sfn_log << SfnLog::reg << "\n";
285 }
286
287 if (nconst < 2)
288 return;
289
290 unsigned sel = c[0]->sel();
291 unsigned kcache = c[0]->kcache_bank();
292 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
293
294 for (int i = 1; i < nconst; ++i) {
295 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
296 if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
297 AluInstruction *ir = nullptr;
298 auto v = get_temp_vec4();
299 for (unsigned k = 0; k < nsrc_comp; ++k) {
300 ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
301 emit_instruction(ir);
302 m_src[idx[i]][k] = v[k];
303 }
304 make_last(ir);
305 }
306 }
307 }
308
emit_alu_inot(const nir_alu_instr & instr)309 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
310 {
311 if (instr.src[0].negate || instr.src[0].abs) {
312 std::cerr << "source modifiers not supported with int ops\n";
313 return false;
314 }
315
316 AluInstruction *ir = nullptr;
317 for (int i = 0; i < 4 ; ++i) {
318 if (instr.dest.write_mask & (1 << i)){
319 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
320 m_src[0][i], write);
321 emit_instruction(ir);
322 }
323 }
324 make_last(ir);
325 return true;
326 }
327
emit_alu_op1(const nir_alu_instr & instr,EAluOp opcode,const AluOpFlags & flags)328 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
329 const AluOpFlags& flags)
330 {
331 AluInstruction *ir = nullptr;
332 for (int i = 0; i < 4 ; ++i) {
333 if (instr.dest.write_mask & (1 << i)){
334 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
335 m_src[0][i], write);
336
337 if (flags.test(alu_src0_abs) || instr.src[0].abs)
338 ir->set_flag(alu_src0_abs);
339
340 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
341 ir->set_flag(alu_src0_neg);
342
343 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
344 ir->set_flag(alu_dst_clamp);
345
346 emit_instruction(ir);
347 }
348 }
349 make_last(ir);
350
351 return true;
352 }
353
emit_mov(const nir_alu_instr & instr)354 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
355 {
356 /* If the op is a plain move beween SSA values we can just forward
357 * the register reference to the original register */
358 if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
359 !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
360 bool result = true;
361 for (int i = 0; i < 4 ; ++i) {
362 if (instr.dest.write_mask & (1 << i)){
363 result &= inject_register(instr.dest.dest.ssa.index, i,
364 m_src[0][i], true);
365 }
366 }
367 return result;
368 } else {
369 return emit_alu_op1(instr, op1_mov);
370 }
371 }
372
emit_alu_trig_op1(const nir_alu_instr & instr,EAluOp opcode)373 bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
374 {
375 // normalize by dividing by 2*PI, shift by 0.5, take fraction, and
376 // then shift back
377
378 const float inv_2_pi = 0.15915494f;
379
380 PValue v[4]; // this might need some additional temp register creation
381 for (unsigned i = 0; i < 4 ; ++i)
382 v[i] = from_nir(instr.dest, i);
383
384 PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
385 AluInstruction *ir = nullptr;
386 for (unsigned i = 0; i < 4 ; ++i) {
387 if (!(instr.dest.write_mask & (1 << i)))
388 continue;
389 ir = new AluInstruction(op3_muladd_ieee, v[i],
390 {m_src[0][i], inv_pihalf, Value::zero_dot_5},
391 {alu_write});
392 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
393 emit_instruction(ir);
394 }
395 make_last(ir);
396
397 for (unsigned i = 0; i < 4 ; ++i) {
398 if (!(instr.dest.write_mask & (1 << i)))
399 continue;
400 ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
401 emit_instruction(ir);
402 }
403 make_last(ir);
404
405 for (unsigned i = 0; i < 4 ; ++i) {
406 if (!(instr.dest.write_mask & (1 << i)))
407 continue;
408 ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
409 ir->set_flag(alu_src1_neg);
410 emit_instruction(ir);
411 }
412 make_last(ir);
413
414 for (unsigned i = 0; i < 4 ; ++i) {
415 if (!(instr.dest.write_mask & (1 << i)))
416 continue;
417
418 ir = new AluInstruction(opcode, v[i], v[i], last_write);
419 emit_instruction(ir);
420 }
421 return true;
422 }
423
emit_alu_trans_op1(const nir_alu_instr & instr,EAluOp opcode,bool absolute)424 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
425 bool absolute)
426 {
427 AluInstruction *ir = nullptr;
428 std::set<int> src_idx;
429
430 if (get_chip_class() == CAYMAN) {
431 int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
432 for (int i = 0; i < last_slot; ++i) {
433 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434 m_src[0][0], instr.dest.write_mask & (1 << i) ? write : empty);
435 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438
439 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
440
441 emit_instruction(ir);
442 }
443 } else {
444 for (int i = 0; i < 4 ; ++i) {
445 if (instr.dest.write_mask & (1 << i)){
446 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
447 m_src[0][i], last_write);
448 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
449 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
450 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
451 emit_instruction(ir);
452 }
453 }
454 }
455 return true;
456 }
457
emit_alu_f2i32_or_u32(const nir_alu_instr & instr,EAluOp op)458 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
459 {
460 AluInstruction *ir = nullptr;
461 std::array<PValue, 4> v;
462
463 for (int i = 0; i < 4; ++i) {
464 if (!(instr.dest.write_mask & (1 << i)))
465 continue;
466 v[i] = from_nir(instr.dest, i);
467 ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
468 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
469 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
470 emit_instruction(ir);
471 }
472 make_last(ir);
473
474 for (int i = 0; i < 4; ++i) {
475 if (!(instr.dest.write_mask & (1 << i)))
476 continue;
477 ir = new AluInstruction(op, v[i], v[i], {alu_write});
478 emit_instruction(ir);
479 if (op == op1_flt_to_uint)
480 make_last(ir);
481 }
482 make_last(ir);
483
484 return true;
485 }
486
emit_alu_f2b32(const nir_alu_instr & instr)487 bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
488 {
489 AluInstruction *ir = nullptr;
490 for (int i = 0; i < 4 ; ++i) {
491 if (instr.dest.write_mask & (1 << i)){
492 ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
493 m_src[0][i], literal(0.0f), write);
494 emit_instruction(ir);
495 }
496 }
497 make_last(ir);
498 return true;
499 }
500
emit_find_msb(const nir_alu_instr & instr,bool sgn)501 bool EmitAluInstruction::emit_find_msb(const nir_alu_instr& instr, bool sgn)
502 {
503 int sel_tmp = allocate_temp_register();
504 int sel_tmp2 = allocate_temp_register();
505 GPRVector tmp(sel_tmp, {0,1,2,3});
506 GPRVector tmp2(sel_tmp2, {0,1,2,3});
507 AluInstruction *ir = nullptr;
508 EAluOp opcode = sgn ? op1_ffbh_int : op1_ffbh_uint;
509 for (int i = 0; i < 4; ++i) {
510 if (!(instr.dest.write_mask & (1 << i)))
511 continue;
512
513 ir = new AluInstruction(opcode, tmp.reg_i(i), m_src[0][i], write);
514 emit_instruction(ir);
515 }
516 make_last(ir);
517
518 for (int i = 0; i < 4 ; ++i) {
519 if (!(instr.dest.write_mask & (1 << i)))
520 continue;
521
522 ir = new AluInstruction(op2_sub_int, tmp2.reg_i(i),
523 PValue(new LiteralValue(31u, 0)), tmp.reg_i(i), write);
524 emit_instruction(ir);
525 }
526 make_last(ir);
527
528 for (int i = 0; i < 4 ; ++i) {
529 if (!(instr.dest.write_mask & (1 << i)))
530 continue;
531
532 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), tmp.reg_i(i),
533 tmp2.reg_i(i), tmp.reg_i(i), write);
534 emit_instruction(ir);
535 }
536 make_last(ir);
537
538 return true;
539 }
540
emit_b2i32(const nir_alu_instr & instr)541 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
542 {
543 AluInstruction *ir = nullptr;
544 for (int i = 0; i < 4 ; ++i) {
545 if (!(instr.dest.write_mask & (1 << i)))
546 continue;
547
548 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
549 m_src[0][i], Value::one_i, write);
550 emit_instruction(ir);
551 }
552 make_last(ir);
553
554 return true;
555 }
556
emit_pack_64_2x32_split(const nir_alu_instr & instr)557 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
558 {
559 AluInstruction *ir = nullptr;
560 for (unsigned i = 0; i < 2; ++i) {
561 if (!(instr.dest.write_mask & (1 << i)))
562 continue;
563 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
564 m_src[0][i], write);
565 emit_instruction(ir);
566 }
567 ir->set_flag(alu_last_instr);
568 return true;
569 }
570
emit_unpack_64_2x32_split(const nir_alu_instr & instr,unsigned comp)571 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
572 {
573 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
574 m_src[0][comp], last_write));
575 return true;
576 }
577
emit_create_vec(const nir_alu_instr & instr,unsigned nc)578 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
579 {
580 AluInstruction *ir = nullptr;
581 std::set<int> src_slot;
582 for(unsigned i = 0; i < nc; ++i) {
583 if (instr.dest.write_mask & (1 << i)){
584 auto src = m_src[i][0];
585 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
586 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
587
588 // FIXME: This is a rather crude approach to fix the problem that
589 // r600 can't read from four different slots of the same component
590 // here we check only for the register index
591 if (src->type() == Value::gpr)
592 src_slot.insert(src->sel());
593 if (src_slot.size() >= 3) {
594 src_slot.clear();
595 ir->set_flag(alu_last_instr);
596 }
597 emit_instruction(ir);
598 }
599 }
600 if (ir)
601 ir->set_flag(alu_last_instr);
602 return true;
603 }
604
emit_dot(const nir_alu_instr & instr,int n)605 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
606 {
607 const nir_alu_src& src0 = instr.src[0];
608 const nir_alu_src& src1 = instr.src[1];
609
610 AluInstruction *ir = nullptr;
611 for (int i = 0; i < n ; ++i) {
612 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613 m_src[0][i], m_src[1][i],
614 instr.dest.write_mask & (1 << i) ? write : empty);
615
616 if (src0.negate) ir->set_flag(alu_src0_neg);
617 if (src0.abs) ir->set_flag(alu_src0_abs);
618 if (src1.negate) ir->set_flag(alu_src1_neg);
619 if (src1.abs) ir->set_flag(alu_src1_abs);
620
621 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
622 emit_instruction(ir);
623 }
624 for (int i = n; i < 4 ; ++i) {
625 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
626 Value::zero, Value::zero,
627 instr.dest.write_mask & (1 << i) ? write : empty);
628 emit_instruction(ir);
629 }
630
631 if (ir)
632 ir->set_flag(alu_last_instr);
633 return true;
634 }
635
emit_fdph(const nir_alu_instr & instr)636 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
637 {
638 const nir_alu_src& src0 = instr.src[0];
639 const nir_alu_src& src1 = instr.src[1];
640
641 AluInstruction *ir = nullptr;
642 for (int i = 0; i < 3 ; ++i) {
643 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
644 m_src[0][i], m_src[1][i],
645 instr.dest.write_mask & (1 << i) ? write : empty);
646 if (src0.negate) ir->set_flag(alu_src0_neg);
647 if (src0.abs) ir->set_flag(alu_src0_abs);
648 if (src1.negate) ir->set_flag(alu_src1_neg);
649 if (src1.abs) ir->set_flag(alu_src1_abs);
650 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
651 emit_instruction(ir);
652 }
653
654 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
655 m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
656 if (src1.negate) ir->set_flag(alu_src1_neg);
657 if (src1.abs) ir->set_flag(alu_src1_abs);
658 emit_instruction(ir);
659
660 ir->set_flag(alu_last_instr);
661 return true;
662
663 }
664
emit_alu_i2orf2_b1(const nir_alu_instr & instr,EAluOp op)665 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
666 {
667 AluInstruction *ir = nullptr;
668 for (int i = 0; i < 4 ; ++i) {
669 if (instr.dest.write_mask & (1 << i)) {
670 ir = new AluInstruction(op, from_nir(instr.dest, i),
671 m_src[0][i], Value::zero,
672 write);
673 emit_instruction(ir);
674 }
675 }
676 if (ir)
677 ir->set_flag(alu_last_instr);
678 return true;
679 }
680
emit_alu_b2f(const nir_alu_instr & instr)681 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
682 {
683 AluInstruction *ir = nullptr;
684 for (int i = 0; i < 4 ; ++i) {
685 if (instr.dest.write_mask & (1 << i)){
686 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
687 m_src[0][i], Value::one_f, write);
688 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
689 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
690 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
691 emit_instruction(ir);
692 }
693 }
694 if (ir)
695 ir->set_flag(alu_last_instr);
696 return true;
697 }
698
emit_any_all_icomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)699 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
700 {
701
702 AluInstruction *ir = nullptr;
703 PValue v[4]; // this might need some additional temp register creation
704 for (unsigned i = 0; i < 4 ; ++i)
705 v[i] = from_nir(instr.dest, i);
706
707 EAluOp combine = all ? op2_and_int : op2_or_int;
708
709 /* For integers we can not use the modifiers, so this needs some emulation */
710 /* Should actually be lowered with NIR */
711 if (instr.src[0].negate == instr.src[1].negate &&
712 instr.src[0].abs == instr.src[1].abs) {
713
714 for (unsigned i = 0; i < nc ; ++i) {
715 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
716 emit_instruction(ir);
717 }
718 if (ir)
719 ir->set_flag(alu_last_instr);
720 } else {
721 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
722 return false;
723 }
724
725 for (unsigned i = 0; i < nc/2 ; ++i) {
726 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
727 emit_instruction(ir);
728 }
729 if (ir)
730 ir->set_flag(alu_last_instr);
731
732 if (nc > 2) {
733 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
734 emit_instruction(ir);
735 }
736
737 return true;
738 }
739
emit_any_all_fcomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)740 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
741 {
742 AluInstruction *ir = nullptr;
743 PValue v[4]; // this might need some additional temp register creation
744 for (unsigned i = 0; i < 4 ; ++i)
745 v[i] = from_nir(instr.dest, i);
746
747 for (unsigned i = 0; i < nc ; ++i) {
748 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
749
750 if (instr.src[0].abs)
751 ir->set_flag(alu_src0_abs);
752 if (instr.src[0].negate)
753 ir->set_flag(alu_src0_neg);
754
755 if (instr.src[1].abs)
756 ir->set_flag(alu_src1_abs);
757 if (instr.src[1].negate)
758 ir->set_flag(alu_src1_neg);
759
760 emit_instruction(ir);
761 }
762 if (ir)
763 ir->set_flag(alu_last_instr);
764
765 for (unsigned i = 0; i < nc ; ++i) {
766 ir = new AluInstruction(op1_max4, v[i], v[i], write);
767 if (all) ir->set_flag(alu_src0_neg);
768 emit_instruction(ir);
769 }
770
771 for (unsigned i = nc; i < 4 ; ++i) {
772 ir = new AluInstruction(op1_max4, v[i],
773 all ? Value::one_f : Value::zero, write);
774 if (all)
775 ir->set_flag(alu_src0_neg);
776
777 emit_instruction(ir);
778 }
779
780 ir->set_flag(alu_last_instr);
781
782 if (all)
783 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
784 else
785 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
786
787 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
788 if (all)
789 ir->set_flag(alu_src1_neg);
790 emit_instruction(ir);
791
792 return true;
793 }
794
emit_any_all_fcomp2(const nir_alu_instr & instr,EAluOp op,bool all)795 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
796 {
797 AluInstruction *ir = nullptr;
798 PValue v[4]; // this might need some additional temp register creation
799 for (unsigned i = 0; i < 4 ; ++i)
800 v[i] = from_nir(instr.dest, i);
801
802 for (unsigned i = 0; i < 2 ; ++i) {
803 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
804 if (instr.src[0].abs)
805 ir->set_flag(alu_src0_abs);
806 if (instr.src[0].negate)
807 ir->set_flag(alu_src0_neg);
808
809 if (instr.src[1].abs)
810 ir->set_flag(alu_src1_abs);
811 if (instr.src[1].negate)
812 ir->set_flag(alu_src1_neg);
813
814 emit_instruction(ir);
815 }
816 if (ir)
817 ir->set_flag(alu_last_instr);
818
819 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
820 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
821 emit_instruction(ir);
822
823 return true;
824 }
825
emit_alu_trans_op2(const nir_alu_instr & instr,EAluOp opcode)826 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
827 {
828 const nir_alu_src& src0 = instr.src[0];
829 const nir_alu_src& src1 = instr.src[1];
830
831 AluInstruction *ir = nullptr;
832
833 if (get_chip_class() == CAYMAN) {
834 int lasti = util_last_bit(instr.dest.write_mask);
835 for (int k = 0; k < lasti ; ++k) {
836 if (instr.dest.write_mask & (1 << k)) {
837
838 for (int i = 0; i < 4; i++) {
839 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[0][k], (i == k) ? write : empty);
840 if (src0.negate) ir->set_flag(alu_src0_neg);
841 if (src0.abs) ir->set_flag(alu_src0_abs);
842 if (src1.negate) ir->set_flag(alu_src1_neg);
843 if (src1.abs) ir->set_flag(alu_src1_abs);
844 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845 if (i == 3) ir->set_flag(alu_last_instr);
846 emit_instruction(ir);
847 }
848 }
849 }
850 } else {
851 for (int i = 0; i < 4 ; ++i) {
852 if (instr.dest.write_mask & (1 << i)){
853 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
854 if (src0.negate) ir->set_flag(alu_src0_neg);
855 if (src0.abs) ir->set_flag(alu_src0_abs);
856 if (src1.negate) ir->set_flag(alu_src1_neg);
857 if (src1.abs) ir->set_flag(alu_src1_abs);
858 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
859 emit_instruction(ir);
860 }
861 }
862 }
863 return true;
864 }
865
emit_alu_op2_int(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts opts)866 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
867 {
868
869 const nir_alu_src& src0 = instr.src[0];
870 const nir_alu_src& src1 = instr.src[1];
871
872 if (src0.negate || src1.negate ||
873 src0.abs || src1.abs) {
874 std::cerr << "R600: don't support modifiers with integer operations";
875 return false;
876 }
877 return emit_alu_op2(instr, opcode, opts);
878 }
879
emit_alu_op2(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)880 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
881 {
882 const nir_alu_src *src0 = &instr.src[0];
883 const nir_alu_src *src1 = &instr.src[1];
884
885 int idx0 = 0;
886 int idx1 = 1;
887 if (ops & op2_opt_reverse) {
888 std::swap(src0, src1);
889 std::swap(idx0, idx1);
890 }
891
892 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
893
894 AluInstruction *ir = nullptr;
895 for (int i = 0; i < 4 ; ++i) {
896 if (instr.dest.write_mask & (1 << i)){
897 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
898 m_src[idx0][i], m_src[idx1][i], write);
899
900 if (src0->negate) ir->set_flag(alu_src0_neg);
901 if (src0->abs) ir->set_flag(alu_src0_abs);
902 if (src1_negate) ir->set_flag(alu_src1_neg);
903 if (src1->abs) ir->set_flag(alu_src1_abs);
904 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
905 emit_instruction(ir);
906 }
907 }
908 if (ir)
909 ir->set_flag(alu_last_instr);
910 return true;
911 }
912
emit_alu_op2_split_src_mods(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)913 bool EmitAluInstruction::emit_alu_op2_split_src_mods(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
914 {
915 const nir_alu_src *src0 = &instr.src[0];
916 const nir_alu_src *src1 = &instr.src[1];
917
918 if (ops & op2_opt_reverse)
919 std::swap(src0, src1);
920
921 GPRVector::Values v0;
922 for (int i = 0; i < 4 ; ++i)
923 v0[i] = m_src[0][i];
924
925 GPRVector::Values v1;
926 for (int i = 0; i < 4 ; ++i)
927 v1[i] = m_src[1][i];
928
929 if (src0->abs || src0->negate) {
930 int src0_tmp = allocate_temp_register();
931 GPRVector::Values v0_temp;
932 AluInstruction *ir = nullptr;
933 for (int i = 0; i < 4 ; ++i) {
934 if (instr.dest.write_mask & (1 << i)) {
935 v0_temp[i] = PValue(new GPRValue(src0_tmp, i));
936 ir = new AluInstruction(op1_mov, v0_temp[i], v0[i], write);
937 if (src0->abs) ir->set_flag(alu_src0_abs);
938 if (src0->negate) ir->set_flag(alu_src0_neg);
939 emit_instruction(ir);
940 v0[i] = v0_temp[i];
941 }
942 }
943 if (ir)
944 ir->set_flag(alu_last_instr);
945 }
946
947 if (src1->abs || src1->negate) {
948 int src1_tmp = allocate_temp_register();
949 GPRVector::Values v1_temp;
950 AluInstruction *ir = nullptr;
951 for (int i = 0; i < 4 ; ++i) {
952 if (instr.dest.write_mask & (1 << i)) {
953 v1_temp[i] = PValue(new GPRValue(src1_tmp, i));
954 ir = new AluInstruction(op1_mov, v1_temp[i], v1[i], {alu_write});
955 if (src1->abs) ir->set_flag(alu_src0_abs);
956 if (src1->negate) ir->set_flag(alu_src0_neg);
957 emit_instruction(ir);
958 v1[i] = v1_temp[i];
959 }
960 }
961 if (ir)
962 ir->set_flag(alu_last_instr);
963 }
964
965 AluInstruction *ir = nullptr;
966 for (int i = 0; i < 4 ; ++i) {
967 if (instr.dest.write_mask & (1 << i)){
968 ir = new AluInstruction(opcode, from_nir(instr.dest, i), {v0[i], v1[i]}, {alu_write});
969 emit_instruction(ir);
970 }
971 }
972 if (ir)
973 ir->set_flag(alu_last_instr);
974 return true;
975 }
976
977
emit_alu_isign(const nir_alu_instr & instr)978 bool EmitAluInstruction::emit_alu_isign(const nir_alu_instr& instr)
979 {
980 int sel_tmp = allocate_temp_register();
981 GPRVector tmp(sel_tmp, {0,1,2,3});
982
983 AluInstruction *ir = nullptr;
984 PValue help[4];
985
986 for (int i = 0; i < 4 ; ++i) {
987 if (instr.dest.write_mask & (1 << i)){
988 help[i] = from_nir(instr.dest, i);
989 auto s = m_src[0][i];
990 ir = new AluInstruction(op3_cndgt_int, help[i], s, Value::one_i, s, write);
991 emit_instruction(ir);
992 }
993 }
994 if (ir)
995 ir->set_flag(alu_last_instr);
996
997 for (int i = 0; i < 4 ; ++i) {
998 if (instr.dest.write_mask & (1 << i)){
999 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, help[i], write);
1000 emit_instruction(ir);
1001 }
1002 }
1003 if (ir)
1004 ir->set_flag(alu_last_instr);
1005
1006 for (int i = 0; i < 4 ; ++i) {
1007 if (instr.dest.write_mask & (1 << i)){
1008
1009 ir = new AluInstruction(op3_cndgt_int, help[i], tmp.reg_i(i),
1010 PValue(new LiteralValue(-1,0)), help[i], write);
1011 emit_instruction(ir);
1012 }
1013 }
1014 if (ir)
1015 ir->set_flag(alu_last_instr);
1016 return true;
1017 }
1018
emit_fsign(const nir_alu_instr & instr)1019 bool EmitAluInstruction::emit_fsign(const nir_alu_instr& instr)
1020 {
1021 PValue help[4];
1022 PValue src[4];
1023 AluInstruction *ir = nullptr;
1024
1025 for (int i = 0; i < 4 ; ++i) {
1026 help[i] = from_nir(instr.dest, i);
1027 src[i] = m_src[0][i];
1028 }
1029
1030 if (instr.src[0].abs) {
1031
1032 for (int i = 0; i < 4 ; ++i) {
1033 if (instr.dest.write_mask & (1 << i)){
1034 ir = new AluInstruction(op2_setgt, help[i], src[i], Value::zero, write);
1035 ir->set_flag(alu_src0_abs);
1036 emit_instruction(ir);
1037 }
1038 }
1039 if (ir)
1040 ir->set_flag(alu_last_instr);
1041
1042 if (instr.src[0].negate) {
1043 for (int i = 0; i < 4 ; ++i) {
1044 if (instr.dest.write_mask & (1 << i)){
1045 ir = new AluInstruction(op1_mov, help[i], help[i], write);
1046 ir->set_flag(alu_src0_neg);
1047 emit_instruction(ir);
1048 }
1049 }
1050 if (ir)
1051 ir->set_flag(alu_last_instr);
1052 }
1053
1054 return true;
1055 }
1056
1057 for (int i = 0; i < 4 ; ++i) {
1058 if (instr.dest.write_mask & (1 << i)){
1059 ir = new AluInstruction(op3_cndgt, help[i], src[i], Value::one_f, src[i], write);
1060 if (instr.src[0].negate) {
1061 ir->set_flag(alu_src0_neg);
1062 ir->set_flag(alu_src2_neg);
1063 }
1064 emit_instruction(ir);
1065 }
1066 }
1067
1068 if (ir)
1069 ir->set_flag(alu_last_instr);
1070
1071 for (int i = 0; i < 4 ; ++i) {
1072 if (instr.dest.write_mask & (1 << i)){
1073 ir = new AluInstruction(op3_cndgt, help[i], help[i], Value::one_f, help[i], write);
1074 ir->set_flag(alu_src0_neg);
1075 ir->set_flag(alu_src1_neg);
1076 emit_instruction(ir);
1077 }
1078 }
1079 if (ir)
1080 ir->set_flag(alu_last_instr);
1081 return true;
1082 }
1083
emit_alu_op3(const nir_alu_instr & instr,EAluOp opcode,std::array<uint8_t,3> reorder)1084 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
1085 std::array<uint8_t, 3> reorder)
1086 {
1087 const nir_alu_src *src[3];
1088 src[0] = &instr.src[reorder[0]];
1089 src[1] = &instr.src[reorder[1]];
1090 src[2] = &instr.src[reorder[2]];
1091
1092 AluInstruction *ir = nullptr;
1093 for (int i = 0; i < 4 ; ++i) {
1094 if (instr.dest.write_mask & (1 << i)){
1095 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
1096 m_src[reorder[0]][i],
1097 m_src[reorder[1]][i],
1098 m_src[reorder[2]][i],
1099 write);
1100
1101 if (src[0]->negate) ir->set_flag(alu_src0_neg);
1102 if (src[1]->negate) ir->set_flag(alu_src1_neg);
1103 if (src[2]->negate) ir->set_flag(alu_src2_neg);
1104
1105 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
1106 ir->set_flag(alu_write);
1107 emit_instruction(ir);
1108 }
1109 }
1110 make_last(ir);
1111 return true;
1112 }
1113
emit_alu_ineg(const nir_alu_instr & instr)1114 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
1115 {
1116 AluInstruction *ir = nullptr;
1117 for (int i = 0; i < 4 ; ++i) {
1118 if (instr.dest.write_mask & (1 << i)){
1119 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
1120 m_src[0][i], write);
1121 emit_instruction(ir);
1122 }
1123 }
1124 if (ir)
1125 ir->set_flag(alu_last_instr);
1126
1127 return true;
1128 }
1129
1130 static const char swz[] = "xyzw01?_";
1131
1132
1133
emit_alu_iabs(const nir_alu_instr & instr)1134 bool EmitAluInstruction::emit_alu_iabs(const nir_alu_instr& instr)
1135 {
1136 int sel_tmp = allocate_temp_register();
1137 GPRVector tmp(sel_tmp, {0,1,2,3});
1138
1139 std::array<PValue,4> src;
1140 AluInstruction *ir = nullptr;
1141 for (int i = 0; i < 4 ; ++i) {
1142 if (instr.dest.write_mask & (1 << i)){
1143 ir = new AluInstruction(op2_sub_int, tmp.reg_i(i), Value::zero, m_src[0][i], write);
1144 emit_instruction(ir);
1145 }
1146 }
1147 make_last(ir);
1148
1149 for (int i = 0; i < 4 ; ++i) {
1150 if (instr.dest.write_mask & (1 << i)){
1151 ir = new AluInstruction(op3_cndge_int, from_nir(instr.dest, i), m_src[0][i],
1152 m_src[0][i], tmp.reg_i(i), write);
1153 emit_instruction(ir);
1154 }
1155 }
1156 make_last(ir);
1157 return true;
1158 }
1159
emit_alu_div_int(const nir_alu_instr & instr,bool use_signed,bool mod)1160 bool EmitAluInstruction::emit_alu_div_int(const nir_alu_instr& instr, bool use_signed, bool mod)
1161 {
1162
1163 int sel_tmp = allocate_temp_register();
1164 int sel_tmp0 = allocate_temp_register();
1165 int sel_tmp1 = allocate_temp_register();
1166
1167 PValue asrc1(new GPRValue(sel_tmp, 0));
1168 PValue asrc2(new GPRValue(sel_tmp, 1));
1169 PValue rsign(new GPRValue(sel_tmp, 2));
1170 PValue err(new GPRValue(sel_tmp, 3));
1171
1172 GPRVector tmp0(sel_tmp0, {0,1,2,3});
1173 GPRVector tmp1(sel_tmp1, {0,1,2,3});
1174
1175 std::array<PValue, 4> src0;
1176 std::array<PValue, 4> src1;
1177
1178 for (int i = 0; i < 4 ; ++i) {
1179 if (instr.dest.write_mask & (1 << i)) {
1180 src0[i] = m_src[0][i];
1181 src1[i] = m_src[1][i];
1182 }
1183 }
1184
1185
1186 for (int i = 3; i >= 0 ; --i) {
1187 if (!(instr.dest.write_mask & (1 << i)))
1188 continue;
1189 if (use_signed) {
1190 emit_instruction(op2_sub_int, asrc1, {Value::zero, src0[i]}, {alu_write});
1191 emit_instruction(op2_sub_int, asrc2, {Value::zero, src1[i]}, {alu_write});
1192 emit_instruction(op2_xor_int, rsign, {src0[i], src1[i]}, {alu_write, alu_last_instr});
1193
1194
1195 emit_instruction(op3_cndge_int, asrc1, {src0[i], src0[i], asrc1}, {alu_write});
1196 emit_instruction(op3_cndge_int, asrc2, {src1[i], src1[i], asrc2}, {alu_write, alu_last_instr});
1197 } else {
1198 asrc1 = src0[i];
1199 asrc2 = src1[i];
1200 }
1201
1202 emit_instruction(op1_recip_uint, tmp0.x(), {asrc2}, {alu_write, alu_last_instr});
1203
1204 emit_instruction(op2_mullo_uint, tmp0.z(), {tmp0.x(), asrc2}, {alu_write, alu_last_instr});
1205
1206 emit_instruction(op2_sub_int, tmp0.w(), {Value::zero, tmp0.z()}, {alu_write});
1207 emit_instruction(op2_mulhi_uint, tmp0.y(), {tmp0.x(), asrc2 }, {alu_write, alu_last_instr});
1208
1209 emit_instruction(op3_cnde_int, tmp0.z(), {tmp0.y(), tmp0.w(), tmp0.z()}, {alu_write, alu_last_instr});
1210
1211 emit_instruction(op2_mulhi_uint, err, {tmp0.z(), tmp0.x()}, {alu_write, alu_last_instr});
1212
1213 emit_instruction(op2_sub_int, tmp1.x(), {tmp0.x(), err}, {alu_write});
1214 emit_instruction(op2_add_int, tmp1.y(), {tmp0.x(), err}, {alu_write, alu_last_instr});
1215
1216 emit_instruction(op3_cnde_int, tmp0.x(), {tmp0.y(), tmp1.y(), tmp1.x()}, {alu_write, alu_last_instr});
1217
1218 emit_instruction(op2_mulhi_uint, tmp0.z(), {tmp0.x(), asrc1 }, {alu_write, alu_last_instr});
1219 emit_instruction(op2_mullo_uint, tmp0.y(), {tmp0.z(), asrc2 }, {alu_write, alu_last_instr});
1220
1221 emit_instruction(op2_sub_int, tmp0.w(), {asrc1, tmp0.y()}, {alu_write, alu_last_instr});
1222
1223
1224 emit_instruction(op2_setge_uint, tmp1.x(), {tmp0.w(), asrc2}, {alu_write});
1225 emit_instruction(op2_setge_uint, tmp1.y(), {asrc1, tmp0.y()}, {alu_write});
1226
1227 if (mod) {
1228 emit_instruction(op2_sub_int, tmp1.z(), {tmp0.w(), asrc2}, {alu_write});
1229 emit_instruction(op2_add_int, tmp1.w(), {tmp0.w(), asrc2}, {alu_write, alu_last_instr});
1230 } else {
1231 emit_instruction(op2_add_int, tmp1.z(), {tmp0.z(), Value::one_i}, {alu_write});
1232 emit_instruction(op2_sub_int, tmp1.w(), {tmp0.z(), Value::one_i}, {alu_write, alu_last_instr});
1233 }
1234
1235 emit_instruction(op2_and_int, tmp1.x(), {tmp1.x(), tmp1.y()}, {alu_write, alu_last_instr});
1236
1237 if (mod)
1238 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.w(), tmp1.z()}, {alu_write, alu_last_instr});
1239 else
1240 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.x(), tmp0.z(), tmp1.z()}, {alu_write, alu_last_instr});
1241
1242 if (use_signed) {
1243 emit_instruction(op3_cnde_int, tmp0.z(), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1244 emit_instruction(op2_sub_int, tmp0.y(), {Value::zero, tmp0.z()}, {alu_write, alu_last_instr});
1245
1246 if (mod)
1247 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {src0[i], tmp0.z(), tmp0.y()},
1248 {alu_write, alu_last_instr});
1249 else
1250 emit_instruction(op3_cndge_int, from_nir(instr.dest, i), {rsign, tmp0.z(), tmp0.y()},
1251 {alu_write, alu_last_instr});
1252 } else {
1253 emit_instruction(op3_cnde_int, from_nir(instr.dest, i), {tmp1.y(), tmp1.w(), tmp0.z()}, {alu_write, alu_last_instr});
1254 }
1255 }
1256 return true;
1257 }
1258
split_alu_modifiers(const nir_alu_src & src,const GPRVector::Values & v,GPRVector::Values & out,int ncomp)1259 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
1260 const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
1261 {
1262
1263 AluInstruction *alu = nullptr;
1264 for (int i = 0; i < ncomp; ++i) {
1265 alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write});
1266 if (src.abs)
1267 alu->set_flag(alu_src0_abs);
1268 if (src.negate)
1269 alu->set_flag(alu_src0_neg);
1270 emit_instruction(alu);
1271 }
1272 make_last(alu);
1273 }
1274
emit_tex_fdd(const nir_alu_instr & instr,TexInstruction::Opcode op,bool fine)1275 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
1276 bool fine)
1277 {
1278
1279 GPRVector::Values v;
1280 std::array<int, 4> writemask = {0,1,2,3};
1281
1282 int ncomp = nir_src_num_components(instr.src[0].src);
1283
1284 GPRVector::Swizzle src_swz;
1285 for (auto i = 0; i < 4; ++i) {
1286 src_swz[i] = instr.src[0].swizzle[i];
1287 }
1288
1289 auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
1290
1291 if (instr.src[0].abs || instr.src[0].negate) {
1292 GPRVector tmp = get_temp_vec4();
1293 split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
1294 src = tmp;
1295 }
1296
1297 for (int i = 0; i < 4; ++i) {
1298 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
1299 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
1300 }
1301
1302 /* This is querying the dreivatives of the output fb, so we would either need
1303 * access to the neighboring pixels or to the framebuffer. Neither is currently
1304 * implemented */
1305 GPRVector dst(v);
1306
1307 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
1308 tex->set_dest_swizzle(writemask);
1309
1310 if (fine) {
1311 std::cerr << "Sewt fine flag\n";
1312 tex->set_flag(TexInstruction::grad_fine);
1313 }
1314
1315 emit_instruction(tex);
1316
1317 return true;
1318 }
1319
emit_bitfield_extract(const nir_alu_instr & instr,EAluOp opcode)1320 bool EmitAluInstruction::emit_bitfield_extract(const nir_alu_instr& instr, EAluOp opcode)
1321 {
1322 int itmp = allocate_temp_register();
1323 std::array<PValue, 4> tmp;
1324 std::array<PValue, 4> dst;
1325 std::array<PValue, 4> src0;
1326 std::array<PValue, 4> shift;
1327
1328 PValue l32(new LiteralValue(32));
1329 unsigned write_mask = instr.dest.write_mask;
1330
1331 AluInstruction *ir = nullptr;
1332 for (int i = 0; i < 4; i++) {
1333 if (!(write_mask & (1<<i)))
1334 continue;
1335 dst[i] = from_nir(instr.dest, i);
1336 src0[i] = m_src[0][i];
1337 shift[i] = m_src[2][i];
1338
1339 ir = new AluInstruction(opcode, dst[i],
1340 {src0[i], m_src[1][i], shift[i]},
1341 {alu_write});
1342 emit_instruction(ir);
1343 }
1344 make_last(ir);
1345
1346 for (int i = 0; i < 4; i++) {
1347 if (!(write_mask & (1<<i)))
1348 continue;
1349 tmp[i] = PValue(new GPRValue(itmp, i));
1350 ir = new AluInstruction(op2_setge_int, tmp[i], {shift[i], l32},
1351 {alu_write});
1352 emit_instruction(ir);
1353 }
1354 make_last(ir);
1355
1356 for (int i = 0; i < 4; i++) {
1357 if (!(write_mask & (1<<i)))
1358 continue;
1359 ir = new AluInstruction(op3_cnde_int, dst[i], {tmp[i], dst[i], src0[i]},
1360 {alu_write});
1361 emit_instruction(ir);
1362 }
1363 make_last(ir);
1364
1365 return true;
1366 }
1367
emit_bitfield_insert(const nir_alu_instr & instr)1368 bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
1369 {
1370 auto t0 = get_temp_vec4();
1371 auto t1 = get_temp_vec4();
1372 auto t2 = get_temp_vec4();
1373 auto t3 = get_temp_vec4();
1374
1375 PValue l32(new LiteralValue(32));
1376 unsigned write_mask = instr.dest.write_mask;
1377 if (!write_mask) return true;
1378
1379 AluInstruction *ir = nullptr;
1380 for (int i = 0; i < 4; i++) {
1381 if (!(write_mask & (1<<i)))
1382 continue;
1383
1384 ir = new AluInstruction(op2_setge_int, t0[i], {m_src[3][i], l32}, {alu_write});
1385 emit_instruction(ir);
1386 }
1387 make_last(ir);
1388
1389 for (int i = 0; i < 4; i++) {
1390 if (!(write_mask & (1<<i)))
1391 continue;
1392 ir = new AluInstruction(op2_bfm_int, t1[i], {m_src[3][i], m_src[2][i]}, {alu_write});
1393 emit_instruction(ir);
1394 }
1395 make_last(ir);
1396
1397 for (int i = 0; i < 4; i++) {
1398 if (!(write_mask & (1<<i)))
1399 continue;
1400 ir = new AluInstruction(op2_lshl_int, t2[i], {m_src[1][i], m_src[2][i]}, {alu_write});
1401 emit_instruction(ir);
1402 }
1403 make_last(ir);
1404
1405
1406 for (int i = 0; i < 4; i++) {
1407 if (!(write_mask & (1<<i)))
1408 continue;
1409 ir = new AluInstruction(op3_bfi_int, t3[i],
1410 {t1[i], t2[i], m_src[0][i]}, {alu_write});
1411 emit_instruction(ir);
1412 }
1413 make_last(ir);
1414
1415 for (int i = 0; i < 4; i++) {
1416 if (!(write_mask & (1<<i)))
1417 continue;
1418 ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
1419 {t0[i], t3[i], m_src[1][i]}, {alu_write});
1420 emit_instruction(ir);
1421 }
1422 make_last(ir);
1423
1424 return true;
1425 }
1426
emit_unpack_32_2x16_split_y(const nir_alu_instr & instr)1427 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1428 {
1429 auto tmp = get_temp_register();
1430 emit_instruction(op2_lshr_int, tmp,
1431 {m_src[0][0], PValue(new LiteralValue(16))},
1432 {alu_write, alu_last_instr});
1433
1434 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1435 {tmp}, {alu_write, alu_last_instr});
1436
1437 return true;
1438 }
1439
emit_unpack_32_2x16_split_x(const nir_alu_instr & instr)1440 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1441 {
1442 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1443 {m_src[0][0]},{alu_write, alu_last_instr});
1444 return true;
1445 }
1446
emit_pack_32_2x16_split(const nir_alu_instr & instr)1447 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1448 {
1449 PValue x = get_temp_register();
1450 PValue y = get_temp_register();
1451
1452 emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1453 emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1454
1455 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1456
1457 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1458
1459 return true;
1460 }
1461
1462 }
1463