1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30
31 #include "gallium/drivers/r600/r600_shader.h"
32
33 namespace r600 {
34
35 using std::vector;
36
EmitAluInstruction(ShaderFromNirProcessor & processor)37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38 EmitInstruction (processor)
39 {
40
41 }
42
do_emit(nir_instr * ir)43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45 const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46
47 r600::sfn_log << SfnLog::instr << "emit '"
48 << *ir
49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50 << "' (" << __func__ << ")\n";
51
52 preload_src(instr);
53
54 if (get_chip_class() == CAYMAN) {
55 switch (instr.op) {
56 case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos);
57 case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee);
58 case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped);
59 case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee);
60 case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1);
61 case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin);
62 case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee);
63 default:
64 ;
65 }
66 }
67
68 switch (instr.op) {
69 /* These are in the ALU instruction list, but they should be texture instructions */
70 case nir_op_b2b1: return emit_mov(instr);
71 case nir_op_b2b32: return emit_mov(instr);
72 case nir_op_b2f32: return emit_alu_b2f(instr);
73 case nir_op_b2i32: return emit_b2i32(instr);
74 case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
75 case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
76 case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
77 case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
78 case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
79 case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
80 case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
81 case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
82 case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
83 case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
84 case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
85 case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
86 case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
87 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
88 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
89 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
90 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
91 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
92 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
93 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
94 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
95 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
96 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
97 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
98 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
99 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
100 case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
101 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
102
103 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
104 case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
105 case nir_op_cube_r600: return emit_cube(instr);
106 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
107 case nir_op_f2b32: return emit_alu_f2b32(instr);
108 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
109 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
110 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
111 case nir_op_fadd: return emit_alu_op2(instr, op2_add);
112 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
113 case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
114 case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
115 case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
116 case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
117
118 /* These are in the ALU instruction list, but they should be texture instructions */
119 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
120 case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
121 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
122 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
123 case nir_op_fddy_coarse:
124 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
125 case nir_op_fdot2: return emit_dot(instr, 2);
126 case nir_op_fdot3: return emit_dot(instr, 3);
127 case nir_op_fdot4: return emit_dot(instr, 4);
128 case nir_op_fdph: return emit_fdph(instr);
129 case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
130 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
131 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
132 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
133 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
134 case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
135 case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
136 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
137 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
138 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
139 case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
140 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
141 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
143 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
144 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
145 case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
146 case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
147 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
148 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
149 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
150 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
151 case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
152 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
153 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
154 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
155 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
156 case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
157 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
158 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
159 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
160 case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
161 case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2});
162 case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2});
163 case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
164 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
165 case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
166 case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
167 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
168 case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
169 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
170 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
171 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
172 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
173 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
174 case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
175 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
176 case nir_op_ineg: return emit_alu_ineg(instr);
177 case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
178 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
179 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
180 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
181 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
182 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
183 case nir_op_mov:return emit_mov(instr);
184 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
185 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186 case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
187 case nir_op_sge: return emit_alu_op2(instr, op2_setge);
188 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
189 case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
190 case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
191 case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
192 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
193 case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
194 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
195 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
196 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
197 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
198 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
200 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
201 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
202 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
203 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
204 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
205 case nir_op_vec2: return emit_create_vec(instr, 2);
206 case nir_op_vec3: return emit_create_vec(instr, 3);
207 case nir_op_vec4: return emit_create_vec(instr, 4);
208 default:
209 return false;
210 }
211 }
212
preload_src(const nir_alu_instr & instr)213 void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
214 {
215 const nir_op_info *op_info = &nir_op_infos[instr.op];
216 assert(op_info->num_inputs <= 4);
217
218 unsigned nsrc_comp = num_src_comp(instr);
219 sfn_log << SfnLog::reg << "Preload:\n";
220 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
221 for (unsigned c = 0; c < nsrc_comp; ++c) {
222 m_src[i][c] = from_nir(instr.src[i], c);
223 sfn_log << SfnLog::reg << " " << *m_src[i][c];
224
225 }
226 sfn_log << SfnLog::reg << "\n";
227 }
228 if (instr.op == nir_op_fdph) {
229 m_src[1][3] = from_nir(instr.src[1], 3);
230 sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
231 }
232
233 split_constants(instr, nsrc_comp);
234 }
235
num_src_comp(const nir_alu_instr & instr)236 unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
237 {
238 switch (instr.op) {
239 case nir_op_fdot2:
240 case nir_op_bany_inequal2:
241 case nir_op_ball_iequal2:
242 case nir_op_bany_fnequal2:
243 case nir_op_ball_fequal2:
244 case nir_op_b32any_inequal2:
245 case nir_op_b32all_iequal2:
246 case nir_op_b32any_fnequal2:
247 case nir_op_b32all_fequal2:
248 case nir_op_unpack_64_2x32_split_y:
249 return 2;
250
251 case nir_op_fdot3:
252 case nir_op_bany_inequal3:
253 case nir_op_ball_iequal3:
254 case nir_op_bany_fnequal3:
255 case nir_op_ball_fequal3:
256 case nir_op_b32any_inequal3:
257 case nir_op_b32all_iequal3:
258 case nir_op_b32any_fnequal3:
259 case nir_op_b32all_fequal3:
260 case nir_op_cube_r600:
261 return 3;
262
263 case nir_op_fdot4:
264 case nir_op_fdph:
265 case nir_op_bany_inequal4:
266 case nir_op_ball_iequal4:
267 case nir_op_bany_fnequal4:
268 case nir_op_ball_fequal4:
269 case nir_op_b32any_inequal4:
270 case nir_op_b32all_iequal4:
271 case nir_op_b32any_fnequal4:
272 case nir_op_b32all_fequal4:
273 return 4;
274
275 case nir_op_vec2:
276 case nir_op_vec3:
277 case nir_op_vec4:
278 return 1;
279
280 default:
281 return nir_dest_num_components(instr.dest.dest);
282
283 }
284 }
285
emit_cube(const nir_alu_instr & instr)286 bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
287 {
288 AluInstruction *ir = nullptr;
289 const uint16_t src0_chan[4] = {2, 2, 0, 1};
290 const uint16_t src1_chan[4] = {1, 0, 2, 2};
291
292 for (int i = 0; i < 4; ++i) {
293 ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
294 from_nir(instr.src[0], src0_chan[i]),
295 from_nir(instr.src[0], src1_chan[i]), {alu_write});
296 emit_instruction(ir);
297 }
298 ir->set_flag(alu_last_instr);
299 return true;
300 }
301
split_constants(const nir_alu_instr & instr,unsigned nsrc_comp)302 void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
303 {
304 const nir_op_info *op_info = &nir_op_infos[instr.op];
305 if (op_info->num_inputs < 2)
306 return;
307
308 int nconst = 0;
309 std::array<const UniformValue *,4> c;
310 std::array<int,4> idx;
311 for (unsigned i = 0; i < op_info->num_inputs; ++i) {
312 PValue& src = m_src[i][0];
313 assert(src);
314 sfn_log << SfnLog::reg << "Split test " << *src;
315
316 if (src->type() == Value::kconst) {
317 c[nconst] = static_cast<const UniformValue *>(src.get());
318 idx[nconst++] = i;
319 sfn_log << SfnLog::reg << " is constant " << i;
320 }
321 sfn_log << SfnLog::reg << "\n";
322 }
323
324 if (nconst < 2)
325 return;
326
327 unsigned sel = c[0]->sel();
328 unsigned kcache = c[0]->kcache_bank();
329 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
330
331 for (int i = 1; i < nconst; ++i) {
332 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
333 if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
334 AluInstruction *ir = nullptr;
335 auto v = get_temp_vec4();
336 for (unsigned k = 0; k < nsrc_comp; ++k) {
337 ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
338 emit_instruction(ir);
339 m_src[idx[i]][k] = v[k];
340 }
341 make_last(ir);
342 }
343 }
344 }
345
emit_alu_inot(const nir_alu_instr & instr)346 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
347 {
348 if (instr.src[0].negate || instr.src[0].abs) {
349 std::cerr << "source modifiers not supported with int ops\n";
350 return false;
351 }
352
353 AluInstruction *ir = nullptr;
354 for (int i = 0; i < 4 ; ++i) {
355 if (instr.dest.write_mask & (1 << i)){
356 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
357 m_src[0][i], write);
358 emit_instruction(ir);
359 }
360 }
361 make_last(ir);
362 return true;
363 }
364
emit_alu_op1(const nir_alu_instr & instr,EAluOp opcode,const AluOpFlags & flags)365 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
366 const AluOpFlags& flags)
367 {
368 AluInstruction *ir = nullptr;
369 for (int i = 0; i < 4 ; ++i) {
370 if (instr.dest.write_mask & (1 << i)){
371 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
372 m_src[0][i], write);
373
374 if (flags.test(alu_src0_abs) || instr.src[0].abs)
375 ir->set_flag(alu_src0_abs);
376
377 if (instr.src[0].negate ^ flags.test(alu_src0_neg))
378 ir->set_flag(alu_src0_neg);
379
380 if (flags.test(alu_dst_clamp) || instr.dest.saturate)
381 ir->set_flag(alu_dst_clamp);
382
383 emit_instruction(ir);
384 }
385 }
386 make_last(ir);
387
388 return true;
389 }
390
emit_mov(const nir_alu_instr & instr)391 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
392 {
393 /* If the op is a plain move beween SSA values we can just forward
394 * the register reference to the original register */
395 if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
396 !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
397 bool result = true;
398 for (int i = 0; i < 4 ; ++i) {
399 if (instr.dest.write_mask & (1 << i)){
400 result &= inject_register(instr.dest.dest.ssa.index, i,
401 m_src[0][i], true);
402 }
403 }
404 return result;
405 } else {
406 return emit_alu_op1(instr, op1_mov);
407 }
408 }
409
emit_alu_trans_op1(const nir_alu_instr & instr,EAluOp opcode,bool absolute)410 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
411 bool absolute)
412 {
413 AluInstruction *ir = nullptr;
414 std::set<int> src_idx;
415
416 if (get_chip_class() == CAYMAN) {
417 int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
418 for (int i = 0; i < last_slot; ++i) {
419 bool write_comp = instr.dest.write_mask & (1 << i);
420 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
421 m_src[0][write_comp ? i : 0], write_comp ? write : empty);
422 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
423 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
424 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
425
426 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
427
428 emit_instruction(ir);
429 }
430 } else {
431 for (int i = 0; i < 4 ; ++i) {
432 if (instr.dest.write_mask & (1 << i)){
433 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434 m_src[0][i], last_write);
435 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438 emit_instruction(ir);
439 }
440 }
441 }
442 return true;
443 }
444
emit_alu_cm_trig(const nir_alu_instr & instr,EAluOp opcode)445 bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode)
446 {
447 AluInstruction *ir = nullptr;
448 std::set<int> src_idx;
449
450 unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
451
452 for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) {
453 for (unsigned i = 0; i < last_slot; ++i) {
454 bool write_comp = instr.dest.write_mask & (1 << j) && (i == j);
455 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
456 m_src[0][j], write_comp ? write : empty);
457 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
458 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
459 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
460
461 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
462
463 emit_instruction(ir);
464 }
465 }
466 return true;
467 }
468
469
emit_alu_f2i32_or_u32(const nir_alu_instr & instr,EAluOp op)470 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
471 {
472 AluInstruction *ir = nullptr;
473
474 if (get_chip_class() < CAYMAN) {
475 std::array<PValue, 4> v;
476
477 for (int i = 0; i < 4; ++i) {
478 if (!(instr.dest.write_mask & (1 << i)))
479 continue;
480 v[i] = from_nir(instr.dest, i);
481 ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
482 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
483 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
484 emit_instruction(ir);
485 }
486 make_last(ir);
487
488 for (int i = 0; i < 4; ++i) {
489 if (!(instr.dest.write_mask & (1 << i)))
490 continue;
491 ir = new AluInstruction(op, v[i], v[i], {alu_write});
492 emit_instruction(ir);
493 if (op == op1_flt_to_uint)
494 make_last(ir);
495 }
496 make_last(ir);
497 } else {
498 for (int i = 0; i < 4; ++i) {
499 if (!(instr.dest.write_mask & (1 << i)))
500 continue;
501 ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write});
502 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
503 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
504 emit_instruction(ir);
505 if (op == op1_flt_to_uint)
506 make_last(ir);
507 }
508 make_last(ir);
509 }
510
511 return true;
512 }
513
emit_alu_f2b32(const nir_alu_instr & instr)514 bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
515 {
516 AluInstruction *ir = nullptr;
517 for (int i = 0; i < 4 ; ++i) {
518 if (instr.dest.write_mask & (1 << i)){
519 ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
520 m_src[0][i], literal(0.0f), write);
521 emit_instruction(ir);
522 }
523 }
524 make_last(ir);
525 return true;
526 }
527
emit_b2i32(const nir_alu_instr & instr)528 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
529 {
530 AluInstruction *ir = nullptr;
531 for (int i = 0; i < 4 ; ++i) {
532 if (!(instr.dest.write_mask & (1 << i)))
533 continue;
534
535 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
536 m_src[0][i], Value::one_i, write);
537 emit_instruction(ir);
538 }
539 make_last(ir);
540
541 return true;
542 }
543
emit_pack_64_2x32_split(const nir_alu_instr & instr)544 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
545 {
546 AluInstruction *ir = nullptr;
547 for (unsigned i = 0; i < 2; ++i) {
548 if (!(instr.dest.write_mask & (1 << i)))
549 continue;
550 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
551 m_src[0][i], write);
552 emit_instruction(ir);
553 }
554 ir->set_flag(alu_last_instr);
555 return true;
556 }
557
emit_unpack_64_2x32_split(const nir_alu_instr & instr,unsigned comp)558 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
559 {
560 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
561 m_src[0][comp], last_write));
562 return true;
563 }
564
emit_create_vec(const nir_alu_instr & instr,unsigned nc)565 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
566 {
567 AluInstruction *ir = nullptr;
568 std::set<int> src_slot;
569 for(unsigned i = 0; i < nc; ++i) {
570 if (instr.dest.write_mask & (1 << i)){
571 auto src = m_src[i][0];
572 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
573 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
574
575 // FIXME: This is a rather crude approach to fix the problem that
576 // r600 can't read from four different slots of the same component
577 // here we check only for the register index
578 if (src->type() == Value::gpr)
579 src_slot.insert(src->sel());
580 if (src_slot.size() >= 3) {
581 src_slot.clear();
582 ir->set_flag(alu_last_instr);
583 }
584 emit_instruction(ir);
585 }
586 }
587 if (ir)
588 ir->set_flag(alu_last_instr);
589 return true;
590 }
591
emit_dot(const nir_alu_instr & instr,int n)592 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
593 {
594 const nir_alu_src& src0 = instr.src[0];
595 const nir_alu_src& src1 = instr.src[1];
596
597 AluInstruction *ir = nullptr;
598 for (int i = 0; i < n ; ++i) {
599 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
600 m_src[0][i], m_src[1][i],
601 instr.dest.write_mask & (1 << i) ? write : empty);
602
603 if (src0.negate) ir->set_flag(alu_src0_neg);
604 if (src0.abs) ir->set_flag(alu_src0_abs);
605 if (src1.negate) ir->set_flag(alu_src1_neg);
606 if (src1.abs) ir->set_flag(alu_src1_abs);
607
608 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
609 emit_instruction(ir);
610 }
611 for (int i = n; i < 4 ; ++i) {
612 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613 Value::zero, Value::zero,
614 instr.dest.write_mask & (1 << i) ? write : empty);
615 emit_instruction(ir);
616 }
617
618 if (ir)
619 ir->set_flag(alu_last_instr);
620 return true;
621 }
622
emit_fdph(const nir_alu_instr & instr)623 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
624 {
625 const nir_alu_src& src0 = instr.src[0];
626 const nir_alu_src& src1 = instr.src[1];
627
628 AluInstruction *ir = nullptr;
629 for (int i = 0; i < 3 ; ++i) {
630 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
631 m_src[0][i], m_src[1][i],
632 instr.dest.write_mask & (1 << i) ? write : empty);
633 if (src0.negate) ir->set_flag(alu_src0_neg);
634 if (src0.abs) ir->set_flag(alu_src0_abs);
635 if (src1.negate) ir->set_flag(alu_src1_neg);
636 if (src1.abs) ir->set_flag(alu_src1_abs);
637 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
638 emit_instruction(ir);
639 }
640
641 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
642 m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
643 if (src1.negate) ir->set_flag(alu_src1_neg);
644 if (src1.abs) ir->set_flag(alu_src1_abs);
645 emit_instruction(ir);
646
647 ir->set_flag(alu_last_instr);
648 return true;
649
650 }
651
emit_alu_i2orf2_b1(const nir_alu_instr & instr,EAluOp op)652 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
653 {
654 AluInstruction *ir = nullptr;
655 for (int i = 0; i < 4 ; ++i) {
656 if (instr.dest.write_mask & (1 << i)) {
657 ir = new AluInstruction(op, from_nir(instr.dest, i),
658 m_src[0][i], Value::zero,
659 write);
660 emit_instruction(ir);
661 }
662 }
663 if (ir)
664 ir->set_flag(alu_last_instr);
665 return true;
666 }
667
emit_alu_b2f(const nir_alu_instr & instr)668 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
669 {
670 AluInstruction *ir = nullptr;
671 for (int i = 0; i < 4 ; ++i) {
672 if (instr.dest.write_mask & (1 << i)){
673 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
674 m_src[0][i], Value::one_f, write);
675 if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
676 if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
677 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
678 emit_instruction(ir);
679 }
680 }
681 if (ir)
682 ir->set_flag(alu_last_instr);
683 return true;
684 }
685
emit_any_all_icomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)686 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
687 {
688
689 AluInstruction *ir = nullptr;
690 PValue v[4]; // this might need some additional temp register creation
691 for (unsigned i = 0; i < 4 ; ++i)
692 v[i] = from_nir(instr.dest, i);
693
694 EAluOp combine = all ? op2_and_int : op2_or_int;
695
696 /* For integers we can not use the modifiers, so this needs some emulation */
697 /* Should actually be lowered with NIR */
698 if (instr.src[0].negate == instr.src[1].negate &&
699 instr.src[0].abs == instr.src[1].abs) {
700
701 for (unsigned i = 0; i < nc ; ++i) {
702 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
703 emit_instruction(ir);
704 }
705 if (ir)
706 ir->set_flag(alu_last_instr);
707 } else {
708 std::cerr << "Negate in iequal/inequal not (yet) supported\n";
709 return false;
710 }
711
712 for (unsigned i = 0; i < nc/2 ; ++i) {
713 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
714 emit_instruction(ir);
715 }
716 if (ir)
717 ir->set_flag(alu_last_instr);
718
719 if (nc > 2) {
720 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
721 emit_instruction(ir);
722 }
723
724 return true;
725 }
726
emit_any_all_fcomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)727 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
728 {
729 AluInstruction *ir = nullptr;
730 PValue v[4]; // this might need some additional temp register creation
731 for (unsigned i = 0; i < 4 ; ++i)
732 v[i] = from_nir(instr.dest, i);
733
734 for (unsigned i = 0; i < nc ; ++i) {
735 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
736
737 if (instr.src[0].abs)
738 ir->set_flag(alu_src0_abs);
739 if (instr.src[0].negate)
740 ir->set_flag(alu_src0_neg);
741
742 if (instr.src[1].abs)
743 ir->set_flag(alu_src1_abs);
744 if (instr.src[1].negate)
745 ir->set_flag(alu_src1_neg);
746
747 emit_instruction(ir);
748 }
749 if (ir)
750 ir->set_flag(alu_last_instr);
751
752 for (unsigned i = 0; i < nc ; ++i) {
753 ir = new AluInstruction(op1_max4, v[i], v[i], write);
754 if (all) ir->set_flag(alu_src0_neg);
755 emit_instruction(ir);
756 }
757
758 for (unsigned i = nc; i < 4 ; ++i) {
759 ir = new AluInstruction(op1_max4, v[i],
760 all ? Value::one_f : Value::zero, write);
761 if (all)
762 ir->set_flag(alu_src0_neg);
763
764 emit_instruction(ir);
765 }
766
767 ir->set_flag(alu_last_instr);
768
769 if (all)
770 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
771 else
772 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
773
774 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
775 if (all)
776 ir->set_flag(alu_src1_neg);
777 emit_instruction(ir);
778
779 return true;
780 }
781
emit_any_all_fcomp2(const nir_alu_instr & instr,EAluOp op,bool all)782 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
783 {
784 AluInstruction *ir = nullptr;
785 PValue v[4]; // this might need some additional temp register creation
786 for (unsigned i = 0; i < 4 ; ++i)
787 v[i] = from_nir(instr.dest, i);
788
789 for (unsigned i = 0; i < 2 ; ++i) {
790 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
791 if (instr.src[0].abs)
792 ir->set_flag(alu_src0_abs);
793 if (instr.src[0].negate)
794 ir->set_flag(alu_src0_neg);
795
796 if (instr.src[1].abs)
797 ir->set_flag(alu_src1_abs);
798 if (instr.src[1].negate)
799 ir->set_flag(alu_src1_neg);
800
801 emit_instruction(ir);
802 }
803 if (ir)
804 ir->set_flag(alu_last_instr);
805
806 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
807 ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
808 emit_instruction(ir);
809
810 return true;
811 }
812
emit_alu_trans_op2(const nir_alu_instr & instr,EAluOp opcode)813 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
814 {
815 const nir_alu_src& src0 = instr.src[0];
816 const nir_alu_src& src1 = instr.src[1];
817
818 AluInstruction *ir = nullptr;
819
820 if (get_chip_class() == CAYMAN) {
821 for (int k = 0; k < 4; ++k) {
822 if (instr.dest.write_mask & (1 << k)) {
823
824 for (int i = 0; i < 4; i++) {
825 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty);
826 if (src0.negate) ir->set_flag(alu_src0_neg);
827 if (src0.abs) ir->set_flag(alu_src0_abs);
828 if (src1.negate) ir->set_flag(alu_src1_neg);
829 if (src1.abs) ir->set_flag(alu_src1_abs);
830 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
831 if (i == 3) ir->set_flag(alu_last_instr);
832 emit_instruction(ir);
833 }
834 }
835 }
836 } else {
837 for (int i = 0; i < 4 ; ++i) {
838 if (instr.dest.write_mask & (1 << i)){
839 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
840 if (src0.negate) ir->set_flag(alu_src0_neg);
841 if (src0.abs) ir->set_flag(alu_src0_abs);
842 if (src1.negate) ir->set_flag(alu_src1_neg);
843 if (src1.abs) ir->set_flag(alu_src1_abs);
844 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845 emit_instruction(ir);
846 }
847 }
848 }
849 return true;
850 }
851
emit_alu_op2_int(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts opts)852 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
853 {
854
855 const nir_alu_src& src0 = instr.src[0];
856 const nir_alu_src& src1 = instr.src[1];
857
858 if (src0.negate || src1.negate ||
859 src0.abs || src1.abs) {
860 std::cerr << "R600: don't support modifiers with integer operations";
861 return false;
862 }
863 return emit_alu_op2(instr, opcode, opts);
864 }
865
emit_alu_op2(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)866 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
867 {
868 const nir_alu_src *src0 = &instr.src[0];
869 const nir_alu_src *src1 = &instr.src[1];
870
871 int idx0 = 0;
872 int idx1 = 1;
873 if (ops & op2_opt_reverse) {
874 std::swap(src0, src1);
875 std::swap(idx0, idx1);
876 }
877
878 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
879
880 AluInstruction *ir = nullptr;
881 for (int i = 0; i < 4 ; ++i) {
882 if (instr.dest.write_mask & (1 << i)){
883 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
884 m_src[idx0][i], m_src[idx1][i], write);
885
886 if (src0->negate) ir->set_flag(alu_src0_neg);
887 if (src0->abs) ir->set_flag(alu_src0_abs);
888 if (src1_negate) ir->set_flag(alu_src1_neg);
889 if (src1->abs) ir->set_flag(alu_src1_abs);
890 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
891 emit_instruction(ir);
892 }
893 }
894 if (ir)
895 ir->set_flag(alu_last_instr);
896 return true;
897 }
898
emit_alu_op3(const nir_alu_instr & instr,EAluOp opcode,std::array<uint8_t,3> reorder)899 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
900 std::array<uint8_t, 3> reorder)
901 {
902 const nir_alu_src *src[3];
903 src[0] = &instr.src[reorder[0]];
904 src[1] = &instr.src[reorder[1]];
905 src[2] = &instr.src[reorder[2]];
906
907 AluInstruction *ir = nullptr;
908 for (int i = 0; i < 4 ; ++i) {
909 if (instr.dest.write_mask & (1 << i)){
910 ir = new AluInstruction(opcode, from_nir(instr.dest, i),
911 m_src[reorder[0]][i],
912 m_src[reorder[1]][i],
913 m_src[reorder[2]][i],
914 write);
915
916 if (src[0]->negate) ir->set_flag(alu_src0_neg);
917 if (src[1]->negate) ir->set_flag(alu_src1_neg);
918 if (src[2]->negate) ir->set_flag(alu_src2_neg);
919
920 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
921 ir->set_flag(alu_write);
922 emit_instruction(ir);
923 }
924 }
925 make_last(ir);
926 return true;
927 }
928
emit_alu_ineg(const nir_alu_instr & instr)929 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
930 {
931 AluInstruction *ir = nullptr;
932 for (int i = 0; i < 4 ; ++i) {
933 if (instr.dest.write_mask & (1 << i)){
934 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
935 m_src[0][i], write);
936 emit_instruction(ir);
937 }
938 }
939 if (ir)
940 ir->set_flag(alu_last_instr);
941
942 return true;
943 }
944
945 static const char swz[] = "xyzw01?_";
946
split_alu_modifiers(const nir_alu_src & src,const GPRVector::Values & v,GPRVector::Values & out,int ncomp)947 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
948 const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
949 {
950
951 AluInstruction *alu = nullptr;
952 for (int i = 0; i < ncomp; ++i) {
953 alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write});
954 if (src.abs)
955 alu->set_flag(alu_src0_abs);
956 if (src.negate)
957 alu->set_flag(alu_src0_neg);
958 emit_instruction(alu);
959 }
960 make_last(alu);
961 }
962
emit_tex_fdd(const nir_alu_instr & instr,TexInstruction::Opcode op,bool fine)963 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
964 bool fine)
965 {
966
967 GPRVector::Values v;
968 std::array<int, 4> writemask = {0,1,2,3};
969
970 int ncomp = nir_dest_num_components(instr.dest.dest);
971 GPRVector::Swizzle src_swz = {7,7,7,7};
972 for (auto i = 0; i < ncomp; ++i)
973 src_swz[i] = instr.src[0].swizzle[i];
974
975 auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
976
977 if (instr.src[0].abs || instr.src[0].negate) {
978 GPRVector tmp = get_temp_vec4();
979 split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
980 src = tmp;
981 }
982
983 for (int i = 0; i < 4; ++i) {
984 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
985 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
986 }
987
988 /* This is querying the dreivatives of the output fb, so we would either need
989 * access to the neighboring pixels or to the framebuffer. Neither is currently
990 * implemented */
991 GPRVector dst(v);
992
993 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
994 tex->set_dest_swizzle(writemask);
995
996 if (fine)
997 tex->set_flag(TexInstruction::grad_fine);
998
999 emit_instruction(tex);
1000
1001 return true;
1002 }
1003
emit_unpack_32_2x16_split_y(const nir_alu_instr & instr)1004 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1005 {
1006 auto tmp = get_temp_register();
1007 emit_instruction(op2_lshr_int, tmp,
1008 {m_src[0][0], PValue(new LiteralValue(16))},
1009 {alu_write, alu_last_instr});
1010
1011 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1012 {tmp}, {alu_write, alu_last_instr});
1013
1014 return true;
1015 }
1016
emit_unpack_32_2x16_split_x(const nir_alu_instr & instr)1017 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1018 {
1019 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1020 {m_src[0][0]},{alu_write, alu_last_instr});
1021 return true;
1022 }
1023
emit_pack_32_2x16_split(const nir_alu_instr & instr)1024 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1025 {
1026 PValue x = get_temp_register();
1027 PValue y = get_temp_register();
1028
1029 emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1030 emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1031
1032 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1033
1034 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1035
1036 return true;
1037 }
1038
1039 }
1040