1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35
36 #include "util/ralloc.h"
37
38 void
brw_set_dest(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest)
40 {
41 const struct intel_device_info *devinfo = p->devinfo;
42
43 if (dest.file == FIXED_GRF)
44 assert(dest.nr < XE2_MAX_GRF);
45
46 /* The hardware has a restriction where a destination of size Byte with
47 * a stride of 1 is only allowed for a packed byte MOV. For any other
48 * instruction, the stride must be at least 2, even when the destination
49 * is the NULL register.
50 */
51 if (dest.file == ARF &&
52 dest.nr == BRW_ARF_NULL &&
53 brw_type_size_bytes(dest.type) == 1 &&
54 dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55 dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56 }
57
58 if (devinfo->ver >= 12 &&
59 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61 assert(dest.file == FIXED_GRF ||
62 dest.file == ADDRESS ||
63 dest.file == ARF);
64 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
65 assert(dest.subnr == 0);
66 assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
67 (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
68 dest.vstride == dest.width + 1));
69 assert(!dest.negate && !dest.abs);
70 brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest));
71 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
72
73 } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
74 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
75 assert(devinfo->ver < 12);
76 assert(dest.file == FIXED_GRF ||
77 dest.file == ADDRESS ||
78 dest.file == ARF);
79 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
80 assert(dest.subnr % 16 == 0);
81 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
82 dest.vstride == dest.width + 1);
83 assert(!dest.negate && !dest.abs);
84 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
85 brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
86 brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest));
87 } else {
88 brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type);
89 brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
90
91 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
92 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
93
94 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
95 brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
96 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
97 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
98 brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
99 } else {
100 brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
101 brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask);
102 if (dest.file == FIXED_GRF) {
103 assert(dest.writemask != 0);
104 }
105 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
106 * Although Dst.HorzStride is a don't care for Align16, HW needs
107 * this to be programmed as "01".
108 */
109 brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
110 }
111 } else {
112 brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
113
114 /* These are different sizes in align1 vs align16:
115 */
116 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
117 brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst,
118 dest.indirect_offset);
119 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
120 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
121 brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
122 } else {
123 brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst,
124 dest.indirect_offset);
125 /* even ignored in da16, still need to set as '01' */
126 brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
127 }
128 }
129 }
130 }
131
132 void
brw_set_src0(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)133 brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
134 {
135 const struct intel_device_info *devinfo = p->devinfo;
136
137 if (reg.file == FIXED_GRF)
138 assert(reg.nr < XE2_MAX_GRF);
139
140 if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
141 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
142 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
143 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
144 /* Any source modifiers or regions will be ignored, since this just
145 * identifies the GRF to start reading the message contents from.
146 * Check for some likely failures.
147 */
148 assert(!reg.negate);
149 assert(!reg.abs);
150 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
151 }
152
153 if (devinfo->ver >= 12 &&
154 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
155 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
156 assert(reg.file == ARF || reg.file == FIXED_GRF);
157 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
158 assert(has_scalar_region(reg) ||
159 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
160 reg.vstride == reg.width + 1));
161 assert(!reg.negate && !reg.abs);
162
163 brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg));
164 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
165
166 if (reg.file == ARF && reg.nr == BRW_ARF_SCALAR) {
167 assert(reg.subnr % 2 == 0);
168 brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2);
169 } else {
170 assert(reg.subnr == 0);
171 }
172 } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
173 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
174 assert(reg.file == FIXED_GRF);
175 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
176 assert(reg.subnr % 16 == 0);
177 assert(has_scalar_region(reg) ||
178 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
179 reg.vstride == reg.width + 1));
180 assert(!reg.negate && !reg.abs);
181 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
182 brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
183 } else {
184 brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type);
185 brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs);
186 brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate);
187 brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
188
189 if (reg.file == IMM) {
190 if (reg.type == BRW_TYPE_DF)
191 brw_eu_inst_set_imm_df(devinfo, inst, reg.df);
192 else if (reg.type == BRW_TYPE_UQ ||
193 reg.type == BRW_TYPE_Q)
194 brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64);
195 else
196 brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
197
198 if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
199 brw_eu_inst_set_src1_reg_file(devinfo, inst,
200 ARF);
201 brw_eu_inst_set_src1_reg_hw_type(devinfo, inst,
202 brw_eu_inst_src0_reg_hw_type(devinfo, inst));
203 }
204 } else {
205 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
206 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
207 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208 brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
209 } else {
210 brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
211 }
212 } else {
213 brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
214
215 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
216 brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
217 } else {
218 brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
219 }
220 }
221
222 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
223 if (reg.width == BRW_WIDTH_1 &&
224 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
225 brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
226 brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
227 brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
228 } else {
229 brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride);
230 brw_eu_inst_set_src0_width(devinfo, inst, reg.width);
231 brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
232 }
233 } else {
234 brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst,
235 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
236 brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst,
237 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
238 brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst,
239 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
240 brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst,
241 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
242
243 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
244 /* This is an oddity of the fact we're using the same
245 * descriptions for registers in align_16 as align_1:
246 */
247 brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
248 } else {
249 brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
250 }
251 }
252 }
253 }
254 }
255
256
257 void
brw_set_src1(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)258 brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
259 {
260 const struct intel_device_info *devinfo = p->devinfo;
261
262 if (reg.file == FIXED_GRF)
263 assert(reg.nr < XE2_MAX_GRF);
264
265 if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
266 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
267 (devinfo->ver >= 12 &&
268 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
269 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
270 assert(reg.file == FIXED_GRF ||
271 reg.file == ARF ||
272 reg.file == ADDRESS);
273 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
274 assert(reg.subnr == 0);
275 assert(has_scalar_region(reg) ||
276 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
277 reg.vstride == reg.width + 1));
278 assert(!reg.negate && !reg.abs);
279 brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
280 brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg));
281 } else {
282 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
283 *
284 * "Accumulator registers may be accessed explicitly as src0
285 * operands only."
286 */
287 assert(reg.file != ARF ||
288 (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
289
290 brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type);
291 brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs);
292 brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate);
293
294 /* Only src1 can be immediate in two-argument instructions.
295 */
296 assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM);
297
298 if (reg.file == IMM) {
299 /* two-argument instructions can only use 32-bit immediates */
300 assert(brw_type_size_bytes(reg.type) < 8);
301 brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
302 } else {
303 /* This is a hardware restriction, which may or may not be lifted
304 * in the future:
305 */
306 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
307 /* assert (reg.file == FIXED_GRF); */
308
309 brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
310 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
311 brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
312 } else {
313 brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
314 }
315
316 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
317 if (reg.width == BRW_WIDTH_1 &&
318 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
319 brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
320 brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
321 brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
322 } else {
323 brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride);
324 brw_eu_inst_set_src1_width(devinfo, inst, reg.width);
325 brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
326 }
327 } else {
328 brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst,
329 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
330 brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst,
331 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
332 brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst,
333 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
334 brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst,
335 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
336
337 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
338 /* This is an oddity of the fact we're using the same
339 * descriptions for registers in align_16 as align_1:
340 */
341 brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
342 } else {
343 brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
344 }
345 }
346 }
347 }
348 }
349
350 /**
351 * Specify the descriptor and extended descriptor immediate for a SEND(C)
352 * message instruction.
353 */
354 void
brw_set_desc_ex(struct brw_codegen * p,brw_eu_inst * inst,unsigned desc,unsigned ex_desc)355 brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst,
356 unsigned desc, unsigned ex_desc)
357 {
358 const struct intel_device_info *devinfo = p->devinfo;
359 assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
360 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
361 if (devinfo->ver < 12)
362 brw_eu_inst_set_src1_file_type(devinfo, inst,
363 IMM, BRW_TYPE_UD);
364 brw_eu_inst_set_send_desc(devinfo, inst, desc);
365 if (devinfo->ver >= 9)
366 brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, false);
367 }
368
369 static void
brw_eu_inst_set_state(const struct brw_isa_info * isa,brw_eu_inst * insn,const struct brw_insn_state * state)370 brw_eu_inst_set_state(const struct brw_isa_info *isa,
371 brw_eu_inst *insn,
372 const struct brw_insn_state *state)
373 {
374 const struct intel_device_info *devinfo = isa->devinfo;
375
376 brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size);
377 brw_eu_inst_set_group(devinfo, insn, state->group);
378 brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode);
379 brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control);
380 if (devinfo->ver >= 12)
381 brw_eu_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_eu_inst_opcode(isa, insn)));
382 brw_eu_inst_set_saturate(devinfo, insn, state->saturate);
383 brw_eu_inst_set_pred_control(devinfo, insn, state->predicate);
384 brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv);
385
386 if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) &&
387 state->access_mode == BRW_ALIGN_16) {
388 brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
389 brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
390 } else {
391 brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
392 brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
393 }
394
395 if (devinfo->ver < 20)
396 brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
397 }
398
399 static brw_eu_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)400 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
401 {
402 assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
403 assert(util_is_power_of_two_or_zero(alignment));
404 const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
405 const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
406 const unsigned new_nr_insn = start_insn + nr_insn;
407
408 if (p->store_size < new_nr_insn) {
409 p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst));
410 p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size);
411 }
412
413 /* Memset any padding due to alignment to 0. We don't want to be hashing
414 * or caching a bunch of random bits we got from a memory allocation.
415 */
416 if (p->nr_insn < start_insn) {
417 memset(&p->store[p->nr_insn], 0,
418 (start_insn - p->nr_insn) * sizeof(brw_eu_inst));
419 }
420
421 assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst));
422 p->nr_insn = new_nr_insn;
423 p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst);
424
425 return &p->store[start_insn];
426 }
427
428 void
brw_realign(struct brw_codegen * p,unsigned alignment)429 brw_realign(struct brw_codegen *p, unsigned alignment)
430 {
431 brw_append_insns(p, 0, alignment);
432 }
433
434 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)435 brw_append_data(struct brw_codegen *p, void *data,
436 unsigned size, unsigned alignment)
437 {
438 unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst));
439 void *dst = brw_append_insns(p, nr_insn, alignment);
440 memcpy(dst, data, size);
441
442 /* If it's not a whole number of instructions, memset the end */
443 if (size < nr_insn * sizeof(brw_eu_inst))
444 memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size);
445
446 return dst - (void *)p->store;
447 }
448
449 #define next_insn brw_next_insn
450 brw_eu_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)451 brw_next_insn(struct brw_codegen *p, unsigned opcode)
452 {
453 brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst));
454
455 memset(insn, 0, sizeof(*insn));
456 brw_eu_inst_set_opcode(p->isa, insn, opcode);
457
458 /* Apply the default instruction state */
459 brw_eu_inst_set_state(p->isa, insn, p->current);
460
461 return insn;
462 }
463
464 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)465 brw_add_reloc(struct brw_codegen *p, uint32_t id,
466 enum brw_shader_reloc_type type,
467 uint32_t offset, uint32_t delta)
468 {
469 if (p->num_relocs + 1 > p->reloc_array_size) {
470 p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
471 p->relocs = reralloc(p->mem_ctx, p->relocs,
472 struct brw_shader_reloc, p->reloc_array_size);
473 }
474
475 p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
476 .id = id,
477 .type = type,
478 .offset = offset,
479 .delta = delta,
480 };
481 }
482
483 static brw_eu_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)484 brw_alu1(struct brw_codegen *p, unsigned opcode,
485 struct brw_reg dest, struct brw_reg src)
486 {
487 brw_eu_inst *insn = next_insn(p, opcode);
488 brw_set_dest(p, insn, dest);
489 brw_set_src0(p, insn, src);
490 return insn;
491 }
492
493 static brw_eu_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)494 brw_alu2(struct brw_codegen *p, unsigned opcode,
495 struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
496 {
497 /* 64-bit immediates are only supported on 1-src instructions */
498 assert(src0.file != IMM ||
499 brw_type_size_bytes(src0.type) <= 4);
500 assert(src1.file != IMM ||
501 brw_type_size_bytes(src1.type) <= 4);
502
503 brw_eu_inst *insn = next_insn(p, opcode);
504 brw_set_dest(p, insn, dest);
505 brw_set_src0(p, insn, src0);
506 brw_set_src1(p, insn, src1);
507 return insn;
508 }
509
510 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)511 to_3src_align1_vstride(const struct intel_device_info *devinfo,
512 enum brw_vertical_stride vstride)
513 {
514 switch (vstride) {
515 case BRW_VERTICAL_STRIDE_0:
516 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
517 case BRW_VERTICAL_STRIDE_1:
518 assert(devinfo->ver >= 12);
519 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
520 case BRW_VERTICAL_STRIDE_2:
521 assert(devinfo->ver < 12);
522 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
523 case BRW_VERTICAL_STRIDE_4:
524 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
525 case BRW_VERTICAL_STRIDE_8:
526 case BRW_VERTICAL_STRIDE_16:
527 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
528 default:
529 unreachable("invalid vstride");
530 }
531 }
532
533
534 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)535 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
536 {
537 switch (hstride) {
538 case BRW_HORIZONTAL_STRIDE_0:
539 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
540 case BRW_HORIZONTAL_STRIDE_1:
541 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
542 case BRW_HORIZONTAL_STRIDE_2:
543 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
544 case BRW_HORIZONTAL_STRIDE_4:
545 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
546 default:
547 unreachable("invalid hstride");
548 }
549 }
550
551 static brw_eu_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)552 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
553 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
554 {
555 const struct intel_device_info *devinfo = p->devinfo;
556 brw_eu_inst *inst = next_insn(p, opcode);
557
558 assert(dest.nr < XE2_MAX_GRF);
559
560 if (devinfo->ver <= 9) {
561 assert(src0.file != IMM && src2.file != IMM);
562 } else if (devinfo->ver <= 11) {
563 /* On Ice Lake, BFE and CSEL cannot have any immediate sources. */
564 assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) ||
565 (src0.file != IMM && src2.file != IMM));
566
567 /* On Ice Lake, DP4A and MAD can only have one immediate source. */
568 assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) ||
569 !(src0.file == IMM && src2.file == IMM));
570 } else {
571 /* Having two immediate sources is allowed, but this should have been
572 * converted to a regular ADD by brw_opt_algebraic.
573 */
574 assert(opcode != BRW_OPCODE_ADD3 ||
575 !(src0.file == IMM && src2.file == IMM));
576 }
577
578 /* BFI2 cannot have any immediate sources on any platform. */
579 assert(opcode != BRW_OPCODE_BFI2 ||
580 (src0.file != IMM && src2.file != IMM));
581
582 assert(src0.file == IMM || src0.nr < XE2_MAX_GRF);
583 assert(src1.file != IMM && src1.nr < XE2_MAX_GRF);
584 assert(src2.file == IMM || src2.nr < XE2_MAX_GRF);
585 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
586 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
587 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
588 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
589
590 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
591 assert(dest.file == FIXED_GRF ||
592 (dest.file == ARF &&
593 (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
594
595 brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
596 brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
597 brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
598 brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
599
600 if (brw_type_is_float(dest.type)) {
601 brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
602 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
603 } else {
604 brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
605 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
606 }
607
608 brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
609 brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
610 brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
611 brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
612
613 if (src0.file == IMM) {
614 brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
615 } else {
616 brw_eu_inst_set_3src_a1_src0_vstride(
617 devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
618 brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst,
619 to_3src_align1_hstride(src0.hstride));
620 brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
621 brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
622 brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
623 brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
624 }
625 brw_eu_inst_set_3src_a1_src1_vstride(
626 devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
627 brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst,
628 to_3src_align1_hstride(src1.hstride));
629
630 brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
631 if (src1.file == ARF) {
632 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
633 } else {
634 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
635 }
636 brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
637 brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
638
639 if (src2.file == IMM) {
640 brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
641 } else {
642 brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst,
643 to_3src_align1_hstride(src2.hstride));
644 /* no vstride on src2 */
645 brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
646 brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
647 brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
648 brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
649 }
650
651 assert(src0.file == FIXED_GRF ||
652 src0.file == IMM);
653 assert(src1.file == FIXED_GRF ||
654 (src1.file == ARF &&
655 src1.nr == BRW_ARF_ACCUMULATOR));
656 assert(src2.file == FIXED_GRF ||
657 src2.file == IMM);
658
659 if (devinfo->ver >= 12) {
660 if (src0.file == IMM) {
661 brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
662 } else {
663 brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
664 }
665
666 brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
667
668 if (src2.file == IMM) {
669 brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
670 } else {
671 brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
672 }
673 } else {
674 brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
675 brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
676 brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
677 }
678
679 } else {
680 assert(dest.file == FIXED_GRF);
681 assert(dest.type == BRW_TYPE_F ||
682 dest.type == BRW_TYPE_DF ||
683 dest.type == BRW_TYPE_D ||
684 dest.type == BRW_TYPE_UD ||
685 dest.type == BRW_TYPE_HF);
686 brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
687 brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
688 brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
689
690 assert(src0.file == FIXED_GRF);
691 brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
692 brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
693 brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
694 brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
695 brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
696 brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
697 src0.vstride == BRW_VERTICAL_STRIDE_0);
698
699 assert(src1.file == FIXED_GRF);
700 brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
701 brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
702 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
703 brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
704 brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
705 brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
706 src1.vstride == BRW_VERTICAL_STRIDE_0);
707
708 assert(src2.file == FIXED_GRF);
709 brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
710 brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
711 brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
712 brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
713 brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
714 brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
715 src2.vstride == BRW_VERTICAL_STRIDE_0);
716
717 /* Set both the source and destination types based on dest.type,
718 * ignoring the source register types. The MAD and LRP emitters ensure
719 * that all four types are float. The BFE and BFI2 emitters, however,
720 * may send us mixed D and UD types and want us to ignore that and use
721 * the destination type.
722 */
723 brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
724 brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
725
726 /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
727 *
728 * "Three source instructions can use operands with mixed-mode
729 * precision. When SrcType field is set to :f or :hf it defines
730 * precision for source 0 only, and fields Src1Type and Src2Type
731 * define precision for other source operands:
732 *
733 * 0b = :f. Single precision Float (32-bit).
734 * 1b = :hf. Half precision Float (16-bit)."
735 */
736 if (src1.type == BRW_TYPE_HF)
737 brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1);
738
739 if (src2.type == BRW_TYPE_HF)
740 brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1);
741 }
742
743 return inst;
744 }
745
746 static brw_eu_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)747 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
748 enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
749 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
750 {
751 const struct intel_device_info *devinfo = p->devinfo;
752 brw_eu_inst *inst = next_insn(p, opcode);
753
754 assert(dest.file == FIXED_GRF);
755 brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
756 FIXED_GRF);
757 brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
758 brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
759
760 if (brw_type_is_float(dest.type)) {
761 brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
762 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
763 } else {
764 brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
765 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
766 }
767
768 brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
769 brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
770
771 brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
772 brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
773 brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
774 brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
775
776 assert(src0.file == FIXED_GRF ||
777 (src0.file == ARF &&
778 src0.nr == BRW_ARF_NULL));
779
780 brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0));
781 brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
782 brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
783
784 assert(src1.file == FIXED_GRF);
785
786 brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1));
787 brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
788 brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
789 brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
790
791 assert(src2.file == FIXED_GRF);
792
793 brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2));
794 brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
795 brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
796 brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
797
798 return inst;
799 }
800
801 /***********************************************************************
802 * Convenience routines.
803 */
804 #define ALU1(OP) \
805 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
806 struct brw_reg dest, \
807 struct brw_reg src0) \
808 { \
809 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
810 }
811
812 #define ALU2(OP) \
813 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
814 struct brw_reg dest, \
815 struct brw_reg src0, \
816 struct brw_reg src1) \
817 { \
818 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
819 }
820
821 #define ALU3(OP) \
822 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
823 struct brw_reg dest, \
824 struct brw_reg src0, \
825 struct brw_reg src1, \
826 struct brw_reg src2) \
827 { \
828 if (p->current->access_mode == BRW_ALIGN_16) { \
829 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
830 src0.swizzle = BRW_SWIZZLE_XXXX; \
831 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
832 src1.swizzle = BRW_SWIZZLE_XXXX; \
833 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
834 src2.swizzle = BRW_SWIZZLE_XXXX; \
835 } \
836 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
837 }
838
839 #define ALU3F(OP) \
840 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
841 struct brw_reg dest, \
842 struct brw_reg src0, \
843 struct brw_reg src1, \
844 struct brw_reg src2) \
845 { \
846 assert(dest.type == BRW_TYPE_F || \
847 dest.type == BRW_TYPE_DF); \
848 if (dest.type == BRW_TYPE_F) { \
849 assert(src0.type == BRW_TYPE_F); \
850 assert(src1.type == BRW_TYPE_F); \
851 assert(src2.type == BRW_TYPE_F); \
852 } else if (dest.type == BRW_TYPE_DF) { \
853 assert(src0.type == BRW_TYPE_DF); \
854 assert(src1.type == BRW_TYPE_DF); \
855 assert(src2.type == BRW_TYPE_DF); \
856 } \
857 \
858 if (p->current->access_mode == BRW_ALIGN_16) { \
859 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
860 src0.swizzle = BRW_SWIZZLE_XXXX; \
861 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
862 src1.swizzle = BRW_SWIZZLE_XXXX; \
863 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
864 src2.swizzle = BRW_SWIZZLE_XXXX; \
865 } \
866 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
867 }
868
869 ALU2(SEL)
ALU1(NOT)870 ALU1(NOT)
871 ALU2(AND)
872 ALU2(OR)
873 ALU2(XOR)
874 ALU2(SHR)
875 ALU2(SHL)
876 ALU2(ASR)
877 ALU2(ROL)
878 ALU2(ROR)
879 ALU3(CSEL)
880 ALU1(FRC)
881 ALU1(RNDD)
882 ALU1(RNDE)
883 ALU1(RNDU)
884 ALU1(RNDZ)
885 ALU2(MAC)
886 ALU2(MACH)
887 ALU1(LZD)
888 ALU2(DP4)
889 ALU2(DPH)
890 ALU2(DP3)
891 ALU2(DP2)
892 ALU3(DP4A)
893 ALU3(MAD)
894 ALU3F(LRP)
895 ALU1(BFREV)
896 ALU3(BFE)
897 ALU2(BFI1)
898 ALU3(BFI2)
899 ALU1(FBH)
900 ALU1(FBL)
901 ALU1(CBIT)
902 ALU2(ADDC)
903 ALU2(SUBB)
904 ALU3(ADD3)
905 ALU1(MOV)
906
907 brw_eu_inst *
908 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
909 struct brw_reg src0, struct brw_reg src1)
910 {
911 /* 6.2.2: add */
912 if (src0.type == BRW_TYPE_F ||
913 (src0.file == IMM &&
914 src0.type == BRW_TYPE_VF)) {
915 assert(src1.type != BRW_TYPE_UD);
916 assert(src1.type != BRW_TYPE_D);
917 }
918
919 if (src1.type == BRW_TYPE_F ||
920 (src1.file == IMM &&
921 src1.type == BRW_TYPE_VF)) {
922 assert(src0.type != BRW_TYPE_UD);
923 assert(src0.type != BRW_TYPE_D);
924 }
925
926 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
927 }
928
929 brw_eu_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)930 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
931 struct brw_reg src0, struct brw_reg src1)
932 {
933 assert(dest.type == src0.type);
934 assert(src0.type == src1.type);
935 switch (src0.type) {
936 case BRW_TYPE_B:
937 case BRW_TYPE_UB:
938 case BRW_TYPE_W:
939 case BRW_TYPE_UW:
940 case BRW_TYPE_D:
941 case BRW_TYPE_UD:
942 break;
943 default:
944 unreachable("Bad type for brw_AVG");
945 }
946
947 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
948 }
949
950 brw_eu_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)951 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
952 struct brw_reg src0, struct brw_reg src1)
953 {
954 /* 6.32.38: mul */
955 if (src0.type == BRW_TYPE_D ||
956 src0.type == BRW_TYPE_UD ||
957 src1.type == BRW_TYPE_D ||
958 src1.type == BRW_TYPE_UD) {
959 assert(dest.type != BRW_TYPE_F);
960 }
961
962 if (src0.type == BRW_TYPE_F ||
963 (src0.file == IMM &&
964 src0.type == BRW_TYPE_VF)) {
965 assert(src1.type != BRW_TYPE_UD);
966 assert(src1.type != BRW_TYPE_D);
967 }
968
969 if (src1.type == BRW_TYPE_F ||
970 (src1.file == IMM &&
971 src1.type == BRW_TYPE_VF)) {
972 assert(src0.type != BRW_TYPE_UD);
973 assert(src0.type != BRW_TYPE_D);
974 }
975
976 assert(src0.file != ARF ||
977 src0.nr != BRW_ARF_ACCUMULATOR);
978 assert(src1.file != ARF ||
979 src1.nr != BRW_ARF_ACCUMULATOR);
980
981 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
982 }
983
984 brw_eu_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)985 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
986 struct brw_reg src0, struct brw_reg src1)
987 {
988 src0.vstride = BRW_VERTICAL_STRIDE_0;
989 src0.width = BRW_WIDTH_1;
990 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
991 return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
992 }
993
994 brw_eu_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)995 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
996 struct brw_reg src0, struct brw_reg src1)
997 {
998 src0.vstride = BRW_VERTICAL_STRIDE_0;
999 src0.width = BRW_WIDTH_1;
1000 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1001 src1.vstride = BRW_VERTICAL_STRIDE_8;
1002 src1.width = BRW_WIDTH_8;
1003 src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1004 return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1005 }
1006
1007 brw_eu_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)1008 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1009 unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1010 struct brw_reg src1, struct brw_reg src2)
1011 {
1012 return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
1013 src1, src2);
1014 }
1015
brw_NOP(struct brw_codegen * p)1016 void brw_NOP(struct brw_codegen *p)
1017 {
1018 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1019 memset(insn, 0, sizeof(*insn));
1020 brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1021 }
1022
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1023 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1024 {
1025 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1026 brw_eu_inst_set_cond_modifier(p->devinfo, insn, func);
1027 }
1028
1029 /***********************************************************************
1030 * Comparisons, if/else/endif
1031 */
1032
1033 brw_eu_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1034 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1035 unsigned predicate_control)
1036 {
1037 const struct intel_device_info *devinfo = p->devinfo;
1038 struct brw_reg ip = brw_ip_reg();
1039 brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1040
1041 brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1042 brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1043 brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1044 brw_eu_inst_set_pred_control(devinfo, inst, predicate_control);
1045
1046 return inst;
1047 }
1048
1049 static void
push_if_stack(struct brw_codegen * p,brw_eu_inst * inst)1050 push_if_stack(struct brw_codegen *p, brw_eu_inst *inst)
1051 {
1052 p->if_stack[p->if_stack_depth] = inst - p->store;
1053
1054 p->if_stack_depth++;
1055 if (p->if_stack_array_size <= p->if_stack_depth) {
1056 p->if_stack_array_size *= 2;
1057 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1058 p->if_stack_array_size);
1059 }
1060 }
1061
1062 static brw_eu_inst *
pop_if_stack(struct brw_codegen * p)1063 pop_if_stack(struct brw_codegen *p)
1064 {
1065 p->if_stack_depth--;
1066 return &p->store[p->if_stack[p->if_stack_depth]];
1067 }
1068
1069 static void
push_loop_stack(struct brw_codegen * p,brw_eu_inst * inst)1070 push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst)
1071 {
1072 if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1073 p->loop_stack_array_size *= 2;
1074 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1075 p->loop_stack_array_size);
1076 }
1077
1078 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1079 p->loop_stack_depth++;
1080 }
1081
1082 static brw_eu_inst *
get_inner_do_insn(struct brw_codegen * p)1083 get_inner_do_insn(struct brw_codegen *p)
1084 {
1085 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1086 }
1087
1088 /* EU takes the value from the flag register and pushes it onto some
1089 * sort of a stack (presumably merging with any flag value already on
1090 * the stack). Within an if block, the flags at the top of the stack
1091 * control execution on each channel of the unit, eg. on each of the
1092 * 16 pixel values in our wm programs.
1093 *
1094 * When the matching 'else' instruction is reached (presumably by
1095 * countdown of the instruction count patched in by our ELSE/ENDIF
1096 * functions), the relevant flags are inverted.
1097 *
1098 * When the matching 'endif' instruction is reached, the flags are
1099 * popped off. If the stack is now empty, normal execution resumes.
1100 */
1101 brw_eu_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1102 brw_IF(struct brw_codegen *p, unsigned execute_size)
1103 {
1104 const struct intel_device_info *devinfo = p->devinfo;
1105 brw_eu_inst *insn;
1106
1107 insn = next_insn(p, BRW_OPCODE_IF);
1108
1109 /* Override the defaults for this instruction:
1110 */
1111 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1112 if (devinfo->ver < 12)
1113 brw_set_src0(p, insn, brw_imm_d(0));
1114 brw_eu_inst_set_jip(devinfo, insn, 0);
1115 brw_eu_inst_set_uip(devinfo, insn, 0);
1116
1117 brw_eu_inst_set_exec_size(devinfo, insn, execute_size);
1118 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1119 brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1120 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1121
1122 push_if_stack(p, insn);
1123 return insn;
1124 }
1125
1126 /**
1127 * Patch IF and ELSE instructions with appropriate jump targets.
1128 */
1129 static void
patch_IF_ELSE(struct brw_codegen * p,brw_eu_inst * if_inst,brw_eu_inst * else_inst,brw_eu_inst * endif_inst)1130 patch_IF_ELSE(struct brw_codegen *p,
1131 brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst)
1132 {
1133 const struct intel_device_info *devinfo = p->devinfo;
1134
1135 assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1136 assert(endif_inst != NULL);
1137 assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1138
1139 unsigned br = brw_jump_scale(devinfo);
1140
1141 assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1142 brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1143
1144 if (else_inst == NULL) {
1145 /* Patch IF -> ENDIF */
1146 brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1147 brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1148 } else {
1149 brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1150
1151 /* Patch ELSE -> ENDIF */
1152 /* The IF instruction's JIP should point just past the ELSE */
1153 brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1154 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1155 brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1156
1157 if (devinfo->ver < 11) {
1158 /* Set the ELSE instruction to use branch_ctrl with a join
1159 * jump target pointing at the NOP inserted right before
1160 * the ENDIF instruction in order to make sure it is
1161 * executed in all cases, since attempting to do the same
1162 * as on other generations could cause the EU to jump at
1163 * the instruction immediately after the ENDIF due to
1164 * Wa_220160235, which could cause the program to continue
1165 * running with all channels disabled.
1166 */
1167 brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1168 brw_eu_inst_set_branch_control(devinfo, else_inst, true);
1169 } else {
1170 brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1171 }
1172
1173 /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1174 * JIP and UIP both should point to ENDIF on those
1175 * platforms.
1176 */
1177 brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1178 }
1179 }
1180
1181 void
brw_ELSE(struct brw_codegen * p)1182 brw_ELSE(struct brw_codegen *p)
1183 {
1184 const struct intel_device_info *devinfo = p->devinfo;
1185 brw_eu_inst *insn;
1186
1187 insn = next_insn(p, BRW_OPCODE_ELSE);
1188
1189 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1190 if (devinfo->ver < 12)
1191 brw_set_src0(p, insn, brw_imm_d(0));
1192 brw_eu_inst_set_jip(devinfo, insn, 0);
1193 brw_eu_inst_set_uip(devinfo, insn, 0);
1194
1195 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1196 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1197
1198 push_if_stack(p, insn);
1199 }
1200
1201 void
brw_ENDIF(struct brw_codegen * p)1202 brw_ENDIF(struct brw_codegen *p)
1203 {
1204 const struct intel_device_info *devinfo = p->devinfo;
1205 brw_eu_inst *insn = NULL;
1206 brw_eu_inst *else_inst = NULL;
1207 brw_eu_inst *if_inst = NULL;
1208 brw_eu_inst *tmp;
1209
1210 assert(p->if_stack_depth > 0);
1211
1212 if (devinfo->ver < 11 &&
1213 brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[
1214 p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1215 /* Insert a NOP to be specified as join instruction within the
1216 * ELSE block, which is valid for an ELSE instruction with
1217 * branch_ctrl on. The ELSE instruction will be set to jump
1218 * here instead of to the ENDIF instruction, since attempting to
1219 * do the latter would prevent the ENDIF from being executed in
1220 * some cases due to Wa_220160235, which could cause the program
1221 * to continue running with all channels disabled.
1222 */
1223 brw_NOP(p);
1224 }
1225
1226 /*
1227 * A single next_insn() may change the base address of instruction store
1228 * memory(p->store), so call it first before referencing the instruction
1229 * store pointer from an index
1230 */
1231 insn = next_insn(p, BRW_OPCODE_ENDIF);
1232
1233 /* Pop the IF and (optional) ELSE instructions from the stack */
1234 tmp = pop_if_stack(p);
1235 if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1236 else_inst = tmp;
1237 tmp = pop_if_stack(p);
1238 }
1239 if_inst = tmp;
1240
1241 brw_set_src0(p, insn, brw_imm_d(0));
1242
1243 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1244 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1245
1246 brw_eu_inst_set_jip(devinfo, insn, 2);
1247 patch_IF_ELSE(p, if_inst, else_inst, insn);
1248 }
1249
1250 brw_eu_inst *
brw_BREAK(struct brw_codegen * p)1251 brw_BREAK(struct brw_codegen *p)
1252 {
1253 const struct intel_device_info *devinfo = p->devinfo;
1254 brw_eu_inst *insn;
1255
1256 insn = next_insn(p, BRW_OPCODE_BREAK);
1257 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1258 brw_set_src0(p, insn, brw_imm_d(0x0));
1259 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1260 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1261
1262 return insn;
1263 }
1264
1265 brw_eu_inst *
brw_CONT(struct brw_codegen * p)1266 brw_CONT(struct brw_codegen *p)
1267 {
1268 const struct intel_device_info *devinfo = p->devinfo;
1269 brw_eu_inst *insn;
1270
1271 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1272 brw_set_dest(p, insn, brw_ip_reg());
1273 brw_set_src0(p, insn, brw_imm_d(0x0));
1274
1275 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1276 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1277 return insn;
1278 }
1279
1280 brw_eu_inst *
brw_HALT(struct brw_codegen * p)1281 brw_HALT(struct brw_codegen *p)
1282 {
1283 const struct intel_device_info *devinfo = p->devinfo;
1284 brw_eu_inst *insn;
1285
1286 insn = next_insn(p, BRW_OPCODE_HALT);
1287 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1288 if (devinfo->ver < 12) {
1289 brw_set_src0(p, insn, brw_imm_d(0x0));
1290 }
1291
1292 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1293 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1294 return insn;
1295 }
1296
1297 /* DO/WHILE loop:
1298 *
1299 * The DO/WHILE is just an unterminated loop -- break or continue are
1300 * used for control within the loop. We have a few ways they can be
1301 * done.
1302 *
1303 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1304 * jip and no DO instruction.
1305 *
1306 * For gfx6, there's no more mask stack, so no need for DO. WHILE
1307 * just points back to the first instruction of the loop.
1308 */
1309 brw_eu_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1310 brw_DO(struct brw_codegen *p, unsigned execute_size)
1311 {
1312 push_loop_stack(p, &p->store[p->nr_insn]);
1313 return &p->store[p->nr_insn];
1314 }
1315
1316 brw_eu_inst *
brw_WHILE(struct brw_codegen * p)1317 brw_WHILE(struct brw_codegen *p)
1318 {
1319 const struct intel_device_info *devinfo = p->devinfo;
1320 brw_eu_inst *insn, *do_insn;
1321 unsigned br = brw_jump_scale(devinfo);
1322
1323 insn = next_insn(p, BRW_OPCODE_WHILE);
1324 do_insn = get_inner_do_insn(p);
1325
1326 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1327 if (devinfo->ver < 12)
1328 brw_set_src0(p, insn, brw_imm_d(0));
1329 brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1330
1331 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1332
1333 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1334
1335 p->loop_stack_depth--;
1336
1337 return insn;
1338 }
1339
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1340 void brw_CMP(struct brw_codegen *p,
1341 struct brw_reg dest,
1342 unsigned conditional,
1343 struct brw_reg src0,
1344 struct brw_reg src1)
1345 {
1346 const struct intel_device_info *devinfo = p->devinfo;
1347 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1348
1349 brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1350 brw_set_dest(p, insn, dest);
1351 brw_set_src0(p, insn, src0);
1352 brw_set_src1(p, insn, src1);
1353 }
1354
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1355 void brw_CMPN(struct brw_codegen *p,
1356 struct brw_reg dest,
1357 unsigned conditional,
1358 struct brw_reg src0,
1359 struct brw_reg src1)
1360 {
1361 const struct intel_device_info *devinfo = p->devinfo;
1362 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1363
1364 brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1365 brw_set_dest(p, insn, dest);
1366 brw_set_src0(p, insn, src0);
1367 brw_set_src1(p, insn, src1);
1368 }
1369
1370 /***********************************************************************
1371 * Helpers for the various SEND message types:
1372 */
1373
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1374 void gfx6_math(struct brw_codegen *p,
1375 struct brw_reg dest,
1376 unsigned function,
1377 struct brw_reg src0,
1378 struct brw_reg src1)
1379 {
1380 const struct intel_device_info *devinfo = p->devinfo;
1381 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1382
1383 assert(dest.file == FIXED_GRF);
1384
1385 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1386
1387 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1388 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1389 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1390 assert(src0.type != BRW_TYPE_F);
1391 assert(src1.type != BRW_TYPE_F);
1392 assert(src1.file == FIXED_GRF ||
1393 src1.file == IMM);
1394 /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1395 * INT DIV function does not support source modifiers.
1396 */
1397 assert(!src0.negate);
1398 assert(!src0.abs);
1399 assert(!src1.negate);
1400 assert(!src1.abs);
1401 } else {
1402 assert(src0.type == BRW_TYPE_F ||
1403 (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1404 assert(src1.type == BRW_TYPE_F ||
1405 (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1406 }
1407
1408 /* This workaround says that we cannot use scalar broadcast with HF types.
1409 * However, for is_scalar values, all 16 elements contain the same value, so
1410 * we can replace a <0,1,0> region with <16,16,1> without ill effect.
1411 */
1412 if (intel_needs_workaround(devinfo, 22016140776)) {
1413 if (src0.is_scalar && src0.type == BRW_TYPE_HF) {
1414 src0.vstride = BRW_VERTICAL_STRIDE_16;
1415 src0.width = BRW_WIDTH_16;
1416 src0.hstride = BRW_HORIZONTAL_STRIDE_1;
1417 src0.swizzle = BRW_SWIZZLE_XYZW;
1418 }
1419
1420 if (src1.is_scalar && src1.type == BRW_TYPE_HF) {
1421 src1.vstride = BRW_VERTICAL_STRIDE_16;
1422 src1.width = BRW_WIDTH_16;
1423 src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1424 src1.swizzle = BRW_SWIZZLE_XYZW;
1425 }
1426 }
1427
1428 brw_eu_inst_set_math_function(devinfo, insn, function);
1429
1430 brw_set_dest(p, insn, dest);
1431 brw_set_src0(p, insn, src0);
1432 brw_set_src1(p, insn, src1);
1433 }
1434
1435 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,bool eot)1436 brw_send_indirect_message(struct brw_codegen *p,
1437 unsigned sfid,
1438 struct brw_reg dst,
1439 struct brw_reg payload,
1440 struct brw_reg desc,
1441 bool eot)
1442 {
1443 const struct intel_device_info *devinfo = p->devinfo;
1444 struct brw_eu_inst *send;
1445
1446 dst = retype(dst, BRW_TYPE_UW);
1447
1448 assert(desc.type == BRW_TYPE_UD);
1449
1450 if (desc.file == IMM) {
1451 send = next_insn(p, BRW_OPCODE_SEND);
1452 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1453 brw_set_desc(p, send, desc.ud);
1454 } else {
1455 assert(desc.file == ADDRESS);
1456 assert(desc.subnr == 0);
1457 send = next_insn(p, BRW_OPCODE_SEND);
1458 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1459 if (devinfo->ver >= 12)
1460 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, true);
1461 else
1462 brw_set_src1(p, send, desc);
1463 }
1464
1465 brw_set_dest(p, send, dst);
1466 brw_eu_inst_set_sfid(devinfo, send, sfid);
1467 brw_eu_inst_set_eot(devinfo, send, eot);
1468 }
1469
1470 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,struct brw_reg ex_desc,unsigned ex_mlen,bool ex_bso,bool eot)1471 brw_send_indirect_split_message(struct brw_codegen *p,
1472 unsigned sfid,
1473 struct brw_reg dst,
1474 struct brw_reg payload0,
1475 struct brw_reg payload1,
1476 struct brw_reg desc,
1477 struct brw_reg ex_desc,
1478 unsigned ex_mlen,
1479 bool ex_bso,
1480 bool eot)
1481 {
1482 const struct intel_device_info *devinfo = p->devinfo;
1483 struct brw_eu_inst *send;
1484
1485 dst = retype(dst, BRW_TYPE_UW);
1486
1487 assert(desc.type == BRW_TYPE_UD);
1488
1489 send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1490 brw_set_dest(p, send, dst);
1491 brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1492 brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1493
1494 if (desc.file == IMM) {
1495 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1496 brw_eu_inst_set_send_desc(devinfo, send, desc.ud);
1497 } else {
1498 assert(desc.file == ADDRESS);
1499 assert(desc.subnr == 0);
1500 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1501 }
1502
1503 if (ex_desc.file == IMM) {
1504 brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1505 brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, false);
1506 } else {
1507 assert(ex_desc.file == ADDRESS);
1508 assert((ex_desc.subnr & 0x3) == 0);
1509 brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1510 brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1511
1512 if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM)
1513 brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1514 }
1515
1516 if (ex_bso) {
1517 /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1518 * it is assumed.
1519 *
1520 * BSpec 56890
1521 */
1522 if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1523 brw_eu_inst_set_send_ex_bso(devinfo, send, true);
1524 brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo));
1525 }
1526 brw_eu_inst_set_sfid(devinfo, send, sfid);
1527 brw_eu_inst_set_eot(devinfo, send, eot);
1528 }
1529
1530 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_eu_inst * insn,int while_offset,int start_offset)1531 while_jumps_before_offset(const struct intel_device_info *devinfo,
1532 brw_eu_inst *insn, int while_offset, int start_offset)
1533 {
1534 int scale = 16 / brw_jump_scale(devinfo);
1535 int jip = brw_eu_inst_jip(devinfo, insn);
1536 assert(jip < 0);
1537 return while_offset + jip * scale <= start_offset;
1538 }
1539
1540
1541 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1542 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1543 {
1544 int offset;
1545 void *store = p->store;
1546 const struct intel_device_info *devinfo = p->devinfo;
1547
1548 int depth = 0;
1549
1550 for (offset = next_offset(devinfo, store, start_offset);
1551 offset < p->next_insn_offset;
1552 offset = next_offset(devinfo, store, offset)) {
1553 brw_eu_inst *insn = store + offset;
1554
1555 switch (brw_eu_inst_opcode(p->isa, insn)) {
1556 case BRW_OPCODE_IF:
1557 depth++;
1558 break;
1559 case BRW_OPCODE_ENDIF:
1560 if (depth == 0)
1561 return offset;
1562 depth--;
1563 break;
1564 case BRW_OPCODE_WHILE:
1565 /* If the while doesn't jump before our instruction, it's the end
1566 * of a sibling do...while loop. Ignore it.
1567 */
1568 if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1569 continue;
1570 FALLTHROUGH;
1571 case BRW_OPCODE_ELSE:
1572 case BRW_OPCODE_HALT:
1573 if (depth == 0)
1574 return offset;
1575 break;
1576 default:
1577 break;
1578 }
1579 }
1580
1581 return 0;
1582 }
1583
1584 /* There is no DO instruction on gfx6, so to find the end of the loop
1585 * we have to see if the loop is jumping back before our start
1586 * instruction.
1587 */
1588 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1589 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1590 {
1591 const struct intel_device_info *devinfo = p->devinfo;
1592 int offset;
1593 void *store = p->store;
1594
1595 /* Always start after the instruction (such as a WHILE) we're trying to fix
1596 * up.
1597 */
1598 for (offset = next_offset(devinfo, store, start_offset);
1599 offset < p->next_insn_offset;
1600 offset = next_offset(devinfo, store, offset)) {
1601 brw_eu_inst *insn = store + offset;
1602
1603 if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1604 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1605 return offset;
1606 }
1607 }
1608 assert(!"not reached");
1609 return start_offset;
1610 }
1611
1612 /* After program generation, go back and update the UIP and JIP of
1613 * BREAK, CONT, and HALT instructions to their correct locations.
1614 */
1615 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1616 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1617 {
1618 const struct intel_device_info *devinfo = p->devinfo;
1619 int offset;
1620 int br = brw_jump_scale(devinfo);
1621 int scale = 16 / br;
1622 void *store = p->store;
1623
1624 for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1625 brw_eu_inst *insn = store + offset;
1626 assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
1627
1628 switch (brw_eu_inst_opcode(p->isa, insn)) {
1629 case BRW_OPCODE_BREAK: {
1630 int block_end_offset = brw_find_next_block_end(p, offset);
1631 assert(block_end_offset != 0);
1632 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1633 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1634 brw_eu_inst_set_uip(devinfo, insn,
1635 (brw_find_loop_end(p, offset) - offset) / scale);
1636 break;
1637 }
1638
1639 case BRW_OPCODE_CONTINUE: {
1640 int block_end_offset = brw_find_next_block_end(p, offset);
1641 assert(block_end_offset != 0);
1642 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1643 brw_eu_inst_set_uip(devinfo, insn,
1644 (brw_find_loop_end(p, offset) - offset) / scale);
1645
1646 assert(brw_eu_inst_uip(devinfo, insn) != 0);
1647 assert(brw_eu_inst_jip(devinfo, insn) != 0);
1648 break;
1649 }
1650
1651 case BRW_OPCODE_ENDIF: {
1652 int block_end_offset = brw_find_next_block_end(p, offset);
1653 int32_t jump = (block_end_offset == 0) ?
1654 1 * br : (block_end_offset - offset) / scale;
1655 brw_eu_inst_set_jip(devinfo, insn, jump);
1656 break;
1657 }
1658
1659 case BRW_OPCODE_HALT: {
1660 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1661 *
1662 * "In case of the halt instruction not inside any conditional
1663 * code block, the value of <JIP> and <UIP> should be the
1664 * same. In case of the halt instruction inside conditional code
1665 * block, the <UIP> should be the end of the program, and the
1666 * <JIP> should be end of the most inner conditional code block."
1667 *
1668 * The uip will have already been set by whoever set up the
1669 * instruction.
1670 */
1671 int block_end_offset = brw_find_next_block_end(p, offset);
1672 if (block_end_offset == 0) {
1673 brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
1674 } else {
1675 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1676 }
1677 assert(brw_eu_inst_uip(devinfo, insn) != 0);
1678 assert(brw_eu_inst_jip(devinfo, insn) != 0);
1679 break;
1680 }
1681
1682 default:
1683 break;
1684 }
1685 }
1686 }
1687
1688 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1689 brw_set_memory_fence_message(struct brw_codegen *p,
1690 struct brw_eu_inst *insn,
1691 enum brw_message_target sfid,
1692 bool commit_enable,
1693 unsigned bti)
1694 {
1695 const struct intel_device_info *devinfo = p->devinfo;
1696
1697 brw_set_desc(p, insn, brw_message_desc(
1698 devinfo, 1, (commit_enable ? 1 : 0), true));
1699
1700 brw_eu_inst_set_sfid(devinfo, insn, sfid);
1701
1702 switch (sfid) {
1703 case GFX6_SFID_DATAPORT_RENDER_CACHE:
1704 brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1705 break;
1706 case GFX7_SFID_DATAPORT_DATA_CACHE:
1707 brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1708 break;
1709 default:
1710 unreachable("Not reached");
1711 }
1712
1713 if (commit_enable)
1714 brw_eu_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1715
1716 assert(devinfo->ver >= 11 || bti == 0);
1717 brw_eu_inst_set_binding_table_index(devinfo, insn, bti);
1718 }
1719
1720 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,uint32_t desc)1721 gfx12_set_memory_fence_message(struct brw_codegen *p,
1722 struct brw_eu_inst *insn,
1723 enum brw_message_target sfid,
1724 uint32_t desc)
1725 {
1726 const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1727 /* Completion signaled by write to register. No data returned. */
1728 const unsigned rlen = 1 * reg_unit(p->devinfo);
1729
1730 brw_eu_inst_set_sfid(p->devinfo, insn, sfid);
1731
1732 /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1733 * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1734 * the descriptor value and rebuild a legacy URB fence descriptor.
1735 */
1736 if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1737 brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1738 brw_message_desc(p->devinfo, mlen, rlen, true));
1739 } else {
1740 enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1741 enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1742
1743 if (sfid == GFX12_SFID_TGM) {
1744 scope = LSC_FENCE_TILE;
1745 flush_type = LSC_FLUSH_TYPE_EVICT;
1746 }
1747
1748 /* Wa_14012437816:
1749 *
1750 * "For any fence greater than local scope, always set flush type to
1751 * at least invalidate so that fence goes on properly."
1752 *
1753 * "The bug is if flush_type is 'None', the scope is always downgraded
1754 * to 'local'."
1755 *
1756 * Here set scope to NONE_6 instead of NONE, which has the same effect
1757 * as NONE but avoids the downgrade to scope LOCAL.
1758 */
1759 if (intel_needs_workaround(p->devinfo, 14012437816) &&
1760 scope > LSC_FENCE_LOCAL &&
1761 flush_type == LSC_FLUSH_TYPE_NONE) {
1762 flush_type = LSC_FLUSH_TYPE_NONE_6;
1763 }
1764
1765 brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1766 flush_type, false) |
1767 brw_message_desc(p->devinfo, mlen, rlen, false));
1768 }
1769 }
1770
1771 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1772 brw_memory_fence(struct brw_codegen *p,
1773 struct brw_reg dst,
1774 struct brw_reg src,
1775 enum opcode send_op,
1776 enum brw_message_target sfid,
1777 uint32_t desc,
1778 bool commit_enable,
1779 unsigned bti)
1780 {
1781 const struct intel_device_info *devinfo = p->devinfo;
1782
1783 dst = retype(vec1(dst), BRW_TYPE_UW);
1784 src = retype(vec1(src), BRW_TYPE_UD);
1785
1786 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1787 * message doesn't write anything back.
1788 */
1789 struct brw_eu_inst *insn = next_insn(p, send_op);
1790 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1791 brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1792 brw_set_dest(p, insn, dst);
1793 brw_set_src0(p, insn, src);
1794
1795 /* All DG2 hardware requires LSC for fence messages, even A-step */
1796 if (devinfo->has_lsc)
1797 gfx12_set_memory_fence_message(p, insn, sfid, desc);
1798 else
1799 brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1800 }
1801
1802 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1803 brw_broadcast(struct brw_codegen *p,
1804 struct brw_reg dst,
1805 struct brw_reg src,
1806 struct brw_reg idx)
1807 {
1808 const struct intel_device_info *devinfo = p->devinfo;
1809 assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1810
1811 brw_push_insn_state(p);
1812 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1813 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1814
1815 assert(src.file == FIXED_GRF &&
1816 src.address_mode == BRW_ADDRESS_DIRECT);
1817 assert(!src.abs && !src.negate);
1818
1819 /* Gen12.5 adds the following region restriction:
1820 *
1821 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1822 * and Quad-Word data must not be used."
1823 *
1824 * We require the source and destination types to match so stomp to an
1825 * unsigned integer type.
1826 */
1827 assert(src.type == dst.type);
1828 src.type = dst.type =
1829 brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1830
1831 if ((src.vstride == 0 && src.hstride == 0) ||
1832 idx.file == IMM) {
1833 /* Trivial, the source is already uniform or the index is a constant.
1834 * We will typically not get here if the optimizer is doing its job, but
1835 * asserting would be mean.
1836 */
1837 const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud;
1838 src = stride(suboffset(src, i), 0, 1, 0);
1839
1840 if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1841 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1842 subscript(src, BRW_TYPE_D, 0));
1843 brw_set_default_swsb(p, tgl_swsb_null());
1844 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1845 subscript(src, BRW_TYPE_D, 1));
1846 } else {
1847 brw_MOV(p, dst, src);
1848 }
1849 } else {
1850 /* From the Haswell PRM section "Register Region Restrictions":
1851 *
1852 * "The lower bits of the AddressImmediate must not overflow to
1853 * change the register address. The lower 5 bits of Address
1854 * Immediate when added to lower 5 bits of address register gives
1855 * the sub-register offset. The upper bits of Address Immediate
1856 * when added to upper bits of address register gives the register
1857 * address. Any overflow from sub-register offset is dropped."
1858 *
1859 * Fortunately, for broadcast, we never have a sub-register offset so
1860 * this isn't an issue.
1861 */
1862 assert(src.subnr == 0);
1863
1864 const struct brw_reg addr =
1865 retype(brw_address_reg(0), BRW_TYPE_UD);
1866 unsigned offset = src.nr * REG_SIZE + src.subnr;
1867 /* Limit in bytes of the signed indirect addressing immediate. */
1868 const unsigned limit = 512;
1869
1870 brw_push_insn_state(p);
1871 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1872 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1873 brw_set_default_flag_reg(p, 0, 0);
1874
1875 /* Take into account the component size and horizontal stride. */
1876 assert(src.vstride == src.hstride + src.width);
1877 brw_SHL(p, addr, vec1(idx),
1878 brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1879 src.hstride - 1));
1880
1881 /* We can only address up to limit bytes using the indirect
1882 * addressing immediate, account for the difference if the source
1883 * register is above this limit.
1884 */
1885 if (offset >= limit) {
1886 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1887 brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1888 offset = offset % limit;
1889 }
1890
1891 brw_pop_insn_state(p);
1892
1893 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1894
1895 /* Use indirect addressing to fetch the specified component. */
1896 if (brw_type_size_bytes(src.type) > 4 &&
1897 (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1898 /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1899 *
1900 * "When source or destination datatype is 64b or operation is
1901 * integer DWord multiply, indirect addressing must not be
1902 * used."
1903 *
1904 * We may also not support Q/UQ types.
1905 *
1906 * To work around both of these, we do two integer MOVs instead
1907 * of one 64-bit MOV. Because no double value should ever cross
1908 * a register boundary, it's safe to use the immediate offset in
1909 * the indirect here to handle adding 4 bytes to the offset and
1910 * avoid the extra ADD to the register file.
1911 */
1912 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1913 retype(brw_vec1_indirect(addr.subnr, offset),
1914 BRW_TYPE_D));
1915 brw_set_default_swsb(p, tgl_swsb_null());
1916 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1917 retype(brw_vec1_indirect(addr.subnr, offset + 4),
1918 BRW_TYPE_D));
1919 } else {
1920 brw_MOV(p, dst,
1921 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
1922 }
1923 }
1924
1925 brw_pop_insn_state(p);
1926 }
1927
1928
1929 /**
1930 * Emit the SEND message for a barrier
1931 */
1932 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)1933 brw_barrier(struct brw_codegen *p, struct brw_reg src)
1934 {
1935 const struct intel_device_info *devinfo = p->devinfo;
1936 struct brw_eu_inst *inst;
1937
1938 brw_push_insn_state(p);
1939 brw_set_default_access_mode(p, BRW_ALIGN_1);
1940 inst = next_insn(p, BRW_OPCODE_SEND);
1941 brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
1942 brw_set_src0(p, inst, src);
1943 brw_set_src1(p, inst, brw_null_reg());
1944 brw_set_desc(p, inst, brw_message_desc(devinfo,
1945 1 * reg_unit(devinfo), 0, false));
1946
1947 brw_eu_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
1948 brw_eu_inst_set_gateway_subfuncid(devinfo, inst,
1949 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
1950
1951 brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1952 brw_pop_insn_state(p);
1953 }
1954
1955
1956 /**
1957 * Emit the wait instruction for a barrier
1958 */
1959 void
brw_WAIT(struct brw_codegen * p)1960 brw_WAIT(struct brw_codegen *p)
1961 {
1962 const struct intel_device_info *devinfo = p->devinfo;
1963 struct brw_eu_inst *insn;
1964
1965 struct brw_reg src = brw_notification_reg();
1966
1967 insn = next_insn(p, BRW_OPCODE_WAIT);
1968 brw_set_dest(p, insn, src);
1969 brw_set_src0(p, insn, src);
1970 brw_set_src1(p, insn, brw_null_reg());
1971
1972 brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1973 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1974 }
1975
1976 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)1977 brw_float_controls_mode(struct brw_codegen *p,
1978 unsigned mode, unsigned mask)
1979 {
1980 assert(p->current->mask_control == BRW_MASK_DISABLE);
1981
1982 /* From the Skylake PRM, Volume 7, page 760:
1983 * "Implementation Restriction on Register Access: When the control
1984 * register is used as an explicit source and/or destination, hardware
1985 * does not ensure execution pipeline coherency. Software must set the
1986 * thread control field to ‘switch’ for an instruction that uses
1987 * control register as an explicit operand."
1988 *
1989 * On Gfx12+ this is implemented in terms of SWSB annotations instead.
1990 */
1991 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1992
1993 brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
1994 brw_imm_ud(~mask));
1995 brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
1996 if (p->devinfo->ver < 12)
1997 brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
1998
1999 if (mode) {
2000 brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2001 brw_imm_ud(mode));
2002 brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2003 if (p->devinfo->ver < 12)
2004 brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2005 }
2006
2007 if (p->devinfo->ver >= 12)
2008 brw_SYNC(p, TGL_SYNC_NOP);
2009 }
2010
2011 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_eu_inst * inst,uint32_t value)2012 brw_update_reloc_imm(const struct brw_isa_info *isa,
2013 brw_eu_inst *inst,
2014 uint32_t value)
2015 {
2016 const struct intel_device_info *devinfo = isa->devinfo;
2017
2018 /* Sanity check that the instruction is a MOV of an immediate */
2019 assert(brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2020 assert(brw_eu_inst_src0_reg_file(devinfo, inst) == IMM);
2021
2022 /* If it was compacted, we can't safely rewrite */
2023 assert(brw_eu_inst_cmpt_control(devinfo, inst) == 0);
2024
2025 brw_eu_inst_set_imm_ud(devinfo, inst, value);
2026 }
2027
2028 /* A default value for constants that will be patched at run-time.
2029 * We pick an arbitrary value that prevents instruction compaction.
2030 */
2031 #define DEFAULT_PATCH_IMM 0x4a7cc037
2032
2033 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2034 brw_MOV_reloc_imm(struct brw_codegen *p,
2035 struct brw_reg dst,
2036 enum brw_reg_type src_type,
2037 uint32_t id,
2038 uint32_t base)
2039 {
2040 assert(brw_type_size_bytes(src_type) == 4);
2041 assert(brw_type_size_bytes(dst.type) == 4);
2042
2043 brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2044 p->next_insn_offset, base);
2045
2046 brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2047 }
2048