1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35
36 #include "util/ralloc.h"
37
38 void
brw_set_dest(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest)
40 {
41 const struct intel_device_info *devinfo = p->devinfo;
42
43 if (dest.file == FIXED_GRF)
44 assert(dest.nr < XE3_MAX_GRF);
45
46 /* The hardware has a restriction where a destination of size Byte with
47 * a stride of 1 is only allowed for a packed byte MOV. For any other
48 * instruction, the stride must be at least 2, even when the destination
49 * is the NULL register.
50 */
51 if (dest.file == ARF &&
52 dest.nr == BRW_ARF_NULL &&
53 brw_type_size_bytes(dest.type) == 1 &&
54 dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55 dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56 }
57
58 if (devinfo->ver >= 12 &&
59 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61 assert(dest.file == FIXED_GRF ||
62 dest.file == ADDRESS ||
63 dest.file == ARF);
64 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
65 assert(dest.subnr == 0);
66 assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
67 (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
68 dest.vstride == dest.width + 1));
69 assert(!dest.negate && !dest.abs);
70 brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest));
71 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
72
73 } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
74 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
75 assert(devinfo->ver < 12);
76 assert(dest.file == FIXED_GRF ||
77 dest.file == ADDRESS ||
78 dest.file == ARF);
79 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
80 assert(dest.subnr % 16 == 0);
81 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
82 dest.vstride == dest.width + 1);
83 assert(!dest.negate && !dest.abs);
84 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
85 brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
86 brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest));
87 } else {
88 brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type);
89 brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
90
91 if (dest.address_mode == BRW_ADDRESS_DIRECT) {
92 brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
93
94 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
95 brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
96 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
97 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
98 brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
99 } else {
100 brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
101 brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask);
102 if (dest.file == FIXED_GRF) {
103 assert(dest.writemask != 0);
104 }
105 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
106 * Although Dst.HorzStride is a don't care for Align16, HW needs
107 * this to be programmed as "01".
108 */
109 brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
110 }
111 } else {
112 brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
113
114 /* These are different sizes in align1 vs align16:
115 */
116 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
117 brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst,
118 dest.indirect_offset);
119 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
120 dest.hstride = BRW_HORIZONTAL_STRIDE_1;
121 brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
122 } else {
123 brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst,
124 dest.indirect_offset);
125 /* even ignored in da16, still need to set as '01' */
126 brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
127 }
128 }
129 }
130 }
131
132 void
brw_set_src0(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)133 brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
134 {
135 const struct intel_device_info *devinfo = p->devinfo;
136
137 if (reg.file == FIXED_GRF)
138 assert(reg.nr < XE3_MAX_GRF);
139
140 if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
141 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
142 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
143 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
144 /* Any source modifiers or regions will be ignored, since this just
145 * identifies the GRF to start reading the message contents from.
146 * Check for some likely failures.
147 */
148 assert(!reg.negate);
149 assert(!reg.abs);
150 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
151 }
152
153 if (devinfo->ver >= 12 &&
154 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
155 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
156 assert(reg.file == ARF || reg.file == FIXED_GRF);
157 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
158 assert(has_scalar_region(reg) ||
159 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
160 reg.vstride == reg.width + 1));
161 assert(!reg.negate && !reg.abs);
162
163 brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg));
164 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
165
166 if (reg.file == ARF && reg.nr == BRW_ARF_SCALAR) {
167 assert(reg.subnr % 2 == 0);
168 brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2);
169 } else {
170 assert(reg.subnr == 0);
171 }
172 } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
173 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
174 assert(reg.file == FIXED_GRF);
175 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
176 assert(reg.subnr % 16 == 0);
177 assert(has_scalar_region(reg) ||
178 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
179 reg.vstride == reg.width + 1));
180 assert(!reg.negate && !reg.abs);
181 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
182 brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
183 } else {
184 brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type);
185 brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs);
186 brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate);
187 brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
188
189 if (reg.file == IMM) {
190 if (reg.type == BRW_TYPE_DF)
191 brw_eu_inst_set_imm_df(devinfo, inst, reg.df);
192 else if (reg.type == BRW_TYPE_UQ ||
193 reg.type == BRW_TYPE_Q)
194 brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64);
195 else
196 brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
197
198 if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
199 brw_eu_inst_set_src1_reg_file(devinfo, inst,
200 ARF);
201 brw_eu_inst_set_src1_reg_hw_type(devinfo, inst,
202 brw_eu_inst_src0_reg_hw_type(devinfo, inst));
203 }
204 } else {
205 if (reg.address_mode == BRW_ADDRESS_DIRECT) {
206 brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
207 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208 brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
209 } else {
210 brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
211 }
212 } else {
213 brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
214
215 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
216 brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
217 } else {
218 brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
219 }
220 }
221
222 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
223 if (reg.width == BRW_WIDTH_1 &&
224 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
225 brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
226 brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
227 brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
228 } else {
229 brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride);
230 brw_eu_inst_set_src0_width(devinfo, inst, reg.width);
231 brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
232 }
233 } else {
234 brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst,
235 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
236 brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst,
237 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
238 brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst,
239 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
240 brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst,
241 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
242
243 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
244 /* This is an oddity of the fact we're using the same
245 * descriptions for registers in align_16 as align_1:
246 */
247 brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
248 } else {
249 brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
250 }
251 }
252 }
253 }
254 }
255
256
257 void
brw_set_src1(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)258 brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
259 {
260 const struct intel_device_info *devinfo = p->devinfo;
261
262 if (reg.file == FIXED_GRF)
263 assert(reg.nr < XE3_MAX_GRF);
264
265 if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
266 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
267 (devinfo->ver >= 12 &&
268 (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
269 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
270 assert(reg.file == FIXED_GRF ||
271 reg.file == ARF ||
272 reg.file == ADDRESS);
273 assert(reg.address_mode == BRW_ADDRESS_DIRECT);
274 assert(reg.subnr == 0);
275 assert(has_scalar_region(reg) ||
276 (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
277 reg.vstride == reg.width + 1));
278 assert(!reg.negate && !reg.abs);
279 brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
280 brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg));
281 } else {
282 /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
283 *
284 * "Accumulator registers may be accessed explicitly as src0
285 * operands only."
286 */
287 assert(reg.file != ARF ||
288 (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
289
290 brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type);
291 brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs);
292 brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate);
293
294 /* Only src1 can be immediate in two-argument instructions.
295 */
296 assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM);
297
298 if (reg.file == IMM) {
299 /* two-argument instructions can only use 32-bit immediates */
300 assert(brw_type_size_bytes(reg.type) < 8);
301 brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
302 } else {
303 /* This is a hardware restriction, which may or may not be lifted
304 * in the future:
305 */
306 assert (reg.address_mode == BRW_ADDRESS_DIRECT);
307 /* assert (reg.file == FIXED_GRF); */
308
309 brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
310 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
311 brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
312 } else {
313 brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
314 }
315
316 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
317 if (reg.width == BRW_WIDTH_1 &&
318 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
319 brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
320 brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
321 brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
322 } else {
323 brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride);
324 brw_eu_inst_set_src1_width(devinfo, inst, reg.width);
325 brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
326 }
327 } else {
328 brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst,
329 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
330 brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst,
331 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
332 brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst,
333 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
334 brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst,
335 BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
336
337 if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
338 /* This is an oddity of the fact we're using the same
339 * descriptions for registers in align_16 as align_1:
340 */
341 brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
342 } else {
343 brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
344 }
345 }
346 }
347 }
348 }
349
350 /**
351 * Specify the descriptor and extended descriptor immediate for a SEND(C)
352 * message instruction.
353 */
354 void
brw_set_desc_ex(struct brw_codegen * p,brw_eu_inst * inst,unsigned desc,unsigned ex_desc,bool gather)355 brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst,
356 unsigned desc, unsigned ex_desc, bool gather)
357 {
358 const struct intel_device_info *devinfo = p->devinfo;
359 assert(!gather || devinfo->ver >= 30);
360 assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
361 brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
362 if (devinfo->ver < 12)
363 brw_eu_inst_set_src1_file_type(devinfo, inst,
364 IMM, BRW_TYPE_UD);
365 brw_eu_inst_set_send_desc(devinfo, inst, desc);
366 if (devinfo->ver >= 9)
367 brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, gather);
368 }
369
370 static void
brw_eu_inst_set_state(const struct brw_isa_info * isa,brw_eu_inst * insn,const struct brw_insn_state * state)371 brw_eu_inst_set_state(const struct brw_isa_info *isa,
372 brw_eu_inst *insn,
373 const struct brw_insn_state *state)
374 {
375 const struct intel_device_info *devinfo = isa->devinfo;
376
377 brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size);
378 brw_eu_inst_set_group(devinfo, insn, state->group);
379 brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode);
380 brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control);
381 if (devinfo->ver >= 12)
382 brw_eu_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_eu_inst_opcode(isa, insn)));
383 brw_eu_inst_set_saturate(devinfo, insn, state->saturate);
384 brw_eu_inst_set_pred_control(devinfo, insn, state->predicate);
385 brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv);
386
387 if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) &&
388 state->access_mode == BRW_ALIGN_16) {
389 brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
390 brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
391 } else {
392 brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
393 brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
394 }
395
396 if (devinfo->ver < 20)
397 brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
398 }
399
400 static brw_eu_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)401 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
402 {
403 assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
404 assert(util_is_power_of_two_or_zero(alignment));
405 const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
406 const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
407 const unsigned new_nr_insn = start_insn + nr_insn;
408
409 if (p->store_size < new_nr_insn) {
410 p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst));
411 p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size);
412 }
413
414 /* Memset any padding due to alignment to 0. We don't want to be hashing
415 * or caching a bunch of random bits we got from a memory allocation.
416 */
417 if (p->nr_insn < start_insn) {
418 memset(&p->store[p->nr_insn], 0,
419 (start_insn - p->nr_insn) * sizeof(brw_eu_inst));
420 }
421
422 assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst));
423 p->nr_insn = new_nr_insn;
424 p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst);
425
426 return &p->store[start_insn];
427 }
428
429 void
brw_realign(struct brw_codegen * p,unsigned alignment)430 brw_realign(struct brw_codegen *p, unsigned alignment)
431 {
432 brw_append_insns(p, 0, alignment);
433 }
434
435 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)436 brw_append_data(struct brw_codegen *p, void *data,
437 unsigned size, unsigned alignment)
438 {
439 unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst));
440 void *dst = brw_append_insns(p, nr_insn, alignment);
441 memcpy(dst, data, size);
442
443 /* If it's not a whole number of instructions, memset the end */
444 if (size < nr_insn * sizeof(brw_eu_inst))
445 memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size);
446
447 return dst - (void *)p->store;
448 }
449
450 #define next_insn brw_next_insn
451 brw_eu_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)452 brw_next_insn(struct brw_codegen *p, unsigned opcode)
453 {
454 brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst));
455
456 memset(insn, 0, sizeof(*insn));
457 brw_eu_inst_set_opcode(p->isa, insn, opcode);
458
459 /* Apply the default instruction state */
460 brw_eu_inst_set_state(p->isa, insn, p->current);
461
462 return insn;
463 }
464
465 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)466 brw_add_reloc(struct brw_codegen *p, uint32_t id,
467 enum brw_shader_reloc_type type,
468 uint32_t offset, uint32_t delta)
469 {
470 if (p->num_relocs + 1 > p->reloc_array_size) {
471 p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
472 p->relocs = reralloc(p->mem_ctx, p->relocs,
473 struct brw_shader_reloc, p->reloc_array_size);
474 }
475
476 p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
477 .id = id,
478 .type = type,
479 .offset = offset,
480 .delta = delta,
481 };
482 }
483
484 static brw_eu_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)485 brw_alu1(struct brw_codegen *p, unsigned opcode,
486 struct brw_reg dest, struct brw_reg src)
487 {
488 brw_eu_inst *insn = next_insn(p, opcode);
489 brw_set_dest(p, insn, dest);
490 brw_set_src0(p, insn, src);
491 return insn;
492 }
493
494 static brw_eu_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)495 brw_alu2(struct brw_codegen *p, unsigned opcode,
496 struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
497 {
498 /* 64-bit immediates are only supported on 1-src instructions */
499 assert(src0.file != IMM ||
500 brw_type_size_bytes(src0.type) <= 4);
501 assert(src1.file != IMM ||
502 brw_type_size_bytes(src1.type) <= 4);
503
504 brw_eu_inst *insn = next_insn(p, opcode);
505 brw_set_dest(p, insn, dest);
506 brw_set_src0(p, insn, src0);
507 brw_set_src1(p, insn, src1);
508 return insn;
509 }
510
511 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)512 to_3src_align1_vstride(const struct intel_device_info *devinfo,
513 enum brw_vertical_stride vstride)
514 {
515 switch (vstride) {
516 case BRW_VERTICAL_STRIDE_0:
517 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
518 case BRW_VERTICAL_STRIDE_1:
519 assert(devinfo->ver >= 12);
520 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
521 case BRW_VERTICAL_STRIDE_2:
522 assert(devinfo->ver < 12);
523 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
524 case BRW_VERTICAL_STRIDE_4:
525 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
526 case BRW_VERTICAL_STRIDE_8:
527 case BRW_VERTICAL_STRIDE_16:
528 return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
529 default:
530 unreachable("invalid vstride");
531 }
532 }
533
534
535 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)536 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
537 {
538 switch (hstride) {
539 case BRW_HORIZONTAL_STRIDE_0:
540 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
541 case BRW_HORIZONTAL_STRIDE_1:
542 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
543 case BRW_HORIZONTAL_STRIDE_2:
544 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
545 case BRW_HORIZONTAL_STRIDE_4:
546 return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
547 default:
548 unreachable("invalid hstride");
549 }
550 }
551
552 static brw_eu_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)553 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
554 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
555 {
556 const struct intel_device_info *devinfo = p->devinfo;
557 brw_eu_inst *inst = next_insn(p, opcode);
558
559 assert(dest.nr < XE3_MAX_GRF);
560
561 if (devinfo->ver <= 9) {
562 assert(src0.file != IMM && src2.file != IMM);
563 } else if (devinfo->ver <= 11) {
564 /* On Ice Lake, BFE and CSEL cannot have any immediate sources. */
565 assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) ||
566 (src0.file != IMM && src2.file != IMM));
567
568 /* On Ice Lake, DP4A and MAD can only have one immediate source. */
569 assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) ||
570 !(src0.file == IMM && src2.file == IMM));
571 } else {
572 /* Having two immediate sources is allowed, but this should have been
573 * converted to a regular ADD by brw_opt_algebraic.
574 */
575 assert(opcode != BRW_OPCODE_ADD3 ||
576 !(src0.file == IMM && src2.file == IMM));
577 }
578
579 /* BFI2 cannot have any immediate sources on any platform. */
580 assert(opcode != BRW_OPCODE_BFI2 ||
581 (src0.file != IMM && src2.file != IMM));
582
583 assert(src0.file == IMM || src0.nr < XE3_MAX_GRF);
584 assert(src1.file != IMM && src1.nr < XE3_MAX_GRF);
585 assert(src2.file == IMM || src2.nr < XE3_MAX_GRF);
586 assert(dest.address_mode == BRW_ADDRESS_DIRECT);
587 assert(src0.address_mode == BRW_ADDRESS_DIRECT);
588 assert(src1.address_mode == BRW_ADDRESS_DIRECT);
589 assert(src2.address_mode == BRW_ADDRESS_DIRECT);
590
591 if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
592 assert(dest.file == FIXED_GRF ||
593 (dest.file == ARF &&
594 (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
595
596 brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
597 brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
598 brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
599 brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
600
601 if (brw_type_is_float(dest.type)) {
602 brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
603 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
604 } else {
605 brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
606 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
607 }
608
609 brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
610 brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
611 brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
612 brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
613
614 if (src0.file == IMM) {
615 brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
616 } else {
617 brw_eu_inst_set_3src_a1_src0_vstride(
618 devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
619 brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst,
620 to_3src_align1_hstride(src0.hstride));
621 brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
622 brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
623 brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
624 brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
625 }
626 brw_eu_inst_set_3src_a1_src1_vstride(
627 devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
628 brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst,
629 to_3src_align1_hstride(src1.hstride));
630
631 brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
632 if (src1.file == ARF) {
633 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
634 } else {
635 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
636 }
637 brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
638 brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
639
640 if (src2.file == IMM) {
641 brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
642 } else {
643 brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst,
644 to_3src_align1_hstride(src2.hstride));
645 /* no vstride on src2 */
646 brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
647 brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
648 brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
649 brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
650 }
651
652 assert(src0.file == FIXED_GRF ||
653 src0.file == IMM);
654 assert(src1.file == FIXED_GRF ||
655 (src1.file == ARF &&
656 src1.nr == BRW_ARF_ACCUMULATOR));
657 assert(src2.file == FIXED_GRF ||
658 src2.file == IMM);
659
660 if (devinfo->ver >= 12) {
661 if (src0.file == IMM) {
662 brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
663 } else {
664 brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
665 }
666
667 brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
668
669 if (src2.file == IMM) {
670 brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
671 } else {
672 brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
673 }
674 } else {
675 brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
676 brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
677 brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
678 }
679
680 } else {
681 assert(dest.file == FIXED_GRF);
682 assert(dest.type == BRW_TYPE_F ||
683 dest.type == BRW_TYPE_DF ||
684 dest.type == BRW_TYPE_D ||
685 dest.type == BRW_TYPE_UD ||
686 dest.type == BRW_TYPE_HF);
687 brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
688 brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
689 brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
690
691 assert(src0.file == FIXED_GRF);
692 brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
693 brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
694 brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
695 brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
696 brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
697 brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
698 src0.vstride == BRW_VERTICAL_STRIDE_0);
699
700 assert(src1.file == FIXED_GRF);
701 brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
702 brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
703 brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
704 brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
705 brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
706 brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
707 src1.vstride == BRW_VERTICAL_STRIDE_0);
708
709 assert(src2.file == FIXED_GRF);
710 brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
711 brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
712 brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
713 brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
714 brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
715 brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
716 src2.vstride == BRW_VERTICAL_STRIDE_0);
717
718 /* Set both the source and destination types based on dest.type,
719 * ignoring the source register types. The MAD and LRP emitters ensure
720 * that all four types are float. The BFE and BFI2 emitters, however,
721 * may send us mixed D and UD types and want us to ignore that and use
722 * the destination type.
723 */
724 brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
725 brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
726
727 /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
728 *
729 * "Three source instructions can use operands with mixed-mode
730 * precision. When SrcType field is set to :f or :hf it defines
731 * precision for source 0 only, and fields Src1Type and Src2Type
732 * define precision for other source operands:
733 *
734 * 0b = :f. Single precision Float (32-bit).
735 * 1b = :hf. Half precision Float (16-bit)."
736 */
737 if (src1.type == BRW_TYPE_HF)
738 brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1);
739
740 if (src2.type == BRW_TYPE_HF)
741 brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1);
742 }
743
744 return inst;
745 }
746
747 static brw_eu_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)748 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
749 enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
750 struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
751 {
752 const struct intel_device_info *devinfo = p->devinfo;
753 brw_eu_inst *inst = next_insn(p, opcode);
754
755 assert(dest.file == FIXED_GRF);
756 brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
757 FIXED_GRF);
758 brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
759 brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
760
761 if (brw_type_is_float(dest.type)) {
762 brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
763 BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
764 } else {
765 brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
766 BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
767 }
768
769 brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
770 brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
771
772 brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
773 brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
774 brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
775 brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
776
777 assert(src0.file == FIXED_GRF ||
778 (src0.file == ARF &&
779 src0.nr == BRW_ARF_NULL));
780
781 brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0));
782 brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
783 brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
784
785 assert(src1.file == FIXED_GRF);
786
787 brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1));
788 brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
789 brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
790 brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
791
792 assert(src2.file == FIXED_GRF);
793
794 brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2));
795 brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
796 brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
797 brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
798
799 return inst;
800 }
801
802 /***********************************************************************
803 * Convenience routines.
804 */
805 #define ALU1(OP) \
806 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
807 struct brw_reg dest, \
808 struct brw_reg src0) \
809 { \
810 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
811 }
812
813 #define ALU2(OP) \
814 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
815 struct brw_reg dest, \
816 struct brw_reg src0, \
817 struct brw_reg src1) \
818 { \
819 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
820 }
821
822 #define ALU3(OP) \
823 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
824 struct brw_reg dest, \
825 struct brw_reg src0, \
826 struct brw_reg src1, \
827 struct brw_reg src2) \
828 { \
829 if (p->current->access_mode == BRW_ALIGN_16) { \
830 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
831 src0.swizzle = BRW_SWIZZLE_XXXX; \
832 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
833 src1.swizzle = BRW_SWIZZLE_XXXX; \
834 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
835 src2.swizzle = BRW_SWIZZLE_XXXX; \
836 } \
837 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
838 }
839
840 #define ALU3F(OP) \
841 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
842 struct brw_reg dest, \
843 struct brw_reg src0, \
844 struct brw_reg src1, \
845 struct brw_reg src2) \
846 { \
847 assert(dest.type == BRW_TYPE_F || \
848 dest.type == BRW_TYPE_DF); \
849 if (dest.type == BRW_TYPE_F) { \
850 assert(src0.type == BRW_TYPE_F); \
851 assert(src1.type == BRW_TYPE_F); \
852 assert(src2.type == BRW_TYPE_F); \
853 } else if (dest.type == BRW_TYPE_DF) { \
854 assert(src0.type == BRW_TYPE_DF); \
855 assert(src1.type == BRW_TYPE_DF); \
856 assert(src2.type == BRW_TYPE_DF); \
857 } \
858 \
859 if (p->current->access_mode == BRW_ALIGN_16) { \
860 if (src0.vstride == BRW_VERTICAL_STRIDE_0) \
861 src0.swizzle = BRW_SWIZZLE_XXXX; \
862 if (src1.vstride == BRW_VERTICAL_STRIDE_0) \
863 src1.swizzle = BRW_SWIZZLE_XXXX; \
864 if (src2.vstride == BRW_VERTICAL_STRIDE_0) \
865 src2.swizzle = BRW_SWIZZLE_XXXX; \
866 } \
867 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
868 }
869
870 ALU2(SEL)
ALU1(NOT)871 ALU1(NOT)
872 ALU2(AND)
873 ALU2(OR)
874 ALU2(XOR)
875 ALU2(SHR)
876 ALU2(SHL)
877 ALU2(ASR)
878 ALU2(ROL)
879 ALU2(ROR)
880 ALU3(CSEL)
881 ALU1(FRC)
882 ALU1(RNDD)
883 ALU1(RNDE)
884 ALU1(RNDU)
885 ALU1(RNDZ)
886 ALU2(MAC)
887 ALU2(MACH)
888 ALU1(LZD)
889 ALU2(DP4)
890 ALU2(DPH)
891 ALU2(DP3)
892 ALU2(DP2)
893 ALU3(DP4A)
894 ALU3(MAD)
895 ALU3F(LRP)
896 ALU1(BFREV)
897 ALU3(BFE)
898 ALU2(BFI1)
899 ALU3(BFI2)
900 ALU1(FBH)
901 ALU1(FBL)
902 ALU1(CBIT)
903 ALU2(ADDC)
904 ALU2(SUBB)
905 ALU3(ADD3)
906 ALU1(MOV)
907
908 brw_eu_inst *
909 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
910 struct brw_reg src0, struct brw_reg src1)
911 {
912 /* 6.2.2: add */
913 if (src0.type == BRW_TYPE_F ||
914 (src0.file == IMM &&
915 src0.type == BRW_TYPE_VF)) {
916 assert(src1.type != BRW_TYPE_UD);
917 assert(src1.type != BRW_TYPE_D);
918 }
919
920 if (src1.type == BRW_TYPE_F ||
921 (src1.file == IMM &&
922 src1.type == BRW_TYPE_VF)) {
923 assert(src0.type != BRW_TYPE_UD);
924 assert(src0.type != BRW_TYPE_D);
925 }
926
927 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
928 }
929
930 brw_eu_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)931 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
932 struct brw_reg src0, struct brw_reg src1)
933 {
934 assert(dest.type == src0.type);
935 assert(src0.type == src1.type);
936 switch (src0.type) {
937 case BRW_TYPE_B:
938 case BRW_TYPE_UB:
939 case BRW_TYPE_W:
940 case BRW_TYPE_UW:
941 case BRW_TYPE_D:
942 case BRW_TYPE_UD:
943 break;
944 default:
945 unreachable("Bad type for brw_AVG");
946 }
947
948 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
949 }
950
951 brw_eu_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)952 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
953 struct brw_reg src0, struct brw_reg src1)
954 {
955 /* 6.32.38: mul */
956 if (src0.type == BRW_TYPE_D ||
957 src0.type == BRW_TYPE_UD ||
958 src1.type == BRW_TYPE_D ||
959 src1.type == BRW_TYPE_UD) {
960 assert(dest.type != BRW_TYPE_F);
961 }
962
963 if (src0.type == BRW_TYPE_F ||
964 (src0.file == IMM &&
965 src0.type == BRW_TYPE_VF)) {
966 assert(src1.type != BRW_TYPE_UD);
967 assert(src1.type != BRW_TYPE_D);
968 }
969
970 if (src1.type == BRW_TYPE_F ||
971 (src1.file == IMM &&
972 src1.type == BRW_TYPE_VF)) {
973 assert(src0.type != BRW_TYPE_UD);
974 assert(src0.type != BRW_TYPE_D);
975 }
976
977 assert(src0.file != ARF ||
978 src0.nr != BRW_ARF_ACCUMULATOR);
979 assert(src1.file != ARF ||
980 src1.nr != BRW_ARF_ACCUMULATOR);
981
982 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
983 }
984
985 brw_eu_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)986 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
987 struct brw_reg src0, struct brw_reg src1)
988 {
989 src0.vstride = BRW_VERTICAL_STRIDE_0;
990 src0.width = BRW_WIDTH_1;
991 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
992 return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
993 }
994
995 brw_eu_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)996 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
997 struct brw_reg src0, struct brw_reg src1)
998 {
999 src0.vstride = BRW_VERTICAL_STRIDE_0;
1000 src0.width = BRW_WIDTH_1;
1001 src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1002 src1.vstride = BRW_VERTICAL_STRIDE_8;
1003 src1.width = BRW_WIDTH_8;
1004 src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1005 return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1006 }
1007
1008 brw_eu_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)1009 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1010 unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1011 struct brw_reg src1, struct brw_reg src2)
1012 {
1013 return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
1014 src1, src2);
1015 }
1016
brw_NOP(struct brw_codegen * p)1017 void brw_NOP(struct brw_codegen *p)
1018 {
1019 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1020 memset(insn, 0, sizeof(*insn));
1021 brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1022 }
1023
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1024 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1025 {
1026 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1027 brw_eu_inst_set_cond_modifier(p->devinfo, insn, func);
1028 }
1029
1030 /***********************************************************************
1031 * Comparisons, if/else/endif
1032 */
1033
1034 brw_eu_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1035 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1036 unsigned predicate_control)
1037 {
1038 const struct intel_device_info *devinfo = p->devinfo;
1039 struct brw_reg ip = brw_ip_reg();
1040 brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1041
1042 brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1043 brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1044 brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1045 brw_eu_inst_set_pred_control(devinfo, inst, predicate_control);
1046
1047 return inst;
1048 }
1049
1050 static void
push_if_stack(struct brw_codegen * p,brw_eu_inst * inst)1051 push_if_stack(struct brw_codegen *p, brw_eu_inst *inst)
1052 {
1053 p->if_stack[p->if_stack_depth] = inst - p->store;
1054
1055 p->if_stack_depth++;
1056 if (p->if_stack_array_size <= p->if_stack_depth) {
1057 p->if_stack_array_size *= 2;
1058 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1059 p->if_stack_array_size);
1060 }
1061 }
1062
1063 static brw_eu_inst *
pop_if_stack(struct brw_codegen * p)1064 pop_if_stack(struct brw_codegen *p)
1065 {
1066 p->if_stack_depth--;
1067 return &p->store[p->if_stack[p->if_stack_depth]];
1068 }
1069
1070 static void
push_loop_stack(struct brw_codegen * p,brw_eu_inst * inst)1071 push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst)
1072 {
1073 if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1074 p->loop_stack_array_size *= 2;
1075 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1076 p->loop_stack_array_size);
1077 }
1078
1079 p->loop_stack[p->loop_stack_depth] = inst - p->store;
1080 p->loop_stack_depth++;
1081 }
1082
1083 static brw_eu_inst *
get_inner_do_insn(struct brw_codegen * p)1084 get_inner_do_insn(struct brw_codegen *p)
1085 {
1086 return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1087 }
1088
1089 /* EU takes the value from the flag register and pushes it onto some
1090 * sort of a stack (presumably merging with any flag value already on
1091 * the stack). Within an if block, the flags at the top of the stack
1092 * control execution on each channel of the unit, eg. on each of the
1093 * 16 pixel values in our wm programs.
1094 *
1095 * When the matching 'else' instruction is reached (presumably by
1096 * countdown of the instruction count patched in by our ELSE/ENDIF
1097 * functions), the relevant flags are inverted.
1098 *
1099 * When the matching 'endif' instruction is reached, the flags are
1100 * popped off. If the stack is now empty, normal execution resumes.
1101 */
1102 brw_eu_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1103 brw_IF(struct brw_codegen *p, unsigned execute_size)
1104 {
1105 const struct intel_device_info *devinfo = p->devinfo;
1106 brw_eu_inst *insn;
1107
1108 insn = next_insn(p, BRW_OPCODE_IF);
1109
1110 /* Override the defaults for this instruction:
1111 */
1112 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1113 if (devinfo->ver < 12)
1114 brw_set_src0(p, insn, brw_imm_d(0));
1115 brw_eu_inst_set_jip(devinfo, insn, 0);
1116 brw_eu_inst_set_uip(devinfo, insn, 0);
1117
1118 brw_eu_inst_set_exec_size(devinfo, insn, execute_size);
1119 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1120 brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1121 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1122
1123 push_if_stack(p, insn);
1124 return insn;
1125 }
1126
1127 /**
1128 * Patch IF and ELSE instructions with appropriate jump targets.
1129 */
1130 static void
patch_IF_ELSE(struct brw_codegen * p,brw_eu_inst * if_inst,brw_eu_inst * else_inst,brw_eu_inst * endif_inst)1131 patch_IF_ELSE(struct brw_codegen *p,
1132 brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst)
1133 {
1134 const struct intel_device_info *devinfo = p->devinfo;
1135
1136 assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1137 assert(endif_inst != NULL);
1138 assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1139
1140 unsigned br = brw_jump_scale(devinfo);
1141
1142 assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1143 brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1144
1145 if (else_inst == NULL) {
1146 /* Patch IF -> ENDIF */
1147 brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1148 brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1149 } else {
1150 brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1151
1152 /* Patch ELSE -> ENDIF */
1153 /* The IF instruction's JIP should point just past the ELSE */
1154 brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1155 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1156 brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1157
1158 if (devinfo->ver < 11) {
1159 /* Set the ELSE instruction to use branch_ctrl with a join
1160 * jump target pointing at the NOP inserted right before
1161 * the ENDIF instruction in order to make sure it is
1162 * executed in all cases, since attempting to do the same
1163 * as on other generations could cause the EU to jump at
1164 * the instruction immediately after the ENDIF due to
1165 * Wa_220160235, which could cause the program to continue
1166 * running with all channels disabled.
1167 */
1168 brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1169 brw_eu_inst_set_branch_control(devinfo, else_inst, true);
1170 } else {
1171 brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1172 }
1173
1174 /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1175 * JIP and UIP both should point to ENDIF on those
1176 * platforms.
1177 */
1178 brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1179 }
1180 }
1181
1182 void
brw_ELSE(struct brw_codegen * p)1183 brw_ELSE(struct brw_codegen *p)
1184 {
1185 const struct intel_device_info *devinfo = p->devinfo;
1186 brw_eu_inst *insn;
1187
1188 insn = next_insn(p, BRW_OPCODE_ELSE);
1189
1190 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1191 if (devinfo->ver < 12)
1192 brw_set_src0(p, insn, brw_imm_d(0));
1193 brw_eu_inst_set_jip(devinfo, insn, 0);
1194 brw_eu_inst_set_uip(devinfo, insn, 0);
1195
1196 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1197 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1198
1199 push_if_stack(p, insn);
1200 }
1201
1202 void
brw_ENDIF(struct brw_codegen * p)1203 brw_ENDIF(struct brw_codegen *p)
1204 {
1205 const struct intel_device_info *devinfo = p->devinfo;
1206 brw_eu_inst *insn = NULL;
1207 brw_eu_inst *else_inst = NULL;
1208 brw_eu_inst *if_inst = NULL;
1209 brw_eu_inst *tmp;
1210
1211 assert(p->if_stack_depth > 0);
1212
1213 if (devinfo->ver < 11 &&
1214 brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[
1215 p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1216 /* Insert a NOP to be specified as join instruction within the
1217 * ELSE block, which is valid for an ELSE instruction with
1218 * branch_ctrl on. The ELSE instruction will be set to jump
1219 * here instead of to the ENDIF instruction, since attempting to
1220 * do the latter would prevent the ENDIF from being executed in
1221 * some cases due to Wa_220160235, which could cause the program
1222 * to continue running with all channels disabled.
1223 */
1224 brw_NOP(p);
1225 }
1226
1227 /*
1228 * A single next_insn() may change the base address of instruction store
1229 * memory(p->store), so call it first before referencing the instruction
1230 * store pointer from an index
1231 */
1232 insn = next_insn(p, BRW_OPCODE_ENDIF);
1233
1234 /* Pop the IF and (optional) ELSE instructions from the stack */
1235 tmp = pop_if_stack(p);
1236 if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1237 else_inst = tmp;
1238 tmp = pop_if_stack(p);
1239 }
1240 if_inst = tmp;
1241
1242 brw_set_src0(p, insn, brw_imm_d(0));
1243
1244 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1245 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1246
1247 brw_eu_inst_set_jip(devinfo, insn, 2);
1248 patch_IF_ELSE(p, if_inst, else_inst, insn);
1249 }
1250
1251 brw_eu_inst *
brw_BREAK(struct brw_codegen * p)1252 brw_BREAK(struct brw_codegen *p)
1253 {
1254 const struct intel_device_info *devinfo = p->devinfo;
1255 brw_eu_inst *insn;
1256
1257 insn = next_insn(p, BRW_OPCODE_BREAK);
1258 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1259 brw_set_src0(p, insn, brw_imm_d(0x0));
1260 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1261 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1262
1263 return insn;
1264 }
1265
1266 brw_eu_inst *
brw_CONT(struct brw_codegen * p)1267 brw_CONT(struct brw_codegen *p)
1268 {
1269 const struct intel_device_info *devinfo = p->devinfo;
1270 brw_eu_inst *insn;
1271
1272 insn = next_insn(p, BRW_OPCODE_CONTINUE);
1273 brw_set_dest(p, insn, brw_ip_reg());
1274 brw_set_src0(p, insn, brw_imm_d(0x0));
1275
1276 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1277 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1278 return insn;
1279 }
1280
1281 brw_eu_inst *
brw_HALT(struct brw_codegen * p)1282 brw_HALT(struct brw_codegen *p)
1283 {
1284 const struct intel_device_info *devinfo = p->devinfo;
1285 brw_eu_inst *insn;
1286
1287 insn = next_insn(p, BRW_OPCODE_HALT);
1288 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1289 if (devinfo->ver < 12) {
1290 brw_set_src0(p, insn, brw_imm_d(0x0));
1291 }
1292
1293 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1294 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1295 return insn;
1296 }
1297
1298 /* DO/WHILE loop:
1299 *
1300 * The DO/WHILE is just an unterminated loop -- break or continue are
1301 * used for control within the loop. We have a few ways they can be
1302 * done.
1303 *
1304 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1305 * jip and no DO instruction.
1306 *
1307 * For gfx6, there's no more mask stack, so no need for DO. WHILE
1308 * just points back to the first instruction of the loop.
1309 */
1310 brw_eu_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1311 brw_DO(struct brw_codegen *p, unsigned execute_size)
1312 {
1313 push_loop_stack(p, &p->store[p->nr_insn]);
1314 return &p->store[p->nr_insn];
1315 }
1316
1317 brw_eu_inst *
brw_WHILE(struct brw_codegen * p)1318 brw_WHILE(struct brw_codegen *p)
1319 {
1320 const struct intel_device_info *devinfo = p->devinfo;
1321 brw_eu_inst *insn, *do_insn;
1322 unsigned br = brw_jump_scale(devinfo);
1323
1324 insn = next_insn(p, BRW_OPCODE_WHILE);
1325 do_insn = get_inner_do_insn(p);
1326
1327 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1328 if (devinfo->ver < 12)
1329 brw_set_src0(p, insn, brw_imm_d(0));
1330 brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1331
1332 brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1333
1334 brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1335
1336 p->loop_stack_depth--;
1337
1338 return insn;
1339 }
1340
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1341 void brw_CMP(struct brw_codegen *p,
1342 struct brw_reg dest,
1343 unsigned conditional,
1344 struct brw_reg src0,
1345 struct brw_reg src1)
1346 {
1347 const struct intel_device_info *devinfo = p->devinfo;
1348 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1349
1350 brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1351 brw_set_dest(p, insn, dest);
1352 brw_set_src0(p, insn, src0);
1353 brw_set_src1(p, insn, src1);
1354 }
1355
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1356 void brw_CMPN(struct brw_codegen *p,
1357 struct brw_reg dest,
1358 unsigned conditional,
1359 struct brw_reg src0,
1360 struct brw_reg src1)
1361 {
1362 const struct intel_device_info *devinfo = p->devinfo;
1363 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1364
1365 brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1366 brw_set_dest(p, insn, dest);
1367 brw_set_src0(p, insn, src0);
1368 brw_set_src1(p, insn, src1);
1369 }
1370
1371 /***********************************************************************
1372 * Helpers for the various SEND message types:
1373 */
1374
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1375 void gfx6_math(struct brw_codegen *p,
1376 struct brw_reg dest,
1377 unsigned function,
1378 struct brw_reg src0,
1379 struct brw_reg src1)
1380 {
1381 const struct intel_device_info *devinfo = p->devinfo;
1382 brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1383
1384 assert(dest.file == FIXED_GRF);
1385
1386 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1387
1388 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1389 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1390 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1391 assert(src0.type != BRW_TYPE_F);
1392 assert(src1.type != BRW_TYPE_F);
1393 assert(src1.file == FIXED_GRF ||
1394 src1.file == IMM);
1395 /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1396 * INT DIV function does not support source modifiers.
1397 */
1398 assert(!src0.negate);
1399 assert(!src0.abs);
1400 assert(!src1.negate);
1401 assert(!src1.abs);
1402 } else {
1403 assert(src0.type == BRW_TYPE_F ||
1404 (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1405 assert(src1.type == BRW_TYPE_F ||
1406 (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1407 }
1408
1409 /* This workaround says that we cannot use scalar broadcast with HF types.
1410 * However, for is_scalar values, all 16 elements contain the same value, so
1411 * we can replace a <0,1,0> region with <16,16,1> without ill effect.
1412 */
1413 if (intel_needs_workaround(devinfo, 22016140776)) {
1414 if (src0.is_scalar && src0.type == BRW_TYPE_HF) {
1415 src0.vstride = BRW_VERTICAL_STRIDE_16;
1416 src0.width = BRW_WIDTH_16;
1417 src0.hstride = BRW_HORIZONTAL_STRIDE_1;
1418 src0.swizzle = BRW_SWIZZLE_XYZW;
1419 }
1420
1421 if (src1.is_scalar && src1.type == BRW_TYPE_HF) {
1422 src1.vstride = BRW_VERTICAL_STRIDE_16;
1423 src1.width = BRW_WIDTH_16;
1424 src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1425 src1.swizzle = BRW_SWIZZLE_XYZW;
1426 }
1427 }
1428
1429 brw_eu_inst_set_math_function(devinfo, insn, function);
1430
1431 brw_set_dest(p, insn, dest);
1432 brw_set_src0(p, insn, src0);
1433 brw_set_src1(p, insn, src1);
1434 }
1435
1436 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,bool eot,bool gather)1437 brw_send_indirect_message(struct brw_codegen *p,
1438 unsigned sfid,
1439 struct brw_reg dst,
1440 struct brw_reg payload,
1441 struct brw_reg desc,
1442 bool eot,
1443 bool gather)
1444 {
1445 const struct intel_device_info *devinfo = p->devinfo;
1446 struct brw_eu_inst *send;
1447
1448 dst = retype(dst, BRW_TYPE_UW);
1449
1450 assert(desc.type == BRW_TYPE_UD);
1451
1452 if (desc.file == IMM) {
1453 send = next_insn(p, BRW_OPCODE_SEND);
1454 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1455 brw_set_desc(p, send, desc.ud, gather);
1456 } else {
1457 assert(desc.file == ADDRESS);
1458 assert(desc.subnr == 0);
1459 send = next_insn(p, BRW_OPCODE_SEND);
1460 brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1461 if (devinfo->ver >= 12)
1462 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, true);
1463 else
1464 brw_set_src1(p, send, desc);
1465 }
1466
1467 brw_set_dest(p, send, dst);
1468 brw_eu_inst_set_sfid(devinfo, send, sfid);
1469 brw_eu_inst_set_eot(devinfo, send, eot);
1470 }
1471
1472 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,struct brw_reg ex_desc,unsigned ex_mlen,bool ex_bso,bool eot,bool gather)1473 brw_send_indirect_split_message(struct brw_codegen *p,
1474 unsigned sfid,
1475 struct brw_reg dst,
1476 struct brw_reg payload0,
1477 struct brw_reg payload1,
1478 struct brw_reg desc,
1479 struct brw_reg ex_desc,
1480 unsigned ex_mlen,
1481 bool ex_bso,
1482 bool eot,
1483 bool gather)
1484 {
1485 const struct intel_device_info *devinfo = p->devinfo;
1486 struct brw_eu_inst *send;
1487
1488 dst = retype(dst, BRW_TYPE_UW);
1489
1490 assert(desc.type == BRW_TYPE_UD);
1491
1492 send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1493 brw_set_dest(p, send, dst);
1494 brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1495 brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1496
1497 if (desc.file == IMM) {
1498 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1499 brw_eu_inst_set_send_desc(devinfo, send, desc.ud);
1500 } else {
1501 assert(desc.file == ADDRESS);
1502 assert(desc.subnr == 0);
1503 brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1504 }
1505
1506 if (ex_desc.file == IMM) {
1507 brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1508 brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, gather);
1509 } else {
1510 assert(ex_desc.file == ADDRESS);
1511 assert((ex_desc.subnr & 0x3) == 0);
1512 brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1513 brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1514
1515 if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM)
1516 brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1517 }
1518
1519 if (ex_bso) {
1520 /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1521 * it is assumed.
1522 *
1523 * BSpec 56890
1524 */
1525 if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1526 brw_eu_inst_set_send_ex_bso(devinfo, send, true);
1527 brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo));
1528 }
1529 brw_eu_inst_set_sfid(devinfo, send, sfid);
1530 brw_eu_inst_set_eot(devinfo, send, eot);
1531 }
1532
1533 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_eu_inst * insn,int while_offset,int start_offset)1534 while_jumps_before_offset(const struct intel_device_info *devinfo,
1535 brw_eu_inst *insn, int while_offset, int start_offset)
1536 {
1537 int scale = 16 / brw_jump_scale(devinfo);
1538 int jip = brw_eu_inst_jip(devinfo, insn);
1539 assert(jip < 0);
1540 return while_offset + jip * scale <= start_offset;
1541 }
1542
1543
1544 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1545 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1546 {
1547 int offset;
1548 void *store = p->store;
1549 const struct intel_device_info *devinfo = p->devinfo;
1550
1551 int depth = 0;
1552
1553 for (offset = next_offset(p, store, start_offset);
1554 offset < p->next_insn_offset;
1555 offset = next_offset(p, store, offset)) {
1556 brw_eu_inst *insn = store + offset;
1557
1558 switch (brw_eu_inst_opcode(p->isa, insn)) {
1559 case BRW_OPCODE_IF:
1560 depth++;
1561 break;
1562 case BRW_OPCODE_ENDIF:
1563 if (depth == 0)
1564 return offset;
1565 depth--;
1566 break;
1567 case BRW_OPCODE_WHILE:
1568 /* If the while doesn't jump before our instruction, it's the end
1569 * of a sibling do...while loop. Ignore it.
1570 */
1571 if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1572 continue;
1573 FALLTHROUGH;
1574 case BRW_OPCODE_ELSE:
1575 case BRW_OPCODE_HALT:
1576 if (depth == 0)
1577 return offset;
1578 break;
1579 default:
1580 break;
1581 }
1582 }
1583
1584 return 0;
1585 }
1586
1587 /* There is no DO instruction on gfx6, so to find the end of the loop
1588 * we have to see if the loop is jumping back before our start
1589 * instruction.
1590 */
1591 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1592 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1593 {
1594 const struct intel_device_info *devinfo = p->devinfo;
1595 int offset;
1596 void *store = p->store;
1597
1598 /* Always start after the instruction (such as a WHILE) we're trying to fix
1599 * up.
1600 */
1601 for (offset = next_offset(p, store, start_offset);
1602 offset < p->next_insn_offset;
1603 offset = next_offset(p, store, offset)) {
1604 brw_eu_inst *insn = store + offset;
1605
1606 if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1607 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1608 return offset;
1609 }
1610 }
1611 assert(!"not reached");
1612 return start_offset;
1613 }
1614
1615 /* After program generation, go back and update the UIP and JIP of
1616 * BREAK, CONT, and HALT instructions to their correct locations.
1617 */
1618 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1619 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1620 {
1621 const struct intel_device_info *devinfo = p->devinfo;
1622 int offset;
1623 int br = brw_jump_scale(devinfo);
1624 int scale = 16 / br;
1625 void *store = p->store;
1626
1627 for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1628 brw_eu_inst *insn = store + offset;
1629 assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
1630
1631 switch (brw_eu_inst_opcode(p->isa, insn)) {
1632 case BRW_OPCODE_BREAK: {
1633 int block_end_offset = brw_find_next_block_end(p, offset);
1634 assert(block_end_offset != 0);
1635 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1636 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1637 brw_eu_inst_set_uip(devinfo, insn,
1638 (brw_find_loop_end(p, offset) - offset) / scale);
1639 break;
1640 }
1641
1642 case BRW_OPCODE_CONTINUE: {
1643 int block_end_offset = brw_find_next_block_end(p, offset);
1644 assert(block_end_offset != 0);
1645 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1646 brw_eu_inst_set_uip(devinfo, insn,
1647 (brw_find_loop_end(p, offset) - offset) / scale);
1648
1649 assert(brw_eu_inst_uip(devinfo, insn) != 0);
1650 assert(brw_eu_inst_jip(devinfo, insn) != 0);
1651 break;
1652 }
1653
1654 case BRW_OPCODE_ENDIF: {
1655 int block_end_offset = brw_find_next_block_end(p, offset);
1656 int32_t jump = (block_end_offset == 0) ?
1657 1 * br : (block_end_offset - offset) / scale;
1658 brw_eu_inst_set_jip(devinfo, insn, jump);
1659 break;
1660 }
1661
1662 case BRW_OPCODE_HALT: {
1663 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1664 *
1665 * "In case of the halt instruction not inside any conditional
1666 * code block, the value of <JIP> and <UIP> should be the
1667 * same. In case of the halt instruction inside conditional code
1668 * block, the <UIP> should be the end of the program, and the
1669 * <JIP> should be end of the most inner conditional code block."
1670 *
1671 * The uip will have already been set by whoever set up the
1672 * instruction.
1673 */
1674 int block_end_offset = brw_find_next_block_end(p, offset);
1675 if (block_end_offset == 0) {
1676 brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
1677 } else {
1678 brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1679 }
1680 assert(brw_eu_inst_uip(devinfo, insn) != 0);
1681 assert(brw_eu_inst_jip(devinfo, insn) != 0);
1682 break;
1683 }
1684
1685 default:
1686 break;
1687 }
1688 }
1689 }
1690
1691 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1692 brw_set_memory_fence_message(struct brw_codegen *p,
1693 struct brw_eu_inst *insn,
1694 enum brw_message_target sfid,
1695 bool commit_enable,
1696 unsigned bti)
1697 {
1698 const struct intel_device_info *devinfo = p->devinfo;
1699
1700 brw_set_desc(p, insn, brw_message_desc(
1701 devinfo, 1, (commit_enable ? 1 : 0), true), false);
1702
1703 brw_eu_inst_set_sfid(devinfo, insn, sfid);
1704
1705 switch (sfid) {
1706 case GFX6_SFID_DATAPORT_RENDER_CACHE:
1707 brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1708 break;
1709 case GFX7_SFID_DATAPORT_DATA_CACHE:
1710 brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1711 break;
1712 default:
1713 unreachable("Not reached");
1714 }
1715
1716 if (commit_enable)
1717 brw_eu_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1718
1719 assert(devinfo->ver >= 11 || bti == 0);
1720 brw_eu_inst_set_binding_table_index(devinfo, insn, bti);
1721 }
1722
1723 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,uint32_t desc)1724 gfx12_set_memory_fence_message(struct brw_codegen *p,
1725 struct brw_eu_inst *insn,
1726 enum brw_message_target sfid,
1727 uint32_t desc)
1728 {
1729 const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1730 /* Completion signaled by write to register. No data returned. */
1731 const unsigned rlen = 1 * reg_unit(p->devinfo);
1732
1733 brw_eu_inst_set_sfid(p->devinfo, insn, sfid);
1734
1735 /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1736 * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1737 * the descriptor value and rebuild a legacy URB fence descriptor.
1738 */
1739 if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1740 brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1741 brw_message_desc(p->devinfo, mlen, rlen, true),
1742 false);
1743 } else {
1744 enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1745 enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1746
1747 if (sfid == GFX12_SFID_TGM) {
1748 scope = LSC_FENCE_TILE;
1749 flush_type = LSC_FLUSH_TYPE_EVICT;
1750 }
1751
1752 /* Wa_14012437816:
1753 *
1754 * "For any fence greater than local scope, always set flush type to
1755 * at least invalidate so that fence goes on properly."
1756 *
1757 * "The bug is if flush_type is 'None', the scope is always downgraded
1758 * to 'local'."
1759 *
1760 * Here set scope to NONE_6 instead of NONE, which has the same effect
1761 * as NONE but avoids the downgrade to scope LOCAL.
1762 */
1763 if (intel_needs_workaround(p->devinfo, 14012437816) &&
1764 scope > LSC_FENCE_LOCAL &&
1765 flush_type == LSC_FLUSH_TYPE_NONE) {
1766 flush_type = LSC_FLUSH_TYPE_NONE_6;
1767 }
1768
1769 brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1770 flush_type, false) |
1771 brw_message_desc(p->devinfo, mlen, rlen, false),
1772 false);
1773 }
1774 }
1775
1776 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1777 brw_memory_fence(struct brw_codegen *p,
1778 struct brw_reg dst,
1779 struct brw_reg src,
1780 enum opcode send_op,
1781 enum brw_message_target sfid,
1782 uint32_t desc,
1783 bool commit_enable,
1784 unsigned bti)
1785 {
1786 const struct intel_device_info *devinfo = p->devinfo;
1787
1788 dst = retype(vec1(dst), BRW_TYPE_UW);
1789 src = retype(vec1(src), BRW_TYPE_UD);
1790
1791 /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1792 * message doesn't write anything back.
1793 */
1794 struct brw_eu_inst *insn = next_insn(p, send_op);
1795 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1796 brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1797 brw_set_dest(p, insn, dst);
1798 brw_set_src0(p, insn, src);
1799
1800 /* All DG2 hardware requires LSC for fence messages, even A-step */
1801 if (devinfo->has_lsc)
1802 gfx12_set_memory_fence_message(p, insn, sfid, desc);
1803 else
1804 brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1805 }
1806
1807 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1808 brw_broadcast(struct brw_codegen *p,
1809 struct brw_reg dst,
1810 struct brw_reg src,
1811 struct brw_reg idx)
1812 {
1813 const struct intel_device_info *devinfo = p->devinfo;
1814 assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1815
1816 brw_push_insn_state(p);
1817 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1818 brw_set_default_exec_size(p, BRW_EXECUTE_1);
1819
1820 assert(src.file == FIXED_GRF &&
1821 src.address_mode == BRW_ADDRESS_DIRECT);
1822 assert(!src.abs && !src.negate);
1823
1824 /* Gen12.5 adds the following region restriction:
1825 *
1826 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1827 * and Quad-Word data must not be used."
1828 *
1829 * We require the source and destination types to match so stomp to an
1830 * unsigned integer type.
1831 */
1832 assert(src.type == dst.type);
1833 src.type = dst.type =
1834 brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1835
1836 if ((src.vstride == 0 && src.hstride == 0) ||
1837 idx.file == IMM) {
1838 /* Trivial, the source is already uniform or the index is a constant.
1839 * We will typically not get here if the optimizer is doing its job, but
1840 * asserting would be mean.
1841 */
1842 const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud;
1843 src = stride(suboffset(src, i), 0, 1, 0);
1844
1845 if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1846 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1847 subscript(src, BRW_TYPE_D, 0));
1848 brw_set_default_swsb(p, tgl_swsb_null());
1849 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1850 subscript(src, BRW_TYPE_D, 1));
1851 } else {
1852 brw_MOV(p, dst, src);
1853 }
1854 } else {
1855 /* From the Haswell PRM section "Register Region Restrictions":
1856 *
1857 * "The lower bits of the AddressImmediate must not overflow to
1858 * change the register address. The lower 5 bits of Address
1859 * Immediate when added to lower 5 bits of address register gives
1860 * the sub-register offset. The upper bits of Address Immediate
1861 * when added to upper bits of address register gives the register
1862 * address. Any overflow from sub-register offset is dropped."
1863 *
1864 * Fortunately, for broadcast, we never have a sub-register offset so
1865 * this isn't an issue.
1866 */
1867 assert(src.subnr == 0);
1868
1869 const struct brw_reg addr =
1870 retype(brw_address_reg(0), BRW_TYPE_UD);
1871 unsigned offset = src.nr * REG_SIZE + src.subnr;
1872 /* Limit in bytes of the signed indirect addressing immediate. */
1873 const unsigned limit = 512;
1874
1875 brw_push_insn_state(p);
1876 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1877 brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1878 brw_set_default_flag_reg(p, 0, 0);
1879
1880 /* Take into account the component size and horizontal stride. */
1881 assert(src.vstride == src.hstride + src.width);
1882 brw_SHL(p, addr, vec1(idx),
1883 brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1884 src.hstride - 1));
1885
1886 /* We can only address up to limit bytes using the indirect
1887 * addressing immediate, account for the difference if the source
1888 * register is above this limit.
1889 */
1890 if (offset >= limit) {
1891 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1892 brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1893 offset = offset % limit;
1894 }
1895
1896 brw_pop_insn_state(p);
1897
1898 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1899
1900 /* Use indirect addressing to fetch the specified component. */
1901 if (brw_type_size_bytes(src.type) > 4 &&
1902 (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1903 /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1904 *
1905 * "When source or destination datatype is 64b or operation is
1906 * integer DWord multiply, indirect addressing must not be
1907 * used."
1908 *
1909 * We may also not support Q/UQ types.
1910 *
1911 * To work around both of these, we do two integer MOVs instead
1912 * of one 64-bit MOV. Because no double value should ever cross
1913 * a register boundary, it's safe to use the immediate offset in
1914 * the indirect here to handle adding 4 bytes to the offset and
1915 * avoid the extra ADD to the register file.
1916 */
1917 brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1918 retype(brw_vec1_indirect(addr.subnr, offset),
1919 BRW_TYPE_D));
1920 brw_set_default_swsb(p, tgl_swsb_null());
1921 brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1922 retype(brw_vec1_indirect(addr.subnr, offset + 4),
1923 BRW_TYPE_D));
1924 } else {
1925 brw_MOV(p, dst,
1926 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
1927 }
1928 }
1929
1930 brw_pop_insn_state(p);
1931 }
1932
1933
1934 /**
1935 * Emit the SEND message for a barrier
1936 */
1937 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)1938 brw_barrier(struct brw_codegen *p, struct brw_reg src)
1939 {
1940 const struct intel_device_info *devinfo = p->devinfo;
1941 struct brw_eu_inst *inst;
1942
1943 brw_push_insn_state(p);
1944 brw_set_default_access_mode(p, BRW_ALIGN_1);
1945 inst = next_insn(p, BRW_OPCODE_SEND);
1946 brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
1947 brw_set_src0(p, inst, src);
1948 brw_set_src1(p, inst, brw_null_reg());
1949 brw_set_desc(p, inst, brw_message_desc(devinfo,
1950 1 * reg_unit(devinfo), 0, false), false);
1951
1952 brw_eu_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
1953 brw_eu_inst_set_gateway_subfuncid(devinfo, inst,
1954 BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
1955
1956 brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1957 brw_pop_insn_state(p);
1958 }
1959
1960
1961 /**
1962 * Emit the wait instruction for a barrier
1963 */
1964 void
brw_WAIT(struct brw_codegen * p)1965 brw_WAIT(struct brw_codegen *p)
1966 {
1967 const struct intel_device_info *devinfo = p->devinfo;
1968 struct brw_eu_inst *insn;
1969
1970 struct brw_reg src = brw_notification_reg();
1971
1972 insn = next_insn(p, BRW_OPCODE_WAIT);
1973 brw_set_dest(p, insn, src);
1974 brw_set_src0(p, insn, src);
1975 brw_set_src1(p, insn, brw_null_reg());
1976
1977 brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1978 brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1979 }
1980
1981 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)1982 brw_float_controls_mode(struct brw_codegen *p,
1983 unsigned mode, unsigned mask)
1984 {
1985 assert(p->current->mask_control == BRW_MASK_DISABLE);
1986
1987 /* From the Skylake PRM, Volume 7, page 760:
1988 * "Implementation Restriction on Register Access: When the control
1989 * register is used as an explicit source and/or destination, hardware
1990 * does not ensure execution pipeline coherency. Software must set the
1991 * thread control field to ‘switch’ for an instruction that uses
1992 * control register as an explicit operand."
1993 *
1994 * On Gfx12+ this is implemented in terms of SWSB annotations instead.
1995 */
1996 brw_set_default_swsb(p, tgl_swsb_regdist(1));
1997
1998 brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
1999 brw_imm_ud(~mask));
2000 brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
2001 if (p->devinfo->ver < 12)
2002 brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
2003
2004 if (mode) {
2005 brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2006 brw_imm_ud(mode));
2007 brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2008 if (p->devinfo->ver < 12)
2009 brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2010 }
2011
2012 if (p->devinfo->ver >= 12)
2013 brw_SYNC(p, TGL_SYNC_NOP);
2014 }
2015
2016 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_eu_inst * inst,uint32_t value)2017 brw_update_reloc_imm(const struct brw_isa_info *isa,
2018 brw_eu_inst *inst,
2019 uint32_t value)
2020 {
2021 const struct intel_device_info *devinfo = isa->devinfo;
2022
2023 /* Sanity check that the instruction is a MOV of an immediate */
2024 assert(brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2025 assert(brw_eu_inst_src0_reg_file(devinfo, inst) == IMM);
2026
2027 /* If it was compacted, we can't safely rewrite */
2028 assert(brw_eu_inst_cmpt_control(devinfo, inst) == 0);
2029
2030 brw_eu_inst_set_imm_ud(devinfo, inst, value);
2031 }
2032
2033 /* A default value for constants that will be patched at run-time.
2034 * We pick an arbitrary value that prevents instruction compaction.
2035 */
2036 #define DEFAULT_PATCH_IMM 0x4a7cc037
2037
2038 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2039 brw_MOV_reloc_imm(struct brw_codegen *p,
2040 struct brw_reg dst,
2041 enum brw_reg_type src_type,
2042 uint32_t id,
2043 uint32_t base)
2044 {
2045 assert(brw_type_size_bytes(src_type) == 4);
2046 assert(brw_type_size_bytes(dst.type) == 4);
2047
2048 brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2049 p->next_insn_offset, base);
2050
2051 brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2052 }
2053