• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35 
36 #include "util/ralloc.h"
37 
38 void
brw_set_dest(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest)
40 {
41    const struct intel_device_info *devinfo = p->devinfo;
42 
43    if (dest.file == FIXED_GRF)
44       assert(dest.nr < XE3_MAX_GRF);
45 
46    /* The hardware has a restriction where a destination of size Byte with
47     * a stride of 1 is only allowed for a packed byte MOV. For any other
48     * instruction, the stride must be at least 2, even when the destination
49     * is the NULL register.
50     */
51    if (dest.file == ARF &&
52        dest.nr == BRW_ARF_NULL &&
53        brw_type_size_bytes(dest.type) == 1 &&
54        dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55       dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56    }
57 
58    if (devinfo->ver >= 12 &&
59        (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60         brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61       assert(dest.file == FIXED_GRF ||
62              dest.file == ADDRESS ||
63              dest.file == ARF);
64       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
65       assert(dest.subnr == 0);
66       assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
67              (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
68               dest.vstride == dest.width + 1));
69       assert(!dest.negate && !dest.abs);
70       brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest));
71       brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
72 
73    } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
74               brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
75       assert(devinfo->ver < 12);
76       assert(dest.file == FIXED_GRF ||
77              dest.file == ADDRESS ||
78              dest.file == ARF);
79       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
80       assert(dest.subnr % 16 == 0);
81       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
82              dest.vstride == dest.width + 1);
83       assert(!dest.negate && !dest.abs);
84       brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
85       brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
86       brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest));
87    } else {
88       brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type);
89       brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
90 
91       if (dest.address_mode == BRW_ADDRESS_DIRECT) {
92          brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
93 
94          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
95             brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
96             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
97                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
98             brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
99          } else {
100             brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
101             brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask);
102             if (dest.file == FIXED_GRF) {
103                assert(dest.writemask != 0);
104             }
105             /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
106              *    Although Dst.HorzStride is a don't care for Align16, HW needs
107              *    this to be programmed as "01".
108              */
109             brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
110          }
111       } else {
112          brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
113 
114          /* These are different sizes in align1 vs align16:
115           */
116          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
117             brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst,
118                                           dest.indirect_offset);
119             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
120                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
121             brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
122          } else {
123             brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst,
124                                            dest.indirect_offset);
125             /* even ignored in da16, still need to set as '01' */
126             brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
127          }
128       }
129    }
130 }
131 
132 void
brw_set_src0(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)133 brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
134 {
135    const struct intel_device_info *devinfo = p->devinfo;
136 
137    if (reg.file == FIXED_GRF)
138       assert(reg.nr < XE3_MAX_GRF);
139 
140    if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
141        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
142        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
143        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
144       /* Any source modifiers or regions will be ignored, since this just
145        * identifies the GRF to start reading the message contents from.
146        * Check for some likely failures.
147        */
148       assert(!reg.negate);
149       assert(!reg.abs);
150       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
151    }
152 
153    if (devinfo->ver >= 12 &&
154        (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
155         brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
156       assert(reg.file == ARF || reg.file == FIXED_GRF);
157       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
158       assert(has_scalar_region(reg) ||
159              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
160               reg.vstride == reg.width + 1));
161       assert(!reg.negate && !reg.abs);
162 
163       brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg));
164       brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
165 
166       if (reg.file == ARF && reg.nr == BRW_ARF_SCALAR) {
167          assert(reg.subnr % 2 == 0);
168          brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2);
169       } else {
170          assert(reg.subnr == 0);
171       }
172    } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
173               brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
174       assert(reg.file == FIXED_GRF);
175       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
176       assert(reg.subnr % 16 == 0);
177       assert(has_scalar_region(reg) ||
178              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
179               reg.vstride == reg.width + 1));
180       assert(!reg.negate && !reg.abs);
181       brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
182       brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
183    } else {
184       brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type);
185       brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs);
186       brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate);
187       brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
188 
189       if (reg.file == IMM) {
190          if (reg.type == BRW_TYPE_DF)
191             brw_eu_inst_set_imm_df(devinfo, inst, reg.df);
192          else if (reg.type == BRW_TYPE_UQ ||
193                   reg.type == BRW_TYPE_Q)
194             brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64);
195          else
196             brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
197 
198          if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
199             brw_eu_inst_set_src1_reg_file(devinfo, inst,
200                                        ARF);
201             brw_eu_inst_set_src1_reg_hw_type(devinfo, inst,
202                                           brw_eu_inst_src0_reg_hw_type(devinfo, inst));
203          }
204       } else {
205          if (reg.address_mode == BRW_ADDRESS_DIRECT) {
206             brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
207             if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208                brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
209             } else {
210                brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
211             }
212          } else {
213             brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
214 
215             if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
216                brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
217             } else {
218                brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
219             }
220          }
221 
222          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
223             if (reg.width == BRW_WIDTH_1 &&
224                 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
225                brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
226                brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
227                brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
228             } else {
229                brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride);
230                brw_eu_inst_set_src0_width(devinfo, inst, reg.width);
231                brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
232             }
233          } else {
234             brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst,
235                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
236             brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst,
237                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
238             brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst,
239                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
240             brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst,
241                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
242 
243             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
244                /* This is an oddity of the fact we're using the same
245                 * descriptions for registers in align_16 as align_1:
246                 */
247                brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
248             } else {
249                brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
250             }
251          }
252       }
253    }
254 }
255 
256 
257 void
brw_set_src1(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)258 brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
259 {
260    const struct intel_device_info *devinfo = p->devinfo;
261 
262    if (reg.file == FIXED_GRF)
263       assert(reg.nr < XE3_MAX_GRF);
264 
265    if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
266        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
267        (devinfo->ver >= 12 &&
268         (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
269          brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
270       assert(reg.file == FIXED_GRF ||
271              reg.file == ARF ||
272              reg.file == ADDRESS);
273       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
274       assert(reg.subnr == 0);
275       assert(has_scalar_region(reg) ||
276              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
277               reg.vstride == reg.width + 1));
278       assert(!reg.negate && !reg.abs);
279       brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
280       brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg));
281    } else {
282       /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
283        *
284        *    "Accumulator registers may be accessed explicitly as src0
285        *    operands only."
286        */
287       assert(reg.file != ARF ||
288              (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
289 
290       brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type);
291       brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs);
292       brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate);
293 
294       /* Only src1 can be immediate in two-argument instructions.
295        */
296       assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM);
297 
298       if (reg.file == IMM) {
299          /* two-argument instructions can only use 32-bit immediates */
300          assert(brw_type_size_bytes(reg.type) < 8);
301          brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
302       } else {
303          /* This is a hardware restriction, which may or may not be lifted
304           * in the future:
305           */
306          assert (reg.address_mode == BRW_ADDRESS_DIRECT);
307          /* assert (reg.file == FIXED_GRF); */
308 
309          brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
310          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
311             brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
312          } else {
313             brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
314          }
315 
316          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
317             if (reg.width == BRW_WIDTH_1 &&
318                 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
319                brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
320                brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
321                brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
322             } else {
323                brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride);
324                brw_eu_inst_set_src1_width(devinfo, inst, reg.width);
325                brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
326             }
327          } else {
328             brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst,
329                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
330             brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst,
331                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
332             brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst,
333                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
334             brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst,
335                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
336 
337             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
338                /* This is an oddity of the fact we're using the same
339                 * descriptions for registers in align_16 as align_1:
340                 */
341                brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
342             } else {
343                brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
344             }
345          }
346       }
347    }
348 }
349 
350 /**
351  * Specify the descriptor and extended descriptor immediate for a SEND(C)
352  * message instruction.
353  */
354 void
brw_set_desc_ex(struct brw_codegen * p,brw_eu_inst * inst,unsigned desc,unsigned ex_desc,bool gather)355 brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst,
356                 unsigned desc, unsigned ex_desc, bool gather)
357 {
358    const struct intel_device_info *devinfo = p->devinfo;
359    assert(!gather || devinfo->ver >= 30);
360    assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
361           brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
362    if (devinfo->ver < 12)
363       brw_eu_inst_set_src1_file_type(devinfo, inst,
364                                   IMM, BRW_TYPE_UD);
365    brw_eu_inst_set_send_desc(devinfo, inst, desc);
366    if (devinfo->ver >= 9)
367       brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, gather);
368 }
369 
370 static void
brw_eu_inst_set_state(const struct brw_isa_info * isa,brw_eu_inst * insn,const struct brw_insn_state * state)371 brw_eu_inst_set_state(const struct brw_isa_info *isa,
372                    brw_eu_inst *insn,
373                    const struct brw_insn_state *state)
374 {
375    const struct intel_device_info *devinfo = isa->devinfo;
376 
377    brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size);
378    brw_eu_inst_set_group(devinfo, insn, state->group);
379    brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode);
380    brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control);
381    if (devinfo->ver >= 12)
382       brw_eu_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_eu_inst_opcode(isa, insn)));
383    brw_eu_inst_set_saturate(devinfo, insn, state->saturate);
384    brw_eu_inst_set_pred_control(devinfo, insn, state->predicate);
385    brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv);
386 
387    if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) &&
388        state->access_mode == BRW_ALIGN_16) {
389       brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
390       brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
391    } else {
392       brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
393       brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
394    }
395 
396    if (devinfo->ver < 20)
397       brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
398 }
399 
400 static brw_eu_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)401 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
402 {
403    assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
404    assert(util_is_power_of_two_or_zero(alignment));
405    const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
406    const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
407    const unsigned new_nr_insn = start_insn + nr_insn;
408 
409    if (p->store_size < new_nr_insn) {
410       p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst));
411       p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size);
412    }
413 
414    /* Memset any padding due to alignment to 0.  We don't want to be hashing
415     * or caching a bunch of random bits we got from a memory allocation.
416     */
417    if (p->nr_insn < start_insn) {
418       memset(&p->store[p->nr_insn], 0,
419              (start_insn - p->nr_insn) * sizeof(brw_eu_inst));
420    }
421 
422    assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst));
423    p->nr_insn = new_nr_insn;
424    p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst);
425 
426    return &p->store[start_insn];
427 }
428 
429 void
brw_realign(struct brw_codegen * p,unsigned alignment)430 brw_realign(struct brw_codegen *p, unsigned alignment)
431 {
432    brw_append_insns(p, 0, alignment);
433 }
434 
435 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)436 brw_append_data(struct brw_codegen *p, void *data,
437                 unsigned size, unsigned alignment)
438 {
439    unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst));
440    void *dst = brw_append_insns(p, nr_insn, alignment);
441    memcpy(dst, data, size);
442 
443    /* If it's not a whole number of instructions, memset the end */
444    if (size < nr_insn * sizeof(brw_eu_inst))
445       memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size);
446 
447    return dst - (void *)p->store;
448 }
449 
450 #define next_insn brw_next_insn
451 brw_eu_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)452 brw_next_insn(struct brw_codegen *p, unsigned opcode)
453 {
454    brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst));
455 
456    memset(insn, 0, sizeof(*insn));
457    brw_eu_inst_set_opcode(p->isa, insn, opcode);
458 
459    /* Apply the default instruction state */
460    brw_eu_inst_set_state(p->isa, insn, p->current);
461 
462    return insn;
463 }
464 
465 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)466 brw_add_reloc(struct brw_codegen *p, uint32_t id,
467               enum brw_shader_reloc_type type,
468               uint32_t offset, uint32_t delta)
469 {
470    if (p->num_relocs + 1 > p->reloc_array_size) {
471       p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
472       p->relocs = reralloc(p->mem_ctx, p->relocs,
473                            struct brw_shader_reloc, p->reloc_array_size);
474    }
475 
476    p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
477       .id = id,
478       .type = type,
479       .offset = offset,
480       .delta = delta,
481    };
482 }
483 
484 static brw_eu_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)485 brw_alu1(struct brw_codegen *p, unsigned opcode,
486          struct brw_reg dest, struct brw_reg src)
487 {
488    brw_eu_inst *insn = next_insn(p, opcode);
489    brw_set_dest(p, insn, dest);
490    brw_set_src0(p, insn, src);
491    return insn;
492 }
493 
494 static brw_eu_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)495 brw_alu2(struct brw_codegen *p, unsigned opcode,
496          struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
497 {
498    /* 64-bit immediates are only supported on 1-src instructions */
499    assert(src0.file != IMM ||
500           brw_type_size_bytes(src0.type) <= 4);
501    assert(src1.file != IMM ||
502           brw_type_size_bytes(src1.type) <= 4);
503 
504    brw_eu_inst *insn = next_insn(p, opcode);
505    brw_set_dest(p, insn, dest);
506    brw_set_src0(p, insn, src0);
507    brw_set_src1(p, insn, src1);
508    return insn;
509 }
510 
511 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)512 to_3src_align1_vstride(const struct intel_device_info *devinfo,
513                        enum brw_vertical_stride vstride)
514 {
515    switch (vstride) {
516    case BRW_VERTICAL_STRIDE_0:
517       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
518    case BRW_VERTICAL_STRIDE_1:
519       assert(devinfo->ver >= 12);
520       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
521    case BRW_VERTICAL_STRIDE_2:
522       assert(devinfo->ver < 12);
523       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
524    case BRW_VERTICAL_STRIDE_4:
525       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
526    case BRW_VERTICAL_STRIDE_8:
527    case BRW_VERTICAL_STRIDE_16:
528       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
529    default:
530       unreachable("invalid vstride");
531    }
532 }
533 
534 
535 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)536 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
537 {
538    switch (hstride) {
539    case BRW_HORIZONTAL_STRIDE_0:
540       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
541    case BRW_HORIZONTAL_STRIDE_1:
542       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
543    case BRW_HORIZONTAL_STRIDE_2:
544       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
545    case BRW_HORIZONTAL_STRIDE_4:
546       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
547    default:
548       unreachable("invalid hstride");
549    }
550 }
551 
552 static brw_eu_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)553 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
554          struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
555 {
556    const struct intel_device_info *devinfo = p->devinfo;
557    brw_eu_inst *inst = next_insn(p, opcode);
558 
559    assert(dest.nr < XE3_MAX_GRF);
560 
561    if (devinfo->ver <= 9) {
562       assert(src0.file != IMM && src2.file != IMM);
563    } else if (devinfo->ver <= 11) {
564       /* On Ice Lake, BFE and CSEL cannot have any immediate sources. */
565       assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) ||
566              (src0.file != IMM && src2.file != IMM));
567 
568       /* On Ice Lake, DP4A and MAD can only have one immediate source. */
569       assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) ||
570              !(src0.file == IMM && src2.file == IMM));
571    } else {
572       /* Having two immediate sources is allowed, but this should have been
573        * converted to a regular ADD by brw_opt_algebraic.
574        */
575       assert(opcode != BRW_OPCODE_ADD3 ||
576              !(src0.file == IMM && src2.file == IMM));
577    }
578 
579    /* BFI2 cannot have any immediate sources on any platform. */
580    assert(opcode != BRW_OPCODE_BFI2 ||
581           (src0.file != IMM && src2.file != IMM));
582 
583    assert(src0.file == IMM || src0.nr < XE3_MAX_GRF);
584    assert(src1.file != IMM && src1.nr < XE3_MAX_GRF);
585    assert(src2.file == IMM || src2.nr < XE3_MAX_GRF);
586    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
587    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
588    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
589    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
590 
591    if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
592       assert(dest.file == FIXED_GRF ||
593              (dest.file == ARF &&
594               (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
595 
596       brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
597       brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
598       brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
599       brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
600 
601       if (brw_type_is_float(dest.type)) {
602          brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
603                                         BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
604       } else {
605          brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
606                                         BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
607       }
608 
609       brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
610       brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
611       brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
612       brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
613 
614       if (src0.file == IMM) {
615          brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
616       } else {
617          brw_eu_inst_set_3src_a1_src0_vstride(
618             devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
619          brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst,
620                                            to_3src_align1_hstride(src0.hstride));
621          brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
622          brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
623          brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
624          brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
625       }
626       brw_eu_inst_set_3src_a1_src1_vstride(
627          devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
628       brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst,
629                                         to_3src_align1_hstride(src1.hstride));
630 
631       brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
632       if (src1.file == ARF) {
633          brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
634       } else {
635          brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
636       }
637       brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
638       brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
639 
640       if (src2.file == IMM) {
641          brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
642       } else {
643          brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst,
644                                            to_3src_align1_hstride(src2.hstride));
645          /* no vstride on src2 */
646          brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
647          brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
648          brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
649          brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
650       }
651 
652       assert(src0.file == FIXED_GRF ||
653              src0.file == IMM);
654       assert(src1.file == FIXED_GRF ||
655              (src1.file == ARF &&
656               src1.nr == BRW_ARF_ACCUMULATOR));
657       assert(src2.file == FIXED_GRF ||
658              src2.file == IMM);
659 
660       if (devinfo->ver >= 12) {
661          if (src0.file == IMM) {
662             brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
663          } else {
664             brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
665          }
666 
667          brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
668 
669          if (src2.file == IMM) {
670             brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
671          } else {
672             brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
673          }
674       } else {
675          brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
676          brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
677          brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
678       }
679 
680    } else {
681       assert(dest.file == FIXED_GRF);
682       assert(dest.type == BRW_TYPE_F  ||
683              dest.type == BRW_TYPE_DF ||
684              dest.type == BRW_TYPE_D  ||
685              dest.type == BRW_TYPE_UD ||
686              dest.type == BRW_TYPE_HF);
687       brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
688       brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
689       brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
690 
691       assert(src0.file == FIXED_GRF);
692       brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
693       brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
694       brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
695       brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
696       brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
697       brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
698                                           src0.vstride == BRW_VERTICAL_STRIDE_0);
699 
700       assert(src1.file == FIXED_GRF);
701       brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
702       brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
703       brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
704       brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
705       brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
706       brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
707                                           src1.vstride == BRW_VERTICAL_STRIDE_0);
708 
709       assert(src2.file == FIXED_GRF);
710       brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
711       brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
712       brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
713       brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
714       brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
715       brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
716                                           src2.vstride == BRW_VERTICAL_STRIDE_0);
717 
718       /* Set both the source and destination types based on dest.type,
719        * ignoring the source register types.  The MAD and LRP emitters ensure
720        * that all four types are float.  The BFE and BFI2 emitters, however,
721        * may send us mixed D and UD types and want us to ignore that and use
722        * the destination type.
723        */
724       brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
725       brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
726 
727       /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
728        *
729        *    "Three source instructions can use operands with mixed-mode
730        *     precision. When SrcType field is set to :f or :hf it defines
731        *     precision for source 0 only, and fields Src1Type and Src2Type
732        *     define precision for other source operands:
733        *
734        *     0b = :f. Single precision Float (32-bit).
735        *     1b = :hf. Half precision Float (16-bit)."
736        */
737       if (src1.type == BRW_TYPE_HF)
738          brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1);
739 
740       if (src2.type == BRW_TYPE_HF)
741          brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1);
742    }
743 
744    return inst;
745 }
746 
747 static brw_eu_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)748 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
749                    enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
750                    struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
751 {
752    const struct intel_device_info *devinfo = p->devinfo;
753    brw_eu_inst *inst = next_insn(p, opcode);
754 
755    assert(dest.file == FIXED_GRF);
756    brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
757                                        FIXED_GRF);
758    brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
759    brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
760 
761    if (brw_type_is_float(dest.type)) {
762       brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
763                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
764    } else {
765       brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
766                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
767    }
768 
769    brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
770    brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
771 
772    brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
773    brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
774    brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
775    brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
776 
777    assert(src0.file == FIXED_GRF ||
778           (src0.file == ARF &&
779            src0.nr == BRW_ARF_NULL));
780 
781    brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0));
782    brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
783    brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
784 
785    assert(src1.file == FIXED_GRF);
786 
787    brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1));
788    brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
789    brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
790    brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
791 
792    assert(src2.file == FIXED_GRF);
793 
794    brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2));
795    brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
796    brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
797    brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
798 
799    return inst;
800 }
801 
802 /***********************************************************************
803  * Convenience routines.
804  */
805 #define ALU1(OP)					\
806 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
807 	      struct brw_reg dest,			\
808 	      struct brw_reg src0)   			\
809 {							\
810    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
811 }
812 
813 #define ALU2(OP)					\
814 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
815 	      struct brw_reg dest,			\
816 	      struct brw_reg src0,			\
817 	      struct brw_reg src1)   			\
818 {							\
819    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
820 }
821 
822 #define ALU3(OP)					\
823 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
824 	      struct brw_reg dest,			\
825 	      struct brw_reg src0,			\
826 	      struct brw_reg src1,			\
827 	      struct brw_reg src2)   			\
828 {                                                       \
829    if (p->current->access_mode == BRW_ALIGN_16) {       \
830       if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
831          src0.swizzle = BRW_SWIZZLE_XXXX;               \
832       if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
833          src1.swizzle = BRW_SWIZZLE_XXXX;               \
834       if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
835          src2.swizzle = BRW_SWIZZLE_XXXX;               \
836    }                                                    \
837    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
838 }
839 
840 #define ALU3F(OP)                                               \
841 brw_eu_inst *brw_##OP(struct brw_codegen *p,         \
842                                  struct brw_reg dest,           \
843                                  struct brw_reg src0,           \
844                                  struct brw_reg src1,           \
845                                  struct brw_reg src2)           \
846 {                                                               \
847    assert(dest.type == BRW_TYPE_F ||                   \
848           dest.type == BRW_TYPE_DF);                   \
849    if (dest.type == BRW_TYPE_F) {                      \
850       assert(src0.type == BRW_TYPE_F);                 \
851       assert(src1.type == BRW_TYPE_F);                 \
852       assert(src2.type == BRW_TYPE_F);                 \
853    } else if (dest.type == BRW_TYPE_DF) {              \
854       assert(src0.type == BRW_TYPE_DF);                \
855       assert(src1.type == BRW_TYPE_DF);                \
856       assert(src2.type == BRW_TYPE_DF);                \
857    }                                                            \
858                                                                 \
859    if (p->current->access_mode == BRW_ALIGN_16) {               \
860       if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
861          src0.swizzle = BRW_SWIZZLE_XXXX;                       \
862       if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
863          src1.swizzle = BRW_SWIZZLE_XXXX;                       \
864       if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
865          src2.swizzle = BRW_SWIZZLE_XXXX;                       \
866    }                                                            \
867    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
868 }
869 
870 ALU2(SEL)
ALU1(NOT)871 ALU1(NOT)
872 ALU2(AND)
873 ALU2(OR)
874 ALU2(XOR)
875 ALU2(SHR)
876 ALU2(SHL)
877 ALU2(ASR)
878 ALU2(ROL)
879 ALU2(ROR)
880 ALU3(CSEL)
881 ALU1(FRC)
882 ALU1(RNDD)
883 ALU1(RNDE)
884 ALU1(RNDU)
885 ALU1(RNDZ)
886 ALU2(MAC)
887 ALU2(MACH)
888 ALU1(LZD)
889 ALU2(DP4)
890 ALU2(DPH)
891 ALU2(DP3)
892 ALU2(DP2)
893 ALU3(DP4A)
894 ALU3(MAD)
895 ALU3F(LRP)
896 ALU1(BFREV)
897 ALU3(BFE)
898 ALU2(BFI1)
899 ALU3(BFI2)
900 ALU1(FBH)
901 ALU1(FBL)
902 ALU1(CBIT)
903 ALU2(ADDC)
904 ALU2(SUBB)
905 ALU3(ADD3)
906 ALU1(MOV)
907 
908 brw_eu_inst *
909 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
910         struct brw_reg src0, struct brw_reg src1)
911 {
912    /* 6.2.2: add */
913    if (src0.type == BRW_TYPE_F ||
914        (src0.file == IMM &&
915 	src0.type == BRW_TYPE_VF)) {
916       assert(src1.type != BRW_TYPE_UD);
917       assert(src1.type != BRW_TYPE_D);
918    }
919 
920    if (src1.type == BRW_TYPE_F ||
921        (src1.file == IMM &&
922 	src1.type == BRW_TYPE_VF)) {
923       assert(src0.type != BRW_TYPE_UD);
924       assert(src0.type != BRW_TYPE_D);
925    }
926 
927    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
928 }
929 
930 brw_eu_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)931 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
932         struct brw_reg src0, struct brw_reg src1)
933 {
934    assert(dest.type == src0.type);
935    assert(src0.type == src1.type);
936    switch (src0.type) {
937    case BRW_TYPE_B:
938    case BRW_TYPE_UB:
939    case BRW_TYPE_W:
940    case BRW_TYPE_UW:
941    case BRW_TYPE_D:
942    case BRW_TYPE_UD:
943       break;
944    default:
945       unreachable("Bad type for brw_AVG");
946    }
947 
948    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
949 }
950 
951 brw_eu_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)952 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
953         struct brw_reg src0, struct brw_reg src1)
954 {
955    /* 6.32.38: mul */
956    if (src0.type == BRW_TYPE_D ||
957        src0.type == BRW_TYPE_UD ||
958        src1.type == BRW_TYPE_D ||
959        src1.type == BRW_TYPE_UD) {
960       assert(dest.type != BRW_TYPE_F);
961    }
962 
963    if (src0.type == BRW_TYPE_F ||
964        (src0.file == IMM &&
965 	src0.type == BRW_TYPE_VF)) {
966       assert(src1.type != BRW_TYPE_UD);
967       assert(src1.type != BRW_TYPE_D);
968    }
969 
970    if (src1.type == BRW_TYPE_F ||
971        (src1.file == IMM &&
972 	src1.type == BRW_TYPE_VF)) {
973       assert(src0.type != BRW_TYPE_UD);
974       assert(src0.type != BRW_TYPE_D);
975    }
976 
977    assert(src0.file != ARF ||
978 	  src0.nr != BRW_ARF_ACCUMULATOR);
979    assert(src1.file != ARF ||
980 	  src1.nr != BRW_ARF_ACCUMULATOR);
981 
982    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
983 }
984 
985 brw_eu_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)986 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
987          struct brw_reg src0, struct brw_reg src1)
988 {
989    src0.vstride = BRW_VERTICAL_STRIDE_0;
990    src0.width = BRW_WIDTH_1;
991    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
992    return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
993 }
994 
995 brw_eu_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)996 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
997         struct brw_reg src0, struct brw_reg src1)
998 {
999    src0.vstride = BRW_VERTICAL_STRIDE_0;
1000    src0.width = BRW_WIDTH_1;
1001    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1002    src1.vstride = BRW_VERTICAL_STRIDE_8;
1003    src1.width = BRW_WIDTH_8;
1004    src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1005    return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1006 }
1007 
1008 brw_eu_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)1009 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1010          unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1011          struct brw_reg src1, struct brw_reg src2)
1012 {
1013    return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
1014                              src1, src2);
1015 }
1016 
brw_NOP(struct brw_codegen * p)1017 void brw_NOP(struct brw_codegen *p)
1018 {
1019    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1020    memset(insn, 0, sizeof(*insn));
1021    brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1022 }
1023 
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1024 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1025 {
1026    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1027    brw_eu_inst_set_cond_modifier(p->devinfo, insn, func);
1028 }
1029 
1030 /***********************************************************************
1031  * Comparisons, if/else/endif
1032  */
1033 
1034 brw_eu_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1035 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1036          unsigned predicate_control)
1037 {
1038    const struct intel_device_info *devinfo = p->devinfo;
1039    struct brw_reg ip = brw_ip_reg();
1040    brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1041 
1042    brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1043    brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1044    brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1045    brw_eu_inst_set_pred_control(devinfo, inst, predicate_control);
1046 
1047    return inst;
1048 }
1049 
1050 static void
push_if_stack(struct brw_codegen * p,brw_eu_inst * inst)1051 push_if_stack(struct brw_codegen *p, brw_eu_inst *inst)
1052 {
1053    p->if_stack[p->if_stack_depth] = inst - p->store;
1054 
1055    p->if_stack_depth++;
1056    if (p->if_stack_array_size <= p->if_stack_depth) {
1057       p->if_stack_array_size *= 2;
1058       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1059 			     p->if_stack_array_size);
1060    }
1061 }
1062 
1063 static brw_eu_inst *
pop_if_stack(struct brw_codegen * p)1064 pop_if_stack(struct brw_codegen *p)
1065 {
1066    p->if_stack_depth--;
1067    return &p->store[p->if_stack[p->if_stack_depth]];
1068 }
1069 
1070 static void
push_loop_stack(struct brw_codegen * p,brw_eu_inst * inst)1071 push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst)
1072 {
1073    if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1074       p->loop_stack_array_size *= 2;
1075       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1076 			       p->loop_stack_array_size);
1077    }
1078 
1079    p->loop_stack[p->loop_stack_depth] = inst - p->store;
1080    p->loop_stack_depth++;
1081 }
1082 
1083 static brw_eu_inst *
get_inner_do_insn(struct brw_codegen * p)1084 get_inner_do_insn(struct brw_codegen *p)
1085 {
1086    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1087 }
1088 
1089 /* EU takes the value from the flag register and pushes it onto some
1090  * sort of a stack (presumably merging with any flag value already on
1091  * the stack).  Within an if block, the flags at the top of the stack
1092  * control execution on each channel of the unit, eg. on each of the
1093  * 16 pixel values in our wm programs.
1094  *
1095  * When the matching 'else' instruction is reached (presumably by
1096  * countdown of the instruction count patched in by our ELSE/ENDIF
1097  * functions), the relevant flags are inverted.
1098  *
1099  * When the matching 'endif' instruction is reached, the flags are
1100  * popped off.  If the stack is now empty, normal execution resumes.
1101  */
1102 brw_eu_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1103 brw_IF(struct brw_codegen *p, unsigned execute_size)
1104 {
1105    const struct intel_device_info *devinfo = p->devinfo;
1106    brw_eu_inst *insn;
1107 
1108    insn = next_insn(p, BRW_OPCODE_IF);
1109 
1110    /* Override the defaults for this instruction:
1111     */
1112    brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1113    if (devinfo->ver < 12)
1114       brw_set_src0(p, insn, brw_imm_d(0));
1115    brw_eu_inst_set_jip(devinfo, insn, 0);
1116    brw_eu_inst_set_uip(devinfo, insn, 0);
1117 
1118    brw_eu_inst_set_exec_size(devinfo, insn, execute_size);
1119    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1120    brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1121    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1122 
1123    push_if_stack(p, insn);
1124    return insn;
1125 }
1126 
1127 /**
1128  * Patch IF and ELSE instructions with appropriate jump targets.
1129  */
1130 static void
patch_IF_ELSE(struct brw_codegen * p,brw_eu_inst * if_inst,brw_eu_inst * else_inst,brw_eu_inst * endif_inst)1131 patch_IF_ELSE(struct brw_codegen *p,
1132               brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst)
1133 {
1134    const struct intel_device_info *devinfo = p->devinfo;
1135 
1136    assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1137    assert(endif_inst != NULL);
1138    assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1139 
1140    unsigned br = brw_jump_scale(devinfo);
1141 
1142    assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1143    brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1144 
1145    if (else_inst == NULL) {
1146       /* Patch IF -> ENDIF */
1147       brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1148       brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1149    } else {
1150       brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1151 
1152       /* Patch ELSE -> ENDIF */
1153       /* The IF instruction's JIP should point just past the ELSE */
1154       brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1155       /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1156       brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1157 
1158       if (devinfo->ver < 11) {
1159          /* Set the ELSE instruction to use branch_ctrl with a join
1160           * jump target pointing at the NOP inserted right before
1161           * the ENDIF instruction in order to make sure it is
1162           * executed in all cases, since attempting to do the same
1163           * as on other generations could cause the EU to jump at
1164           * the instruction immediately after the ENDIF due to
1165           * Wa_220160235, which could cause the program to continue
1166           * running with all channels disabled.
1167           */
1168          brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1169          brw_eu_inst_set_branch_control(devinfo, else_inst, true);
1170       } else {
1171          brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1172       }
1173 
1174       /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1175        * JIP and UIP both should point to ENDIF on those
1176        * platforms.
1177        */
1178       brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1179    }
1180 }
1181 
1182 void
brw_ELSE(struct brw_codegen * p)1183 brw_ELSE(struct brw_codegen *p)
1184 {
1185    const struct intel_device_info *devinfo = p->devinfo;
1186    brw_eu_inst *insn;
1187 
1188    insn = next_insn(p, BRW_OPCODE_ELSE);
1189 
1190    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1191    if (devinfo->ver < 12)
1192       brw_set_src0(p, insn, brw_imm_d(0));
1193    brw_eu_inst_set_jip(devinfo, insn, 0);
1194    brw_eu_inst_set_uip(devinfo, insn, 0);
1195 
1196    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1197    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1198 
1199    push_if_stack(p, insn);
1200 }
1201 
1202 void
brw_ENDIF(struct brw_codegen * p)1203 brw_ENDIF(struct brw_codegen *p)
1204 {
1205    const struct intel_device_info *devinfo = p->devinfo;
1206    brw_eu_inst *insn = NULL;
1207    brw_eu_inst *else_inst = NULL;
1208    brw_eu_inst *if_inst = NULL;
1209    brw_eu_inst *tmp;
1210 
1211    assert(p->if_stack_depth > 0);
1212 
1213    if (devinfo->ver < 11 &&
1214        brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[
1215                              p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1216       /* Insert a NOP to be specified as join instruction within the
1217        * ELSE block, which is valid for an ELSE instruction with
1218        * branch_ctrl on.  The ELSE instruction will be set to jump
1219        * here instead of to the ENDIF instruction, since attempting to
1220        * do the latter would prevent the ENDIF from being executed in
1221        * some cases due to Wa_220160235, which could cause the program
1222        * to continue running with all channels disabled.
1223        */
1224       brw_NOP(p);
1225    }
1226 
1227    /*
1228     * A single next_insn() may change the base address of instruction store
1229     * memory(p->store), so call it first before referencing the instruction
1230     * store pointer from an index
1231     */
1232    insn = next_insn(p, BRW_OPCODE_ENDIF);
1233 
1234    /* Pop the IF and (optional) ELSE instructions from the stack */
1235    tmp = pop_if_stack(p);
1236    if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1237       else_inst = tmp;
1238       tmp = pop_if_stack(p);
1239    }
1240    if_inst = tmp;
1241 
1242    brw_set_src0(p, insn, brw_imm_d(0));
1243 
1244    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1245    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1246 
1247    brw_eu_inst_set_jip(devinfo, insn, 2);
1248    patch_IF_ELSE(p, if_inst, else_inst, insn);
1249 }
1250 
1251 brw_eu_inst *
brw_BREAK(struct brw_codegen * p)1252 brw_BREAK(struct brw_codegen *p)
1253 {
1254    const struct intel_device_info *devinfo = p->devinfo;
1255    brw_eu_inst *insn;
1256 
1257    insn = next_insn(p, BRW_OPCODE_BREAK);
1258    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1259    brw_set_src0(p, insn, brw_imm_d(0x0));
1260    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1261    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1262 
1263    return insn;
1264 }
1265 
1266 brw_eu_inst *
brw_CONT(struct brw_codegen * p)1267 brw_CONT(struct brw_codegen *p)
1268 {
1269    const struct intel_device_info *devinfo = p->devinfo;
1270    brw_eu_inst *insn;
1271 
1272    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1273    brw_set_dest(p, insn, brw_ip_reg());
1274    brw_set_src0(p, insn, brw_imm_d(0x0));
1275 
1276    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1277    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1278    return insn;
1279 }
1280 
1281 brw_eu_inst *
brw_HALT(struct brw_codegen * p)1282 brw_HALT(struct brw_codegen *p)
1283 {
1284    const struct intel_device_info *devinfo = p->devinfo;
1285    brw_eu_inst *insn;
1286 
1287    insn = next_insn(p, BRW_OPCODE_HALT);
1288    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1289    if (devinfo->ver < 12) {
1290       brw_set_src0(p, insn, brw_imm_d(0x0));
1291    }
1292 
1293    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1294    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1295    return insn;
1296 }
1297 
1298 /* DO/WHILE loop:
1299  *
1300  * The DO/WHILE is just an unterminated loop -- break or continue are
1301  * used for control within the loop.  We have a few ways they can be
1302  * done.
1303  *
1304  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1305  * jip and no DO instruction.
1306  *
1307  * For gfx6, there's no more mask stack, so no need for DO.  WHILE
1308  * just points back to the first instruction of the loop.
1309  */
1310 brw_eu_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1311 brw_DO(struct brw_codegen *p, unsigned execute_size)
1312 {
1313    push_loop_stack(p, &p->store[p->nr_insn]);
1314    return &p->store[p->nr_insn];
1315 }
1316 
1317 brw_eu_inst *
brw_WHILE(struct brw_codegen * p)1318 brw_WHILE(struct brw_codegen *p)
1319 {
1320    const struct intel_device_info *devinfo = p->devinfo;
1321    brw_eu_inst *insn, *do_insn;
1322    unsigned br = brw_jump_scale(devinfo);
1323 
1324    insn = next_insn(p, BRW_OPCODE_WHILE);
1325    do_insn = get_inner_do_insn(p);
1326 
1327    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1328    if (devinfo->ver < 12)
1329       brw_set_src0(p, insn, brw_imm_d(0));
1330    brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1331 
1332    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1333 
1334    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1335 
1336    p->loop_stack_depth--;
1337 
1338    return insn;
1339 }
1340 
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1341 void brw_CMP(struct brw_codegen *p,
1342 	     struct brw_reg dest,
1343 	     unsigned conditional,
1344 	     struct brw_reg src0,
1345 	     struct brw_reg src1)
1346 {
1347    const struct intel_device_info *devinfo = p->devinfo;
1348    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1349 
1350    brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1351    brw_set_dest(p, insn, dest);
1352    brw_set_src0(p, insn, src0);
1353    brw_set_src1(p, insn, src1);
1354 }
1355 
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1356 void brw_CMPN(struct brw_codegen *p,
1357               struct brw_reg dest,
1358               unsigned conditional,
1359               struct brw_reg src0,
1360               struct brw_reg src1)
1361 {
1362    const struct intel_device_info *devinfo = p->devinfo;
1363    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1364 
1365    brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1366    brw_set_dest(p, insn, dest);
1367    brw_set_src0(p, insn, src0);
1368    brw_set_src1(p, insn, src1);
1369 }
1370 
1371 /***********************************************************************
1372  * Helpers for the various SEND message types:
1373  */
1374 
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1375 void gfx6_math(struct brw_codegen *p,
1376 	       struct brw_reg dest,
1377 	       unsigned function,
1378 	       struct brw_reg src0,
1379 	       struct brw_reg src1)
1380 {
1381    const struct intel_device_info *devinfo = p->devinfo;
1382    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1383 
1384    assert(dest.file == FIXED_GRF);
1385 
1386    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1387 
1388    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1389        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1390        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1391       assert(src0.type != BRW_TYPE_F);
1392       assert(src1.type != BRW_TYPE_F);
1393       assert(src1.file == FIXED_GRF ||
1394              src1.file == IMM);
1395       /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1396        *     INT DIV function does not support source modifiers.
1397        */
1398       assert(!src0.negate);
1399       assert(!src0.abs);
1400       assert(!src1.negate);
1401       assert(!src1.abs);
1402    } else {
1403       assert(src0.type == BRW_TYPE_F ||
1404              (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1405       assert(src1.type == BRW_TYPE_F ||
1406              (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1407    }
1408 
1409   /* This workaround says that we cannot use scalar broadcast with HF types.
1410    * However, for is_scalar values, all 16 elements contain the same value, so
1411    * we can replace a <0,1,0> region with <16,16,1> without ill effect.
1412    */
1413    if (intel_needs_workaround(devinfo, 22016140776)) {
1414       if (src0.is_scalar && src0.type == BRW_TYPE_HF) {
1415          src0.vstride = BRW_VERTICAL_STRIDE_16;
1416          src0.width = BRW_WIDTH_16;
1417          src0.hstride = BRW_HORIZONTAL_STRIDE_1;
1418          src0.swizzle = BRW_SWIZZLE_XYZW;
1419       }
1420 
1421       if (src1.is_scalar && src1.type == BRW_TYPE_HF) {
1422          src1.vstride = BRW_VERTICAL_STRIDE_16;
1423          src1.width = BRW_WIDTH_16;
1424          src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1425          src1.swizzle = BRW_SWIZZLE_XYZW;
1426       }
1427    }
1428 
1429    brw_eu_inst_set_math_function(devinfo, insn, function);
1430 
1431    brw_set_dest(p, insn, dest);
1432    brw_set_src0(p, insn, src0);
1433    brw_set_src1(p, insn, src1);
1434 }
1435 
1436 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,bool eot,bool gather)1437 brw_send_indirect_message(struct brw_codegen *p,
1438                           unsigned sfid,
1439                           struct brw_reg dst,
1440                           struct brw_reg payload,
1441                           struct brw_reg desc,
1442                           bool eot,
1443                           bool gather)
1444 {
1445    const struct intel_device_info *devinfo = p->devinfo;
1446    struct brw_eu_inst *send;
1447 
1448    dst = retype(dst, BRW_TYPE_UW);
1449 
1450    assert(desc.type == BRW_TYPE_UD);
1451 
1452    if (desc.file == IMM) {
1453       send = next_insn(p, BRW_OPCODE_SEND);
1454       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1455       brw_set_desc(p, send, desc.ud, gather);
1456    } else {
1457       assert(desc.file == ADDRESS);
1458       assert(desc.subnr == 0);
1459       send = next_insn(p, BRW_OPCODE_SEND);
1460       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1461       if (devinfo->ver >= 12)
1462          brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, true);
1463       else
1464          brw_set_src1(p, send, desc);
1465    }
1466 
1467    brw_set_dest(p, send, dst);
1468    brw_eu_inst_set_sfid(devinfo, send, sfid);
1469    brw_eu_inst_set_eot(devinfo, send, eot);
1470 }
1471 
1472 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,struct brw_reg ex_desc,unsigned ex_mlen,bool ex_bso,bool eot,bool gather)1473 brw_send_indirect_split_message(struct brw_codegen *p,
1474                                 unsigned sfid,
1475                                 struct brw_reg dst,
1476                                 struct brw_reg payload0,
1477                                 struct brw_reg payload1,
1478                                 struct brw_reg desc,
1479                                 struct brw_reg ex_desc,
1480                                 unsigned ex_mlen,
1481                                 bool ex_bso,
1482                                 bool eot,
1483                                 bool gather)
1484 {
1485    const struct intel_device_info *devinfo = p->devinfo;
1486    struct brw_eu_inst *send;
1487 
1488    dst = retype(dst, BRW_TYPE_UW);
1489 
1490    assert(desc.type == BRW_TYPE_UD);
1491 
1492    send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1493    brw_set_dest(p, send, dst);
1494    brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1495    brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1496 
1497    if (desc.file == IMM) {
1498       brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1499       brw_eu_inst_set_send_desc(devinfo, send, desc.ud);
1500    } else {
1501       assert(desc.file == ADDRESS);
1502       assert(desc.subnr == 0);
1503       brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1504    }
1505 
1506    if (ex_desc.file == IMM) {
1507       brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1508       brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, gather);
1509    } else {
1510       assert(ex_desc.file == ADDRESS);
1511       assert((ex_desc.subnr & 0x3) == 0);
1512       brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1513       brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1514 
1515       if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM)
1516          brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1517    }
1518 
1519    if (ex_bso) {
1520       /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1521        * it is assumed.
1522        *
1523        * BSpec 56890
1524        */
1525       if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1526          brw_eu_inst_set_send_ex_bso(devinfo, send, true);
1527       brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo));
1528    }
1529    brw_eu_inst_set_sfid(devinfo, send, sfid);
1530    brw_eu_inst_set_eot(devinfo, send, eot);
1531 }
1532 
1533 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_eu_inst * insn,int while_offset,int start_offset)1534 while_jumps_before_offset(const struct intel_device_info *devinfo,
1535                           brw_eu_inst *insn, int while_offset, int start_offset)
1536 {
1537    int scale = 16 / brw_jump_scale(devinfo);
1538    int jip = brw_eu_inst_jip(devinfo, insn);
1539    assert(jip < 0);
1540    return while_offset + jip * scale <= start_offset;
1541 }
1542 
1543 
1544 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1545 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1546 {
1547    int offset;
1548    void *store = p->store;
1549    const struct intel_device_info *devinfo = p->devinfo;
1550 
1551    int depth = 0;
1552 
1553    for (offset = next_offset(p, store, start_offset);
1554         offset < p->next_insn_offset;
1555         offset = next_offset(p, store, offset)) {
1556       brw_eu_inst *insn = store + offset;
1557 
1558       switch (brw_eu_inst_opcode(p->isa, insn)) {
1559       case BRW_OPCODE_IF:
1560          depth++;
1561          break;
1562       case BRW_OPCODE_ENDIF:
1563          if (depth == 0)
1564             return offset;
1565          depth--;
1566          break;
1567       case BRW_OPCODE_WHILE:
1568          /* If the while doesn't jump before our instruction, it's the end
1569           * of a sibling do...while loop.  Ignore it.
1570           */
1571          if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1572             continue;
1573          FALLTHROUGH;
1574       case BRW_OPCODE_ELSE:
1575       case BRW_OPCODE_HALT:
1576          if (depth == 0)
1577             return offset;
1578          break;
1579       default:
1580          break;
1581       }
1582    }
1583 
1584    return 0;
1585 }
1586 
1587 /* There is no DO instruction on gfx6, so to find the end of the loop
1588  * we have to see if the loop is jumping back before our start
1589  * instruction.
1590  */
1591 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1592 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1593 {
1594    const struct intel_device_info *devinfo = p->devinfo;
1595    int offset;
1596    void *store = p->store;
1597 
1598    /* Always start after the instruction (such as a WHILE) we're trying to fix
1599     * up.
1600     */
1601    for (offset = next_offset(p, store, start_offset);
1602         offset < p->next_insn_offset;
1603         offset = next_offset(p, store, offset)) {
1604       brw_eu_inst *insn = store + offset;
1605 
1606       if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1607 	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1608 	    return offset;
1609       }
1610    }
1611    assert(!"not reached");
1612    return start_offset;
1613 }
1614 
1615 /* After program generation, go back and update the UIP and JIP of
1616  * BREAK, CONT, and HALT instructions to their correct locations.
1617  */
1618 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1619 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1620 {
1621    const struct intel_device_info *devinfo = p->devinfo;
1622    int offset;
1623    int br = brw_jump_scale(devinfo);
1624    int scale = 16 / br;
1625    void *store = p->store;
1626 
1627    for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1628       brw_eu_inst *insn = store + offset;
1629       assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
1630 
1631       switch (brw_eu_inst_opcode(p->isa, insn)) {
1632       case BRW_OPCODE_BREAK: {
1633          int block_end_offset = brw_find_next_block_end(p, offset);
1634          assert(block_end_offset != 0);
1635          brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1636 	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1637          brw_eu_inst_set_uip(devinfo, insn,
1638 	    (brw_find_loop_end(p, offset) - offset) / scale);
1639 	 break;
1640       }
1641 
1642       case BRW_OPCODE_CONTINUE: {
1643          int block_end_offset = brw_find_next_block_end(p, offset);
1644          assert(block_end_offset != 0);
1645          brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1646          brw_eu_inst_set_uip(devinfo, insn,
1647             (brw_find_loop_end(p, offset) - offset) / scale);
1648 
1649          assert(brw_eu_inst_uip(devinfo, insn) != 0);
1650          assert(brw_eu_inst_jip(devinfo, insn) != 0);
1651 	 break;
1652       }
1653 
1654       case BRW_OPCODE_ENDIF: {
1655          int block_end_offset = brw_find_next_block_end(p, offset);
1656          int32_t jump = (block_end_offset == 0) ?
1657                         1 * br : (block_end_offset - offset) / scale;
1658          brw_eu_inst_set_jip(devinfo, insn, jump);
1659 	 break;
1660       }
1661 
1662       case BRW_OPCODE_HALT: {
1663 	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1664 	  *
1665 	  *    "In case of the halt instruction not inside any conditional
1666 	  *     code block, the value of <JIP> and <UIP> should be the
1667 	  *     same. In case of the halt instruction inside conditional code
1668 	  *     block, the <UIP> should be the end of the program, and the
1669 	  *     <JIP> should be end of the most inner conditional code block."
1670 	  *
1671 	  * The uip will have already been set by whoever set up the
1672 	  * instruction.
1673 	  */
1674          int block_end_offset = brw_find_next_block_end(p, offset);
1675 	 if (block_end_offset == 0) {
1676             brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
1677 	 } else {
1678             brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1679 	 }
1680          assert(brw_eu_inst_uip(devinfo, insn) != 0);
1681          assert(brw_eu_inst_jip(devinfo, insn) != 0);
1682 	 break;
1683       }
1684 
1685       default:
1686          break;
1687       }
1688    }
1689 }
1690 
1691 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1692 brw_set_memory_fence_message(struct brw_codegen *p,
1693                              struct brw_eu_inst *insn,
1694                              enum brw_message_target sfid,
1695                              bool commit_enable,
1696                              unsigned bti)
1697 {
1698    const struct intel_device_info *devinfo = p->devinfo;
1699 
1700    brw_set_desc(p, insn, brw_message_desc(
1701                    devinfo, 1, (commit_enable ? 1 : 0), true), false);
1702 
1703    brw_eu_inst_set_sfid(devinfo, insn, sfid);
1704 
1705    switch (sfid) {
1706    case GFX6_SFID_DATAPORT_RENDER_CACHE:
1707       brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1708       break;
1709    case GFX7_SFID_DATAPORT_DATA_CACHE:
1710       brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1711       break;
1712    default:
1713       unreachable("Not reached");
1714    }
1715 
1716    if (commit_enable)
1717       brw_eu_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1718 
1719    assert(devinfo->ver >= 11 || bti == 0);
1720    brw_eu_inst_set_binding_table_index(devinfo, insn, bti);
1721 }
1722 
1723 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,uint32_t desc)1724 gfx12_set_memory_fence_message(struct brw_codegen *p,
1725                                struct brw_eu_inst *insn,
1726                                enum brw_message_target sfid,
1727                                uint32_t desc)
1728 {
1729    const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1730     /* Completion signaled by write to register. No data returned. */
1731    const unsigned rlen = 1 * reg_unit(p->devinfo);
1732 
1733    brw_eu_inst_set_sfid(p->devinfo, insn, sfid);
1734 
1735    /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1736     * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1737     * the descriptor value and rebuild a legacy URB fence descriptor.
1738     */
1739    if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1740       brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1741                             brw_message_desc(p->devinfo, mlen, rlen, true),
1742                    false);
1743    } else {
1744       enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1745       enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1746 
1747       if (sfid == GFX12_SFID_TGM) {
1748          scope = LSC_FENCE_TILE;
1749          flush_type = LSC_FLUSH_TYPE_EVICT;
1750       }
1751 
1752       /* Wa_14012437816:
1753        *
1754        *   "For any fence greater than local scope, always set flush type to
1755        *    at least invalidate so that fence goes on properly."
1756        *
1757        *   "The bug is if flush_type is 'None', the scope is always downgraded
1758        *    to 'local'."
1759        *
1760        * Here set scope to NONE_6 instead of NONE, which has the same effect
1761        * as NONE but avoids the downgrade to scope LOCAL.
1762        */
1763       if (intel_needs_workaround(p->devinfo, 14012437816) &&
1764           scope > LSC_FENCE_LOCAL &&
1765           flush_type == LSC_FLUSH_TYPE_NONE) {
1766          flush_type = LSC_FLUSH_TYPE_NONE_6;
1767       }
1768 
1769       brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1770                                                flush_type, false) |
1771                             brw_message_desc(p->devinfo, mlen, rlen, false),
1772                    false);
1773    }
1774 }
1775 
1776 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1777 brw_memory_fence(struct brw_codegen *p,
1778                  struct brw_reg dst,
1779                  struct brw_reg src,
1780                  enum opcode send_op,
1781                  enum brw_message_target sfid,
1782                  uint32_t desc,
1783                  bool commit_enable,
1784                  unsigned bti)
1785 {
1786    const struct intel_device_info *devinfo = p->devinfo;
1787 
1788    dst = retype(vec1(dst), BRW_TYPE_UW);
1789    src = retype(vec1(src), BRW_TYPE_UD);
1790 
1791    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1792     * message doesn't write anything back.
1793     */
1794    struct brw_eu_inst *insn = next_insn(p, send_op);
1795    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1796    brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1797    brw_set_dest(p, insn, dst);
1798    brw_set_src0(p, insn, src);
1799 
1800    /* All DG2 hardware requires LSC for fence messages, even A-step */
1801    if (devinfo->has_lsc)
1802       gfx12_set_memory_fence_message(p, insn, sfid, desc);
1803    else
1804       brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1805 }
1806 
1807 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1808 brw_broadcast(struct brw_codegen *p,
1809               struct brw_reg dst,
1810               struct brw_reg src,
1811               struct brw_reg idx)
1812 {
1813    const struct intel_device_info *devinfo = p->devinfo;
1814    assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1815 
1816    brw_push_insn_state(p);
1817    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1818    brw_set_default_exec_size(p, BRW_EXECUTE_1);
1819 
1820    assert(src.file == FIXED_GRF &&
1821           src.address_mode == BRW_ADDRESS_DIRECT);
1822    assert(!src.abs && !src.negate);
1823 
1824    /* Gen12.5 adds the following region restriction:
1825     *
1826     *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1827     *    and Quad-Word data must not be used."
1828     *
1829     * We require the source and destination types to match so stomp to an
1830     * unsigned integer type.
1831     */
1832    assert(src.type == dst.type);
1833    src.type = dst.type =
1834       brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1835 
1836    if ((src.vstride == 0 && src.hstride == 0) ||
1837        idx.file == IMM) {
1838       /* Trivial, the source is already uniform or the index is a constant.
1839        * We will typically not get here if the optimizer is doing its job, but
1840        * asserting would be mean.
1841        */
1842       const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud;
1843       src = stride(suboffset(src, i), 0, 1, 0);
1844 
1845       if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1846          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1847                     subscript(src, BRW_TYPE_D, 0));
1848          brw_set_default_swsb(p, tgl_swsb_null());
1849          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1850                     subscript(src, BRW_TYPE_D, 1));
1851       } else {
1852          brw_MOV(p, dst, src);
1853       }
1854    } else {
1855       /* From the Haswell PRM section "Register Region Restrictions":
1856        *
1857        *    "The lower bits of the AddressImmediate must not overflow to
1858        *    change the register address.  The lower 5 bits of Address
1859        *    Immediate when added to lower 5 bits of address register gives
1860        *    the sub-register offset. The upper bits of Address Immediate
1861        *    when added to upper bits of address register gives the register
1862        *    address. Any overflow from sub-register offset is dropped."
1863        *
1864        * Fortunately, for broadcast, we never have a sub-register offset so
1865        * this isn't an issue.
1866        */
1867       assert(src.subnr == 0);
1868 
1869       const struct brw_reg addr =
1870          retype(brw_address_reg(0), BRW_TYPE_UD);
1871       unsigned offset = src.nr * REG_SIZE + src.subnr;
1872       /* Limit in bytes of the signed indirect addressing immediate. */
1873       const unsigned limit = 512;
1874 
1875       brw_push_insn_state(p);
1876       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1877       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1878       brw_set_default_flag_reg(p, 0, 0);
1879 
1880       /* Take into account the component size and horizontal stride. */
1881       assert(src.vstride == src.hstride + src.width);
1882       brw_SHL(p, addr, vec1(idx),
1883               brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1884                          src.hstride - 1));
1885 
1886       /* We can only address up to limit bytes using the indirect
1887        * addressing immediate, account for the difference if the source
1888        * register is above this limit.
1889        */
1890       if (offset >= limit) {
1891          brw_set_default_swsb(p, tgl_swsb_regdist(1));
1892          brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1893          offset = offset % limit;
1894       }
1895 
1896       brw_pop_insn_state(p);
1897 
1898       brw_set_default_swsb(p, tgl_swsb_regdist(1));
1899 
1900       /* Use indirect addressing to fetch the specified component. */
1901       if (brw_type_size_bytes(src.type) > 4 &&
1902           (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1903          /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1904           *
1905           *   "When source or destination datatype is 64b or operation is
1906           *    integer DWord multiply, indirect addressing must not be
1907           *    used."
1908           *
1909           * We may also not support Q/UQ types.
1910           *
1911           * To work around both of these, we do two integer MOVs instead
1912           * of one 64-bit MOV.  Because no double value should ever cross
1913           * a register boundary, it's safe to use the immediate offset in
1914           * the indirect here to handle adding 4 bytes to the offset and
1915           * avoid the extra ADD to the register file.
1916           */
1917          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1918                     retype(brw_vec1_indirect(addr.subnr, offset),
1919                            BRW_TYPE_D));
1920          brw_set_default_swsb(p, tgl_swsb_null());
1921          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1922                     retype(brw_vec1_indirect(addr.subnr, offset + 4),
1923                            BRW_TYPE_D));
1924       } else {
1925          brw_MOV(p, dst,
1926                  retype(brw_vec1_indirect(addr.subnr, offset), src.type));
1927       }
1928    }
1929 
1930    brw_pop_insn_state(p);
1931 }
1932 
1933 
1934 /**
1935  * Emit the SEND message for a barrier
1936  */
1937 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)1938 brw_barrier(struct brw_codegen *p, struct brw_reg src)
1939 {
1940    const struct intel_device_info *devinfo = p->devinfo;
1941    struct brw_eu_inst *inst;
1942 
1943    brw_push_insn_state(p);
1944    brw_set_default_access_mode(p, BRW_ALIGN_1);
1945    inst = next_insn(p, BRW_OPCODE_SEND);
1946    brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
1947    brw_set_src0(p, inst, src);
1948    brw_set_src1(p, inst, brw_null_reg());
1949    brw_set_desc(p, inst, brw_message_desc(devinfo,
1950                                           1 * reg_unit(devinfo), 0, false), false);
1951 
1952    brw_eu_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
1953    brw_eu_inst_set_gateway_subfuncid(devinfo, inst,
1954                                   BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
1955 
1956    brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1957    brw_pop_insn_state(p);
1958 }
1959 
1960 
1961 /**
1962  * Emit the wait instruction for a barrier
1963  */
1964 void
brw_WAIT(struct brw_codegen * p)1965 brw_WAIT(struct brw_codegen *p)
1966 {
1967    const struct intel_device_info *devinfo = p->devinfo;
1968    struct brw_eu_inst *insn;
1969 
1970    struct brw_reg src = brw_notification_reg();
1971 
1972    insn = next_insn(p, BRW_OPCODE_WAIT);
1973    brw_set_dest(p, insn, src);
1974    brw_set_src0(p, insn, src);
1975    brw_set_src1(p, insn, brw_null_reg());
1976 
1977    brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1978    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1979 }
1980 
1981 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)1982 brw_float_controls_mode(struct brw_codegen *p,
1983                         unsigned mode, unsigned mask)
1984 {
1985    assert(p->current->mask_control == BRW_MASK_DISABLE);
1986 
1987    /* From the Skylake PRM, Volume 7, page 760:
1988     *  "Implementation Restriction on Register Access: When the control
1989     *   register is used as an explicit source and/or destination, hardware
1990     *   does not ensure execution pipeline coherency. Software must set the
1991     *   thread control field to ‘switch’ for an instruction that uses
1992     *   control register as an explicit operand."
1993     *
1994     * On Gfx12+ this is implemented in terms of SWSB annotations instead.
1995     */
1996    brw_set_default_swsb(p, tgl_swsb_regdist(1));
1997 
1998    brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
1999                             brw_imm_ud(~mask));
2000    brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
2001    if (p->devinfo->ver < 12)
2002       brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
2003 
2004    if (mode) {
2005       brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2006                                  brw_imm_ud(mode));
2007       brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2008       if (p->devinfo->ver < 12)
2009          brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2010    }
2011 
2012    if (p->devinfo->ver >= 12)
2013       brw_SYNC(p, TGL_SYNC_NOP);
2014 }
2015 
2016 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_eu_inst * inst,uint32_t value)2017 brw_update_reloc_imm(const struct brw_isa_info *isa,
2018                      brw_eu_inst *inst,
2019                      uint32_t value)
2020 {
2021    const struct intel_device_info *devinfo = isa->devinfo;
2022 
2023    /* Sanity check that the instruction is a MOV of an immediate */
2024    assert(brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2025    assert(brw_eu_inst_src0_reg_file(devinfo, inst) == IMM);
2026 
2027    /* If it was compacted, we can't safely rewrite */
2028    assert(brw_eu_inst_cmpt_control(devinfo, inst) == 0);
2029 
2030    brw_eu_inst_set_imm_ud(devinfo, inst, value);
2031 }
2032 
2033 /* A default value for constants that will be patched at run-time.
2034  * We pick an arbitrary value that prevents instruction compaction.
2035  */
2036 #define DEFAULT_PATCH_IMM 0x4a7cc037
2037 
2038 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2039 brw_MOV_reloc_imm(struct brw_codegen *p,
2040                   struct brw_reg dst,
2041                   enum brw_reg_type src_type,
2042                   uint32_t id,
2043                   uint32_t base)
2044 {
2045    assert(brw_type_size_bytes(src_type) == 4);
2046    assert(brw_type_size_bytes(dst.type) == 4);
2047 
2048    brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2049                  p->next_insn_offset, base);
2050 
2051    brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2052 }
2053