• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35 
36 #include "util/ralloc.h"
37 
38 void
brw_set_dest(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg dest)
40 {
41    const struct intel_device_info *devinfo = p->devinfo;
42 
43    if (dest.file == FIXED_GRF)
44       assert(dest.nr < XE2_MAX_GRF);
45 
46    /* The hardware has a restriction where a destination of size Byte with
47     * a stride of 1 is only allowed for a packed byte MOV. For any other
48     * instruction, the stride must be at least 2, even when the destination
49     * is the NULL register.
50     */
51    if (dest.file == ARF &&
52        dest.nr == BRW_ARF_NULL &&
53        brw_type_size_bytes(dest.type) == 1 &&
54        dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55       dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56    }
57 
58    if (devinfo->ver >= 12 &&
59        (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60         brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61       assert(dest.file == FIXED_GRF ||
62              dest.file == ADDRESS ||
63              dest.file == ARF);
64       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
65       assert(dest.subnr == 0);
66       assert(brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
67              (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
68               dest.vstride == dest.width + 1));
69       assert(!dest.negate && !dest.abs);
70       brw_eu_inst_set_dst_reg_file(devinfo, inst, phys_file(dest));
71       brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
72 
73    } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
74               brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
75       assert(devinfo->ver < 12);
76       assert(dest.file == FIXED_GRF ||
77              dest.file == ADDRESS ||
78              dest.file == ARF);
79       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
80       assert(dest.subnr % 16 == 0);
81       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
82              dest.vstride == dest.width + 1);
83       assert(!dest.negate && !dest.abs);
84       brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
85       brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
86       brw_eu_inst_set_send_dst_reg_file(devinfo, inst, phys_file(dest));
87    } else {
88       brw_eu_inst_set_dst_file_type(devinfo, inst, phys_file(dest), dest.type);
89       brw_eu_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
90 
91       if (dest.address_mode == BRW_ADDRESS_DIRECT) {
92          brw_eu_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
93 
94          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
95             brw_eu_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
96             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
97                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
98             brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
99          } else {
100             brw_eu_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
101             brw_eu_inst_set_da16_writemask(devinfo, inst, dest.writemask);
102             if (dest.file == FIXED_GRF) {
103                assert(dest.writemask != 0);
104             }
105             /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
106              *    Although Dst.HorzStride is a don't care for Align16, HW needs
107              *    this to be programmed as "01".
108              */
109             brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
110          }
111       } else {
112          brw_eu_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
113 
114          /* These are different sizes in align1 vs align16:
115           */
116          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
117             brw_eu_inst_set_dst_ia1_addr_imm(devinfo, inst,
118                                           dest.indirect_offset);
119             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
120                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
121             brw_eu_inst_set_dst_hstride(devinfo, inst, dest.hstride);
122          } else {
123             brw_eu_inst_set_dst_ia16_addr_imm(devinfo, inst,
124                                            dest.indirect_offset);
125             /* even ignored in da16, still need to set as '01' */
126             brw_eu_inst_set_dst_hstride(devinfo, inst, 1);
127          }
128       }
129    }
130 }
131 
132 void
brw_set_src0(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)133 brw_set_src0(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
134 {
135    const struct intel_device_info *devinfo = p->devinfo;
136 
137    if (reg.file == FIXED_GRF)
138       assert(reg.nr < XE2_MAX_GRF);
139 
140    if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
141        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
142        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
143        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
144       /* Any source modifiers or regions will be ignored, since this just
145        * identifies the GRF to start reading the message contents from.
146        * Check for some likely failures.
147        */
148       assert(!reg.negate);
149       assert(!reg.abs);
150       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
151    }
152 
153    if (devinfo->ver >= 12 &&
154        (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
155         brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
156       assert(reg.file == ARF || reg.file == FIXED_GRF);
157       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
158       assert(has_scalar_region(reg) ||
159              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
160               reg.vstride == reg.width + 1));
161       assert(!reg.negate && !reg.abs);
162 
163       brw_eu_inst_set_send_src0_reg_file(devinfo, inst, phys_file(reg));
164       brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
165 
166       if (reg.file == ARF && reg.nr == BRW_ARF_SCALAR) {
167          assert(reg.subnr % 2 == 0);
168          brw_eu_inst_set_send_src0_subreg_nr(devinfo, inst, reg.subnr / 2);
169       } else {
170          assert(reg.subnr == 0);
171       }
172    } else if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
173               brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
174       assert(reg.file == FIXED_GRF);
175       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
176       assert(reg.subnr % 16 == 0);
177       assert(has_scalar_region(reg) ||
178              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
179               reg.vstride == reg.width + 1));
180       assert(!reg.negate && !reg.abs);
181       brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
182       brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
183    } else {
184       brw_eu_inst_set_src0_file_type(devinfo, inst, phys_file(reg), reg.type);
185       brw_eu_inst_set_src0_abs(devinfo, inst, reg.abs);
186       brw_eu_inst_set_src0_negate(devinfo, inst, reg.negate);
187       brw_eu_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
188 
189       if (reg.file == IMM) {
190          if (reg.type == BRW_TYPE_DF)
191             brw_eu_inst_set_imm_df(devinfo, inst, reg.df);
192          else if (reg.type == BRW_TYPE_UQ ||
193                   reg.type == BRW_TYPE_Q)
194             brw_eu_inst_set_imm_uq(devinfo, inst, reg.u64);
195          else
196             brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
197 
198          if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
199             brw_eu_inst_set_src1_reg_file(devinfo, inst,
200                                        ARF);
201             brw_eu_inst_set_src1_reg_hw_type(devinfo, inst,
202                                           brw_eu_inst_src0_reg_hw_type(devinfo, inst));
203          }
204       } else {
205          if (reg.address_mode == BRW_ADDRESS_DIRECT) {
206             brw_eu_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
207             if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208                brw_eu_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
209             } else {
210                brw_eu_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
211             }
212          } else {
213             brw_eu_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
214 
215             if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
216                brw_eu_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
217             } else {
218                brw_eu_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
219             }
220          }
221 
222          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
223             if (reg.width == BRW_WIDTH_1 &&
224                 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
225                brw_eu_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
226                brw_eu_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
227                brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
228             } else {
229                brw_eu_inst_set_src0_hstride(devinfo, inst, reg.hstride);
230                brw_eu_inst_set_src0_width(devinfo, inst, reg.width);
231                brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
232             }
233          } else {
234             brw_eu_inst_set_src0_da16_swiz_x(devinfo, inst,
235                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
236             brw_eu_inst_set_src0_da16_swiz_y(devinfo, inst,
237                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
238             brw_eu_inst_set_src0_da16_swiz_z(devinfo, inst,
239                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
240             brw_eu_inst_set_src0_da16_swiz_w(devinfo, inst,
241                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
242 
243             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
244                /* This is an oddity of the fact we're using the same
245                 * descriptions for registers in align_16 as align_1:
246                 */
247                brw_eu_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
248             } else {
249                brw_eu_inst_set_src0_vstride(devinfo, inst, reg.vstride);
250             }
251          }
252       }
253    }
254 }
255 
256 
257 void
brw_set_src1(struct brw_codegen * p,brw_eu_inst * inst,struct brw_reg reg)258 brw_set_src1(struct brw_codegen *p, brw_eu_inst *inst, struct brw_reg reg)
259 {
260    const struct intel_device_info *devinfo = p->devinfo;
261 
262    if (reg.file == FIXED_GRF)
263       assert(reg.nr < XE2_MAX_GRF);
264 
265    if (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
266        brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
267        (devinfo->ver >= 12 &&
268         (brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
269          brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
270       assert(reg.file == FIXED_GRF ||
271              reg.file == ARF ||
272              reg.file == ADDRESS);
273       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
274       assert(reg.subnr == 0);
275       assert(has_scalar_region(reg) ||
276              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
277               reg.vstride == reg.width + 1));
278       assert(!reg.negate && !reg.abs);
279       brw_eu_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
280       brw_eu_inst_set_send_src1_reg_file(devinfo, inst, phys_file(reg));
281    } else {
282       /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
283        *
284        *    "Accumulator registers may be accessed explicitly as src0
285        *    operands only."
286        */
287       assert(reg.file != ARF ||
288              (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
289 
290       brw_eu_inst_set_src1_file_type(devinfo, inst, phys_file(reg), reg.type);
291       brw_eu_inst_set_src1_abs(devinfo, inst, reg.abs);
292       brw_eu_inst_set_src1_negate(devinfo, inst, reg.negate);
293 
294       /* Only src1 can be immediate in two-argument instructions.
295        */
296       assert(brw_eu_inst_src0_reg_file(devinfo, inst) != IMM);
297 
298       if (reg.file == IMM) {
299          /* two-argument instructions can only use 32-bit immediates */
300          assert(brw_type_size_bytes(reg.type) < 8);
301          brw_eu_inst_set_imm_ud(devinfo, inst, reg.ud);
302       } else {
303          /* This is a hardware restriction, which may or may not be lifted
304           * in the future:
305           */
306          assert (reg.address_mode == BRW_ADDRESS_DIRECT);
307          /* assert (reg.file == FIXED_GRF); */
308 
309          brw_eu_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
310          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
311             brw_eu_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
312          } else {
313             brw_eu_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
314          }
315 
316          if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
317             if (reg.width == BRW_WIDTH_1 &&
318                 brw_eu_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
319                brw_eu_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
320                brw_eu_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
321                brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
322             } else {
323                brw_eu_inst_set_src1_hstride(devinfo, inst, reg.hstride);
324                brw_eu_inst_set_src1_width(devinfo, inst, reg.width);
325                brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
326             }
327          } else {
328             brw_eu_inst_set_src1_da16_swiz_x(devinfo, inst,
329                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
330             brw_eu_inst_set_src1_da16_swiz_y(devinfo, inst,
331                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
332             brw_eu_inst_set_src1_da16_swiz_z(devinfo, inst,
333                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
334             brw_eu_inst_set_src1_da16_swiz_w(devinfo, inst,
335                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
336 
337             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
338                /* This is an oddity of the fact we're using the same
339                 * descriptions for registers in align_16 as align_1:
340                 */
341                brw_eu_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
342             } else {
343                brw_eu_inst_set_src1_vstride(devinfo, inst, reg.vstride);
344             }
345          }
346       }
347    }
348 }
349 
350 /**
351  * Specify the descriptor and extended descriptor immediate for a SEND(C)
352  * message instruction.
353  */
354 void
brw_set_desc_ex(struct brw_codegen * p,brw_eu_inst * inst,unsigned desc,unsigned ex_desc)355 brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *inst,
356                 unsigned desc, unsigned ex_desc)
357 {
358    const struct intel_device_info *devinfo = p->devinfo;
359    assert(brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
360           brw_eu_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
361    if (devinfo->ver < 12)
362       brw_eu_inst_set_src1_file_type(devinfo, inst,
363                                   IMM, BRW_TYPE_UD);
364    brw_eu_inst_set_send_desc(devinfo, inst, desc);
365    if (devinfo->ver >= 9)
366       brw_eu_inst_set_send_ex_desc(devinfo, inst, ex_desc, false);
367 }
368 
369 static void
brw_eu_inst_set_state(const struct brw_isa_info * isa,brw_eu_inst * insn,const struct brw_insn_state * state)370 brw_eu_inst_set_state(const struct brw_isa_info *isa,
371                    brw_eu_inst *insn,
372                    const struct brw_insn_state *state)
373 {
374    const struct intel_device_info *devinfo = isa->devinfo;
375 
376    brw_eu_inst_set_exec_size(devinfo, insn, state->exec_size);
377    brw_eu_inst_set_group(devinfo, insn, state->group);
378    brw_eu_inst_set_access_mode(devinfo, insn, state->access_mode);
379    brw_eu_inst_set_mask_control(devinfo, insn, state->mask_control);
380    if (devinfo->ver >= 12)
381       brw_eu_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb, brw_eu_inst_opcode(isa, insn)));
382    brw_eu_inst_set_saturate(devinfo, insn, state->saturate);
383    brw_eu_inst_set_pred_control(devinfo, insn, state->predicate);
384    brw_eu_inst_set_pred_inv(devinfo, insn, state->pred_inv);
385 
386    if (is_3src(isa, brw_eu_inst_opcode(isa, insn)) &&
387        state->access_mode == BRW_ALIGN_16) {
388       brw_eu_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
389       brw_eu_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
390    } else {
391       brw_eu_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
392       brw_eu_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
393    }
394 
395    if (devinfo->ver < 20)
396       brw_eu_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
397 }
398 
399 static brw_eu_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)400 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
401 {
402    assert(util_is_power_of_two_or_zero(sizeof(brw_eu_inst)));
403    assert(util_is_power_of_two_or_zero(alignment));
404    const unsigned align_insn = MAX2(alignment / sizeof(brw_eu_inst), 1);
405    const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
406    const unsigned new_nr_insn = start_insn + nr_insn;
407 
408    if (p->store_size < new_nr_insn) {
409       p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_eu_inst));
410       p->store = reralloc(p->mem_ctx, p->store, brw_eu_inst, p->store_size);
411    }
412 
413    /* Memset any padding due to alignment to 0.  We don't want to be hashing
414     * or caching a bunch of random bits we got from a memory allocation.
415     */
416    if (p->nr_insn < start_insn) {
417       memset(&p->store[p->nr_insn], 0,
418              (start_insn - p->nr_insn) * sizeof(brw_eu_inst));
419    }
420 
421    assert(p->next_insn_offset == p->nr_insn * sizeof(brw_eu_inst));
422    p->nr_insn = new_nr_insn;
423    p->next_insn_offset = new_nr_insn * sizeof(brw_eu_inst);
424 
425    return &p->store[start_insn];
426 }
427 
428 void
brw_realign(struct brw_codegen * p,unsigned alignment)429 brw_realign(struct brw_codegen *p, unsigned alignment)
430 {
431    brw_append_insns(p, 0, alignment);
432 }
433 
434 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)435 brw_append_data(struct brw_codegen *p, void *data,
436                 unsigned size, unsigned alignment)
437 {
438    unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_eu_inst));
439    void *dst = brw_append_insns(p, nr_insn, alignment);
440    memcpy(dst, data, size);
441 
442    /* If it's not a whole number of instructions, memset the end */
443    if (size < nr_insn * sizeof(brw_eu_inst))
444       memset(dst + size, 0, nr_insn * sizeof(brw_eu_inst) - size);
445 
446    return dst - (void *)p->store;
447 }
448 
449 #define next_insn brw_next_insn
450 brw_eu_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)451 brw_next_insn(struct brw_codegen *p, unsigned opcode)
452 {
453    brw_eu_inst *insn = brw_append_insns(p, 1, sizeof(brw_eu_inst));
454 
455    memset(insn, 0, sizeof(*insn));
456    brw_eu_inst_set_opcode(p->isa, insn, opcode);
457 
458    /* Apply the default instruction state */
459    brw_eu_inst_set_state(p->isa, insn, p->current);
460 
461    return insn;
462 }
463 
464 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)465 brw_add_reloc(struct brw_codegen *p, uint32_t id,
466               enum brw_shader_reloc_type type,
467               uint32_t offset, uint32_t delta)
468 {
469    if (p->num_relocs + 1 > p->reloc_array_size) {
470       p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
471       p->relocs = reralloc(p->mem_ctx, p->relocs,
472                            struct brw_shader_reloc, p->reloc_array_size);
473    }
474 
475    p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
476       .id = id,
477       .type = type,
478       .offset = offset,
479       .delta = delta,
480    };
481 }
482 
483 static brw_eu_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)484 brw_alu1(struct brw_codegen *p, unsigned opcode,
485          struct brw_reg dest, struct brw_reg src)
486 {
487    brw_eu_inst *insn = next_insn(p, opcode);
488    brw_set_dest(p, insn, dest);
489    brw_set_src0(p, insn, src);
490    return insn;
491 }
492 
493 static brw_eu_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)494 brw_alu2(struct brw_codegen *p, unsigned opcode,
495          struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
496 {
497    /* 64-bit immediates are only supported on 1-src instructions */
498    assert(src0.file != IMM ||
499           brw_type_size_bytes(src0.type) <= 4);
500    assert(src1.file != IMM ||
501           brw_type_size_bytes(src1.type) <= 4);
502 
503    brw_eu_inst *insn = next_insn(p, opcode);
504    brw_set_dest(p, insn, dest);
505    brw_set_src0(p, insn, src0);
506    brw_set_src1(p, insn, src1);
507    return insn;
508 }
509 
510 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)511 to_3src_align1_vstride(const struct intel_device_info *devinfo,
512                        enum brw_vertical_stride vstride)
513 {
514    switch (vstride) {
515    case BRW_VERTICAL_STRIDE_0:
516       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
517    case BRW_VERTICAL_STRIDE_1:
518       assert(devinfo->ver >= 12);
519       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
520    case BRW_VERTICAL_STRIDE_2:
521       assert(devinfo->ver < 12);
522       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
523    case BRW_VERTICAL_STRIDE_4:
524       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
525    case BRW_VERTICAL_STRIDE_8:
526    case BRW_VERTICAL_STRIDE_16:
527       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
528    default:
529       unreachable("invalid vstride");
530    }
531 }
532 
533 
534 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)535 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
536 {
537    switch (hstride) {
538    case BRW_HORIZONTAL_STRIDE_0:
539       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
540    case BRW_HORIZONTAL_STRIDE_1:
541       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
542    case BRW_HORIZONTAL_STRIDE_2:
543       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
544    case BRW_HORIZONTAL_STRIDE_4:
545       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
546    default:
547       unreachable("invalid hstride");
548    }
549 }
550 
551 static brw_eu_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)552 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
553          struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
554 {
555    const struct intel_device_info *devinfo = p->devinfo;
556    brw_eu_inst *inst = next_insn(p, opcode);
557 
558    assert(dest.nr < XE2_MAX_GRF);
559 
560    if (devinfo->ver <= 9) {
561       assert(src0.file != IMM && src2.file != IMM);
562    } else if (devinfo->ver <= 11) {
563       /* On Ice Lake, BFE and CSEL cannot have any immediate sources. */
564       assert((opcode != BRW_OPCODE_BFE && opcode != BRW_OPCODE_CSEL) ||
565              (src0.file != IMM && src2.file != IMM));
566 
567       /* On Ice Lake, DP4A and MAD can only have one immediate source. */
568       assert((opcode != BRW_OPCODE_DP4A && opcode != BRW_OPCODE_MAD) ||
569              !(src0.file == IMM && src2.file == IMM));
570    } else {
571       /* Having two immediate sources is allowed, but this should have been
572        * converted to a regular ADD by brw_opt_algebraic.
573        */
574       assert(opcode != BRW_OPCODE_ADD3 ||
575              !(src0.file == IMM && src2.file == IMM));
576    }
577 
578    /* BFI2 cannot have any immediate sources on any platform. */
579    assert(opcode != BRW_OPCODE_BFI2 ||
580           (src0.file != IMM && src2.file != IMM));
581 
582    assert(src0.file == IMM || src0.nr < XE2_MAX_GRF);
583    assert(src1.file != IMM && src1.nr < XE2_MAX_GRF);
584    assert(src2.file == IMM || src2.nr < XE2_MAX_GRF);
585    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
586    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
587    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
588    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
589 
590    if (brw_eu_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
591       assert(dest.file == FIXED_GRF ||
592              (dest.file == ARF &&
593               (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
594 
595       brw_eu_inst_set_3src_a1_dst_reg_file(devinfo, inst, phys_file(dest));
596       brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
597       brw_eu_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
598       brw_eu_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
599 
600       if (brw_type_is_float(dest.type)) {
601          brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
602                                         BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
603       } else {
604          brw_eu_inst_set_3src_a1_exec_type(devinfo, inst,
605                                         BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
606       }
607 
608       brw_eu_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
609       brw_eu_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
610       brw_eu_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
611       brw_eu_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
612 
613       if (src0.file == IMM) {
614          brw_eu_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
615       } else {
616          brw_eu_inst_set_3src_a1_src0_vstride(
617             devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
618          brw_eu_inst_set_3src_a1_src0_hstride(devinfo, inst,
619                                            to_3src_align1_hstride(src0.hstride));
620          brw_eu_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
621          brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
622          brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
623          brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
624       }
625       brw_eu_inst_set_3src_a1_src1_vstride(
626          devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
627       brw_eu_inst_set_3src_a1_src1_hstride(devinfo, inst,
628                                         to_3src_align1_hstride(src1.hstride));
629 
630       brw_eu_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
631       if (src1.file == ARF) {
632          brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
633       } else {
634          brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
635       }
636       brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
637       brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
638 
639       if (src2.file == IMM) {
640          brw_eu_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
641       } else {
642          brw_eu_inst_set_3src_a1_src2_hstride(devinfo, inst,
643                                            to_3src_align1_hstride(src2.hstride));
644          /* no vstride on src2 */
645          brw_eu_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
646          brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
647          brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
648          brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
649       }
650 
651       assert(src0.file == FIXED_GRF ||
652              src0.file == IMM);
653       assert(src1.file == FIXED_GRF ||
654              (src1.file == ARF &&
655               src1.nr == BRW_ARF_ACCUMULATOR));
656       assert(src2.file == FIXED_GRF ||
657              src2.file == IMM);
658 
659       if (devinfo->ver >= 12) {
660          if (src0.file == IMM) {
661             brw_eu_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
662          } else {
663             brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
664          }
665 
666          brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
667 
668          if (src2.file == IMM) {
669             brw_eu_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
670          } else {
671             brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
672          }
673       } else {
674          brw_eu_inst_set_3src_a1_src0_reg_file(devinfo, inst, phys_file(src0));
675          brw_eu_inst_set_3src_a1_src1_reg_file(devinfo, inst, phys_file(src1));
676          brw_eu_inst_set_3src_a1_src2_reg_file(devinfo, inst, phys_file(src2));
677       }
678 
679    } else {
680       assert(dest.file == FIXED_GRF);
681       assert(dest.type == BRW_TYPE_F  ||
682              dest.type == BRW_TYPE_DF ||
683              dest.type == BRW_TYPE_D  ||
684              dest.type == BRW_TYPE_UD ||
685              dest.type == BRW_TYPE_HF);
686       brw_eu_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
687       brw_eu_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
688       brw_eu_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
689 
690       assert(src0.file == FIXED_GRF);
691       brw_eu_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
692       brw_eu_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, src0.subnr);
693       brw_eu_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
694       brw_eu_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
695       brw_eu_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
696       brw_eu_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
697                                           src0.vstride == BRW_VERTICAL_STRIDE_0);
698 
699       assert(src1.file == FIXED_GRF);
700       brw_eu_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
701       brw_eu_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, src1.subnr);
702       brw_eu_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
703       brw_eu_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
704       brw_eu_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
705       brw_eu_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
706                                           src1.vstride == BRW_VERTICAL_STRIDE_0);
707 
708       assert(src2.file == FIXED_GRF);
709       brw_eu_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
710       brw_eu_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, src2.subnr);
711       brw_eu_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
712       brw_eu_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
713       brw_eu_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
714       brw_eu_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
715                                           src2.vstride == BRW_VERTICAL_STRIDE_0);
716 
717       /* Set both the source and destination types based on dest.type,
718        * ignoring the source register types.  The MAD and LRP emitters ensure
719        * that all four types are float.  The BFE and BFI2 emitters, however,
720        * may send us mixed D and UD types and want us to ignore that and use
721        * the destination type.
722        */
723       brw_eu_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
724       brw_eu_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
725 
726       /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
727        *
728        *    "Three source instructions can use operands with mixed-mode
729        *     precision. When SrcType field is set to :f or :hf it defines
730        *     precision for source 0 only, and fields Src1Type and Src2Type
731        *     define precision for other source operands:
732        *
733        *     0b = :f. Single precision Float (32-bit).
734        *     1b = :hf. Half precision Float (16-bit)."
735        */
736       if (src1.type == BRW_TYPE_HF)
737          brw_eu_inst_set_3src_a16_src1_type(devinfo, inst, 1);
738 
739       if (src2.type == BRW_TYPE_HF)
740          brw_eu_inst_set_3src_a16_src2_type(devinfo, inst, 1);
741    }
742 
743    return inst;
744 }
745 
746 static brw_eu_inst *
brw_dpas_three_src(struct brw_codegen * p,enum opcode opcode,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)747 brw_dpas_three_src(struct brw_codegen *p, enum opcode opcode,
748                    enum gfx12_systolic_depth sdepth, unsigned rcount, struct brw_reg dest,
749                    struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
750 {
751    const struct intel_device_info *devinfo = p->devinfo;
752    brw_eu_inst *inst = next_insn(p, opcode);
753 
754    assert(dest.file == FIXED_GRF);
755    brw_eu_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
756                                        FIXED_GRF);
757    brw_eu_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
758    brw_eu_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
759 
760    if (brw_type_is_float(dest.type)) {
761       brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
762                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
763    } else {
764       brw_eu_inst_set_dpas_3src_exec_type(devinfo, inst,
765                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
766    }
767 
768    brw_eu_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
769    brw_eu_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
770 
771    brw_eu_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
772    brw_eu_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
773    brw_eu_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
774    brw_eu_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
775 
776    assert(src0.file == FIXED_GRF ||
777           (src0.file == ARF &&
778            src0.nr == BRW_ARF_NULL));
779 
780    brw_eu_inst_set_dpas_3src_src0_reg_file(devinfo, inst, phys_file(src0));
781    brw_eu_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
782    brw_eu_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
783 
784    assert(src1.file == FIXED_GRF);
785 
786    brw_eu_inst_set_dpas_3src_src1_reg_file(devinfo, inst, phys_file(src1));
787    brw_eu_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
788    brw_eu_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
789    brw_eu_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
790 
791    assert(src2.file == FIXED_GRF);
792 
793    brw_eu_inst_set_dpas_3src_src2_reg_file(devinfo, inst, phys_file(src2));
794    brw_eu_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
795    brw_eu_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
796    brw_eu_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
797 
798    return inst;
799 }
800 
801 /***********************************************************************
802  * Convenience routines.
803  */
804 #define ALU1(OP)					\
805 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
806 	      struct brw_reg dest,			\
807 	      struct brw_reg src0)   			\
808 {							\
809    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
810 }
811 
812 #define ALU2(OP)					\
813 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
814 	      struct brw_reg dest,			\
815 	      struct brw_reg src0,			\
816 	      struct brw_reg src1)   			\
817 {							\
818    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
819 }
820 
821 #define ALU3(OP)					\
822 brw_eu_inst *brw_##OP(struct brw_codegen *p,		\
823 	      struct brw_reg dest,			\
824 	      struct brw_reg src0,			\
825 	      struct brw_reg src1,			\
826 	      struct brw_reg src2)   			\
827 {                                                       \
828    if (p->current->access_mode == BRW_ALIGN_16) {       \
829       if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
830          src0.swizzle = BRW_SWIZZLE_XXXX;               \
831       if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
832          src1.swizzle = BRW_SWIZZLE_XXXX;               \
833       if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
834          src2.swizzle = BRW_SWIZZLE_XXXX;               \
835    }                                                    \
836    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
837 }
838 
839 #define ALU3F(OP)                                               \
840 brw_eu_inst *brw_##OP(struct brw_codegen *p,         \
841                                  struct brw_reg dest,           \
842                                  struct brw_reg src0,           \
843                                  struct brw_reg src1,           \
844                                  struct brw_reg src2)           \
845 {                                                               \
846    assert(dest.type == BRW_TYPE_F ||                   \
847           dest.type == BRW_TYPE_DF);                   \
848    if (dest.type == BRW_TYPE_F) {                      \
849       assert(src0.type == BRW_TYPE_F);                 \
850       assert(src1.type == BRW_TYPE_F);                 \
851       assert(src2.type == BRW_TYPE_F);                 \
852    } else if (dest.type == BRW_TYPE_DF) {              \
853       assert(src0.type == BRW_TYPE_DF);                \
854       assert(src1.type == BRW_TYPE_DF);                \
855       assert(src2.type == BRW_TYPE_DF);                \
856    }                                                            \
857                                                                 \
858    if (p->current->access_mode == BRW_ALIGN_16) {               \
859       if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
860          src0.swizzle = BRW_SWIZZLE_XXXX;                       \
861       if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
862          src1.swizzle = BRW_SWIZZLE_XXXX;                       \
863       if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
864          src2.swizzle = BRW_SWIZZLE_XXXX;                       \
865    }                                                            \
866    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
867 }
868 
869 ALU2(SEL)
ALU1(NOT)870 ALU1(NOT)
871 ALU2(AND)
872 ALU2(OR)
873 ALU2(XOR)
874 ALU2(SHR)
875 ALU2(SHL)
876 ALU2(ASR)
877 ALU2(ROL)
878 ALU2(ROR)
879 ALU3(CSEL)
880 ALU1(FRC)
881 ALU1(RNDD)
882 ALU1(RNDE)
883 ALU1(RNDU)
884 ALU1(RNDZ)
885 ALU2(MAC)
886 ALU2(MACH)
887 ALU1(LZD)
888 ALU2(DP4)
889 ALU2(DPH)
890 ALU2(DP3)
891 ALU2(DP2)
892 ALU3(DP4A)
893 ALU3(MAD)
894 ALU3F(LRP)
895 ALU1(BFREV)
896 ALU3(BFE)
897 ALU2(BFI1)
898 ALU3(BFI2)
899 ALU1(FBH)
900 ALU1(FBL)
901 ALU1(CBIT)
902 ALU2(ADDC)
903 ALU2(SUBB)
904 ALU3(ADD3)
905 ALU1(MOV)
906 
907 brw_eu_inst *
908 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
909         struct brw_reg src0, struct brw_reg src1)
910 {
911    /* 6.2.2: add */
912    if (src0.type == BRW_TYPE_F ||
913        (src0.file == IMM &&
914 	src0.type == BRW_TYPE_VF)) {
915       assert(src1.type != BRW_TYPE_UD);
916       assert(src1.type != BRW_TYPE_D);
917    }
918 
919    if (src1.type == BRW_TYPE_F ||
920        (src1.file == IMM &&
921 	src1.type == BRW_TYPE_VF)) {
922       assert(src0.type != BRW_TYPE_UD);
923       assert(src0.type != BRW_TYPE_D);
924    }
925 
926    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
927 }
928 
929 brw_eu_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)930 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
931         struct brw_reg src0, struct brw_reg src1)
932 {
933    assert(dest.type == src0.type);
934    assert(src0.type == src1.type);
935    switch (src0.type) {
936    case BRW_TYPE_B:
937    case BRW_TYPE_UB:
938    case BRW_TYPE_W:
939    case BRW_TYPE_UW:
940    case BRW_TYPE_D:
941    case BRW_TYPE_UD:
942       break;
943    default:
944       unreachable("Bad type for brw_AVG");
945    }
946 
947    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
948 }
949 
950 brw_eu_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)951 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
952         struct brw_reg src0, struct brw_reg src1)
953 {
954    /* 6.32.38: mul */
955    if (src0.type == BRW_TYPE_D ||
956        src0.type == BRW_TYPE_UD ||
957        src1.type == BRW_TYPE_D ||
958        src1.type == BRW_TYPE_UD) {
959       assert(dest.type != BRW_TYPE_F);
960    }
961 
962    if (src0.type == BRW_TYPE_F ||
963        (src0.file == IMM &&
964 	src0.type == BRW_TYPE_VF)) {
965       assert(src1.type != BRW_TYPE_UD);
966       assert(src1.type != BRW_TYPE_D);
967    }
968 
969    if (src1.type == BRW_TYPE_F ||
970        (src1.file == IMM &&
971 	src1.type == BRW_TYPE_VF)) {
972       assert(src0.type != BRW_TYPE_UD);
973       assert(src0.type != BRW_TYPE_D);
974    }
975 
976    assert(src0.file != ARF ||
977 	  src0.nr != BRW_ARF_ACCUMULATOR);
978    assert(src1.file != ARF ||
979 	  src1.nr != BRW_ARF_ACCUMULATOR);
980 
981    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
982 }
983 
984 brw_eu_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)985 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
986          struct brw_reg src0, struct brw_reg src1)
987 {
988    src0.vstride = BRW_VERTICAL_STRIDE_0;
989    src0.width = BRW_WIDTH_1;
990    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
991    return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
992 }
993 
994 brw_eu_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)995 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
996         struct brw_reg src0, struct brw_reg src1)
997 {
998    src0.vstride = BRW_VERTICAL_STRIDE_0;
999    src0.width = BRW_WIDTH_1;
1000    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1001    src1.vstride = BRW_VERTICAL_STRIDE_8;
1002    src1.width = BRW_WIDTH_8;
1003    src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1004    return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1005 }
1006 
1007 brw_eu_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)1008 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1009          unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1010          struct brw_reg src1, struct brw_reg src2)
1011 {
1012    return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
1013                              src1, src2);
1014 }
1015 
brw_NOP(struct brw_codegen * p)1016 void brw_NOP(struct brw_codegen *p)
1017 {
1018    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1019    memset(insn, 0, sizeof(*insn));
1020    brw_eu_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1021 }
1022 
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1023 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1024 {
1025    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1026    brw_eu_inst_set_cond_modifier(p->devinfo, insn, func);
1027 }
1028 
1029 /***********************************************************************
1030  * Comparisons, if/else/endif
1031  */
1032 
1033 brw_eu_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1034 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1035          unsigned predicate_control)
1036 {
1037    const struct intel_device_info *devinfo = p->devinfo;
1038    struct brw_reg ip = brw_ip_reg();
1039    brw_eu_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1040 
1041    brw_eu_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1042    brw_eu_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1043    brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1044    brw_eu_inst_set_pred_control(devinfo, inst, predicate_control);
1045 
1046    return inst;
1047 }
1048 
1049 static void
push_if_stack(struct brw_codegen * p,brw_eu_inst * inst)1050 push_if_stack(struct brw_codegen *p, brw_eu_inst *inst)
1051 {
1052    p->if_stack[p->if_stack_depth] = inst - p->store;
1053 
1054    p->if_stack_depth++;
1055    if (p->if_stack_array_size <= p->if_stack_depth) {
1056       p->if_stack_array_size *= 2;
1057       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1058 			     p->if_stack_array_size);
1059    }
1060 }
1061 
1062 static brw_eu_inst *
pop_if_stack(struct brw_codegen * p)1063 pop_if_stack(struct brw_codegen *p)
1064 {
1065    p->if_stack_depth--;
1066    return &p->store[p->if_stack[p->if_stack_depth]];
1067 }
1068 
1069 static void
push_loop_stack(struct brw_codegen * p,brw_eu_inst * inst)1070 push_loop_stack(struct brw_codegen *p, brw_eu_inst *inst)
1071 {
1072    if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1073       p->loop_stack_array_size *= 2;
1074       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1075 			       p->loop_stack_array_size);
1076    }
1077 
1078    p->loop_stack[p->loop_stack_depth] = inst - p->store;
1079    p->loop_stack_depth++;
1080 }
1081 
1082 static brw_eu_inst *
get_inner_do_insn(struct brw_codegen * p)1083 get_inner_do_insn(struct brw_codegen *p)
1084 {
1085    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1086 }
1087 
1088 /* EU takes the value from the flag register and pushes it onto some
1089  * sort of a stack (presumably merging with any flag value already on
1090  * the stack).  Within an if block, the flags at the top of the stack
1091  * control execution on each channel of the unit, eg. on each of the
1092  * 16 pixel values in our wm programs.
1093  *
1094  * When the matching 'else' instruction is reached (presumably by
1095  * countdown of the instruction count patched in by our ELSE/ENDIF
1096  * functions), the relevant flags are inverted.
1097  *
1098  * When the matching 'endif' instruction is reached, the flags are
1099  * popped off.  If the stack is now empty, normal execution resumes.
1100  */
1101 brw_eu_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1102 brw_IF(struct brw_codegen *p, unsigned execute_size)
1103 {
1104    const struct intel_device_info *devinfo = p->devinfo;
1105    brw_eu_inst *insn;
1106 
1107    insn = next_insn(p, BRW_OPCODE_IF);
1108 
1109    /* Override the defaults for this instruction:
1110     */
1111    brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
1112    if (devinfo->ver < 12)
1113       brw_set_src0(p, insn, brw_imm_d(0));
1114    brw_eu_inst_set_jip(devinfo, insn, 0);
1115    brw_eu_inst_set_uip(devinfo, insn, 0);
1116 
1117    brw_eu_inst_set_exec_size(devinfo, insn, execute_size);
1118    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1119    brw_eu_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1120    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1121 
1122    push_if_stack(p, insn);
1123    return insn;
1124 }
1125 
1126 /**
1127  * Patch IF and ELSE instructions with appropriate jump targets.
1128  */
1129 static void
patch_IF_ELSE(struct brw_codegen * p,brw_eu_inst * if_inst,brw_eu_inst * else_inst,brw_eu_inst * endif_inst)1130 patch_IF_ELSE(struct brw_codegen *p,
1131               brw_eu_inst *if_inst, brw_eu_inst *else_inst, brw_eu_inst *endif_inst)
1132 {
1133    const struct intel_device_info *devinfo = p->devinfo;
1134 
1135    assert(if_inst != NULL && brw_eu_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1136    assert(endif_inst != NULL);
1137    assert(else_inst == NULL || brw_eu_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1138 
1139    unsigned br = brw_jump_scale(devinfo);
1140 
1141    assert(brw_eu_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1142    brw_eu_inst_set_exec_size(devinfo, endif_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1143 
1144    if (else_inst == NULL) {
1145       /* Patch IF -> ENDIF */
1146       brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1147       brw_eu_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1148    } else {
1149       brw_eu_inst_set_exec_size(devinfo, else_inst, brw_eu_inst_exec_size(devinfo, if_inst));
1150 
1151       /* Patch ELSE -> ENDIF */
1152       /* The IF instruction's JIP should point just past the ELSE */
1153       brw_eu_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1154       /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1155       brw_eu_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1156 
1157       if (devinfo->ver < 11) {
1158          /* Set the ELSE instruction to use branch_ctrl with a join
1159           * jump target pointing at the NOP inserted right before
1160           * the ENDIF instruction in order to make sure it is
1161           * executed in all cases, since attempting to do the same
1162           * as on other generations could cause the EU to jump at
1163           * the instruction immediately after the ENDIF due to
1164           * Wa_220160235, which could cause the program to continue
1165           * running with all channels disabled.
1166           */
1167          brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1168          brw_eu_inst_set_branch_control(devinfo, else_inst, true);
1169       } else {
1170          brw_eu_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1171       }
1172 
1173       /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1174        * JIP and UIP both should point to ENDIF on those
1175        * platforms.
1176        */
1177       brw_eu_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1178    }
1179 }
1180 
1181 void
brw_ELSE(struct brw_codegen * p)1182 brw_ELSE(struct brw_codegen *p)
1183 {
1184    const struct intel_device_info *devinfo = p->devinfo;
1185    brw_eu_inst *insn;
1186 
1187    insn = next_insn(p, BRW_OPCODE_ELSE);
1188 
1189    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1190    if (devinfo->ver < 12)
1191       brw_set_src0(p, insn, brw_imm_d(0));
1192    brw_eu_inst_set_jip(devinfo, insn, 0);
1193    brw_eu_inst_set_uip(devinfo, insn, 0);
1194 
1195    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1196    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1197 
1198    push_if_stack(p, insn);
1199 }
1200 
1201 void
brw_ENDIF(struct brw_codegen * p)1202 brw_ENDIF(struct brw_codegen *p)
1203 {
1204    const struct intel_device_info *devinfo = p->devinfo;
1205    brw_eu_inst *insn = NULL;
1206    brw_eu_inst *else_inst = NULL;
1207    brw_eu_inst *if_inst = NULL;
1208    brw_eu_inst *tmp;
1209 
1210    assert(p->if_stack_depth > 0);
1211 
1212    if (devinfo->ver < 11 &&
1213        brw_eu_inst_opcode(p->isa, &p->store[p->if_stack[
1214                              p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1215       /* Insert a NOP to be specified as join instruction within the
1216        * ELSE block, which is valid for an ELSE instruction with
1217        * branch_ctrl on.  The ELSE instruction will be set to jump
1218        * here instead of to the ENDIF instruction, since attempting to
1219        * do the latter would prevent the ENDIF from being executed in
1220        * some cases due to Wa_220160235, which could cause the program
1221        * to continue running with all channels disabled.
1222        */
1223       brw_NOP(p);
1224    }
1225 
1226    /*
1227     * A single next_insn() may change the base address of instruction store
1228     * memory(p->store), so call it first before referencing the instruction
1229     * store pointer from an index
1230     */
1231    insn = next_insn(p, BRW_OPCODE_ENDIF);
1232 
1233    /* Pop the IF and (optional) ELSE instructions from the stack */
1234    tmp = pop_if_stack(p);
1235    if (brw_eu_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1236       else_inst = tmp;
1237       tmp = pop_if_stack(p);
1238    }
1239    if_inst = tmp;
1240 
1241    brw_set_src0(p, insn, brw_imm_d(0));
1242 
1243    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1244    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1245 
1246    brw_eu_inst_set_jip(devinfo, insn, 2);
1247    patch_IF_ELSE(p, if_inst, else_inst, insn);
1248 }
1249 
1250 brw_eu_inst *
brw_BREAK(struct brw_codegen * p)1251 brw_BREAK(struct brw_codegen *p)
1252 {
1253    const struct intel_device_info *devinfo = p->devinfo;
1254    brw_eu_inst *insn;
1255 
1256    insn = next_insn(p, BRW_OPCODE_BREAK);
1257    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1258    brw_set_src0(p, insn, brw_imm_d(0x0));
1259    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1260    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1261 
1262    return insn;
1263 }
1264 
1265 brw_eu_inst *
brw_CONT(struct brw_codegen * p)1266 brw_CONT(struct brw_codegen *p)
1267 {
1268    const struct intel_device_info *devinfo = p->devinfo;
1269    brw_eu_inst *insn;
1270 
1271    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1272    brw_set_dest(p, insn, brw_ip_reg());
1273    brw_set_src0(p, insn, brw_imm_d(0x0));
1274 
1275    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1276    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1277    return insn;
1278 }
1279 
1280 brw_eu_inst *
brw_HALT(struct brw_codegen * p)1281 brw_HALT(struct brw_codegen *p)
1282 {
1283    const struct intel_device_info *devinfo = p->devinfo;
1284    brw_eu_inst *insn;
1285 
1286    insn = next_insn(p, BRW_OPCODE_HALT);
1287    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1288    if (devinfo->ver < 12) {
1289       brw_set_src0(p, insn, brw_imm_d(0x0));
1290    }
1291 
1292    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1293    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1294    return insn;
1295 }
1296 
1297 /* DO/WHILE loop:
1298  *
1299  * The DO/WHILE is just an unterminated loop -- break or continue are
1300  * used for control within the loop.  We have a few ways they can be
1301  * done.
1302  *
1303  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1304  * jip and no DO instruction.
1305  *
1306  * For gfx6, there's no more mask stack, so no need for DO.  WHILE
1307  * just points back to the first instruction of the loop.
1308  */
1309 brw_eu_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1310 brw_DO(struct brw_codegen *p, unsigned execute_size)
1311 {
1312    push_loop_stack(p, &p->store[p->nr_insn]);
1313    return &p->store[p->nr_insn];
1314 }
1315 
1316 brw_eu_inst *
brw_WHILE(struct brw_codegen * p)1317 brw_WHILE(struct brw_codegen *p)
1318 {
1319    const struct intel_device_info *devinfo = p->devinfo;
1320    brw_eu_inst *insn, *do_insn;
1321    unsigned br = brw_jump_scale(devinfo);
1322 
1323    insn = next_insn(p, BRW_OPCODE_WHILE);
1324    do_insn = get_inner_do_insn(p);
1325 
1326    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
1327    if (devinfo->ver < 12)
1328       brw_set_src0(p, insn, brw_imm_d(0));
1329    brw_eu_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1330 
1331    brw_eu_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1332 
1333    brw_eu_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1334 
1335    p->loop_stack_depth--;
1336 
1337    return insn;
1338 }
1339 
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1340 void brw_CMP(struct brw_codegen *p,
1341 	     struct brw_reg dest,
1342 	     unsigned conditional,
1343 	     struct brw_reg src0,
1344 	     struct brw_reg src1)
1345 {
1346    const struct intel_device_info *devinfo = p->devinfo;
1347    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1348 
1349    brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1350    brw_set_dest(p, insn, dest);
1351    brw_set_src0(p, insn, src0);
1352    brw_set_src1(p, insn, src1);
1353 }
1354 
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1355 void brw_CMPN(struct brw_codegen *p,
1356               struct brw_reg dest,
1357               unsigned conditional,
1358               struct brw_reg src0,
1359               struct brw_reg src1)
1360 {
1361    const struct intel_device_info *devinfo = p->devinfo;
1362    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1363 
1364    brw_eu_inst_set_cond_modifier(devinfo, insn, conditional);
1365    brw_set_dest(p, insn, dest);
1366    brw_set_src0(p, insn, src0);
1367    brw_set_src1(p, insn, src1);
1368 }
1369 
1370 /***********************************************************************
1371  * Helpers for the various SEND message types:
1372  */
1373 
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1374 void gfx6_math(struct brw_codegen *p,
1375 	       struct brw_reg dest,
1376 	       unsigned function,
1377 	       struct brw_reg src0,
1378 	       struct brw_reg src1)
1379 {
1380    const struct intel_device_info *devinfo = p->devinfo;
1381    brw_eu_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1382 
1383    assert(dest.file == FIXED_GRF);
1384 
1385    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1386 
1387    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1388        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1389        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1390       assert(src0.type != BRW_TYPE_F);
1391       assert(src1.type != BRW_TYPE_F);
1392       assert(src1.file == FIXED_GRF ||
1393              src1.file == IMM);
1394       /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1395        *     INT DIV function does not support source modifiers.
1396        */
1397       assert(!src0.negate);
1398       assert(!src0.abs);
1399       assert(!src1.negate);
1400       assert(!src1.abs);
1401    } else {
1402       assert(src0.type == BRW_TYPE_F ||
1403              (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
1404       assert(src1.type == BRW_TYPE_F ||
1405              (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
1406    }
1407 
1408   /* This workaround says that we cannot use scalar broadcast with HF types.
1409    * However, for is_scalar values, all 16 elements contain the same value, so
1410    * we can replace a <0,1,0> region with <16,16,1> without ill effect.
1411    */
1412    if (intel_needs_workaround(devinfo, 22016140776)) {
1413       if (src0.is_scalar && src0.type == BRW_TYPE_HF) {
1414          src0.vstride = BRW_VERTICAL_STRIDE_16;
1415          src0.width = BRW_WIDTH_16;
1416          src0.hstride = BRW_HORIZONTAL_STRIDE_1;
1417          src0.swizzle = BRW_SWIZZLE_XYZW;
1418       }
1419 
1420       if (src1.is_scalar && src1.type == BRW_TYPE_HF) {
1421          src1.vstride = BRW_VERTICAL_STRIDE_16;
1422          src1.width = BRW_WIDTH_16;
1423          src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1424          src1.swizzle = BRW_SWIZZLE_XYZW;
1425       }
1426    }
1427 
1428    brw_eu_inst_set_math_function(devinfo, insn, function);
1429 
1430    brw_set_dest(p, insn, dest);
1431    brw_set_src0(p, insn, src0);
1432    brw_set_src1(p, insn, src1);
1433 }
1434 
1435 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,bool eot)1436 brw_send_indirect_message(struct brw_codegen *p,
1437                           unsigned sfid,
1438                           struct brw_reg dst,
1439                           struct brw_reg payload,
1440                           struct brw_reg desc,
1441                           bool eot)
1442 {
1443    const struct intel_device_info *devinfo = p->devinfo;
1444    struct brw_eu_inst *send;
1445 
1446    dst = retype(dst, BRW_TYPE_UW);
1447 
1448    assert(desc.type == BRW_TYPE_UD);
1449 
1450    if (desc.file == IMM) {
1451       send = next_insn(p, BRW_OPCODE_SEND);
1452       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1453       brw_set_desc(p, send, desc.ud);
1454    } else {
1455       assert(desc.file == ADDRESS);
1456       assert(desc.subnr == 0);
1457       send = next_insn(p, BRW_OPCODE_SEND);
1458       brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
1459       if (devinfo->ver >= 12)
1460          brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, true);
1461       else
1462          brw_set_src1(p, send, desc);
1463    }
1464 
1465    brw_set_dest(p, send, dst);
1466    brw_eu_inst_set_sfid(devinfo, send, sfid);
1467    brw_eu_inst_set_eot(devinfo, send, eot);
1468 }
1469 
1470 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,struct brw_reg ex_desc,unsigned ex_mlen,bool ex_bso,bool eot)1471 brw_send_indirect_split_message(struct brw_codegen *p,
1472                                 unsigned sfid,
1473                                 struct brw_reg dst,
1474                                 struct brw_reg payload0,
1475                                 struct brw_reg payload1,
1476                                 struct brw_reg desc,
1477                                 struct brw_reg ex_desc,
1478                                 unsigned ex_mlen,
1479                                 bool ex_bso,
1480                                 bool eot)
1481 {
1482    const struct intel_device_info *devinfo = p->devinfo;
1483    struct brw_eu_inst *send;
1484 
1485    dst = retype(dst, BRW_TYPE_UW);
1486 
1487    assert(desc.type == BRW_TYPE_UD);
1488 
1489    send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1490    brw_set_dest(p, send, dst);
1491    brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
1492    brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
1493 
1494    if (desc.file == IMM) {
1495       brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1496       brw_eu_inst_set_send_desc(devinfo, send, desc.ud);
1497    } else {
1498       assert(desc.file == ADDRESS);
1499       assert(desc.subnr == 0);
1500       brw_eu_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1501    }
1502 
1503    if (ex_desc.file == IMM) {
1504       brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1505       brw_eu_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud, false);
1506    } else {
1507       assert(ex_desc.file == ADDRESS);
1508       assert((ex_desc.subnr & 0x3) == 0);
1509       brw_eu_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1510       brw_eu_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1511 
1512       if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM)
1513          brw_eu_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
1514    }
1515 
1516    if (ex_bso) {
1517       /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1518        * it is assumed.
1519        *
1520        * BSpec 56890
1521        */
1522       if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1523          brw_eu_inst_set_send_ex_bso(devinfo, send, true);
1524       brw_eu_inst_set_send_src1_len(devinfo, send, ex_mlen / reg_unit(devinfo));
1525    }
1526    brw_eu_inst_set_sfid(devinfo, send, sfid);
1527    brw_eu_inst_set_eot(devinfo, send, eot);
1528 }
1529 
1530 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_eu_inst * insn,int while_offset,int start_offset)1531 while_jumps_before_offset(const struct intel_device_info *devinfo,
1532                           brw_eu_inst *insn, int while_offset, int start_offset)
1533 {
1534    int scale = 16 / brw_jump_scale(devinfo);
1535    int jip = brw_eu_inst_jip(devinfo, insn);
1536    assert(jip < 0);
1537    return while_offset + jip * scale <= start_offset;
1538 }
1539 
1540 
1541 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1542 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1543 {
1544    int offset;
1545    void *store = p->store;
1546    const struct intel_device_info *devinfo = p->devinfo;
1547 
1548    int depth = 0;
1549 
1550    for (offset = next_offset(devinfo, store, start_offset);
1551         offset < p->next_insn_offset;
1552         offset = next_offset(devinfo, store, offset)) {
1553       brw_eu_inst *insn = store + offset;
1554 
1555       switch (brw_eu_inst_opcode(p->isa, insn)) {
1556       case BRW_OPCODE_IF:
1557          depth++;
1558          break;
1559       case BRW_OPCODE_ENDIF:
1560          if (depth == 0)
1561             return offset;
1562          depth--;
1563          break;
1564       case BRW_OPCODE_WHILE:
1565          /* If the while doesn't jump before our instruction, it's the end
1566           * of a sibling do...while loop.  Ignore it.
1567           */
1568          if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1569             continue;
1570          FALLTHROUGH;
1571       case BRW_OPCODE_ELSE:
1572       case BRW_OPCODE_HALT:
1573          if (depth == 0)
1574             return offset;
1575          break;
1576       default:
1577          break;
1578       }
1579    }
1580 
1581    return 0;
1582 }
1583 
1584 /* There is no DO instruction on gfx6, so to find the end of the loop
1585  * we have to see if the loop is jumping back before our start
1586  * instruction.
1587  */
1588 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1589 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1590 {
1591    const struct intel_device_info *devinfo = p->devinfo;
1592    int offset;
1593    void *store = p->store;
1594 
1595    /* Always start after the instruction (such as a WHILE) we're trying to fix
1596     * up.
1597     */
1598    for (offset = next_offset(devinfo, store, start_offset);
1599         offset < p->next_insn_offset;
1600         offset = next_offset(devinfo, store, offset)) {
1601       brw_eu_inst *insn = store + offset;
1602 
1603       if (brw_eu_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1604 	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1605 	    return offset;
1606       }
1607    }
1608    assert(!"not reached");
1609    return start_offset;
1610 }
1611 
1612 /* After program generation, go back and update the UIP and JIP of
1613  * BREAK, CONT, and HALT instructions to their correct locations.
1614  */
1615 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1616 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1617 {
1618    const struct intel_device_info *devinfo = p->devinfo;
1619    int offset;
1620    int br = brw_jump_scale(devinfo);
1621    int scale = 16 / br;
1622    void *store = p->store;
1623 
1624    for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1625       brw_eu_inst *insn = store + offset;
1626       assert(brw_eu_inst_cmpt_control(devinfo, insn) == 0);
1627 
1628       switch (brw_eu_inst_opcode(p->isa, insn)) {
1629       case BRW_OPCODE_BREAK: {
1630          int block_end_offset = brw_find_next_block_end(p, offset);
1631          assert(block_end_offset != 0);
1632          brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1633 	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1634          brw_eu_inst_set_uip(devinfo, insn,
1635 	    (brw_find_loop_end(p, offset) - offset) / scale);
1636 	 break;
1637       }
1638 
1639       case BRW_OPCODE_CONTINUE: {
1640          int block_end_offset = brw_find_next_block_end(p, offset);
1641          assert(block_end_offset != 0);
1642          brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1643          brw_eu_inst_set_uip(devinfo, insn,
1644             (brw_find_loop_end(p, offset) - offset) / scale);
1645 
1646          assert(brw_eu_inst_uip(devinfo, insn) != 0);
1647          assert(brw_eu_inst_jip(devinfo, insn) != 0);
1648 	 break;
1649       }
1650 
1651       case BRW_OPCODE_ENDIF: {
1652          int block_end_offset = brw_find_next_block_end(p, offset);
1653          int32_t jump = (block_end_offset == 0) ?
1654                         1 * br : (block_end_offset - offset) / scale;
1655          brw_eu_inst_set_jip(devinfo, insn, jump);
1656 	 break;
1657       }
1658 
1659       case BRW_OPCODE_HALT: {
1660 	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1661 	  *
1662 	  *    "In case of the halt instruction not inside any conditional
1663 	  *     code block, the value of <JIP> and <UIP> should be the
1664 	  *     same. In case of the halt instruction inside conditional code
1665 	  *     block, the <UIP> should be the end of the program, and the
1666 	  *     <JIP> should be end of the most inner conditional code block."
1667 	  *
1668 	  * The uip will have already been set by whoever set up the
1669 	  * instruction.
1670 	  */
1671          int block_end_offset = brw_find_next_block_end(p, offset);
1672 	 if (block_end_offset == 0) {
1673             brw_eu_inst_set_jip(devinfo, insn, brw_eu_inst_uip(devinfo, insn));
1674 	 } else {
1675             brw_eu_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1676 	 }
1677          assert(brw_eu_inst_uip(devinfo, insn) != 0);
1678          assert(brw_eu_inst_jip(devinfo, insn) != 0);
1679 	 break;
1680       }
1681 
1682       default:
1683          break;
1684       }
1685    }
1686 }
1687 
1688 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)1689 brw_set_memory_fence_message(struct brw_codegen *p,
1690                              struct brw_eu_inst *insn,
1691                              enum brw_message_target sfid,
1692                              bool commit_enable,
1693                              unsigned bti)
1694 {
1695    const struct intel_device_info *devinfo = p->devinfo;
1696 
1697    brw_set_desc(p, insn, brw_message_desc(
1698                    devinfo, 1, (commit_enable ? 1 : 0), true));
1699 
1700    brw_eu_inst_set_sfid(devinfo, insn, sfid);
1701 
1702    switch (sfid) {
1703    case GFX6_SFID_DATAPORT_RENDER_CACHE:
1704       brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
1705       break;
1706    case GFX7_SFID_DATAPORT_DATA_CACHE:
1707       brw_eu_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
1708       break;
1709    default:
1710       unreachable("Not reached");
1711    }
1712 
1713    if (commit_enable)
1714       brw_eu_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
1715 
1716    assert(devinfo->ver >= 11 || bti == 0);
1717    brw_eu_inst_set_binding_table_index(devinfo, insn, bti);
1718 }
1719 
1720 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_eu_inst * insn,enum brw_message_target sfid,uint32_t desc)1721 gfx12_set_memory_fence_message(struct brw_codegen *p,
1722                                struct brw_eu_inst *insn,
1723                                enum brw_message_target sfid,
1724                                uint32_t desc)
1725 {
1726    const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
1727     /* Completion signaled by write to register. No data returned. */
1728    const unsigned rlen = 1 * reg_unit(p->devinfo);
1729 
1730    brw_eu_inst_set_sfid(p->devinfo, insn, sfid);
1731 
1732    /* On Gfx12.5 URB is not listed as port usable for fences with the LSC (see
1733     * BSpec 53578 for Gfx12.5, BSpec 57330 for Gfx20), so we completely ignore
1734     * the descriptor value and rebuild a legacy URB fence descriptor.
1735     */
1736    if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
1737       brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
1738                             brw_message_desc(p->devinfo, mlen, rlen, true));
1739    } else {
1740       enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
1741       enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
1742 
1743       if (sfid == GFX12_SFID_TGM) {
1744          scope = LSC_FENCE_TILE;
1745          flush_type = LSC_FLUSH_TYPE_EVICT;
1746       }
1747 
1748       /* Wa_14012437816:
1749        *
1750        *   "For any fence greater than local scope, always set flush type to
1751        *    at least invalidate so that fence goes on properly."
1752        *
1753        *   "The bug is if flush_type is 'None', the scope is always downgraded
1754        *    to 'local'."
1755        *
1756        * Here set scope to NONE_6 instead of NONE, which has the same effect
1757        * as NONE but avoids the downgrade to scope LOCAL.
1758        */
1759       if (intel_needs_workaround(p->devinfo, 14012437816) &&
1760           scope > LSC_FENCE_LOCAL &&
1761           flush_type == LSC_FLUSH_TYPE_NONE) {
1762          flush_type = LSC_FLUSH_TYPE_NONE_6;
1763       }
1764 
1765       brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
1766                                                flush_type, false) |
1767                             brw_message_desc(p->devinfo, mlen, rlen, false));
1768    }
1769 }
1770 
1771 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)1772 brw_memory_fence(struct brw_codegen *p,
1773                  struct brw_reg dst,
1774                  struct brw_reg src,
1775                  enum opcode send_op,
1776                  enum brw_message_target sfid,
1777                  uint32_t desc,
1778                  bool commit_enable,
1779                  unsigned bti)
1780 {
1781    const struct intel_device_info *devinfo = p->devinfo;
1782 
1783    dst = retype(vec1(dst), BRW_TYPE_UW);
1784    src = retype(vec1(src), BRW_TYPE_UD);
1785 
1786    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
1787     * message doesn't write anything back.
1788     */
1789    struct brw_eu_inst *insn = next_insn(p, send_op);
1790    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1791    brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1792    brw_set_dest(p, insn, dst);
1793    brw_set_src0(p, insn, src);
1794 
1795    /* All DG2 hardware requires LSC for fence messages, even A-step */
1796    if (devinfo->has_lsc)
1797       gfx12_set_memory_fence_message(p, insn, sfid, desc);
1798    else
1799       brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
1800 }
1801 
1802 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)1803 brw_broadcast(struct brw_codegen *p,
1804               struct brw_reg dst,
1805               struct brw_reg src,
1806               struct brw_reg idx)
1807 {
1808    const struct intel_device_info *devinfo = p->devinfo;
1809    assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
1810 
1811    brw_push_insn_state(p);
1812    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1813    brw_set_default_exec_size(p, BRW_EXECUTE_1);
1814 
1815    assert(src.file == FIXED_GRF &&
1816           src.address_mode == BRW_ADDRESS_DIRECT);
1817    assert(!src.abs && !src.negate);
1818 
1819    /* Gen12.5 adds the following region restriction:
1820     *
1821     *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
1822     *    and Quad-Word data must not be used."
1823     *
1824     * We require the source and destination types to match so stomp to an
1825     * unsigned integer type.
1826     */
1827    assert(src.type == dst.type);
1828    src.type = dst.type =
1829       brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
1830 
1831    if ((src.vstride == 0 && src.hstride == 0) ||
1832        idx.file == IMM) {
1833       /* Trivial, the source is already uniform or the index is a constant.
1834        * We will typically not get here if the optimizer is doing its job, but
1835        * asserting would be mean.
1836        */
1837       const unsigned i = (src.vstride == 0 && src.hstride == 0) ? 0 : idx.ud;
1838       src = stride(suboffset(src, i), 0, 1, 0);
1839 
1840       if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
1841          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1842                     subscript(src, BRW_TYPE_D, 0));
1843          brw_set_default_swsb(p, tgl_swsb_null());
1844          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1845                     subscript(src, BRW_TYPE_D, 1));
1846       } else {
1847          brw_MOV(p, dst, src);
1848       }
1849    } else {
1850       /* From the Haswell PRM section "Register Region Restrictions":
1851        *
1852        *    "The lower bits of the AddressImmediate must not overflow to
1853        *    change the register address.  The lower 5 bits of Address
1854        *    Immediate when added to lower 5 bits of address register gives
1855        *    the sub-register offset. The upper bits of Address Immediate
1856        *    when added to upper bits of address register gives the register
1857        *    address. Any overflow from sub-register offset is dropped."
1858        *
1859        * Fortunately, for broadcast, we never have a sub-register offset so
1860        * this isn't an issue.
1861        */
1862       assert(src.subnr == 0);
1863 
1864       const struct brw_reg addr =
1865          retype(brw_address_reg(0), BRW_TYPE_UD);
1866       unsigned offset = src.nr * REG_SIZE + src.subnr;
1867       /* Limit in bytes of the signed indirect addressing immediate. */
1868       const unsigned limit = 512;
1869 
1870       brw_push_insn_state(p);
1871       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1872       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1873       brw_set_default_flag_reg(p, 0, 0);
1874 
1875       /* Take into account the component size and horizontal stride. */
1876       assert(src.vstride == src.hstride + src.width);
1877       brw_SHL(p, addr, vec1(idx),
1878               brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
1879                          src.hstride - 1));
1880 
1881       /* We can only address up to limit bytes using the indirect
1882        * addressing immediate, account for the difference if the source
1883        * register is above this limit.
1884        */
1885       if (offset >= limit) {
1886          brw_set_default_swsb(p, tgl_swsb_regdist(1));
1887          brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
1888          offset = offset % limit;
1889       }
1890 
1891       brw_pop_insn_state(p);
1892 
1893       brw_set_default_swsb(p, tgl_swsb_regdist(1));
1894 
1895       /* Use indirect addressing to fetch the specified component. */
1896       if (brw_type_size_bytes(src.type) > 4 &&
1897           (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
1898          /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
1899           *
1900           *   "When source or destination datatype is 64b or operation is
1901           *    integer DWord multiply, indirect addressing must not be
1902           *    used."
1903           *
1904           * We may also not support Q/UQ types.
1905           *
1906           * To work around both of these, we do two integer MOVs instead
1907           * of one 64-bit MOV.  Because no double value should ever cross
1908           * a register boundary, it's safe to use the immediate offset in
1909           * the indirect here to handle adding 4 bytes to the offset and
1910           * avoid the extra ADD to the register file.
1911           */
1912          brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
1913                     retype(brw_vec1_indirect(addr.subnr, offset),
1914                            BRW_TYPE_D));
1915          brw_set_default_swsb(p, tgl_swsb_null());
1916          brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
1917                     retype(brw_vec1_indirect(addr.subnr, offset + 4),
1918                            BRW_TYPE_D));
1919       } else {
1920          brw_MOV(p, dst,
1921                  retype(brw_vec1_indirect(addr.subnr, offset), src.type));
1922       }
1923    }
1924 
1925    brw_pop_insn_state(p);
1926 }
1927 
1928 
1929 /**
1930  * Emit the SEND message for a barrier
1931  */
1932 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)1933 brw_barrier(struct brw_codegen *p, struct brw_reg src)
1934 {
1935    const struct intel_device_info *devinfo = p->devinfo;
1936    struct brw_eu_inst *inst;
1937 
1938    brw_push_insn_state(p);
1939    brw_set_default_access_mode(p, BRW_ALIGN_1);
1940    inst = next_insn(p, BRW_OPCODE_SEND);
1941    brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
1942    brw_set_src0(p, inst, src);
1943    brw_set_src1(p, inst, brw_null_reg());
1944    brw_set_desc(p, inst, brw_message_desc(devinfo,
1945                                           1 * reg_unit(devinfo), 0, false));
1946 
1947    brw_eu_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
1948    brw_eu_inst_set_gateway_subfuncid(devinfo, inst,
1949                                   BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
1950 
1951    brw_eu_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1952    brw_pop_insn_state(p);
1953 }
1954 
1955 
1956 /**
1957  * Emit the wait instruction for a barrier
1958  */
1959 void
brw_WAIT(struct brw_codegen * p)1960 brw_WAIT(struct brw_codegen *p)
1961 {
1962    const struct intel_device_info *devinfo = p->devinfo;
1963    struct brw_eu_inst *insn;
1964 
1965    struct brw_reg src = brw_notification_reg();
1966 
1967    insn = next_insn(p, BRW_OPCODE_WAIT);
1968    brw_set_dest(p, insn, src);
1969    brw_set_src0(p, insn, src);
1970    brw_set_src1(p, insn, brw_null_reg());
1971 
1972    brw_eu_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1973    brw_eu_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
1974 }
1975 
1976 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)1977 brw_float_controls_mode(struct brw_codegen *p,
1978                         unsigned mode, unsigned mask)
1979 {
1980    assert(p->current->mask_control == BRW_MASK_DISABLE);
1981 
1982    /* From the Skylake PRM, Volume 7, page 760:
1983     *  "Implementation Restriction on Register Access: When the control
1984     *   register is used as an explicit source and/or destination, hardware
1985     *   does not ensure execution pipeline coherency. Software must set the
1986     *   thread control field to ‘switch’ for an instruction that uses
1987     *   control register as an explicit operand."
1988     *
1989     * On Gfx12+ this is implemented in terms of SWSB annotations instead.
1990     */
1991    brw_set_default_swsb(p, tgl_swsb_regdist(1));
1992 
1993    brw_eu_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
1994                             brw_imm_ud(~mask));
1995    brw_eu_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
1996    if (p->devinfo->ver < 12)
1997       brw_eu_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
1998 
1999    if (mode) {
2000       brw_eu_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2001                                  brw_imm_ud(mode));
2002       brw_eu_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2003       if (p->devinfo->ver < 12)
2004          brw_eu_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2005    }
2006 
2007    if (p->devinfo->ver >= 12)
2008       brw_SYNC(p, TGL_SYNC_NOP);
2009 }
2010 
2011 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_eu_inst * inst,uint32_t value)2012 brw_update_reloc_imm(const struct brw_isa_info *isa,
2013                      brw_eu_inst *inst,
2014                      uint32_t value)
2015 {
2016    const struct intel_device_info *devinfo = isa->devinfo;
2017 
2018    /* Sanity check that the instruction is a MOV of an immediate */
2019    assert(brw_eu_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2020    assert(brw_eu_inst_src0_reg_file(devinfo, inst) == IMM);
2021 
2022    /* If it was compacted, we can't safely rewrite */
2023    assert(brw_eu_inst_cmpt_control(devinfo, inst) == 0);
2024 
2025    brw_eu_inst_set_imm_ud(devinfo, inst, value);
2026 }
2027 
2028 /* A default value for constants that will be patched at run-time.
2029  * We pick an arbitrary value that prevents instruction compaction.
2030  */
2031 #define DEFAULT_PATCH_IMM 0x4a7cc037
2032 
2033 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id,uint32_t base)2034 brw_MOV_reloc_imm(struct brw_codegen *p,
2035                   struct brw_reg dst,
2036                   enum brw_reg_type src_type,
2037                   uint32_t id,
2038                   uint32_t base)
2039 {
2040    assert(brw_type_size_bytes(src_type) == 4);
2041    assert(brw_type_size_bytes(dst.type) == 4);
2042 
2043    brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2044                  p->next_insn_offset, base);
2045 
2046    brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2047 }
2048