• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #include "brw_eu_defines.h"
34 #include "brw_eu.h"
35 
36 #include "util/ralloc.h"
37 
38 void
brw_set_dest(struct brw_codegen * p,brw_inst * inst,struct brw_reg dest)39 brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
40 {
41    const struct intel_device_info *devinfo = p->devinfo;
42 
43    if (dest.file == BRW_GENERAL_REGISTER_FILE)
44       assert(dest.nr < XE2_MAX_GRF);
45 
46    /* The hardware has a restriction where a destination of size Byte with
47     * a stride of 1 is only allowed for a packed byte MOV. For any other
48     * instruction, the stride must be at least 2, even when the destination
49     * is the NULL register.
50     */
51    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
52        dest.nr == BRW_ARF_NULL &&
53        type_sz(dest.type) == 1 &&
54        dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
55       dest.hstride = BRW_HORIZONTAL_STRIDE_2;
56    }
57 
58    if (devinfo->ver >= 12 &&
59        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
60         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
61       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
62              dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
63       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
64       assert(dest.subnr == 0);
65       assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
66              (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
67               dest.vstride == dest.width + 1));
68       assert(!dest.negate && !dest.abs);
69       brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
70       brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
71 
72    } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
73               brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
74       assert(devinfo->ver < 12);
75       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
76              dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
77       assert(dest.address_mode == BRW_ADDRESS_DIRECT);
78       assert(dest.subnr % 16 == 0);
79       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
80              dest.vstride == dest.width + 1);
81       assert(!dest.negate && !dest.abs);
82       brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
83       brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
84       brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
85    } else {
86       brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
87       brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
88 
89       if (dest.address_mode == BRW_ADDRESS_DIRECT) {
90          brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
91 
92          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
93             brw_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
94             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
95                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
96             brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
97          } else {
98             brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
99             brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
100             if (dest.file == BRW_GENERAL_REGISTER_FILE) {
101                assert(dest.writemask != 0);
102             }
103             /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
104              *    Although Dst.HorzStride is a don't care for Align16, HW needs
105              *    this to be programmed as "01".
106              */
107             brw_inst_set_dst_hstride(devinfo, inst, 1);
108          }
109       } else {
110          brw_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
111 
112          /* These are different sizes in align1 vs align16:
113           */
114          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
115             brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
116                                           dest.indirect_offset);
117             if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
118                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
119             brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
120          } else {
121             brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
122                                            dest.indirect_offset);
123             /* even ignored in da16, still need to set as '01' */
124             brw_inst_set_dst_hstride(devinfo, inst, 1);
125          }
126       }
127    }
128 }
129 
130 void
brw_set_src0(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)131 brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
132 {
133    const struct intel_device_info *devinfo = p->devinfo;
134 
135    if (reg.file == BRW_GENERAL_REGISTER_FILE)
136       assert(reg.nr < XE2_MAX_GRF);
137 
138    if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
139        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
140        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
141        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
142       /* Any source modifiers or regions will be ignored, since this just
143        * identifies the GRF to start reading the message contents from.
144        * Check for some likely failures.
145        */
146       assert(!reg.negate);
147       assert(!reg.abs);
148       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
149    }
150 
151    if (devinfo->ver >= 12 &&
152        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
153         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
154       assert(reg.file != BRW_IMMEDIATE_VALUE);
155       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
156       assert(reg.subnr == 0);
157       assert(has_scalar_region(reg) ||
158              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
159               reg.vstride == reg.width + 1));
160       assert(!reg.negate && !reg.abs);
161       brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
162       brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
163 
164    } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
165               brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
166       assert(reg.file == BRW_GENERAL_REGISTER_FILE);
167       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
168       assert(reg.subnr % 16 == 0);
169       assert(has_scalar_region(reg) ||
170              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
171               reg.vstride == reg.width + 1));
172       assert(!reg.negate && !reg.abs);
173       brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
174       brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
175    } else {
176       brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
177       brw_inst_set_src0_abs(devinfo, inst, reg.abs);
178       brw_inst_set_src0_negate(devinfo, inst, reg.negate);
179       brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
180 
181       if (reg.file == BRW_IMMEDIATE_VALUE) {
182          if (reg.type == BRW_REGISTER_TYPE_DF)
183             brw_inst_set_imm_df(devinfo, inst, reg.df);
184          else if (reg.type == BRW_REGISTER_TYPE_UQ ||
185                   reg.type == BRW_REGISTER_TYPE_Q)
186             brw_inst_set_imm_uq(devinfo, inst, reg.u64);
187          else
188             brw_inst_set_imm_ud(devinfo, inst, reg.ud);
189 
190          if (devinfo->ver < 12 && type_sz(reg.type) < 8) {
191             brw_inst_set_src1_reg_file(devinfo, inst,
192                                        BRW_ARCHITECTURE_REGISTER_FILE);
193             brw_inst_set_src1_reg_hw_type(devinfo, inst,
194                                           brw_inst_src0_reg_hw_type(devinfo, inst));
195          }
196       } else {
197          if (reg.address_mode == BRW_ADDRESS_DIRECT) {
198             brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
199             if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
200                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
201             } else {
202                brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
203             }
204          } else {
205             brw_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
206 
207             if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
208                brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
209             } else {
210                brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
211             }
212          }
213 
214          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
215             if (reg.width == BRW_WIDTH_1 &&
216                 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
217                brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
218                brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
219                brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
220             } else {
221                brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
222                brw_inst_set_src0_width(devinfo, inst, reg.width);
223                brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
224             }
225          } else {
226             brw_inst_set_src0_da16_swiz_x(devinfo, inst,
227                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
228             brw_inst_set_src0_da16_swiz_y(devinfo, inst,
229                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
230             brw_inst_set_src0_da16_swiz_z(devinfo, inst,
231                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
232             brw_inst_set_src0_da16_swiz_w(devinfo, inst,
233                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
234 
235             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
236                /* This is an oddity of the fact we're using the same
237                 * descriptions for registers in align_16 as align_1:
238                 */
239                brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
240             } else {
241                brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
242             }
243          }
244       }
245    }
246 }
247 
248 
249 void
brw_set_src1(struct brw_codegen * p,brw_inst * inst,struct brw_reg reg)250 brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
251 {
252    const struct intel_device_info *devinfo = p->devinfo;
253 
254    if (reg.file == BRW_GENERAL_REGISTER_FILE)
255       assert(reg.nr < XE2_MAX_GRF);
256 
257    if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
258        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
259        (devinfo->ver >= 12 &&
260         (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
261          brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
262       assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
263              reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
264       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
265       assert(reg.subnr == 0);
266       assert(has_scalar_region(reg) ||
267              (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
268               reg.vstride == reg.width + 1));
269       assert(!reg.negate && !reg.abs);
270       brw_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
271       brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
272    } else {
273       /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
274        *
275        *    "Accumulator registers may be accessed explicitly as src0
276        *    operands only."
277        */
278       assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
279              reg.nr != BRW_ARF_ACCUMULATOR);
280 
281       brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
282       brw_inst_set_src1_abs(devinfo, inst, reg.abs);
283       brw_inst_set_src1_negate(devinfo, inst, reg.negate);
284 
285       /* Only src1 can be immediate in two-argument instructions.
286        */
287       assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
288 
289       if (reg.file == BRW_IMMEDIATE_VALUE) {
290          /* two-argument instructions can only use 32-bit immediates */
291          assert(type_sz(reg.type) < 8);
292          brw_inst_set_imm_ud(devinfo, inst, reg.ud);
293       } else {
294          /* This is a hardware restriction, which may or may not be lifted
295           * in the future:
296           */
297          assert (reg.address_mode == BRW_ADDRESS_DIRECT);
298          /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
299 
300          brw_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
301          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
302             brw_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
303          } else {
304             brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
305          }
306 
307          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
308             if (reg.width == BRW_WIDTH_1 &&
309                 brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
310                brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
311                brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
312                brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
313             } else {
314                brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
315                brw_inst_set_src1_width(devinfo, inst, reg.width);
316                brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
317             }
318          } else {
319             brw_inst_set_src1_da16_swiz_x(devinfo, inst,
320                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
321             brw_inst_set_src1_da16_swiz_y(devinfo, inst,
322                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
323             brw_inst_set_src1_da16_swiz_z(devinfo, inst,
324                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
325             brw_inst_set_src1_da16_swiz_w(devinfo, inst,
326                BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
327 
328             if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
329                /* This is an oddity of the fact we're using the same
330                 * descriptions for registers in align_16 as align_1:
331                 */
332                brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
333             } else {
334                brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
335             }
336          }
337       }
338    }
339 }
340 
341 /**
342  * Specify the descriptor and extended descriptor immediate for a SEND(C)
343  * message instruction.
344  */
345 void
brw_set_desc_ex(struct brw_codegen * p,brw_inst * inst,unsigned desc,unsigned ex_desc)346 brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
347                 unsigned desc, unsigned ex_desc)
348 {
349    const struct intel_device_info *devinfo = p->devinfo;
350    assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
351           brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
352    if (devinfo->ver < 12)
353       brw_inst_set_src1_file_type(devinfo, inst,
354                                   BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
355    brw_inst_set_send_desc(devinfo, inst, desc);
356    if (devinfo->ver >= 9)
357       brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
358 }
359 
360 static void
gfx7_set_dp_scratch_message(struct brw_codegen * p,brw_inst * inst,bool write,bool dword,bool invalidate_after_read,unsigned num_regs,unsigned addr_offset,unsigned mlen,unsigned rlen,bool header_present)361 gfx7_set_dp_scratch_message(struct brw_codegen *p,
362                             brw_inst *inst,
363                             bool write,
364                             bool dword,
365                             bool invalidate_after_read,
366                             unsigned num_regs,
367                             unsigned addr_offset,
368                             unsigned mlen,
369                             unsigned rlen,
370                             bool header_present)
371 {
372    const struct intel_device_info *devinfo = p->devinfo;
373    assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
374           num_regs == 8);
375    const unsigned block_size = util_logbase2(num_regs);
376 
377    brw_set_desc(p, inst, brw_message_desc(
378                    devinfo, mlen, rlen, header_present));
379 
380    brw_inst_set_sfid(devinfo, inst, GFX7_SFID_DATAPORT_DATA_CACHE);
381    brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
382    brw_inst_set_scratch_read_write(devinfo, inst, write);
383    brw_inst_set_scratch_type(devinfo, inst, dword);
384    brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
385    brw_inst_set_scratch_block_size(devinfo, inst, block_size);
386    brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
387 }
388 
389 static void
brw_inst_set_state(const struct brw_isa_info * isa,brw_inst * insn,const struct brw_insn_state * state)390 brw_inst_set_state(const struct brw_isa_info *isa,
391                    brw_inst *insn,
392                    const struct brw_insn_state *state)
393 {
394    const struct intel_device_info *devinfo = isa->devinfo;
395 
396    brw_inst_set_exec_size(devinfo, insn, state->exec_size);
397    brw_inst_set_group(devinfo, insn, state->group);
398    brw_inst_set_access_mode(devinfo, insn, state->access_mode);
399    brw_inst_set_mask_control(devinfo, insn, state->mask_control);
400    if (devinfo->ver >= 12)
401       brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
402    brw_inst_set_saturate(devinfo, insn, state->saturate);
403    brw_inst_set_pred_control(devinfo, insn, state->predicate);
404    brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
405 
406    if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
407        state->access_mode == BRW_ALIGN_16) {
408       brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
409       brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
410    } else {
411       brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
412       brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
413    }
414 
415    if (devinfo->ver < 20)
416       brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
417 }
418 
419 static brw_inst *
brw_append_insns(struct brw_codegen * p,unsigned nr_insn,unsigned alignment)420 brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
421 {
422    assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
423    assert(util_is_power_of_two_or_zero(alignment));
424    const unsigned align_insn = MAX2(alignment / sizeof(brw_inst), 1);
425    const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
426    const unsigned new_nr_insn = start_insn + nr_insn;
427 
428    if (p->store_size < new_nr_insn) {
429       p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
430       p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
431    }
432 
433    /* Memset any padding due to alignment to 0.  We don't want to be hashing
434     * or caching a bunch of random bits we got from a memory allocation.
435     */
436    if (p->nr_insn < start_insn) {
437       memset(&p->store[p->nr_insn], 0,
438              (start_insn - p->nr_insn) * sizeof(brw_inst));
439    }
440 
441    assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
442    p->nr_insn = new_nr_insn;
443    p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
444 
445    return &p->store[start_insn];
446 }
447 
448 void
brw_realign(struct brw_codegen * p,unsigned alignment)449 brw_realign(struct brw_codegen *p, unsigned alignment)
450 {
451    brw_append_insns(p, 0, alignment);
452 }
453 
454 int
brw_append_data(struct brw_codegen * p,void * data,unsigned size,unsigned alignment)455 brw_append_data(struct brw_codegen *p, void *data,
456                 unsigned size, unsigned alignment)
457 {
458    unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
459    void *dst = brw_append_insns(p, nr_insn, alignment);
460    memcpy(dst, data, size);
461 
462    /* If it's not a whole number of instructions, memset the end */
463    if (size < nr_insn * sizeof(brw_inst))
464       memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
465 
466    return dst - (void *)p->store;
467 }
468 
469 #define next_insn brw_next_insn
470 brw_inst *
brw_next_insn(struct brw_codegen * p,unsigned opcode)471 brw_next_insn(struct brw_codegen *p, unsigned opcode)
472 {
473    brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
474 
475    memset(insn, 0, sizeof(*insn));
476    brw_inst_set_opcode(p->isa, insn, opcode);
477 
478    /* Apply the default instruction state */
479    brw_inst_set_state(p->isa, insn, p->current);
480 
481    return insn;
482 }
483 
484 void
brw_add_reloc(struct brw_codegen * p,uint32_t id,enum brw_shader_reloc_type type,uint32_t offset,uint32_t delta)485 brw_add_reloc(struct brw_codegen *p, uint32_t id,
486               enum brw_shader_reloc_type type,
487               uint32_t offset, uint32_t delta)
488 {
489    if (p->num_relocs + 1 > p->reloc_array_size) {
490       p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
491       p->relocs = reralloc(p->mem_ctx, p->relocs,
492                            struct brw_shader_reloc, p->reloc_array_size);
493    }
494 
495    p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
496       .id = id,
497       .type = type,
498       .offset = offset,
499       .delta = delta,
500    };
501 }
502 
503 static brw_inst *
brw_alu1(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src)504 brw_alu1(struct brw_codegen *p, unsigned opcode,
505          struct brw_reg dest, struct brw_reg src)
506 {
507    brw_inst *insn = next_insn(p, opcode);
508    brw_set_dest(p, insn, dest);
509    brw_set_src0(p, insn, src);
510    return insn;
511 }
512 
513 static brw_inst *
brw_alu2(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)514 brw_alu2(struct brw_codegen *p, unsigned opcode,
515          struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
516 {
517    /* 64-bit immediates are only supported on 1-src instructions */
518    assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4);
519    assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4);
520 
521    brw_inst *insn = next_insn(p, opcode);
522    brw_set_dest(p, insn, dest);
523    brw_set_src0(p, insn, src0);
524    brw_set_src1(p, insn, src1);
525    return insn;
526 }
527 
528 static int
get_3src_subreg_nr(struct brw_reg reg)529 get_3src_subreg_nr(struct brw_reg reg)
530 {
531    /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
532     * use 32-bit units (components 0..7).  Since they only support F/D/UD
533     * types, this doesn't lose any flexibility, but uses fewer bits.
534     */
535    return reg.subnr / 4;
536 }
537 
538 static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info * devinfo,enum brw_vertical_stride vstride)539 to_3src_align1_vstride(const struct intel_device_info *devinfo,
540                        enum brw_vertical_stride vstride)
541 {
542    switch (vstride) {
543    case BRW_VERTICAL_STRIDE_0:
544       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
545    case BRW_VERTICAL_STRIDE_1:
546       assert(devinfo->ver >= 12);
547       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
548    case BRW_VERTICAL_STRIDE_2:
549       assert(devinfo->ver < 12);
550       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
551    case BRW_VERTICAL_STRIDE_4:
552       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
553    case BRW_VERTICAL_STRIDE_8:
554    case BRW_VERTICAL_STRIDE_16:
555       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
556    default:
557       unreachable("invalid vstride");
558    }
559 }
560 
561 
562 static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)563 to_3src_align1_hstride(enum brw_horizontal_stride hstride)
564 {
565    switch (hstride) {
566    case BRW_HORIZONTAL_STRIDE_0:
567       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
568    case BRW_HORIZONTAL_STRIDE_1:
569       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
570    case BRW_HORIZONTAL_STRIDE_2:
571       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
572    case BRW_HORIZONTAL_STRIDE_4:
573       return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
574    default:
575       unreachable("invalid hstride");
576    }
577 }
578 
579 static brw_inst *
brw_alu3(struct brw_codegen * p,unsigned opcode,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)580 brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
581          struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
582 {
583    const struct intel_device_info *devinfo = p->devinfo;
584    brw_inst *inst = next_insn(p, opcode);
585 
586    assert(dest.nr < XE2_MAX_GRF);
587 
588    if (devinfo->ver >= 10)
589       assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
590                src2.file == BRW_IMMEDIATE_VALUE));
591 
592    assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < XE2_MAX_GRF);
593    assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < XE2_MAX_GRF);
594    assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < XE2_MAX_GRF);
595    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
596    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
597    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
598    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
599 
600    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
601       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
602              (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
603               dest.nr == BRW_ARF_ACCUMULATOR));
604 
605       if (devinfo->ver >= 12) {
606          brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
607          brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
608       } else {
609          if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
610             brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
611                                               BRW_ALIGN1_3SRC_ACCUMULATOR);
612             brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
613          } else {
614             brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
615                                               BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
616             brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
617          }
618       }
619       brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
620 
621       brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
622 
623       if (brw_reg_type_is_floating_point(dest.type)) {
624          brw_inst_set_3src_a1_exec_type(devinfo, inst,
625                                         BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
626       } else {
627          brw_inst_set_3src_a1_exec_type(devinfo, inst,
628                                         BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
629       }
630 
631       brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
632       brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
633       brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
634       brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
635 
636       if (src0.file == BRW_IMMEDIATE_VALUE) {
637          brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
638       } else {
639          brw_inst_set_3src_a1_src0_vstride(
640             devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
641          brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
642                                            to_3src_align1_hstride(src0.hstride));
643          brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
644          if (src0.type == BRW_REGISTER_TYPE_NF) {
645             brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
646          } else {
647             brw_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
648          }
649          brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
650          brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
651       }
652       brw_inst_set_3src_a1_src1_vstride(
653          devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
654       brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
655                                         to_3src_align1_hstride(src1.hstride));
656 
657       brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
658       if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
659          brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
660       } else {
661          brw_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
662       }
663       brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
664       brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
665 
666       if (src2.file == BRW_IMMEDIATE_VALUE) {
667          brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
668       } else {
669          brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
670                                            to_3src_align1_hstride(src2.hstride));
671          /* no vstride on src2 */
672          brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
673          brw_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
674          brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
675          brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
676       }
677 
678       assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
679              src0.file == BRW_IMMEDIATE_VALUE ||
680              (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
681               src0.type == BRW_REGISTER_TYPE_NF));
682       assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
683              (src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
684               src1.nr == BRW_ARF_ACCUMULATOR));
685       assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
686              src2.file == BRW_IMMEDIATE_VALUE);
687 
688       if (devinfo->ver >= 12) {
689          if (src0.file == BRW_IMMEDIATE_VALUE) {
690             brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
691          } else {
692             brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
693          }
694 
695          brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
696 
697          if (src2.file == BRW_IMMEDIATE_VALUE) {
698             brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
699          } else {
700             brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
701          }
702       } else {
703          brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
704                                             src0.file == BRW_GENERAL_REGISTER_FILE ?
705                                             BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
706                                             BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
707          brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
708                                             src1.file == BRW_GENERAL_REGISTER_FILE ?
709                                             BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
710                                             BRW_ALIGN1_3SRC_ACCUMULATOR);
711          brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
712                                             src2.file == BRW_GENERAL_REGISTER_FILE ?
713                                             BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
714                                             BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
715       }
716 
717    } else {
718       assert(dest.file == BRW_GENERAL_REGISTER_FILE);
719       assert(dest.type == BRW_REGISTER_TYPE_F  ||
720              dest.type == BRW_REGISTER_TYPE_DF ||
721              dest.type == BRW_REGISTER_TYPE_D  ||
722              dest.type == BRW_REGISTER_TYPE_UD ||
723              dest.type == BRW_REGISTER_TYPE_HF);
724       brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
725       brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
726       brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
727 
728       assert(src0.file == BRW_GENERAL_REGISTER_FILE);
729       brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
730       brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
731       brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
732       brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
733       brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
734       brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
735                                           src0.vstride == BRW_VERTICAL_STRIDE_0);
736 
737       assert(src1.file == BRW_GENERAL_REGISTER_FILE);
738       brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
739       brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
740       brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
741       brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
742       brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
743       brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
744                                           src1.vstride == BRW_VERTICAL_STRIDE_0);
745 
746       assert(src2.file == BRW_GENERAL_REGISTER_FILE);
747       brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
748       brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
749       brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
750       brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
751       brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
752       brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
753                                           src2.vstride == BRW_VERTICAL_STRIDE_0);
754 
755       /* Set both the source and destination types based on dest.type,
756        * ignoring the source register types.  The MAD and LRP emitters ensure
757        * that all four types are float.  The BFE and BFI2 emitters, however,
758        * may send us mixed D and UD types and want us to ignore that and use
759        * the destination type.
760        */
761       brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
762       brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
763 
764       /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
765        *
766        *    "Three source instructions can use operands with mixed-mode
767        *     precision. When SrcType field is set to :f or :hf it defines
768        *     precision for source 0 only, and fields Src1Type and Src2Type
769        *     define precision for other source operands:
770        *
771        *     0b = :f. Single precision Float (32-bit).
772        *     1b = :hf. Half precision Float (16-bit)."
773        */
774       if (src1.type == BRW_REGISTER_TYPE_HF)
775          brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
776 
777       if (src2.type == BRW_REGISTER_TYPE_HF)
778          brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
779    }
780 
781    return inst;
782 }
783 
784 static brw_inst *
brw_dpas_three_src(struct brw_codegen * p,enum gfx12_systolic_depth opcode,unsigned sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)785 brw_dpas_three_src(struct brw_codegen *p, enum gfx12_systolic_depth opcode,
786                    unsigned sdepth, unsigned rcount, struct brw_reg dest,
787                    struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
788 {
789    const struct intel_device_info *devinfo = p->devinfo;
790    brw_inst *inst = next_insn(p, opcode);
791 
792    assert(dest.file == BRW_GENERAL_REGISTER_FILE);
793    brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
794                                        BRW_GENERAL_REGISTER_FILE);
795    brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, dest.nr);
796    brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, dest.subnr);
797 
798    if (brw_reg_type_is_floating_point(dest.type)) {
799       brw_inst_set_dpas_3src_exec_type(devinfo, inst,
800                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
801    } else {
802       brw_inst_set_dpas_3src_exec_type(devinfo, inst,
803                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
804    }
805 
806    brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
807    brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
808 
809    brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
810    brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
811    brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
812    brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
813 
814    assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
815           (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
816            src0.nr == BRW_ARF_NULL));
817 
818    brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
819    brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, src0.nr);
820    brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, src0.subnr);
821 
822    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
823 
824    brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
825    brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, src1.nr);
826    brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, src1.subnr);
827    brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
828 
829    assert(src2.file == BRW_GENERAL_REGISTER_FILE);
830 
831    brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
832    brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, src2.nr);
833    brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, src2.subnr);
834    brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
835 
836    return inst;
837 }
838 
839 /***********************************************************************
840  * Convenience routines.
841  */
842 #define ALU1(OP)					\
843 brw_inst *brw_##OP(struct brw_codegen *p,		\
844 	      struct brw_reg dest,			\
845 	      struct brw_reg src0)   			\
846 {							\
847    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
848 }
849 
850 #define ALU2(OP)					\
851 brw_inst *brw_##OP(struct brw_codegen *p,		\
852 	      struct brw_reg dest,			\
853 	      struct brw_reg src0,			\
854 	      struct brw_reg src1)   			\
855 {							\
856    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
857 }
858 
859 #define ALU3(OP)					\
860 brw_inst *brw_##OP(struct brw_codegen *p,		\
861 	      struct brw_reg dest,			\
862 	      struct brw_reg src0,			\
863 	      struct brw_reg src1,			\
864 	      struct brw_reg src2)   			\
865 {                                                       \
866    if (p->current->access_mode == BRW_ALIGN_16) {       \
867       if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
868          src0.swizzle = BRW_SWIZZLE_XXXX;               \
869       if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
870          src1.swizzle = BRW_SWIZZLE_XXXX;               \
871       if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
872          src2.swizzle = BRW_SWIZZLE_XXXX;               \
873    }                                                    \
874    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
875 }
876 
877 #define ALU3F(OP)                                               \
878 brw_inst *brw_##OP(struct brw_codegen *p,         \
879                                  struct brw_reg dest,           \
880                                  struct brw_reg src0,           \
881                                  struct brw_reg src1,           \
882                                  struct brw_reg src2)           \
883 {                                                               \
884    assert(dest.type == BRW_REGISTER_TYPE_F ||                   \
885           dest.type == BRW_REGISTER_TYPE_DF);                   \
886    if (dest.type == BRW_REGISTER_TYPE_F) {                      \
887       assert(src0.type == BRW_REGISTER_TYPE_F);                 \
888       assert(src1.type == BRW_REGISTER_TYPE_F);                 \
889       assert(src2.type == BRW_REGISTER_TYPE_F);                 \
890    } else if (dest.type == BRW_REGISTER_TYPE_DF) {              \
891       assert(src0.type == BRW_REGISTER_TYPE_DF);                \
892       assert(src1.type == BRW_REGISTER_TYPE_DF);                \
893       assert(src2.type == BRW_REGISTER_TYPE_DF);                \
894    }                                                            \
895                                                                 \
896    if (p->current->access_mode == BRW_ALIGN_16) {               \
897       if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
898          src0.swizzle = BRW_SWIZZLE_XXXX;                       \
899       if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
900          src1.swizzle = BRW_SWIZZLE_XXXX;                       \
901       if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
902          src2.swizzle = BRW_SWIZZLE_XXXX;                       \
903    }                                                            \
904    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
905 }
906 
907 ALU2(SEL)
ALU1(NOT)908 ALU1(NOT)
909 ALU2(AND)
910 ALU2(OR)
911 ALU2(XOR)
912 ALU2(SHR)
913 ALU2(SHL)
914 ALU2(ASR)
915 ALU2(ROL)
916 ALU2(ROR)
917 ALU3(CSEL)
918 ALU1(FRC)
919 ALU1(RNDD)
920 ALU1(RNDE)
921 ALU1(RNDU)
922 ALU1(RNDZ)
923 ALU2(MAC)
924 ALU2(MACH)
925 ALU1(LZD)
926 ALU2(DP4)
927 ALU2(DPH)
928 ALU2(DP3)
929 ALU2(DP2)
930 ALU3(DP4A)
931 ALU3(MAD)
932 ALU3F(LRP)
933 ALU1(BFREV)
934 ALU3(BFE)
935 ALU2(BFI1)
936 ALU3(BFI2)
937 ALU1(FBH)
938 ALU1(FBL)
939 ALU1(CBIT)
940 ALU2(ADDC)
941 ALU2(SUBB)
942 ALU3(ADD3)
943 ALU1(MOV)
944 
945 brw_inst *
946 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
947         struct brw_reg src0, struct brw_reg src1)
948 {
949    /* 6.2.2: add */
950    if (src0.type == BRW_REGISTER_TYPE_F ||
951        (src0.file == BRW_IMMEDIATE_VALUE &&
952 	src0.type == BRW_REGISTER_TYPE_VF)) {
953       assert(src1.type != BRW_REGISTER_TYPE_UD);
954       assert(src1.type != BRW_REGISTER_TYPE_D);
955    }
956 
957    if (src1.type == BRW_REGISTER_TYPE_F ||
958        (src1.file == BRW_IMMEDIATE_VALUE &&
959 	src1.type == BRW_REGISTER_TYPE_VF)) {
960       assert(src0.type != BRW_REGISTER_TYPE_UD);
961       assert(src0.type != BRW_REGISTER_TYPE_D);
962    }
963 
964    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
965 }
966 
967 brw_inst *
brw_AVG(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)968 brw_AVG(struct brw_codegen *p, struct brw_reg dest,
969         struct brw_reg src0, struct brw_reg src1)
970 {
971    assert(dest.type == src0.type);
972    assert(src0.type == src1.type);
973    switch (src0.type) {
974    case BRW_REGISTER_TYPE_B:
975    case BRW_REGISTER_TYPE_UB:
976    case BRW_REGISTER_TYPE_W:
977    case BRW_REGISTER_TYPE_UW:
978    case BRW_REGISTER_TYPE_D:
979    case BRW_REGISTER_TYPE_UD:
980       break;
981    default:
982       unreachable("Bad type for brw_AVG");
983    }
984 
985    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
986 }
987 
988 brw_inst *
brw_MUL(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)989 brw_MUL(struct brw_codegen *p, struct brw_reg dest,
990         struct brw_reg src0, struct brw_reg src1)
991 {
992    /* 6.32.38: mul */
993    if (src0.type == BRW_REGISTER_TYPE_D ||
994        src0.type == BRW_REGISTER_TYPE_UD ||
995        src1.type == BRW_REGISTER_TYPE_D ||
996        src1.type == BRW_REGISTER_TYPE_UD) {
997       assert(dest.type != BRW_REGISTER_TYPE_F);
998    }
999 
1000    if (src0.type == BRW_REGISTER_TYPE_F ||
1001        (src0.file == BRW_IMMEDIATE_VALUE &&
1002 	src0.type == BRW_REGISTER_TYPE_VF)) {
1003       assert(src1.type != BRW_REGISTER_TYPE_UD);
1004       assert(src1.type != BRW_REGISTER_TYPE_D);
1005    }
1006 
1007    if (src1.type == BRW_REGISTER_TYPE_F ||
1008        (src1.file == BRW_IMMEDIATE_VALUE &&
1009 	src1.type == BRW_REGISTER_TYPE_VF)) {
1010       assert(src0.type != BRW_REGISTER_TYPE_UD);
1011       assert(src0.type != BRW_REGISTER_TYPE_D);
1012    }
1013 
1014    assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1015 	  src0.nr != BRW_ARF_ACCUMULATOR);
1016    assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1017 	  src1.nr != BRW_ARF_ACCUMULATOR);
1018 
1019    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1020 }
1021 
1022 brw_inst *
brw_LINE(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1023 brw_LINE(struct brw_codegen *p, struct brw_reg dest,
1024          struct brw_reg src0, struct brw_reg src1)
1025 {
1026    src0.vstride = BRW_VERTICAL_STRIDE_0;
1027    src0.width = BRW_WIDTH_1;
1028    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1029    return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
1030 }
1031 
1032 brw_inst *
brw_PLN(struct brw_codegen * p,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1)1033 brw_PLN(struct brw_codegen *p, struct brw_reg dest,
1034         struct brw_reg src0, struct brw_reg src1)
1035 {
1036    src0.vstride = BRW_VERTICAL_STRIDE_0;
1037    src0.width = BRW_WIDTH_1;
1038    src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1039    src1.vstride = BRW_VERTICAL_STRIDE_8;
1040    src1.width = BRW_WIDTH_8;
1041    src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1042    return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1043 }
1044 
1045 brw_inst *
brw_DPAS(struct brw_codegen * p,enum gfx12_systolic_depth sdepth,unsigned rcount,struct brw_reg dest,struct brw_reg src0,struct brw_reg src1,struct brw_reg src2)1046 brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1047          unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1048          struct brw_reg src1, struct brw_reg src2)
1049 {
1050    return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
1051                              src1, src2);
1052 }
1053 
brw_NOP(struct brw_codegen * p)1054 void brw_NOP(struct brw_codegen *p)
1055 {
1056    brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1057    memset(insn, 0, sizeof(*insn));
1058    brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
1059 }
1060 
brw_SYNC(struct brw_codegen * p,enum tgl_sync_function func)1061 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
1062 {
1063    brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
1064    brw_inst_set_cond_modifier(p->devinfo, insn, func);
1065 }
1066 
1067 /***********************************************************************
1068  * Comparisons, if/else/endif
1069  */
1070 
1071 brw_inst *
brw_JMPI(struct brw_codegen * p,struct brw_reg index,unsigned predicate_control)1072 brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1073          unsigned predicate_control)
1074 {
1075    const struct intel_device_info *devinfo = p->devinfo;
1076    struct brw_reg ip = brw_ip_reg();
1077    brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1078 
1079    brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1080    brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1081    brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1082    brw_inst_set_pred_control(devinfo, inst, predicate_control);
1083 
1084    return inst;
1085 }
1086 
1087 static void
push_if_stack(struct brw_codegen * p,brw_inst * inst)1088 push_if_stack(struct brw_codegen *p, brw_inst *inst)
1089 {
1090    p->if_stack[p->if_stack_depth] = inst - p->store;
1091 
1092    p->if_stack_depth++;
1093    if (p->if_stack_array_size <= p->if_stack_depth) {
1094       p->if_stack_array_size *= 2;
1095       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1096 			     p->if_stack_array_size);
1097    }
1098 }
1099 
1100 static brw_inst *
pop_if_stack(struct brw_codegen * p)1101 pop_if_stack(struct brw_codegen *p)
1102 {
1103    p->if_stack_depth--;
1104    return &p->store[p->if_stack[p->if_stack_depth]];
1105 }
1106 
1107 static void
push_loop_stack(struct brw_codegen * p,brw_inst * inst)1108 push_loop_stack(struct brw_codegen *p, brw_inst *inst)
1109 {
1110    if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1111       p->loop_stack_array_size *= 2;
1112       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1113 			       p->loop_stack_array_size);
1114       p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1115 				     p->loop_stack_array_size);
1116    }
1117 
1118    p->loop_stack[p->loop_stack_depth] = inst - p->store;
1119    p->loop_stack_depth++;
1120    p->if_depth_in_loop[p->loop_stack_depth] = 0;
1121 }
1122 
1123 static brw_inst *
get_inner_do_insn(struct brw_codegen * p)1124 get_inner_do_insn(struct brw_codegen *p)
1125 {
1126    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1127 }
1128 
1129 /* EU takes the value from the flag register and pushes it onto some
1130  * sort of a stack (presumably merging with any flag value already on
1131  * the stack).  Within an if block, the flags at the top of the stack
1132  * control execution on each channel of the unit, eg. on each of the
1133  * 16 pixel values in our wm programs.
1134  *
1135  * When the matching 'else' instruction is reached (presumably by
1136  * countdown of the instruction count patched in by our ELSE/ENDIF
1137  * functions), the relevant flags are inverted.
1138  *
1139  * When the matching 'endif' instruction is reached, the flags are
1140  * popped off.  If the stack is now empty, normal execution resumes.
1141  */
1142 brw_inst *
brw_IF(struct brw_codegen * p,unsigned execute_size)1143 brw_IF(struct brw_codegen *p, unsigned execute_size)
1144 {
1145    const struct intel_device_info *devinfo = p->devinfo;
1146    brw_inst *insn;
1147 
1148    insn = next_insn(p, BRW_OPCODE_IF);
1149 
1150    /* Override the defaults for this instruction:
1151     */
1152    brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1153    if (devinfo->ver < 12)
1154       brw_set_src0(p, insn, brw_imm_d(0));
1155    brw_inst_set_jip(devinfo, insn, 0);
1156    brw_inst_set_uip(devinfo, insn, 0);
1157 
1158    brw_inst_set_exec_size(devinfo, insn, execute_size);
1159    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1160    brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1161    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1162 
1163    push_if_stack(p, insn);
1164    p->if_depth_in_loop[p->loop_stack_depth]++;
1165    return insn;
1166 }
1167 
1168 /**
1169  * Patch IF and ELSE instructions with appropriate jump targets.
1170  */
1171 static void
patch_IF_ELSE(struct brw_codegen * p,brw_inst * if_inst,brw_inst * else_inst,brw_inst * endif_inst)1172 patch_IF_ELSE(struct brw_codegen *p,
1173               brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1174 {
1175    const struct intel_device_info *devinfo = p->devinfo;
1176 
1177    assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
1178    assert(endif_inst != NULL);
1179    assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
1180 
1181    unsigned br = brw_jump_scale(devinfo);
1182 
1183    assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
1184    brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1185 
1186    if (else_inst == NULL) {
1187       /* Patch IF -> ENDIF */
1188       brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1189       brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1190    } else {
1191       brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1192 
1193       /* Patch ELSE -> ENDIF */
1194       /* The IF instruction's JIP should point just past the ELSE */
1195       brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1196       /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1197       brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1198 
1199       if (devinfo->ver < 11) {
1200          /* Set the ELSE instruction to use branch_ctrl with a join
1201           * jump target pointing at the NOP inserted right before
1202           * the ENDIF instruction in order to make sure it is
1203           * executed in all cases, since attempting to do the same
1204           * as on other generations could cause the EU to jump at
1205           * the instruction immediately after the ENDIF due to
1206           * Wa_220160235, which could cause the program to continue
1207           * running with all channels disabled.
1208           */
1209          brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
1210          brw_inst_set_branch_control(devinfo, else_inst, true);
1211       } else {
1212          brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1213       }
1214 
1215       /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
1216        * JIP and UIP both should point to ENDIF on those
1217        * platforms.
1218        */
1219       brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1220    }
1221 }
1222 
1223 void
brw_ELSE(struct brw_codegen * p)1224 brw_ELSE(struct brw_codegen *p)
1225 {
1226    const struct intel_device_info *devinfo = p->devinfo;
1227    brw_inst *insn;
1228 
1229    insn = next_insn(p, BRW_OPCODE_ELSE);
1230 
1231    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1232    if (devinfo->ver < 12)
1233       brw_set_src0(p, insn, brw_imm_d(0));
1234    brw_inst_set_jip(devinfo, insn, 0);
1235    brw_inst_set_uip(devinfo, insn, 0);
1236 
1237    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1238    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1239 
1240    push_if_stack(p, insn);
1241 }
1242 
1243 void
brw_ENDIF(struct brw_codegen * p)1244 brw_ENDIF(struct brw_codegen *p)
1245 {
1246    const struct intel_device_info *devinfo = p->devinfo;
1247    brw_inst *insn = NULL;
1248    brw_inst *else_inst = NULL;
1249    brw_inst *if_inst = NULL;
1250    brw_inst *tmp;
1251 
1252    assert(p->if_stack_depth > 0);
1253 
1254    if (devinfo->ver < 11 &&
1255        brw_inst_opcode(p->isa, &p->store[p->if_stack[
1256                              p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
1257       /* Insert a NOP to be specified as join instruction within the
1258        * ELSE block, which is valid for an ELSE instruction with
1259        * branch_ctrl on.  The ELSE instruction will be set to jump
1260        * here instead of to the ENDIF instruction, since attempting to
1261        * do the latter would prevent the ENDIF from being executed in
1262        * some cases due to Wa_220160235, which could cause the program
1263        * to continue running with all channels disabled.
1264        */
1265       brw_NOP(p);
1266    }
1267 
1268    /*
1269     * A single next_insn() may change the base address of instruction store
1270     * memory(p->store), so call it first before referencing the instruction
1271     * store pointer from an index
1272     */
1273    insn = next_insn(p, BRW_OPCODE_ENDIF);
1274 
1275    /* Pop the IF and (optional) ELSE instructions from the stack */
1276    p->if_depth_in_loop[p->loop_stack_depth]--;
1277    tmp = pop_if_stack(p);
1278    if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
1279       else_inst = tmp;
1280       tmp = pop_if_stack(p);
1281    }
1282    if_inst = tmp;
1283 
1284    brw_set_src0(p, insn, brw_imm_d(0));
1285 
1286    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1287    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1288 
1289    brw_inst_set_jip(devinfo, insn, 2);
1290    patch_IF_ELSE(p, if_inst, else_inst, insn);
1291 }
1292 
1293 brw_inst *
brw_BREAK(struct brw_codegen * p)1294 brw_BREAK(struct brw_codegen *p)
1295 {
1296    const struct intel_device_info *devinfo = p->devinfo;
1297    brw_inst *insn;
1298 
1299    insn = next_insn(p, BRW_OPCODE_BREAK);
1300    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1301    brw_set_src0(p, insn, brw_imm_d(0x0));
1302    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1303    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1304 
1305    return insn;
1306 }
1307 
1308 brw_inst *
brw_CONT(struct brw_codegen * p)1309 brw_CONT(struct brw_codegen *p)
1310 {
1311    const struct intel_device_info *devinfo = p->devinfo;
1312    brw_inst *insn;
1313 
1314    insn = next_insn(p, BRW_OPCODE_CONTINUE);
1315    brw_set_dest(p, insn, brw_ip_reg());
1316    brw_set_src0(p, insn, brw_imm_d(0x0));
1317 
1318    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1319    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1320    return insn;
1321 }
1322 
1323 brw_inst *
brw_HALT(struct brw_codegen * p)1324 brw_HALT(struct brw_codegen *p)
1325 {
1326    const struct intel_device_info *devinfo = p->devinfo;
1327    brw_inst *insn;
1328 
1329    insn = next_insn(p, BRW_OPCODE_HALT);
1330    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1331    if (devinfo->ver < 12) {
1332       brw_set_src0(p, insn, brw_imm_d(0x0));
1333    }
1334 
1335    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1336    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1337    return insn;
1338 }
1339 
1340 /* DO/WHILE loop:
1341  *
1342  * The DO/WHILE is just an unterminated loop -- break or continue are
1343  * used for control within the loop.  We have a few ways they can be
1344  * done.
1345  *
1346  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1347  * jip and no DO instruction.
1348  *
1349  * For gfx6, there's no more mask stack, so no need for DO.  WHILE
1350  * just points back to the first instruction of the loop.
1351  */
1352 brw_inst *
brw_DO(struct brw_codegen * p,unsigned execute_size)1353 brw_DO(struct brw_codegen *p, unsigned execute_size)
1354 {
1355    push_loop_stack(p, &p->store[p->nr_insn]);
1356    return &p->store[p->nr_insn];
1357 }
1358 
1359 brw_inst *
brw_WHILE(struct brw_codegen * p)1360 brw_WHILE(struct brw_codegen *p)
1361 {
1362    const struct intel_device_info *devinfo = p->devinfo;
1363    brw_inst *insn, *do_insn;
1364    unsigned br = brw_jump_scale(devinfo);
1365 
1366    insn = next_insn(p, BRW_OPCODE_WHILE);
1367    do_insn = get_inner_do_insn(p);
1368 
1369    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1370    if (devinfo->ver < 12)
1371       brw_set_src0(p, insn, brw_imm_d(0));
1372    brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1373 
1374    brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1375 
1376    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1377 
1378    p->loop_stack_depth--;
1379 
1380    return insn;
1381 }
1382 
brw_CMP(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1383 void brw_CMP(struct brw_codegen *p,
1384 	     struct brw_reg dest,
1385 	     unsigned conditional,
1386 	     struct brw_reg src0,
1387 	     struct brw_reg src1)
1388 {
1389    const struct intel_device_info *devinfo = p->devinfo;
1390    brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1391 
1392    brw_inst_set_cond_modifier(devinfo, insn, conditional);
1393    brw_set_dest(p, insn, dest);
1394    brw_set_src0(p, insn, src0);
1395    brw_set_src1(p, insn, src1);
1396 }
1397 
brw_CMPN(struct brw_codegen * p,struct brw_reg dest,unsigned conditional,struct brw_reg src0,struct brw_reg src1)1398 void brw_CMPN(struct brw_codegen *p,
1399               struct brw_reg dest,
1400               unsigned conditional,
1401               struct brw_reg src0,
1402               struct brw_reg src1)
1403 {
1404    const struct intel_device_info *devinfo = p->devinfo;
1405    brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
1406 
1407    brw_inst_set_cond_modifier(devinfo, insn, conditional);
1408    brw_set_dest(p, insn, dest);
1409    brw_set_src0(p, insn, src0);
1410    brw_set_src1(p, insn, src1);
1411 }
1412 
1413 /***********************************************************************
1414  * Helpers for the various SEND message types:
1415  */
1416 
gfx6_math(struct brw_codegen * p,struct brw_reg dest,unsigned function,struct brw_reg src0,struct brw_reg src1)1417 void gfx6_math(struct brw_codegen *p,
1418 	       struct brw_reg dest,
1419 	       unsigned function,
1420 	       struct brw_reg src0,
1421 	       struct brw_reg src1)
1422 {
1423    const struct intel_device_info *devinfo = p->devinfo;
1424    brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1425 
1426    assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1427 
1428    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1429 
1430    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1431        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1432        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1433       assert(src0.type != BRW_REGISTER_TYPE_F);
1434       assert(src1.type != BRW_REGISTER_TYPE_F);
1435       assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
1436              src1.file == BRW_IMMEDIATE_VALUE);
1437       /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
1438        *     INT DIV function does not support source modifiers.
1439        */
1440       assert(!src0.negate);
1441       assert(!src0.abs);
1442       assert(!src1.negate);
1443       assert(!src1.abs);
1444    } else {
1445       assert(src0.type == BRW_REGISTER_TYPE_F ||
1446              (src0.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
1447       assert(src1.type == BRW_REGISTER_TYPE_F ||
1448              (src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9));
1449    }
1450 
1451    brw_inst_set_math_function(devinfo, insn, function);
1452 
1453    brw_set_dest(p, insn, dest);
1454    brw_set_src0(p, insn, src0);
1455    brw_set_src1(p, insn, src1);
1456 }
1457 
1458 /**
1459  * Return the right surface index to access the thread scratch space using
1460  * stateless dataport messages.
1461  */
1462 unsigned
brw_scratch_surface_idx(const struct brw_codegen * p)1463 brw_scratch_surface_idx(const struct brw_codegen *p)
1464 {
1465    /* The scratch space is thread-local so IA coherency is unnecessary. */
1466    return GFX8_BTI_STATELESS_NON_COHERENT;
1467 }
1468 
1469 void
gfx7_block_read_scratch(struct brw_codegen * p,struct brw_reg dest,int num_regs,unsigned offset)1470 gfx7_block_read_scratch(struct brw_codegen *p,
1471                         struct brw_reg dest,
1472                         int num_regs,
1473                         unsigned offset)
1474 {
1475    brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
1476    assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE);
1477 
1478    brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
1479 
1480    /* The HW requires that the header is present; this is to get the g0.5
1481     * scratch offset.
1482     */
1483    brw_set_src0(p, insn, brw_vec8_grf(0, 0));
1484 
1485    /* According to the docs, offset is "A 12-bit HWord offset into the memory
1486     * Immediate Memory buffer as specified by binding table 0xFF."  An HWORD
1487     * is 32 bytes, which happens to be the size of a register.
1488     */
1489    offset /= REG_SIZE;
1490    assert(offset < (1 << 12));
1491 
1492    gfx7_set_dp_scratch_message(p, insn,
1493                                false, /* scratch read */
1494                                false, /* OWords */
1495                                false, /* invalidate after read */
1496                                num_regs,
1497                                offset,
1498                                1,        /* mlen: just g0 */
1499                                num_regs, /* rlen */
1500                                true);    /* header present */
1501 }
1502 
1503 brw_inst *
gfx9_fb_READ(struct brw_codegen * p,struct brw_reg dst,struct brw_reg payload,unsigned binding_table_index,unsigned msg_length,unsigned response_length,bool per_sample)1504 gfx9_fb_READ(struct brw_codegen *p,
1505              struct brw_reg dst,
1506              struct brw_reg payload,
1507              unsigned binding_table_index,
1508              unsigned msg_length,
1509              unsigned response_length,
1510              bool per_sample)
1511 {
1512    const struct intel_device_info *devinfo = p->devinfo;
1513    assert(devinfo->ver >= 9);
1514    brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
1515 
1516    brw_inst_set_sfid(devinfo, insn, GFX6_SFID_DATAPORT_RENDER_CACHE);
1517    brw_set_dest(p, insn, dst);
1518    brw_set_src0(p, insn, payload);
1519    brw_set_desc(
1520       p, insn,
1521       brw_message_desc(devinfo, msg_length, response_length, true) |
1522       brw_fb_read_desc(devinfo, binding_table_index, 0 /* msg_control */,
1523                        1 << brw_get_default_exec_size(p), per_sample));
1524    brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
1525 
1526    return insn;
1527 }
1528 
1529 /* Adjust the message header's sampler state pointer to
1530  * select the correct group of 16 samplers.
1531  */
brw_adjust_sampler_state_pointer(struct brw_codegen * p,struct brw_reg header,struct brw_reg sampler_index)1532 void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
1533                                       struct brw_reg header,
1534                                       struct brw_reg sampler_index)
1535 {
1536    /* The "Sampler Index" field can only store values between 0 and 15.
1537     * However, we can add an offset to the "Sampler State Pointer"
1538     * field, effectively selecting a different set of 16 samplers.
1539     *
1540     * The "Sampler State Pointer" needs to be aligned to a 32-byte
1541     * offset, and each sampler state is only 16-bytes, so we can't
1542     * exclusively use the offset - we have to use both.
1543     */
1544 
1545    if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
1546       const int sampler_state_size = 16; /* 16 bytes */
1547       uint32_t sampler = sampler_index.ud;
1548 
1549       if (sampler >= 16) {
1550          brw_ADD(p,
1551                  get_element_ud(header, 3),
1552                  get_element_ud(brw_vec8_grf(0, 0), 3),
1553                  brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
1554       }
1555    } else {
1556       /* Non-const sampler array indexing case */
1557       struct brw_reg temp = get_element_ud(header, 3);
1558 
1559       brw_push_insn_state(p);
1560       brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
1561       brw_set_default_swsb(p, tgl_swsb_regdist(1));
1562       brw_SHL(p, temp, temp, brw_imm_ud(4));
1563       brw_ADD(p,
1564               get_element_ud(header, 3),
1565               get_element_ud(brw_vec8_grf(0, 0), 3),
1566               temp);
1567       brw_pop_insn_state(p);
1568    }
1569 }
1570 
1571 void
brw_send_indirect_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg desc,unsigned desc_imm,bool eot)1572 brw_send_indirect_message(struct brw_codegen *p,
1573                           unsigned sfid,
1574                           struct brw_reg dst,
1575                           struct brw_reg payload,
1576                           struct brw_reg desc,
1577                           unsigned desc_imm,
1578                           bool eot)
1579 {
1580    const struct intel_device_info *devinfo = p->devinfo;
1581    struct brw_inst *send;
1582 
1583    dst = retype(dst, BRW_REGISTER_TYPE_UW);
1584 
1585    assert(desc.type == BRW_REGISTER_TYPE_UD);
1586 
1587    if (desc.file == BRW_IMMEDIATE_VALUE) {
1588       send = next_insn(p, BRW_OPCODE_SEND);
1589       brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
1590       brw_set_desc(p, send, desc.ud | desc_imm);
1591    } else {
1592       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1593       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
1594 
1595       brw_push_insn_state(p);
1596       brw_set_default_access_mode(p, BRW_ALIGN_1);
1597       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1598       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1599       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1600       brw_set_default_flag_reg(p, 0, 0);
1601       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1602 
1603       /* Load the indirect descriptor to an address register using OR so the
1604        * caller can specify additional descriptor bits with the desc_imm
1605        * immediate.
1606        */
1607       brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1608 
1609       brw_pop_insn_state(p);
1610 
1611       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1612       send = next_insn(p, BRW_OPCODE_SEND);
1613       brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
1614 
1615       if (devinfo->ver >= 12)
1616          brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
1617       else
1618          brw_set_src1(p, send, addr);
1619    }
1620 
1621    brw_set_dest(p, send, dst);
1622    brw_inst_set_sfid(devinfo, send, sfid);
1623    brw_inst_set_eot(devinfo, send, eot);
1624 }
1625 
1626 void
brw_send_indirect_split_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload0,struct brw_reg payload1,struct brw_reg desc,unsigned desc_imm,struct brw_reg ex_desc,unsigned ex_desc_imm,bool ex_desc_scratch,bool ex_bso,bool eot)1627 brw_send_indirect_split_message(struct brw_codegen *p,
1628                                 unsigned sfid,
1629                                 struct brw_reg dst,
1630                                 struct brw_reg payload0,
1631                                 struct brw_reg payload1,
1632                                 struct brw_reg desc,
1633                                 unsigned desc_imm,
1634                                 struct brw_reg ex_desc,
1635                                 unsigned ex_desc_imm,
1636                                 bool ex_desc_scratch,
1637                                 bool ex_bso,
1638                                 bool eot)
1639 {
1640    const struct intel_device_info *devinfo = p->devinfo;
1641    struct brw_inst *send;
1642 
1643    dst = retype(dst, BRW_REGISTER_TYPE_UW);
1644 
1645    assert(desc.type == BRW_REGISTER_TYPE_UD);
1646 
1647    if (desc.file == BRW_IMMEDIATE_VALUE) {
1648       desc.ud |= desc_imm;
1649    } else {
1650       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1651       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
1652 
1653       brw_push_insn_state(p);
1654       brw_set_default_access_mode(p, BRW_ALIGN_1);
1655       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1656       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1657       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1658       brw_set_default_flag_reg(p, 0, 0);
1659       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1660 
1661       /* Load the indirect descriptor to an address register using OR so the
1662        * caller can specify additional descriptor bits with the desc_imm
1663        * immediate.
1664        */
1665       brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
1666 
1667       brw_pop_insn_state(p);
1668       desc = addr;
1669 
1670       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1671    }
1672 
1673    if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
1674        !ex_desc_scratch &&
1675        (devinfo->ver >= 12 ||
1676         ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
1677       /* ATS-M PRMs, Volume 2d: Command Reference: Structures,
1678        * EU_INSTRUCTION_SEND instruction
1679        *
1680        *    "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
1681        */
1682       assert(!ex_bso);
1683       ex_desc.ud |= ex_desc_imm;
1684    } else {
1685       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1686       struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
1687 
1688       brw_push_insn_state(p);
1689       brw_set_default_access_mode(p, BRW_ALIGN_1);
1690       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1691       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1692       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1693       brw_set_default_flag_reg(p, 0, 0);
1694       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1695 
1696       /* Load the indirect extended descriptor to an address register using OR
1697        * so the caller can specify additional descriptor bits with the
1698        * desc_imm immediate.
1699        *
1700        * Even though the instruction dispatcher always pulls the SFID and EOT
1701        * fields from the instruction itself, actual external unit which
1702        * processes the message gets the SFID and EOT from the extended
1703        * descriptor which comes from the address register.  If we don't OR
1704        * those two bits in, the external unit may get confused and hang.
1705        */
1706       unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
1707 
1708       if (ex_desc_scratch) {
1709          /* Or the scratch surface offset together with the immediate part of
1710           * the extended descriptor.
1711           */
1712          assert(devinfo->verx10 >= 125);
1713          brw_AND(p, addr,
1714                  retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
1715                  brw_imm_ud(INTEL_MASK(31, 10)));
1716          brw_OR(p, addr, addr, brw_imm_ud(imm_part));
1717       } else if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
1718          /* ex_desc bits 15:12 don't exist in the instruction encoding prior
1719           * to Gfx12, so we may have fallen back to an indirect extended
1720           * descriptor.
1721           */
1722          brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
1723       } else {
1724          brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
1725       }
1726 
1727       brw_pop_insn_state(p);
1728       ex_desc = addr;
1729 
1730       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1731    }
1732 
1733    send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
1734    brw_set_dest(p, send, dst);
1735    brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
1736    brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
1737 
1738    if (desc.file == BRW_IMMEDIATE_VALUE) {
1739       brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
1740       brw_inst_set_send_desc(devinfo, send, desc.ud);
1741    } else {
1742       assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
1743       assert(desc.nr == BRW_ARF_ADDRESS);
1744       assert(desc.subnr == 0);
1745       brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
1746    }
1747 
1748    if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
1749       brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
1750       brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
1751    } else {
1752       assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
1753       assert(ex_desc.nr == BRW_ARF_ADDRESS);
1754       assert((ex_desc.subnr & 0x3) == 0);
1755       brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
1756       brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
1757    }
1758 
1759    if (ex_bso) {
1760       /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
1761        * it is assumed.
1762        *
1763        * BSpec 56890
1764        */
1765       if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
1766          brw_inst_set_send_ex_bso(devinfo, send, true);
1767       brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
1768    }
1769    brw_inst_set_sfid(devinfo, send, sfid);
1770    brw_inst_set_eot(devinfo, send, eot);
1771 }
1772 
1773 static void
brw_send_indirect_surface_message(struct brw_codegen * p,unsigned sfid,struct brw_reg dst,struct brw_reg payload,struct brw_reg surface,unsigned desc_imm)1774 brw_send_indirect_surface_message(struct brw_codegen *p,
1775                                   unsigned sfid,
1776                                   struct brw_reg dst,
1777                                   struct brw_reg payload,
1778                                   struct brw_reg surface,
1779                                   unsigned desc_imm)
1780 {
1781    if (surface.file != BRW_IMMEDIATE_VALUE) {
1782       const struct tgl_swsb swsb = brw_get_default_swsb(p);
1783       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
1784 
1785       brw_push_insn_state(p);
1786       brw_set_default_access_mode(p, BRW_ALIGN_1);
1787       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
1788       brw_set_default_exec_size(p, BRW_EXECUTE_1);
1789       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
1790       brw_set_default_flag_reg(p, 0, 0);
1791       brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
1792 
1793       /* Mask out invalid bits from the surface index to avoid hangs e.g. when
1794        * some surface array is accessed out of bounds.
1795        */
1796       brw_AND(p, addr,
1797               suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
1798                         BRW_GET_SWZ(surface.swizzle, 0)),
1799               brw_imm_ud(0xff));
1800 
1801       brw_pop_insn_state(p);
1802 
1803       surface = addr;
1804       brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
1805    }
1806 
1807    brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
1808 }
1809 
1810 static bool
while_jumps_before_offset(const struct intel_device_info * devinfo,brw_inst * insn,int while_offset,int start_offset)1811 while_jumps_before_offset(const struct intel_device_info *devinfo,
1812                           brw_inst *insn, int while_offset, int start_offset)
1813 {
1814    int scale = 16 / brw_jump_scale(devinfo);
1815    int jip = brw_inst_jip(devinfo, insn);
1816    assert(jip < 0);
1817    return while_offset + jip * scale <= start_offset;
1818 }
1819 
1820 
1821 static int
brw_find_next_block_end(struct brw_codegen * p,int start_offset)1822 brw_find_next_block_end(struct brw_codegen *p, int start_offset)
1823 {
1824    int offset;
1825    void *store = p->store;
1826    const struct intel_device_info *devinfo = p->devinfo;
1827 
1828    int depth = 0;
1829 
1830    for (offset = next_offset(devinfo, store, start_offset);
1831         offset < p->next_insn_offset;
1832         offset = next_offset(devinfo, store, offset)) {
1833       brw_inst *insn = store + offset;
1834 
1835       switch (brw_inst_opcode(p->isa, insn)) {
1836       case BRW_OPCODE_IF:
1837          depth++;
1838          break;
1839       case BRW_OPCODE_ENDIF:
1840          if (depth == 0)
1841             return offset;
1842          depth--;
1843          break;
1844       case BRW_OPCODE_WHILE:
1845          /* If the while doesn't jump before our instruction, it's the end
1846           * of a sibling do...while loop.  Ignore it.
1847           */
1848          if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
1849             continue;
1850          FALLTHROUGH;
1851       case BRW_OPCODE_ELSE:
1852       case BRW_OPCODE_HALT:
1853          if (depth == 0)
1854             return offset;
1855          break;
1856       default:
1857          break;
1858       }
1859    }
1860 
1861    return 0;
1862 }
1863 
1864 /* There is no DO instruction on gfx6, so to find the end of the loop
1865  * we have to see if the loop is jumping back before our start
1866  * instruction.
1867  */
1868 static int
brw_find_loop_end(struct brw_codegen * p,int start_offset)1869 brw_find_loop_end(struct brw_codegen *p, int start_offset)
1870 {
1871    const struct intel_device_info *devinfo = p->devinfo;
1872    int offset;
1873    void *store = p->store;
1874 
1875    /* Always start after the instruction (such as a WHILE) we're trying to fix
1876     * up.
1877     */
1878    for (offset = next_offset(devinfo, store, start_offset);
1879         offset < p->next_insn_offset;
1880         offset = next_offset(devinfo, store, offset)) {
1881       brw_inst *insn = store + offset;
1882 
1883       if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
1884 	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
1885 	    return offset;
1886       }
1887    }
1888    assert(!"not reached");
1889    return start_offset;
1890 }
1891 
1892 /* After program generation, go back and update the UIP and JIP of
1893  * BREAK, CONT, and HALT instructions to their correct locations.
1894  */
1895 void
brw_set_uip_jip(struct brw_codegen * p,int start_offset)1896 brw_set_uip_jip(struct brw_codegen *p, int start_offset)
1897 {
1898    const struct intel_device_info *devinfo = p->devinfo;
1899    int offset;
1900    int br = brw_jump_scale(devinfo);
1901    int scale = 16 / br;
1902    void *store = p->store;
1903 
1904    for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
1905       brw_inst *insn = store + offset;
1906       assert(brw_inst_cmpt_control(devinfo, insn) == 0);
1907 
1908       switch (brw_inst_opcode(p->isa, insn)) {
1909       case BRW_OPCODE_BREAK: {
1910          int block_end_offset = brw_find_next_block_end(p, offset);
1911          assert(block_end_offset != 0);
1912          brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1913 	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
1914          brw_inst_set_uip(devinfo, insn,
1915 	    (brw_find_loop_end(p, offset) - offset) / scale);
1916 	 break;
1917       }
1918 
1919       case BRW_OPCODE_CONTINUE: {
1920          int block_end_offset = brw_find_next_block_end(p, offset);
1921          assert(block_end_offset != 0);
1922          brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1923          brw_inst_set_uip(devinfo, insn,
1924             (brw_find_loop_end(p, offset) - offset) / scale);
1925 
1926          assert(brw_inst_uip(devinfo, insn) != 0);
1927          assert(brw_inst_jip(devinfo, insn) != 0);
1928 	 break;
1929       }
1930 
1931       case BRW_OPCODE_ENDIF: {
1932          int block_end_offset = brw_find_next_block_end(p, offset);
1933          int32_t jump = (block_end_offset == 0) ?
1934                         1 * br : (block_end_offset - offset) / scale;
1935          brw_inst_set_jip(devinfo, insn, jump);
1936 	 break;
1937       }
1938 
1939       case BRW_OPCODE_HALT: {
1940 	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
1941 	  *
1942 	  *    "In case of the halt instruction not inside any conditional
1943 	  *     code block, the value of <JIP> and <UIP> should be the
1944 	  *     same. In case of the halt instruction inside conditional code
1945 	  *     block, the <UIP> should be the end of the program, and the
1946 	  *     <JIP> should be end of the most inner conditional code block."
1947 	  *
1948 	  * The uip will have already been set by whoever set up the
1949 	  * instruction.
1950 	  */
1951          int block_end_offset = brw_find_next_block_end(p, offset);
1952 	 if (block_end_offset == 0) {
1953             brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
1954 	 } else {
1955             brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
1956 	 }
1957          assert(brw_inst_uip(devinfo, insn) != 0);
1958          assert(brw_inst_jip(devinfo, insn) != 0);
1959 	 break;
1960       }
1961 
1962       default:
1963          break;
1964       }
1965    }
1966 }
1967 
1968 static unsigned
brw_surface_payload_size(unsigned num_channels,unsigned exec_size)1969 brw_surface_payload_size(unsigned num_channels,
1970                          unsigned exec_size /**< 0 for SIMD4x2 */)
1971 {
1972    if (exec_size == 0)
1973       return 1; /* SIMD4x2 */
1974    else if (exec_size <= 8)
1975       return num_channels;
1976    else
1977       return 2 * num_channels;
1978 }
1979 
1980 void
brw_untyped_atomic(struct brw_codegen * p,struct brw_reg dst,struct brw_reg payload,struct brw_reg surface,unsigned atomic_op,unsigned msg_length,bool response_expected,bool header_present)1981 brw_untyped_atomic(struct brw_codegen *p,
1982                    struct brw_reg dst,
1983                    struct brw_reg payload,
1984                    struct brw_reg surface,
1985                    unsigned atomic_op,
1986                    unsigned msg_length,
1987                    bool response_expected,
1988                    bool header_present)
1989 {
1990    const struct intel_device_info *devinfo = p->devinfo;
1991    const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
1992    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
1993    const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
1994    const unsigned response_length =
1995       brw_surface_payload_size(response_expected, exec_size);
1996    const unsigned desc =
1997       brw_message_desc(devinfo, msg_length, response_length, header_present) |
1998       brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
1999                                  response_expected);
2000    /* Mask out unused components -- This is especially important in Align16
2001     * mode on generations that don't have native support for SIMD4x2 atomics,
2002     * because unused but enabled components will cause the dataport to perform
2003     * additional atomic operations on the addresses that happen to be in the
2004     * uninitialized Y, Z and W coordinates of the payload.
2005     */
2006    const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
2007 
2008    brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
2009                                      payload, surface, desc);
2010 }
2011 
2012 void
brw_untyped_surface_read(struct brw_codegen * p,struct brw_reg dst,struct brw_reg payload,struct brw_reg surface,unsigned msg_length,unsigned num_channels)2013 brw_untyped_surface_read(struct brw_codegen *p,
2014                          struct brw_reg dst,
2015                          struct brw_reg payload,
2016                          struct brw_reg surface,
2017                          unsigned msg_length,
2018                          unsigned num_channels)
2019 {
2020    const struct intel_device_info *devinfo = p->devinfo;
2021    const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2022    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2023    const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
2024    const unsigned response_length =
2025       brw_surface_payload_size(num_channels, exec_size);
2026    const unsigned desc =
2027       brw_message_desc(devinfo, msg_length, response_length, false) |
2028       brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
2029 
2030    brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
2031 }
2032 
2033 void
brw_untyped_surface_write(struct brw_codegen * p,struct brw_reg payload,struct brw_reg surface,unsigned msg_length,unsigned num_channels,bool header_present)2034 brw_untyped_surface_write(struct brw_codegen *p,
2035                           struct brw_reg payload,
2036                           struct brw_reg surface,
2037                           unsigned msg_length,
2038                           unsigned num_channels,
2039                           bool header_present)
2040 {
2041    const struct intel_device_info *devinfo = p->devinfo;
2042    const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
2043    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2044    const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
2045    const unsigned desc =
2046       brw_message_desc(devinfo, msg_length, 0, header_present) |
2047       brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
2048 
2049    brw_send_indirect_surface_message(p, sfid, brw_null_reg(),
2050                                      payload, surface, desc);
2051 }
2052 
2053 static void
brw_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,bool commit_enable,unsigned bti)2054 brw_set_memory_fence_message(struct brw_codegen *p,
2055                              struct brw_inst *insn,
2056                              enum brw_message_target sfid,
2057                              bool commit_enable,
2058                              unsigned bti)
2059 {
2060    const struct intel_device_info *devinfo = p->devinfo;
2061 
2062    brw_set_desc(p, insn, brw_message_desc(
2063                    devinfo, 1, (commit_enable ? 1 : 0), true));
2064 
2065    brw_inst_set_sfid(devinfo, insn, sfid);
2066 
2067    switch (sfid) {
2068    case GFX6_SFID_DATAPORT_RENDER_CACHE:
2069       brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
2070       break;
2071    case GFX7_SFID_DATAPORT_DATA_CACHE:
2072       brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
2073       break;
2074    default:
2075       unreachable("Not reached");
2076    }
2077 
2078    if (commit_enable)
2079       brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
2080 
2081    assert(devinfo->ver >= 11 || bti == 0);
2082    brw_inst_set_binding_table_index(devinfo, insn, bti);
2083 }
2084 
2085 static void
gfx12_set_memory_fence_message(struct brw_codegen * p,struct brw_inst * insn,enum brw_message_target sfid,uint32_t desc)2086 gfx12_set_memory_fence_message(struct brw_codegen *p,
2087                                struct brw_inst *insn,
2088                                enum brw_message_target sfid,
2089                                uint32_t desc)
2090 {
2091    const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
2092     /* Completion signaled by write to register. No data returned. */
2093    const unsigned rlen = 1 * reg_unit(p->devinfo);
2094 
2095    brw_inst_set_sfid(p->devinfo, insn, sfid);
2096 
2097    if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
2098       brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
2099                             brw_message_desc(p->devinfo, mlen, rlen, true));
2100    } else {
2101       enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
2102       enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
2103 
2104       if (sfid == GFX12_SFID_TGM) {
2105          scope = LSC_FENCE_TILE;
2106          flush_type = LSC_FLUSH_TYPE_EVICT;
2107       }
2108 
2109       /* Wa_14012437816:
2110        *
2111        *   "For any fence greater than local scope, always set flush type to
2112        *    at least invalidate so that fence goes on properly."
2113        *
2114        *   "The bug is if flush_type is 'None', the scope is always downgraded
2115        *    to 'local'."
2116        *
2117        * Here set scope to NONE_6 instead of NONE, which has the same effect
2118        * as NONE but avoids the downgrade to scope LOCAL.
2119        */
2120       if (intel_needs_workaround(p->devinfo, 14012437816) &&
2121           scope > LSC_FENCE_LOCAL &&
2122           flush_type == LSC_FLUSH_TYPE_NONE) {
2123          flush_type = LSC_FLUSH_TYPE_NONE_6;
2124       }
2125 
2126       brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
2127                                                flush_type, false) |
2128                             brw_message_desc(p->devinfo, mlen, rlen, false));
2129    }
2130 }
2131 
2132 void
brw_memory_fence(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,enum opcode send_op,enum brw_message_target sfid,uint32_t desc,bool commit_enable,unsigned bti)2133 brw_memory_fence(struct brw_codegen *p,
2134                  struct brw_reg dst,
2135                  struct brw_reg src,
2136                  enum opcode send_op,
2137                  enum brw_message_target sfid,
2138                  uint32_t desc,
2139                  bool commit_enable,
2140                  unsigned bti)
2141 {
2142    const struct intel_device_info *devinfo = p->devinfo;
2143 
2144    dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
2145    src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
2146 
2147    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
2148     * message doesn't write anything back.
2149     */
2150    struct brw_inst *insn = next_insn(p, send_op);
2151    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
2152    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
2153    brw_set_dest(p, insn, dst);
2154    brw_set_src0(p, insn, src);
2155 
2156    /* All DG2 hardware requires LSC for fence messages, even A-step */
2157    if (devinfo->has_lsc)
2158       gfx12_set_memory_fence_message(p, insn, sfid, desc);
2159    else
2160       brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
2161 }
2162 
2163 void
brw_broadcast(struct brw_codegen * p,struct brw_reg dst,struct brw_reg src,struct brw_reg idx)2164 brw_broadcast(struct brw_codegen *p,
2165               struct brw_reg dst,
2166               struct brw_reg src,
2167               struct brw_reg idx)
2168 {
2169    const struct intel_device_info *devinfo = p->devinfo;
2170    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2171    brw_inst *inst;
2172 
2173    brw_push_insn_state(p);
2174    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2175    brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
2176 
2177    assert(src.file == BRW_GENERAL_REGISTER_FILE &&
2178           src.address_mode == BRW_ADDRESS_DIRECT);
2179    assert(!src.abs && !src.negate);
2180 
2181    /* Gen12.5 adds the following region restriction:
2182     *
2183     *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
2184     *    and Quad-Word data must not be used."
2185     *
2186     * We require the source and destination types to match so stomp to an
2187     * unsigned integer type.
2188     */
2189    assert(src.type == dst.type);
2190    src.type = dst.type = brw_reg_type_from_bit_size(type_sz(src.type) * 8,
2191                                                     BRW_REGISTER_TYPE_UD);
2192 
2193    if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
2194        idx.file == BRW_IMMEDIATE_VALUE) {
2195       /* Trivial, the source is already uniform or the index is a constant.
2196        * We will typically not get here if the optimizer is doing its job, but
2197        * asserting would be mean.
2198        */
2199       const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
2200       src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
2201                      stride(suboffset(src, 4 * i), 0, 4, 1);
2202 
2203       if (type_sz(src.type) > 4 && !devinfo->has_64bit_int) {
2204          brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
2205                     subscript(src, BRW_REGISTER_TYPE_D, 0));
2206          brw_set_default_swsb(p, tgl_swsb_null());
2207          brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
2208                     subscript(src, BRW_REGISTER_TYPE_D, 1));
2209       } else {
2210          brw_MOV(p, dst, src);
2211       }
2212    } else {
2213       /* From the Haswell PRM section "Register Region Restrictions":
2214        *
2215        *    "The lower bits of the AddressImmediate must not overflow to
2216        *    change the register address.  The lower 5 bits of Address
2217        *    Immediate when added to lower 5 bits of address register gives
2218        *    the sub-register offset. The upper bits of Address Immediate
2219        *    when added to upper bits of address register gives the register
2220        *    address. Any overflow from sub-register offset is dropped."
2221        *
2222        * Fortunately, for broadcast, we never have a sub-register offset so
2223        * this isn't an issue.
2224        */
2225       assert(src.subnr == 0);
2226 
2227       if (align1) {
2228          const struct brw_reg addr =
2229             retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2230          unsigned offset = src.nr * REG_SIZE + src.subnr;
2231          /* Limit in bytes of the signed indirect addressing immediate. */
2232          const unsigned limit = 512;
2233 
2234          brw_push_insn_state(p);
2235          brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2236          brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2237          brw_set_default_flag_reg(p, 0, 0);
2238 
2239          /* Take into account the component size and horizontal stride. */
2240          assert(src.vstride == src.hstride + src.width);
2241          brw_SHL(p, addr, vec1(idx),
2242                  brw_imm_ud(util_logbase2(type_sz(src.type)) +
2243                             src.hstride - 1));
2244 
2245          /* We can only address up to limit bytes using the indirect
2246           * addressing immediate, account for the difference if the source
2247           * register is above this limit.
2248           */
2249          if (offset >= limit) {
2250             brw_set_default_swsb(p, tgl_swsb_regdist(1));
2251             brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
2252             offset = offset % limit;
2253          }
2254 
2255          brw_pop_insn_state(p);
2256 
2257          brw_set_default_swsb(p, tgl_swsb_regdist(1));
2258 
2259          /* Use indirect addressing to fetch the specified component. */
2260          if (type_sz(src.type) > 4 &&
2261              (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) ||
2262               !devinfo->has_64bit_int)) {
2263             /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
2264              *
2265              *    "When source or destination datatype is 64b or operation is
2266              *    integer DWord multiply, indirect addressing must not be
2267              *    used."
2268              *
2269              * To work around both of this issue, we do two integer MOVs
2270              * insead of one 64-bit MOV.  Because no double value should ever
2271              * cross a register boundary, it's safe to use the immediate
2272              * offset in the indirect here to handle adding 4 bytes to the
2273              * offset and avoid the extra ADD to the register file.
2274              */
2275             brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
2276                        retype(brw_vec1_indirect(addr.subnr, offset),
2277                               BRW_REGISTER_TYPE_D));
2278             brw_set_default_swsb(p, tgl_swsb_null());
2279             brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
2280                        retype(brw_vec1_indirect(addr.subnr, offset + 4),
2281                               BRW_REGISTER_TYPE_D));
2282          } else {
2283             brw_MOV(p, dst,
2284                     retype(brw_vec1_indirect(addr.subnr, offset), src.type));
2285          }
2286       } else {
2287          /* In SIMD4x2 mode the index can be either zero or one, replicate it
2288           * to all bits of a flag register,
2289           */
2290          inst = brw_MOV(p,
2291                         brw_null_reg(),
2292                         stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
2293          brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
2294          brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
2295          brw_inst_set_flag_reg_nr(devinfo, inst, 1);
2296 
2297          /* and use predicated SEL to pick the right channel. */
2298          inst = brw_SEL(p, dst,
2299                         stride(suboffset(src, 4), 4, 4, 1),
2300                         stride(src, 4, 4, 1));
2301          brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
2302          brw_inst_set_flag_reg_nr(devinfo, inst, 1);
2303       }
2304    }
2305 
2306    brw_pop_insn_state(p);
2307 }
2308 
2309 
2310 /**
2311  * Emit the SEND message for a barrier
2312  */
2313 void
brw_barrier(struct brw_codegen * p,struct brw_reg src)2314 brw_barrier(struct brw_codegen *p, struct brw_reg src)
2315 {
2316    const struct intel_device_info *devinfo = p->devinfo;
2317    struct brw_inst *inst;
2318 
2319    brw_push_insn_state(p);
2320    brw_set_default_access_mode(p, BRW_ALIGN_1);
2321    inst = next_insn(p, BRW_OPCODE_SEND);
2322    brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
2323    brw_set_src0(p, inst, src);
2324    brw_set_src1(p, inst, brw_null_reg());
2325    brw_set_desc(p, inst, brw_message_desc(devinfo,
2326                                           1 * reg_unit(devinfo), 0, false));
2327 
2328    brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
2329    brw_inst_set_gateway_subfuncid(devinfo, inst,
2330                                   BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
2331 
2332    brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
2333    brw_pop_insn_state(p);
2334 }
2335 
2336 
2337 /**
2338  * Emit the wait instruction for a barrier
2339  */
2340 void
brw_WAIT(struct brw_codegen * p)2341 brw_WAIT(struct brw_codegen *p)
2342 {
2343    const struct intel_device_info *devinfo = p->devinfo;
2344    struct brw_inst *insn;
2345 
2346    struct brw_reg src = brw_notification_reg();
2347 
2348    insn = next_insn(p, BRW_OPCODE_WAIT);
2349    brw_set_dest(p, insn, src);
2350    brw_set_src0(p, insn, src);
2351    brw_set_src1(p, insn, brw_null_reg());
2352 
2353    brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
2354    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
2355 }
2356 
2357 void
brw_float_controls_mode(struct brw_codegen * p,unsigned mode,unsigned mask)2358 brw_float_controls_mode(struct brw_codegen *p,
2359                         unsigned mode, unsigned mask)
2360 {
2361    assert(p->current->mask_control == BRW_MASK_DISABLE);
2362 
2363    /* From the Skylake PRM, Volume 7, page 760:
2364     *  "Implementation Restriction on Register Access: When the control
2365     *   register is used as an explicit source and/or destination, hardware
2366     *   does not ensure execution pipeline coherency. Software must set the
2367     *   thread control field to ‘switch’ for an instruction that uses
2368     *   control register as an explicit operand."
2369     *
2370     * On Gfx12+ this is implemented in terms of SWSB annotations instead.
2371     */
2372    brw_set_default_swsb(p, tgl_swsb_regdist(1));
2373 
2374    brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
2375                             brw_imm_ud(~mask));
2376    brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
2377    if (p->devinfo->ver < 12)
2378       brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
2379 
2380    if (mode) {
2381       brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
2382                                  brw_imm_ud(mode));
2383       brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
2384       if (p->devinfo->ver < 12)
2385          brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
2386    }
2387 
2388    if (p->devinfo->ver >= 12)
2389       brw_SYNC(p, TGL_SYNC_NOP);
2390 }
2391 
2392 void
brw_update_reloc_imm(const struct brw_isa_info * isa,brw_inst * inst,uint32_t value)2393 brw_update_reloc_imm(const struct brw_isa_info *isa,
2394                      brw_inst *inst,
2395                      uint32_t value)
2396 {
2397    const struct intel_device_info *devinfo = isa->devinfo;
2398 
2399    /* Sanity check that the instruction is a MOV of an immediate */
2400    assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
2401    assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
2402 
2403    /* If it was compacted, we can't safely rewrite */
2404    assert(brw_inst_cmpt_control(devinfo, inst) == 0);
2405 
2406    brw_inst_set_imm_ud(devinfo, inst, value);
2407 }
2408 
2409 /* A default value for constants that will be patched at run-time.
2410  * We pick an arbitrary value that prevents instruction compaction.
2411  */
2412 #define DEFAULT_PATCH_IMM 0x4a7cc037
2413 
2414 void
brw_MOV_reloc_imm(struct brw_codegen * p,struct brw_reg dst,enum brw_reg_type src_type,uint32_t id)2415 brw_MOV_reloc_imm(struct brw_codegen *p,
2416                   struct brw_reg dst,
2417                   enum brw_reg_type src_type,
2418                   uint32_t id)
2419 {
2420    assert(type_sz(src_type) == 4);
2421    assert(type_sz(dst.type) == 4);
2422 
2423    brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
2424                  p->next_insn_offset, 0);
2425 
2426    brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
2427 }
2428