• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_builder.h"
7 
8 /*
9  * This helper takes a source register and un/shuffles it into the destination
10  * register.
11  *
12  * If source type size is smaller than destination type size the operation
13  * needed is a component shuffle. The opposite case would be an unshuffle. If
14  * source/destination type size is equal a shuffle is done that would be
15  * equivalent to a simple MOV.
16  *
17  * For example, if source is a 16-bit type and destination is 32-bit. A 3
18  * components .xyz 16-bit vector on SIMD8 would be.
19  *
20  *    |x1|x2|x3|x4|x5|x6|x7|x8|y1|y2|y3|y4|y5|y6|y7|y8|
21  *    |z1|z2|z3|z4|z5|z6|z7|z8|  |  |  |  |  |  |  |  |
22  *
23  * This helper will return the following 2 32-bit components with the 16-bit
24  * values shuffled:
25  *
26  *    |x1 y1|x2 y2|x3 y3|x4 y4|x5 y5|x6 y6|x7 y7|x8 y8|
27  *    |z1   |z2   |z3   |z4   |z5   |z6   |z7   |z8   |
28  *
29  * For unshuffle, the example would be the opposite, a 64-bit type source
30  * and a 32-bit destination. A 2 component .xy 64-bit vector on SIMD8
31  * would be:
32  *
33  *    | x1l   x1h | x2l   x2h | x3l   x3h | x4l   x4h |
34  *    | x5l   x5h | x6l   x6h | x7l   x7h | x8l   x8h |
35  *    | y1l   y1h | y2l   y2h | y3l   y3h | y4l   y4h |
36  *    | y5l   y5h | y6l   y6h | y7l   y7h | y8l   y8h |
37  *
38  * The returned result would be the following 4 32-bit components unshuffled:
39  *
40  *    | x1l | x2l | x3l | x4l | x5l | x6l | x7l | x8l |
41  *    | x1h | x2h | x3h | x4h | x5h | x6h | x7h | x8h |
42  *    | y1l | y2l | y3l | y4l | y5l | y6l | y7l | y8l |
43  *    | y1h | y2h | y3h | y4h | y5h | y6h | y7h | y8h |
44  *
45  * - Source and destination register must not be overlapped.
46  * - components units are measured in terms of the smaller type between
47  *   source and destination because we are un/shuffling the smaller
48  *   components from/into the bigger ones.
49  * - first_component parameter allows skipping source components.
50  */
51 static void
shuffle_src_to_dst(const brw_builder & bld,const brw_reg & dst,const brw_reg & src,uint32_t first_component,uint32_t components)52 shuffle_src_to_dst(const brw_builder &bld,
53                    const brw_reg &dst,
54                    const brw_reg &src,
55                    uint32_t first_component,
56                    uint32_t components)
57 {
58    if (brw_type_size_bytes(src.type) == brw_type_size_bytes(dst.type)) {
59       assert(!regions_overlap(dst,
60          brw_type_size_bytes(dst.type) * bld.dispatch_width() * components,
61          offset(src, bld, first_component),
62          brw_type_size_bytes(src.type) * bld.dispatch_width() * components));
63       for (unsigned i = 0; i < components; i++) {
64          bld.MOV(retype(offset(dst, bld, i), src.type),
65                  offset(src, bld, i + first_component));
66       }
67    } else if (brw_type_size_bytes(src.type) < brw_type_size_bytes(dst.type)) {
68       /* Source is shuffled into destination */
69       unsigned size_ratio = brw_type_size_bytes(dst.type) / brw_type_size_bytes(src.type);
70       assert(!regions_overlap(dst,
71          brw_type_size_bytes(dst.type) * bld.dispatch_width() *
72          DIV_ROUND_UP(components, size_ratio),
73          offset(src, bld, first_component),
74          brw_type_size_bytes(src.type) * bld.dispatch_width() * components));
75 
76       brw_reg_type shuffle_type =
77          brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(src.type));
78       for (unsigned i = 0; i < components; i++) {
79          brw_reg shuffle_component_i =
80             subscript(offset(dst, bld, i / size_ratio),
81                       shuffle_type, i % size_ratio);
82          bld.MOV(shuffle_component_i,
83                  retype(offset(src, bld, i + first_component), shuffle_type));
84       }
85    } else {
86       /* Source is unshuffled into destination */
87       unsigned size_ratio = brw_type_size_bytes(src.type) / brw_type_size_bytes(dst.type);
88       assert(!regions_overlap(dst,
89          brw_type_size_bytes(dst.type) * bld.dispatch_width() * components,
90          offset(src, bld, first_component / size_ratio),
91          brw_type_size_bytes(src.type) * bld.dispatch_width() *
92          DIV_ROUND_UP(components + (first_component % size_ratio),
93                       size_ratio)));
94 
95       brw_reg_type shuffle_type =
96          brw_type_with_size(BRW_TYPE_D, brw_type_size_bits(dst.type));
97       for (unsigned i = 0; i < components; i++) {
98          brw_reg shuffle_component_i =
99             subscript(offset(src, bld, (first_component + i) / size_ratio),
100                       shuffle_type, (first_component + i) % size_ratio);
101          bld.MOV(retype(offset(dst, bld, i), shuffle_type),
102                  shuffle_component_i);
103       }
104    }
105 }
106 
107 void
shuffle_from_32bit_read(const brw_reg & dst,const brw_reg & src,uint32_t first_component,uint32_t components) const108 brw_builder::shuffle_from_32bit_read(const brw_reg &dst,
109                                      const brw_reg &src,
110                                      uint32_t first_component,
111                                      uint32_t components) const
112 {
113    assert(brw_type_size_bytes(src.type) == 4);
114 
115    /* This function takes components in units of the destination type while
116     * shuffle_src_to_dst takes components in units of the smallest type
117     */
118    if (brw_type_size_bytes(dst.type) > 4) {
119       assert(brw_type_size_bytes(dst.type) == 8);
120       first_component *= 2;
121       components *= 2;
122    }
123 
124    shuffle_src_to_dst(*this, dst, src, first_component, components);
125 }
126 
127 /**
128  * Get the mask of SIMD channels enabled during dispatch and not yet disabled
129  * by discard.  Due to the layout of the sample mask in the fragment shader
130  * thread payload, \p bld is required to have a dispatch_width() not greater
131  * than 16 for fragment shaders.
132  */
133 brw_reg
brw_sample_mask_reg(const brw_builder & bld)134 brw_sample_mask_reg(const brw_builder &bld)
135 {
136    const fs_visitor &s = *bld.shader;
137 
138    if (s.stage != MESA_SHADER_FRAGMENT) {
139       return brw_imm_ud(0xffffffff);
140    } else if (s.devinfo->ver >= 20 ||
141               brw_wm_prog_data(s.prog_data)->uses_kill) {
142       return brw_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16);
143    } else {
144       assert(bld.dispatch_width() <= 16);
145       assert(s.devinfo->ver < 20);
146       return retype(brw_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),
147                     BRW_TYPE_UW);
148    }
149 }
150 
151 /**
152  * Predicate the specified instruction on the sample mask.
153  */
154 void
brw_emit_predicate_on_sample_mask(const brw_builder & bld,fs_inst * inst)155 brw_emit_predicate_on_sample_mask(const brw_builder &bld, fs_inst *inst)
156 {
157    assert(bld.shader->stage == MESA_SHADER_FRAGMENT &&
158           bld.group() == inst->group &&
159           bld.dispatch_width() == inst->exec_size);
160 
161    const fs_visitor &s = *bld.shader;
162    const brw_reg sample_mask = brw_sample_mask_reg(bld);
163    const unsigned subreg = sample_mask_flag_subreg(s);
164 
165    if (s.devinfo->ver >= 20 || brw_wm_prog_data(s.prog_data)->uses_kill) {
166       assert(sample_mask.file == ARF &&
167              sample_mask.nr == brw_flag_subreg(subreg).nr &&
168              sample_mask.subnr == brw_flag_subreg(
169                 subreg + inst->group / 16).subnr);
170    } else {
171       bld.group(1, 0).exec_all()
172          .MOV(brw_flag_subreg(subreg + inst->group / 16), sample_mask);
173    }
174 
175    if (inst->predicate) {
176       assert(inst->predicate == BRW_PREDICATE_NORMAL);
177       assert(!inst->predicate_inverse);
178       assert(inst->flag_subreg == 0);
179       assert(s.devinfo->ver < 20);
180       /* Combine the sample mask with the existing predicate by using a
181        * vertical predication mode.
182        */
183       inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
184    } else {
185       inst->flag_subreg = subreg;
186       inst->predicate = BRW_PREDICATE_NORMAL;
187       inst->predicate_inverse = false;
188    }
189 }
190 
191 
192 brw_reg
brw_fetch_payload_reg(const brw_builder & bld,uint8_t regs[2],brw_reg_type type,unsigned n)193 brw_fetch_payload_reg(const brw_builder &bld, uint8_t regs[2],
194                       brw_reg_type type, unsigned n)
195 {
196    if (!regs[0])
197       return brw_reg();
198 
199    if (bld.dispatch_width() > 16) {
200       const brw_reg tmp = bld.vgrf(type, n);
201       const brw_builder hbld = bld.exec_all().group(16, 0);
202       const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
203       brw_reg *const components = new brw_reg[m * n];
204 
205       for (unsigned c = 0; c < n; c++) {
206          for (unsigned g = 0; g < m; g++)
207             components[c * m + g] =
208                offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
209       }
210 
211       hbld.LOAD_PAYLOAD(tmp, components, m * n, 0);
212 
213       delete[] components;
214       return tmp;
215 
216    } else {
217       return brw_reg(retype(brw_vec8_grf(regs[0], 0), type));
218    }
219 }
220 
221 brw_reg
brw_fetch_barycentric_reg(const brw_builder & bld,uint8_t regs[2])222 brw_fetch_barycentric_reg(const brw_builder &bld, uint8_t regs[2])
223 {
224    if (!regs[0])
225       return brw_reg();
226    else if (bld.shader->devinfo->ver >= 20)
227       return brw_fetch_payload_reg(bld, regs, BRW_TYPE_F, 2);
228 
229    const brw_reg tmp = bld.vgrf(BRW_TYPE_F, 2);
230    const brw_builder hbld = bld.exec_all().group(8, 0);
231    const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
232    brw_reg *const components = new brw_reg[2 * m];
233 
234    for (unsigned c = 0; c < 2; c++) {
235       for (unsigned g = 0; g < m; g++)
236          components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
237                                         hbld, c + 2 * (g % 2));
238    }
239 
240    hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
241 
242    delete[] components;
243    return tmp;
244 }
245 
246 void
brw_check_dynamic_msaa_flag(const brw_builder & bld,const struct brw_wm_prog_data * wm_prog_data,enum intel_msaa_flags flag)247 brw_check_dynamic_msaa_flag(const brw_builder &bld,
248                         const struct brw_wm_prog_data *wm_prog_data,
249                         enum intel_msaa_flags flag)
250 {
251    fs_inst *inst = bld.AND(bld.null_reg_ud(),
252                            brw_dynamic_msaa_flags(wm_prog_data),
253                            brw_imm_ud(flag));
254    inst->conditional_mod = BRW_CONDITIONAL_NZ;
255 }
256 
257