• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_vec4_surface_builder.h"
25 
26 using namespace brw;
27 
28 namespace {
29    namespace array_utils {
30       /**
31        * Copy one every \p src_stride logical components of the argument into
32        * one every \p dst_stride logical components of the result.
33        */
34       static src_reg
emit_stride(const vec4_builder & bld,const src_reg & src,unsigned size,unsigned dst_stride,unsigned src_stride)35       emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36                   unsigned dst_stride, unsigned src_stride)
37       {
38          if (src_stride == 1 && dst_stride == 1) {
39             return src;
40          } else {
41             const dst_reg dst = bld.vgrf(src.type,
42                                          DIV_ROUND_UP(size * dst_stride, 4));
43 
44             for (unsigned i = 0; i < size; ++i)
45                bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46                                  1 << (i * dst_stride % 4)),
47                        swizzle(offset(src, 8, i * src_stride / 4),
48                                brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49 
50             return src_reg(dst);
51          }
52       }
53 
54       /**
55        * Convert a VEC4 into an array of registers with the layout expected by
56        * the recipient shared unit.  If \p has_simd4x2 is true the argument is
57        * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58        * a SIMD8 vector.
59        */
60       static src_reg
emit_insert(const vec4_builder & bld,const src_reg & src,unsigned n,bool has_simd4x2)61       emit_insert(const vec4_builder &bld, const src_reg &src,
62                   unsigned n, bool has_simd4x2)
63       {
64          if (src.file == BAD_FILE || n == 0) {
65             return src_reg();
66 
67          } else {
68             /* Pad unused components with zeroes. */
69             const unsigned mask = (1 << n) - 1;
70             const dst_reg tmp = bld.vgrf(src.type);
71 
72             bld.MOV(writemask(tmp, mask), src);
73             if (n < 4)
74                bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75 
76             return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77          }
78       }
79 
80       /**
81        * Convert an array of registers back into a VEC4 according to the
82        * layout expected from some shared unit.  If \p has_simd4x2 is true the
83        * argument is left unmodified in SIMD4x2 form, otherwise it will be
84        * rearranged from SIMD8 form.
85        */
86       static src_reg
emit_extract(const vec4_builder & bld,const src_reg src,unsigned n,bool has_simd4x2)87       emit_extract(const vec4_builder &bld, const src_reg src,
88                    unsigned n, bool has_simd4x2)
89       {
90          if (src.file == BAD_FILE || n == 0) {
91             return src_reg();
92 
93          } else {
94             return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
95          }
96       }
97    }
98 }
99 
100 namespace brw {
101    namespace surface_access {
102       namespace {
103          using namespace array_utils;
104 
105          /**
106           * Generate a send opcode for a surface message and return the
107           * result.
108           */
109          src_reg
emit_send(const vec4_builder & bld,enum opcode op,const src_reg & header,const src_reg & addr,unsigned addr_sz,const src_reg & src,unsigned src_sz,const src_reg & surface,unsigned arg,unsigned ret_sz,brw_predicate pred=BRW_PREDICATE_NONE)110          emit_send(const vec4_builder &bld, enum opcode op,
111                    const src_reg &header,
112                    const src_reg &addr, unsigned addr_sz,
113                    const src_reg &src, unsigned src_sz,
114                    const src_reg &surface,
115                    unsigned arg, unsigned ret_sz,
116                    brw_predicate pred = BRW_PREDICATE_NONE)
117          {
118             /* Calculate the total number of components of the payload. */
119             const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
120             const unsigned sz = header_sz + addr_sz + src_sz;
121 
122             /* Construct the payload. */
123             const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
124             unsigned n = 0;
125 
126             if (header_sz)
127                bld.exec_all().MOV(offset(payload, 8, n++),
128                                   retype(header, BRW_REGISTER_TYPE_UD));
129 
130             for (unsigned i = 0; i < addr_sz; i++)
131                bld.MOV(offset(payload, 8, n++),
132                        offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
133 
134             for (unsigned i = 0; i < src_sz; i++)
135                bld.MOV(offset(payload, 8, n++),
136                        offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
137 
138             /* Reduce the dynamically uniform surface index to a single
139              * scalar.
140              */
141             const src_reg usurface = bld.emit_uniformize(surface);
142 
143             /* Emit the message send instruction. */
144             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
145             vec4_instruction *inst =
146                bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
147             inst->mlen = sz;
148             inst->size_written = ret_sz * REG_SIZE;
149             inst->header_size = header_sz;
150             inst->predicate = pred;
151 
152             return src_reg(dst);
153          }
154       }
155 
156       /**
157        * Emit an untyped surface read opcode.  \p dims determines the number
158        * of components of the address and \p size the number of components of
159        * the returned value.
160        */
161       src_reg
emit_untyped_read(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,unsigned dims,unsigned size,brw_predicate pred)162       emit_untyped_read(const vec4_builder &bld,
163                         const src_reg &surface, const src_reg &addr,
164                         unsigned dims, unsigned size,
165                         brw_predicate pred)
166       {
167          return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
168                           emit_insert(bld, addr, dims, true), 1,
169                           src_reg(), 0,
170                           surface, size, 1, pred);
171       }
172 
173       /**
174        * Emit an untyped surface write opcode.  \p dims determines the number
175        * of components of the address and \p size the number of components of
176        * the argument.
177        */
178       void
emit_untyped_write(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src,unsigned dims,unsigned size,brw_predicate pred)179       emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
180                          const src_reg &addr, const src_reg &src,
181                          unsigned dims, unsigned size,
182                          brw_predicate pred)
183       {
184          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
185                                    bld.shader->devinfo->is_haswell);
186          emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
187                    emit_insert(bld, addr, dims, has_simd4x2),
188                    has_simd4x2 ? 1 : dims,
189                    emit_insert(bld, src, size, has_simd4x2),
190                    has_simd4x2 ? 1 : size,
191                    surface, size, 0, pred);
192       }
193 
194       /**
195        * Emit an untyped surface atomic opcode.  \p dims determines the number
196        * of components of the address and \p rsize the number of components of
197        * the returned value (either zero or one).
198        */
199       src_reg
emit_untyped_atomic(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src0,const src_reg & src1,unsigned dims,unsigned rsize,unsigned op,brw_predicate pred)200       emit_untyped_atomic(const vec4_builder &bld,
201                           const src_reg &surface, const src_reg &addr,
202                           const src_reg &src0, const src_reg &src1,
203                           unsigned dims, unsigned rsize, unsigned op,
204                           brw_predicate pred)
205       {
206          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
207                                    bld.shader->devinfo->is_haswell);
208 
209          /* Zip the components of both sources, they are represented as the X
210           * and Y components of the same vector.
211           */
212          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
213          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
214 
215          if (size >= 1) {
216             bld.MOV(writemask(srcs, WRITEMASK_X),
217                     swizzle(src0, BRW_SWIZZLE_XXXX));
218          }
219 
220          if (size >= 2) {
221             bld.MOV(writemask(srcs, WRITEMASK_Y),
222                     swizzle(src1, BRW_SWIZZLE_XXXX));
223          }
224 
225          return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
226                           emit_insert(bld, addr, dims, has_simd4x2),
227                           has_simd4x2 ? 1 : dims,
228                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
229                           has_simd4x2 && size ? 1 : size,
230                           surface, op, rsize, pred);
231       }
232 
233       namespace {
234          /**
235           * Initialize the header present in typed surface messages.
236           */
237          src_reg
emit_typed_message_header(const vec4_builder & bld)238          emit_typed_message_header(const vec4_builder &bld)
239          {
240             const vec4_builder ubld = bld.exec_all();
241             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
242 
243             ubld.MOV(dst, brw_imm_d(0));
244 
245             if (bld.shader->devinfo->gen == 7 &&
246                 !bld.shader->devinfo->is_haswell) {
247                /* The sample mask is used on IVB for the SIMD8 messages that
248                 * have no SIMD4x2 variant.  We only use the two X channels
249                 * in that case, mask everything else out.
250                 */
251                ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
252             }
253 
254             return src_reg(dst);
255          }
256       }
257 
258       /**
259        * Emit a typed surface read opcode.  \p dims determines the number of
260        * components of the address and \p size the number of components of the
261        * returned value.
262        */
263       src_reg
emit_typed_read(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,unsigned dims,unsigned size)264       emit_typed_read(const vec4_builder &bld, const src_reg &surface,
265                       const src_reg &addr, unsigned dims, unsigned size)
266       {
267          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
268                                    bld.shader->devinfo->is_haswell);
269          const src_reg tmp =
270             emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
271                       emit_typed_message_header(bld),
272                       emit_insert(bld, addr, dims, has_simd4x2),
273                       has_simd4x2 ? 1 : dims,
274                       src_reg(), 0,
275                       surface, size,
276                       has_simd4x2 ? 1 : size);
277 
278          return emit_extract(bld, tmp, size, has_simd4x2);
279       }
280 
281       /**
282        * Emit a typed surface write opcode.  \p dims determines the number of
283        * components of the address and \p size the number of components of the
284        * argument.
285        */
286       void
emit_typed_write(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src,unsigned dims,unsigned size)287       emit_typed_write(const vec4_builder &bld, const src_reg &surface,
288                        const src_reg &addr, const src_reg &src,
289                        unsigned dims, unsigned size)
290       {
291          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
292                                    bld.shader->devinfo->is_haswell);
293          emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
294                    emit_typed_message_header(bld),
295                    emit_insert(bld, addr, dims, has_simd4x2),
296                    has_simd4x2 ? 1 : dims,
297                    emit_insert(bld, src, size, has_simd4x2),
298                    has_simd4x2 ? 1 : size,
299                    surface, size, 0);
300       }
301 
302       /**
303        * Emit a typed surface atomic opcode.  \p dims determines the number of
304        * components of the address and \p rsize the number of components of
305        * the returned value (either zero or one).
306        */
307       src_reg
emit_typed_atomic(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src0,const src_reg & src1,unsigned dims,unsigned rsize,unsigned op,brw_predicate pred)308       emit_typed_atomic(const vec4_builder &bld,
309                         const src_reg &surface, const src_reg &addr,
310                         const src_reg &src0, const src_reg &src1,
311                         unsigned dims, unsigned rsize, unsigned op,
312                         brw_predicate pred)
313       {
314          const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
315                                    bld.shader->devinfo->is_haswell);
316 
317          /* Zip the components of both sources, they are represented as the X
318           * and Y components of the same vector.
319           */
320          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
321          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
322 
323          if (size >= 1)
324             bld.MOV(writemask(srcs, WRITEMASK_X), src0);
325          if (size >= 2)
326             bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
327 
328          return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
329                           emit_typed_message_header(bld),
330                           emit_insert(bld, addr, dims, has_simd4x2),
331                           has_simd4x2 ? 1 : dims,
332                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
333                           has_simd4x2 ? 1 : size,
334                           surface, op, rsize, pred);
335       }
336    }
337 }
338