• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_vec4_surface_builder.h"
25 
26 using namespace brw;
27 
28 namespace {
29    namespace array_utils {
30       /**
31        * Copy one every \p src_stride logical components of the argument into
32        * one every \p dst_stride logical components of the result.
33        */
34       static src_reg
emit_stride(const vec4_builder & bld,const src_reg & src,unsigned size,unsigned dst_stride,unsigned src_stride)35       emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36                   unsigned dst_stride, unsigned src_stride)
37       {
38          if (src_stride == 1 && dst_stride == 1) {
39             return src;
40          } else {
41             const dst_reg dst = bld.vgrf(src.type,
42                                          DIV_ROUND_UP(size * dst_stride, 4));
43 
44             for (unsigned i = 0; i < size; ++i)
45                bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46                                  1 << (i * dst_stride % 4)),
47                        swizzle(offset(src, 8, i * src_stride / 4),
48                                brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49 
50             return src_reg(dst);
51          }
52       }
53 
54       /**
55        * Convert a VEC4 into an array of registers with the layout expected by
56        * the recipient shared unit.  If \p has_simd4x2 is true the argument is
57        * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58        * a SIMD8 vector.
59        */
60       static src_reg
emit_insert(const vec4_builder & bld,const src_reg & src,unsigned n,bool has_simd4x2)61       emit_insert(const vec4_builder &bld, const src_reg &src,
62                   unsigned n, bool has_simd4x2)
63       {
64          if (src.file == BAD_FILE || n == 0) {
65             return src_reg();
66 
67          } else {
68             /* Pad unused components with zeroes. */
69             const unsigned mask = (1 << n) - 1;
70             const dst_reg tmp = bld.vgrf(src.type);
71 
72             bld.MOV(writemask(tmp, mask), src);
73             if (n < 4)
74                bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75 
76             return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77          }
78       }
79    }
80 }
81 
82 namespace brw {
83    namespace surface_access {
84       namespace {
85          using namespace array_utils;
86 
87          /**
88           * Generate a send opcode for a surface message and return the
89           * result.
90           */
91          src_reg
emit_send(const vec4_builder & bld,enum opcode op,const src_reg & header,const src_reg & addr,unsigned addr_sz,const src_reg & src,unsigned src_sz,const src_reg & surface,unsigned arg,unsigned ret_sz,brw_predicate pred=BRW_PREDICATE_NONE)92          emit_send(const vec4_builder &bld, enum opcode op,
93                    const src_reg &header,
94                    const src_reg &addr, unsigned addr_sz,
95                    const src_reg &src, unsigned src_sz,
96                    const src_reg &surface,
97                    unsigned arg, unsigned ret_sz,
98                    brw_predicate pred = BRW_PREDICATE_NONE)
99          {
100             /* Calculate the total number of components of the payload. */
101             const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
102             const unsigned sz = header_sz + addr_sz + src_sz;
103 
104             /* Construct the payload. */
105             const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
106             unsigned n = 0;
107 
108             if (header_sz)
109                bld.exec_all().MOV(offset(payload, 8, n++),
110                                   retype(header, BRW_REGISTER_TYPE_UD));
111 
112             for (unsigned i = 0; i < addr_sz; i++)
113                bld.MOV(offset(payload, 8, n++),
114                        offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
115 
116             for (unsigned i = 0; i < src_sz; i++)
117                bld.MOV(offset(payload, 8, n++),
118                        offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
119 
120             /* Reduce the dynamically uniform surface index to a single
121              * scalar.
122              */
123             const src_reg usurface = bld.emit_uniformize(surface);
124 
125             /* Emit the message send instruction. */
126             const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
127             vec4_instruction *inst =
128                bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
129             inst->mlen = sz;
130             inst->size_written = ret_sz * REG_SIZE;
131             inst->header_size = header_sz;
132             inst->predicate = pred;
133 
134             return src_reg(dst);
135          }
136       }
137 
138       /**
139        * Emit an untyped surface read opcode.  \p dims determines the number
140        * of components of the address and \p size the number of components of
141        * the returned value.
142        */
143       src_reg
emit_untyped_read(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,unsigned dims,unsigned size,brw_predicate pred)144       emit_untyped_read(const vec4_builder &bld,
145                         const src_reg &surface, const src_reg &addr,
146                         unsigned dims, unsigned size,
147                         brw_predicate pred)
148       {
149          return emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
150                           emit_insert(bld, addr, dims, true), 1,
151                           src_reg(), 0,
152                           surface, size, 1, pred);
153       }
154 
155       /**
156        * Emit an untyped surface write opcode.  \p dims determines the number
157        * of components of the address and \p size the number of components of
158        * the argument.
159        */
160       void
emit_untyped_write(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src,unsigned dims,unsigned size,brw_predicate pred)161       emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
162                          const src_reg &addr, const src_reg &src,
163                          unsigned dims, unsigned size,
164                          brw_predicate pred)
165       {
166          const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
167          emit_send(bld, VEC4_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
168                    emit_insert(bld, addr, dims, has_simd4x2),
169                    has_simd4x2 ? 1 : dims,
170                    emit_insert(bld, src, size, has_simd4x2),
171                    has_simd4x2 ? 1 : size,
172                    surface, size, 0, pred);
173       }
174 
175       /**
176        * Emit an untyped surface atomic opcode.  \p dims determines the number
177        * of components of the address and \p rsize the number of components of
178        * the returned value (either zero or one).
179        */
180       src_reg
emit_untyped_atomic(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src0,const src_reg & src1,unsigned dims,unsigned rsize,unsigned op,brw_predicate pred)181       emit_untyped_atomic(const vec4_builder &bld,
182                           const src_reg &surface, const src_reg &addr,
183                           const src_reg &src0, const src_reg &src1,
184                           unsigned dims, unsigned rsize, unsigned op,
185                           brw_predicate pred)
186       {
187          const bool has_simd4x2 = bld.shader->devinfo->is_haswell;
188 
189          /* Zip the components of both sources, they are represented as the X
190           * and Y components of the same vector.
191           */
192          const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
193          const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
194 
195          if (size >= 1) {
196             bld.MOV(writemask(srcs, WRITEMASK_X),
197                     swizzle(src0, BRW_SWIZZLE_XXXX));
198          }
199 
200          if (size >= 2) {
201             bld.MOV(writemask(srcs, WRITEMASK_Y),
202                     swizzle(src1, BRW_SWIZZLE_XXXX));
203          }
204 
205          return emit_send(bld, VEC4_OPCODE_UNTYPED_ATOMIC, src_reg(),
206                           emit_insert(bld, addr, dims, has_simd4x2),
207                           has_simd4x2 ? 1 : dims,
208                           emit_insert(bld, src_reg(srcs), size, has_simd4x2),
209                           has_simd4x2 && size ? 1 : size,
210                           surface, op, rsize, pred);
211       }
212    }
213 }
214