1 /* 2 * Copyright © 2013-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_vec4_surface_builder.h" 25 26 using namespace brw; 27 28 namespace { 29 namespace array_utils { 30 /** 31 * Copy one every \p src_stride logical components of the argument into 32 * one every \p dst_stride logical components of the result. 33 */ 34 static src_reg emit_stride(const vec4_builder & bld,const src_reg & src,unsigned size,unsigned dst_stride,unsigned src_stride)35 emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size, 36 unsigned dst_stride, unsigned src_stride) 37 { 38 if (src_stride == 1 && dst_stride == 1) { 39 return src; 40 } else { 41 const dst_reg dst = bld.vgrf(src.type, 42 DIV_ROUND_UP(size * dst_stride, 4)); 43 44 for (unsigned i = 0; i < size; ++i) 45 bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4), 46 1 << (i * dst_stride % 4)), 47 swizzle(offset(src, 8, i * src_stride / 4), 48 brw_swizzle_for_mask(1 << (i * src_stride % 4)))); 49 50 return src_reg(dst); 51 } 52 } 53 54 /** 55 * Convert a VEC4 into an array of registers with the layout expected by 56 * the recipient shared unit. If \p has_simd4x2 is true the argument is 57 * left unmodified in SIMD4x2 form, otherwise it will be rearranged into 58 * a SIMD8 vector. 59 */ 60 static src_reg emit_insert(const vec4_builder & bld,const src_reg & src,unsigned n,bool has_simd4x2)61 emit_insert(const vec4_builder &bld, const src_reg &src, 62 unsigned n, bool has_simd4x2) 63 { 64 if (src.file == BAD_FILE || n == 0) { 65 return src_reg(); 66 67 } else { 68 /* Pad unused components with zeroes. */ 69 const unsigned mask = (1 << n) - 1; 70 const dst_reg tmp = bld.vgrf(src.type); 71 72 bld.MOV(writemask(tmp, mask), src); 73 if (n < 4) 74 bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); 75 76 return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); 77 } 78 } 79 80 /** 81 * Convert an array of registers back into a VEC4 according to the 82 * layout expected from some shared unit. If \p has_simd4x2 is true the 83 * argument is left unmodified in SIMD4x2 form, otherwise it will be 84 * rearranged from SIMD8 form. 85 */ 86 static src_reg emit_extract(const vec4_builder & bld,const src_reg src,unsigned n,bool has_simd4x2)87 emit_extract(const vec4_builder &bld, const src_reg src, 88 unsigned n, bool has_simd4x2) 89 { 90 if (src.file == BAD_FILE || n == 0) { 91 return src_reg(); 92 93 } else { 94 return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4); 95 } 96 } 97 } 98 } 99 100 namespace brw { 101 namespace surface_access { 102 namespace { 103 using namespace array_utils; 104 105 /** 106 * Generate a send opcode for a surface message and return the 107 * result. 108 */ 109 src_reg emit_send(const vec4_builder & bld,enum opcode op,const src_reg & header,const src_reg & addr,unsigned addr_sz,const src_reg & src,unsigned src_sz,const src_reg & surface,unsigned arg,unsigned ret_sz,brw_predicate pred=BRW_PREDICATE_NONE)110 emit_send(const vec4_builder &bld, enum opcode op, 111 const src_reg &header, 112 const src_reg &addr, unsigned addr_sz, 113 const src_reg &src, unsigned src_sz, 114 const src_reg &surface, 115 unsigned arg, unsigned ret_sz, 116 brw_predicate pred = BRW_PREDICATE_NONE) 117 { 118 /* Calculate the total number of components of the payload. */ 119 const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1); 120 const unsigned sz = header_sz + addr_sz + src_sz; 121 122 /* Construct the payload. */ 123 const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); 124 unsigned n = 0; 125 126 if (header_sz) 127 bld.exec_all().MOV(offset(payload, 8, n++), 128 retype(header, BRW_REGISTER_TYPE_UD)); 129 130 for (unsigned i = 0; i < addr_sz; i++) 131 bld.MOV(offset(payload, 8, n++), 132 offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i)); 133 134 for (unsigned i = 0; i < src_sz; i++) 135 bld.MOV(offset(payload, 8, n++), 136 offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i)); 137 138 /* Reduce the dynamically uniform surface index to a single 139 * scalar. 140 */ 141 const src_reg usurface = bld.emit_uniformize(surface); 142 143 /* Emit the message send instruction. */ 144 const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); 145 vec4_instruction *inst = 146 bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); 147 inst->mlen = sz; 148 inst->size_written = ret_sz * REG_SIZE; 149 inst->header_size = header_sz; 150 inst->predicate = pred; 151 152 return src_reg(dst); 153 } 154 } 155 156 /** 157 * Emit an untyped surface read opcode. \p dims determines the number 158 * of components of the address and \p size the number of components of 159 * the returned value. 160 */ 161 src_reg emit_untyped_read(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,unsigned dims,unsigned size,brw_predicate pred)162 emit_untyped_read(const vec4_builder &bld, 163 const src_reg &surface, const src_reg &addr, 164 unsigned dims, unsigned size, 165 brw_predicate pred) 166 { 167 return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(), 168 emit_insert(bld, addr, dims, true), 1, 169 src_reg(), 0, 170 surface, size, 1, pred); 171 } 172 173 /** 174 * Emit an untyped surface write opcode. \p dims determines the number 175 * of components of the address and \p size the number of components of 176 * the argument. 177 */ 178 void emit_untyped_write(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src,unsigned dims,unsigned size,brw_predicate pred)179 emit_untyped_write(const vec4_builder &bld, const src_reg &surface, 180 const src_reg &addr, const src_reg &src, 181 unsigned dims, unsigned size, 182 brw_predicate pred) 183 { 184 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || 185 bld.shader->devinfo->is_haswell); 186 emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), 187 emit_insert(bld, addr, dims, has_simd4x2), 188 has_simd4x2 ? 1 : dims, 189 emit_insert(bld, src, size, has_simd4x2), 190 has_simd4x2 ? 1 : size, 191 surface, size, 0, pred); 192 } 193 194 /** 195 * Emit an untyped surface atomic opcode. \p dims determines the number 196 * of components of the address and \p rsize the number of components of 197 * the returned value (either zero or one). 198 */ 199 src_reg emit_untyped_atomic(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src0,const src_reg & src1,unsigned dims,unsigned rsize,unsigned op,brw_predicate pred)200 emit_untyped_atomic(const vec4_builder &bld, 201 const src_reg &surface, const src_reg &addr, 202 const src_reg &src0, const src_reg &src1, 203 unsigned dims, unsigned rsize, unsigned op, 204 brw_predicate pred) 205 { 206 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || 207 bld.shader->devinfo->is_haswell); 208 209 /* Zip the components of both sources, they are represented as the X 210 * and Y components of the same vector. 211 */ 212 const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); 213 const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); 214 215 if (size >= 1) { 216 bld.MOV(writemask(srcs, WRITEMASK_X), 217 swizzle(src0, BRW_SWIZZLE_XXXX)); 218 } 219 220 if (size >= 2) { 221 bld.MOV(writemask(srcs, WRITEMASK_Y), 222 swizzle(src1, BRW_SWIZZLE_XXXX)); 223 } 224 225 return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(), 226 emit_insert(bld, addr, dims, has_simd4x2), 227 has_simd4x2 ? 1 : dims, 228 emit_insert(bld, src_reg(srcs), size, has_simd4x2), 229 has_simd4x2 && size ? 1 : size, 230 surface, op, rsize, pred); 231 } 232 233 namespace { 234 /** 235 * Initialize the header present in typed surface messages. 236 */ 237 src_reg emit_typed_message_header(const vec4_builder & bld)238 emit_typed_message_header(const vec4_builder &bld) 239 { 240 const vec4_builder ubld = bld.exec_all(); 241 const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); 242 243 ubld.MOV(dst, brw_imm_d(0)); 244 245 if (bld.shader->devinfo->gen == 7 && 246 !bld.shader->devinfo->is_haswell) { 247 /* The sample mask is used on IVB for the SIMD8 messages that 248 * have no SIMD4x2 variant. We only use the two X channels 249 * in that case, mask everything else out. 250 */ 251 ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11)); 252 } 253 254 return src_reg(dst); 255 } 256 } 257 258 /** 259 * Emit a typed surface read opcode. \p dims determines the number of 260 * components of the address and \p size the number of components of the 261 * returned value. 262 */ 263 src_reg emit_typed_read(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,unsigned dims,unsigned size)264 emit_typed_read(const vec4_builder &bld, const src_reg &surface, 265 const src_reg &addr, unsigned dims, unsigned size) 266 { 267 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || 268 bld.shader->devinfo->is_haswell); 269 const src_reg tmp = 270 emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ, 271 emit_typed_message_header(bld), 272 emit_insert(bld, addr, dims, has_simd4x2), 273 has_simd4x2 ? 1 : dims, 274 src_reg(), 0, 275 surface, size, 276 has_simd4x2 ? 1 : size); 277 278 return emit_extract(bld, tmp, size, has_simd4x2); 279 } 280 281 /** 282 * Emit a typed surface write opcode. \p dims determines the number of 283 * components of the address and \p size the number of components of the 284 * argument. 285 */ 286 void emit_typed_write(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src,unsigned dims,unsigned size)287 emit_typed_write(const vec4_builder &bld, const src_reg &surface, 288 const src_reg &addr, const src_reg &src, 289 unsigned dims, unsigned size) 290 { 291 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || 292 bld.shader->devinfo->is_haswell); 293 emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE, 294 emit_typed_message_header(bld), 295 emit_insert(bld, addr, dims, has_simd4x2), 296 has_simd4x2 ? 1 : dims, 297 emit_insert(bld, src, size, has_simd4x2), 298 has_simd4x2 ? 1 : size, 299 surface, size, 0); 300 } 301 302 /** 303 * Emit a typed surface atomic opcode. \p dims determines the number of 304 * components of the address and \p rsize the number of components of 305 * the returned value (either zero or one). 306 */ 307 src_reg emit_typed_atomic(const vec4_builder & bld,const src_reg & surface,const src_reg & addr,const src_reg & src0,const src_reg & src1,unsigned dims,unsigned rsize,unsigned op,brw_predicate pred)308 emit_typed_atomic(const vec4_builder &bld, 309 const src_reg &surface, const src_reg &addr, 310 const src_reg &src0, const src_reg &src1, 311 unsigned dims, unsigned rsize, unsigned op, 312 brw_predicate pred) 313 { 314 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || 315 bld.shader->devinfo->is_haswell); 316 317 /* Zip the components of both sources, they are represented as the X 318 * and Y components of the same vector. 319 */ 320 const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); 321 const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); 322 323 if (size >= 1) 324 bld.MOV(writemask(srcs, WRITEMASK_X), src0); 325 if (size >= 2) 326 bld.MOV(writemask(srcs, WRITEMASK_Y), src1); 327 328 return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC, 329 emit_typed_message_header(bld), 330 emit_insert(bld, addr, dims, has_simd4x2), 331 has_simd4x2 ? 1 : dims, 332 emit_insert(bld, src_reg(srcs), size, has_simd4x2), 333 has_simd4x2 ? 1 : size, 334 surface, op, rsize, pred); 335 } 336 } 337 } 338