1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "compiler/nir/nir.h"
34 #include "main/context.h"
35 #include "main/blend.h"
36 #include "main/mtypes.h"
37 #include "main/samplerobj.h"
38 #include "main/shaderimage.h"
39 #include "main/teximage.h"
40 #include "program/prog_parameter.h"
41 #include "program/prog_instruction.h"
42 #include "main/framebuffer.h"
43 #include "main/shaderapi.h"
44
45 #include "isl/isl.h"
46
47 #include "intel_mipmap_tree.h"
48 #include "intel_batchbuffer.h"
49 #include "intel_tex.h"
50 #include "intel_fbo.h"
51 #include "intel_buffer_objects.h"
52
53 #include "brw_context.h"
54 #include "brw_state.h"
55 #include "brw_defines.h"
56 #include "brw_wm.h"
57
58 uint32_t wb_mocs[] = {
59 [7] = GEN7_MOCS_L3,
60 [8] = BDW_MOCS_WB,
61 [9] = SKL_MOCS_WB,
62 [10] = CNL_MOCS_WB,
63 };
64
65 uint32_t pte_mocs[] = {
66 [7] = GEN7_MOCS_L3,
67 [8] = BDW_MOCS_PTE,
68 [9] = SKL_MOCS_PTE,
69 [10] = CNL_MOCS_PTE,
70 };
71
72 uint32_t
brw_get_bo_mocs(const struct gen_device_info * devinfo,struct brw_bo * bo)73 brw_get_bo_mocs(const struct gen_device_info *devinfo, struct brw_bo *bo)
74 {
75 return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen];
76 }
77
78 static void
get_isl_surf(struct brw_context * brw,struct intel_mipmap_tree * mt,GLenum target,struct isl_view * view,uint32_t * tile_x,uint32_t * tile_y,uint32_t * offset,struct isl_surf * surf)79 get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
80 GLenum target, struct isl_view *view,
81 uint32_t *tile_x, uint32_t *tile_y,
82 uint32_t *offset, struct isl_surf *surf)
83 {
84 *surf = mt->surf;
85
86 const struct gen_device_info *devinfo = &brw->screen->devinfo;
87 const enum isl_dim_layout dim_layout =
88 get_isl_dim_layout(devinfo, mt->surf.tiling, target);
89
90 surf->dim = get_isl_surf_dim(target);
91
92 if (surf->dim_layout == dim_layout)
93 return;
94
95 /* The layout of the specified texture target is not compatible with the
96 * actual layout of the miptree structure in memory -- You're entering
97 * dangerous territory, this can only possibly work if you only intended
98 * to access a single level and slice of the texture, and the hardware
99 * supports the tile offset feature in order to allow non-tile-aligned
100 * base offsets, since we'll have to point the hardware to the first
101 * texel of the level instead of relying on the usual base level/layer
102 * controls.
103 */
104 assert(devinfo->has_surface_tile_offset);
105 assert(view->levels == 1 && view->array_len == 1);
106 assert(*tile_x == 0 && *tile_y == 0);
107
108 *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
109 view->base_array_layer,
110 tile_x, tile_y);
111
112 /* Minify the logical dimensions of the texture. */
113 const unsigned l = view->base_level - mt->first_level;
114 surf->logical_level0_px.width = minify(surf->logical_level0_px.width, l);
115 surf->logical_level0_px.height = surf->dim <= ISL_SURF_DIM_1D ? 1 :
116 minify(surf->logical_level0_px.height, l);
117 surf->logical_level0_px.depth = surf->dim <= ISL_SURF_DIM_2D ? 1 :
118 minify(surf->logical_level0_px.depth, l);
119
120 /* Only the base level and layer can be addressed with the overridden
121 * layout.
122 */
123 surf->logical_level0_px.array_len = 1;
124 surf->levels = 1;
125 surf->dim_layout = dim_layout;
126
127 /* The requested slice of the texture is now at the base level and
128 * layer.
129 */
130 view->base_level = 0;
131 view->base_array_layer = 0;
132 }
133
134 static void
brw_emit_surface_state(struct brw_context * brw,struct intel_mipmap_tree * mt,GLenum target,struct isl_view view,enum isl_aux_usage aux_usage,uint32_t * surf_offset,int surf_index,unsigned reloc_flags)135 brw_emit_surface_state(struct brw_context *brw,
136 struct intel_mipmap_tree *mt,
137 GLenum target, struct isl_view view,
138 enum isl_aux_usage aux_usage,
139 uint32_t *surf_offset, int surf_index,
140 unsigned reloc_flags)
141 {
142 const struct gen_device_info *devinfo = &brw->screen->devinfo;
143 uint32_t tile_x = mt->level[0].level_x;
144 uint32_t tile_y = mt->level[0].level_y;
145 uint32_t offset = mt->offset;
146
147 struct isl_surf surf;
148
149 get_isl_surf(brw, mt, target, &view, &tile_x, &tile_y, &offset, &surf);
150
151 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
152
153 struct brw_bo *aux_bo;
154 struct isl_surf *aux_surf = NULL;
155 uint64_t aux_offset = 0;
156 switch (aux_usage) {
157 case ISL_AUX_USAGE_MCS:
158 case ISL_AUX_USAGE_CCS_D:
159 case ISL_AUX_USAGE_CCS_E:
160 aux_surf = &mt->mcs_buf->surf;
161 aux_bo = mt->mcs_buf->bo;
162 aux_offset = mt->mcs_buf->offset;
163 break;
164
165 case ISL_AUX_USAGE_HIZ:
166 aux_surf = &mt->hiz_buf->surf;
167 aux_bo = mt->hiz_buf->bo;
168 aux_offset = 0;
169 break;
170
171 case ISL_AUX_USAGE_NONE:
172 break;
173 }
174
175 if (aux_usage != ISL_AUX_USAGE_NONE) {
176 /* We only really need a clear color if we also have an auxiliary
177 * surface. Without one, it does nothing.
178 */
179 clear_color = mt->fast_clear_color;
180 }
181
182 void *state = brw_state_batch(brw,
183 brw->isl_dev.ss.size,
184 brw->isl_dev.ss.align,
185 surf_offset);
186
187 isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
188 .address = brw_state_reloc(&brw->batch,
189 *surf_offset + brw->isl_dev.ss.addr_offset,
190 mt->bo, offset, reloc_flags),
191 .aux_surf = aux_surf, .aux_usage = aux_usage,
192 .aux_address = aux_offset,
193 .mocs = brw_get_bo_mocs(devinfo, mt->bo),
194 .clear_color = clear_color,
195 .x_offset_sa = tile_x, .y_offset_sa = tile_y);
196 if (aux_surf) {
197 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
198 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
199 * contain other control information. Since buffer addresses are always
200 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
201 * an ordinary reloc to do the necessary address translation.
202 *
203 * FIXME: move to the point of assignment.
204 */
205 assert((aux_offset & 0xfff) == 0);
206 uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
207 *aux_addr = brw_state_reloc(&brw->batch,
208 *surf_offset +
209 brw->isl_dev.ss.aux_addr_offset,
210 aux_bo, *aux_addr,
211 reloc_flags);
212 }
213 }
214
215 static uint32_t
gen6_update_renderbuffer_surface(struct brw_context * brw,struct gl_renderbuffer * rb,unsigned unit,uint32_t surf_index)216 gen6_update_renderbuffer_surface(struct brw_context *brw,
217 struct gl_renderbuffer *rb,
218 unsigned unit,
219 uint32_t surf_index)
220 {
221 struct gl_context *ctx = &brw->ctx;
222 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
223 struct intel_mipmap_tree *mt = irb->mt;
224
225 assert(brw_render_target_supported(brw, rb));
226
227 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
228 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
229 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
230 __func__, _mesa_get_format_name(rb_format));
231 }
232 enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
233
234 struct isl_view view = {
235 .format = isl_format,
236 .base_level = irb->mt_level - irb->mt->first_level,
237 .levels = 1,
238 .base_array_layer = irb->mt_layer,
239 .array_len = MAX2(irb->layer_count, 1),
240 .swizzle = ISL_SWIZZLE_IDENTITY,
241 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
242 };
243
244 uint32_t offset;
245 brw_emit_surface_state(brw, mt, mt->target, view,
246 brw->draw_aux_usage[unit],
247 &offset, surf_index,
248 RELOC_WRITE);
249 return offset;
250 }
251
252 GLuint
translate_tex_target(GLenum target)253 translate_tex_target(GLenum target)
254 {
255 switch (target) {
256 case GL_TEXTURE_1D:
257 case GL_TEXTURE_1D_ARRAY_EXT:
258 return BRW_SURFACE_1D;
259
260 case GL_TEXTURE_RECTANGLE_NV:
261 return BRW_SURFACE_2D;
262
263 case GL_TEXTURE_2D:
264 case GL_TEXTURE_2D_ARRAY_EXT:
265 case GL_TEXTURE_EXTERNAL_OES:
266 case GL_TEXTURE_2D_MULTISAMPLE:
267 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
268 return BRW_SURFACE_2D;
269
270 case GL_TEXTURE_3D:
271 return BRW_SURFACE_3D;
272
273 case GL_TEXTURE_CUBE_MAP:
274 case GL_TEXTURE_CUBE_MAP_ARRAY:
275 return BRW_SURFACE_CUBE;
276
277 default:
278 unreachable("not reached");
279 }
280 }
281
282 uint32_t
brw_get_surface_tiling_bits(enum isl_tiling tiling)283 brw_get_surface_tiling_bits(enum isl_tiling tiling)
284 {
285 switch (tiling) {
286 case ISL_TILING_X:
287 return BRW_SURFACE_TILED;
288 case ISL_TILING_Y0:
289 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
290 default:
291 return 0;
292 }
293 }
294
295
296 uint32_t
brw_get_surface_num_multisamples(unsigned num_samples)297 brw_get_surface_num_multisamples(unsigned num_samples)
298 {
299 if (num_samples > 1)
300 return BRW_SURFACE_MULTISAMPLECOUNT_4;
301 else
302 return BRW_SURFACE_MULTISAMPLECOUNT_1;
303 }
304
305 /**
306 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
307 * swizzling.
308 */
309 int
brw_get_texture_swizzle(const struct gl_context * ctx,const struct gl_texture_object * t)310 brw_get_texture_swizzle(const struct gl_context *ctx,
311 const struct gl_texture_object *t)
312 {
313 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
314
315 int swizzles[SWIZZLE_NIL + 1] = {
316 SWIZZLE_X,
317 SWIZZLE_Y,
318 SWIZZLE_Z,
319 SWIZZLE_W,
320 SWIZZLE_ZERO,
321 SWIZZLE_ONE,
322 SWIZZLE_NIL
323 };
324
325 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
326 img->_BaseFormat == GL_DEPTH_STENCIL) {
327 GLenum depth_mode = t->DepthMode;
328
329 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
330 * with depth component data specified with a sized internal format.
331 * Otherwise, it's left at the old default, GL_LUMINANCE.
332 */
333 if (_mesa_is_gles3(ctx) &&
334 img->InternalFormat != GL_DEPTH_COMPONENT &&
335 img->InternalFormat != GL_DEPTH_STENCIL) {
336 depth_mode = GL_RED;
337 }
338
339 switch (depth_mode) {
340 case GL_ALPHA:
341 swizzles[0] = SWIZZLE_ZERO;
342 swizzles[1] = SWIZZLE_ZERO;
343 swizzles[2] = SWIZZLE_ZERO;
344 swizzles[3] = SWIZZLE_X;
345 break;
346 case GL_LUMINANCE:
347 swizzles[0] = SWIZZLE_X;
348 swizzles[1] = SWIZZLE_X;
349 swizzles[2] = SWIZZLE_X;
350 swizzles[3] = SWIZZLE_ONE;
351 break;
352 case GL_INTENSITY:
353 swizzles[0] = SWIZZLE_X;
354 swizzles[1] = SWIZZLE_X;
355 swizzles[2] = SWIZZLE_X;
356 swizzles[3] = SWIZZLE_X;
357 break;
358 case GL_RED:
359 swizzles[0] = SWIZZLE_X;
360 swizzles[1] = SWIZZLE_ZERO;
361 swizzles[2] = SWIZZLE_ZERO;
362 swizzles[3] = SWIZZLE_ONE;
363 break;
364 }
365 }
366
367 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
368
369 /* If the texture's format is alpha-only, force R, G, and B to
370 * 0.0. Similarly, if the texture's format has no alpha channel,
371 * force the alpha value read to 1.0. This allows for the
372 * implementation to use an RGBA texture for any of these formats
373 * without leaking any unexpected values.
374 */
375 switch (img->_BaseFormat) {
376 case GL_ALPHA:
377 swizzles[0] = SWIZZLE_ZERO;
378 swizzles[1] = SWIZZLE_ZERO;
379 swizzles[2] = SWIZZLE_ZERO;
380 break;
381 case GL_LUMINANCE:
382 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
383 swizzles[0] = SWIZZLE_X;
384 swizzles[1] = SWIZZLE_X;
385 swizzles[2] = SWIZZLE_X;
386 swizzles[3] = SWIZZLE_ONE;
387 }
388 break;
389 case GL_LUMINANCE_ALPHA:
390 if (datatype == GL_SIGNED_NORMALIZED) {
391 swizzles[0] = SWIZZLE_X;
392 swizzles[1] = SWIZZLE_X;
393 swizzles[2] = SWIZZLE_X;
394 swizzles[3] = SWIZZLE_W;
395 }
396 break;
397 case GL_INTENSITY:
398 if (datatype == GL_SIGNED_NORMALIZED) {
399 swizzles[0] = SWIZZLE_X;
400 swizzles[1] = SWIZZLE_X;
401 swizzles[2] = SWIZZLE_X;
402 swizzles[3] = SWIZZLE_X;
403 }
404 break;
405 case GL_RED:
406 case GL_RG:
407 case GL_RGB:
408 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
409 img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
410 img->TexFormat == MESA_FORMAT_SRGB_DXT1)
411 swizzles[3] = SWIZZLE_ONE;
412 break;
413 }
414
415 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
416 swizzles[GET_SWZ(t->_Swizzle, 1)],
417 swizzles[GET_SWZ(t->_Swizzle, 2)],
418 swizzles[GET_SWZ(t->_Swizzle, 3)]);
419 }
420
421 /**
422 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
423 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
424 *
425 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
426 * 0 1 2 3 4 5
427 * 4 5 6 7 0 1
428 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
429 *
430 * which is simply adding 4 then modding by 8 (or anding with 7).
431 *
432 * We then may need to apply workarounds for textureGather hardware bugs.
433 */
434 static unsigned
swizzle_to_scs(GLenum swizzle,bool need_green_to_blue)435 swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
436 {
437 unsigned scs = (swizzle + 4) & 7;
438
439 return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
440 }
441
brw_update_texture_surface(struct gl_context * ctx,unsigned unit,uint32_t * surf_offset,bool for_gather,bool for_txf,uint32_t plane)442 static void brw_update_texture_surface(struct gl_context *ctx,
443 unsigned unit,
444 uint32_t *surf_offset,
445 bool for_gather,
446 bool for_txf,
447 uint32_t plane)
448 {
449 struct brw_context *brw = brw_context(ctx);
450 const struct gen_device_info *devinfo = &brw->screen->devinfo;
451 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
452
453 if (obj->Target == GL_TEXTURE_BUFFER) {
454 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
455
456 } else {
457 struct intel_texture_object *intel_obj = intel_texture_object(obj);
458 struct intel_mipmap_tree *mt = intel_obj->mt;
459
460 if (plane > 0) {
461 if (mt->plane[plane - 1] == NULL)
462 return;
463 mt = mt->plane[plane - 1];
464 }
465
466 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
467 /* If this is a view with restricted NumLayers, then our effective depth
468 * is not just the miptree depth.
469 */
470 unsigned view_num_layers;
471 if (obj->Immutable && obj->Target != GL_TEXTURE_3D) {
472 view_num_layers = obj->NumLayers;
473 } else {
474 view_num_layers = mt->surf.dim == ISL_SURF_DIM_3D ?
475 mt->surf.logical_level0_px.depth :
476 mt->surf.logical_level0_px.array_len;
477 }
478
479 /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
480 * texturing functions that return a float, as our code generation always
481 * selects the .x channel (which would always be 0).
482 */
483 struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
484 const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
485 (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
486 firstImage->_BaseFormat == GL_DEPTH_STENCIL);
487 const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
488 brw_get_texture_swizzle(&brw->ctx, obj));
489
490 mesa_format mesa_fmt = plane == 0 ? intel_obj->_Format : mt->format;
491 enum isl_format format = translate_tex_format(brw, mesa_fmt,
492 for_txf ? GL_DECODE_EXT :
493 sampler->sRGBDecode);
494
495 /* Implement gen6 and gen7 gather work-around */
496 bool need_green_to_blue = false;
497 if (for_gather) {
498 if (devinfo->gen == 7 && (format == ISL_FORMAT_R32G32_FLOAT ||
499 format == ISL_FORMAT_R32G32_SINT ||
500 format == ISL_FORMAT_R32G32_UINT)) {
501 format = ISL_FORMAT_R32G32_FLOAT_LD;
502 need_green_to_blue = devinfo->is_haswell;
503 } else if (devinfo->gen == 6) {
504 /* Sandybridge's gather4 message is broken for integer formats.
505 * To work around this, we pretend the surface is UNORM for
506 * 8 or 16-bit formats, and emit shader instructions to recover
507 * the real INT/UINT value. For 32-bit formats, we pretend
508 * the surface is FLOAT, and simply reinterpret the resulting
509 * bits.
510 */
511 switch (format) {
512 case ISL_FORMAT_R8_SINT:
513 case ISL_FORMAT_R8_UINT:
514 format = ISL_FORMAT_R8_UNORM;
515 break;
516
517 case ISL_FORMAT_R16_SINT:
518 case ISL_FORMAT_R16_UINT:
519 format = ISL_FORMAT_R16_UNORM;
520 break;
521
522 case ISL_FORMAT_R32_SINT:
523 case ISL_FORMAT_R32_UINT:
524 format = ISL_FORMAT_R32_FLOAT;
525 break;
526
527 default:
528 break;
529 }
530 }
531 }
532
533 if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
534 if (devinfo->gen <= 7) {
535 assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
536 mt = mt->r8stencil_mt;
537 } else {
538 mt = mt->stencil_mt;
539 }
540 format = ISL_FORMAT_R8_UINT;
541 } else if (devinfo->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
542 assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
543 mt = mt->r8stencil_mt;
544 format = ISL_FORMAT_R8_UINT;
545 }
546
547 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
548
549 struct isl_view view = {
550 .format = format,
551 .base_level = obj->MinLevel + obj->BaseLevel,
552 .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
553 .base_array_layer = obj->MinLayer,
554 .array_len = view_num_layers,
555 .swizzle = {
556 .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
557 .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
558 .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
559 .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
560 },
561 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
562 };
563
564 if (obj->Target == GL_TEXTURE_CUBE_MAP ||
565 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
566 view.usage |= ISL_SURF_USAGE_CUBE_BIT;
567
568 enum isl_aux_usage aux_usage =
569 intel_miptree_texture_aux_usage(brw, mt, format);
570
571 brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
572 surf_offset, surf_index,
573 0);
574 }
575 }
576
577 void
brw_emit_buffer_surface_state(struct brw_context * brw,uint32_t * out_offset,struct brw_bo * bo,unsigned buffer_offset,unsigned surface_format,unsigned buffer_size,unsigned pitch,unsigned reloc_flags)578 brw_emit_buffer_surface_state(struct brw_context *brw,
579 uint32_t *out_offset,
580 struct brw_bo *bo,
581 unsigned buffer_offset,
582 unsigned surface_format,
583 unsigned buffer_size,
584 unsigned pitch,
585 unsigned reloc_flags)
586 {
587 const struct gen_device_info *devinfo = &brw->screen->devinfo;
588 uint32_t *dw = brw_state_batch(brw,
589 brw->isl_dev.ss.size,
590 brw->isl_dev.ss.align,
591 out_offset);
592
593 isl_buffer_fill_state(&brw->isl_dev, dw,
594 .address = !bo ? buffer_offset :
595 brw_state_reloc(&brw->batch,
596 *out_offset + brw->isl_dev.ss.addr_offset,
597 bo, buffer_offset,
598 reloc_flags),
599 .size = buffer_size,
600 .format = surface_format,
601 .stride = pitch,
602 .mocs = brw_get_bo_mocs(devinfo, bo));
603 }
604
605 void
brw_update_buffer_texture_surface(struct gl_context * ctx,unsigned unit,uint32_t * surf_offset)606 brw_update_buffer_texture_surface(struct gl_context *ctx,
607 unsigned unit,
608 uint32_t *surf_offset)
609 {
610 struct brw_context *brw = brw_context(ctx);
611 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
612 struct intel_buffer_object *intel_obj =
613 intel_buffer_object(tObj->BufferObject);
614 uint32_t size = tObj->BufferSize;
615 struct brw_bo *bo = NULL;
616 mesa_format format = tObj->_BufferObjectFormat;
617 const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
618 int texel_size = _mesa_get_format_bytes(format);
619
620 if (intel_obj) {
621 size = MIN2(size, intel_obj->Base.Size);
622 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
623 false);
624 }
625
626 /* The ARB_texture_buffer_specification says:
627 *
628 * "The number of texels in the buffer texture's texel array is given by
629 *
630 * floor(<buffer_size> / (<components> * sizeof(<base_type>)),
631 *
632 * where <buffer_size> is the size of the buffer object, in basic
633 * machine units and <components> and <base_type> are the element count
634 * and base data type for elements, as specified in Table X.1. The
635 * number of texels in the texel array is then clamped to the
636 * implementation-dependent limit MAX_TEXTURE_BUFFER_SIZE_ARB."
637 *
638 * We need to clamp the size in bytes to MAX_TEXTURE_BUFFER_SIZE * stride,
639 * so that when ISL divides by stride to obtain the number of texels, that
640 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
641 */
642 size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
643
644 if (isl_format == ISL_FORMAT_UNSUPPORTED) {
645 _mesa_problem(NULL, "bad format %s for texture buffer\n",
646 _mesa_get_format_name(format));
647 }
648
649 brw_emit_buffer_surface_state(brw, surf_offset, bo,
650 tObj->BufferOffset,
651 isl_format,
652 size,
653 texel_size,
654 0);
655 }
656
657 /**
658 * Set up a binding table entry for use by stream output logic (transform
659 * feedback).
660 *
661 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
662 */
663 void
brw_update_sol_surface(struct brw_context * brw,struct gl_buffer_object * buffer_obj,uint32_t * out_offset,unsigned num_vector_components,unsigned stride_dwords,unsigned offset_dwords)664 brw_update_sol_surface(struct brw_context *brw,
665 struct gl_buffer_object *buffer_obj,
666 uint32_t *out_offset, unsigned num_vector_components,
667 unsigned stride_dwords, unsigned offset_dwords)
668 {
669 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
670 uint32_t offset_bytes = 4 * offset_dwords;
671 struct brw_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
672 offset_bytes,
673 buffer_obj->Size - offset_bytes,
674 true);
675 uint32_t *surf = brw_state_batch(brw, 6 * 4, 32, out_offset);
676 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
677 size_t size_dwords = buffer_obj->Size / 4;
678 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
679
680 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
681 * too big to map using a single binding table entry?
682 */
683 assert((size_dwords - offset_dwords) / stride_dwords
684 <= BRW_MAX_NUM_BUFFER_ENTRIES);
685
686 if (size_dwords > offset_dwords + num_vector_components) {
687 /* There is room for at least 1 transform feedback output in the buffer.
688 * Compute the number of additional transform feedback outputs the
689 * buffer has room for.
690 */
691 buffer_size_minus_1 =
692 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
693 } else {
694 /* There isn't even room for a single transform feedback output in the
695 * buffer. We can't configure the binding table entry to prevent output
696 * entirely; we'll have to rely on the geometry shader to detect
697 * overflow. But to minimize the damage in case of a bug, set up the
698 * binding table entry to just allow a single output.
699 */
700 buffer_size_minus_1 = 0;
701 }
702 width = buffer_size_minus_1 & 0x7f;
703 height = (buffer_size_minus_1 & 0xfff80) >> 7;
704 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
705
706 switch (num_vector_components) {
707 case 1:
708 surface_format = ISL_FORMAT_R32_FLOAT;
709 break;
710 case 2:
711 surface_format = ISL_FORMAT_R32G32_FLOAT;
712 break;
713 case 3:
714 surface_format = ISL_FORMAT_R32G32B32_FLOAT;
715 break;
716 case 4:
717 surface_format = ISL_FORMAT_R32G32B32A32_FLOAT;
718 break;
719 default:
720 unreachable("Invalid vector size for transform feedback output");
721 }
722
723 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
724 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
725 surface_format << BRW_SURFACE_FORMAT_SHIFT |
726 BRW_SURFACE_RC_READ_WRITE;
727 surf[1] = brw_state_reloc(&brw->batch,
728 *out_offset + 4, bo, offset_bytes, RELOC_WRITE);
729 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
730 height << BRW_SURFACE_HEIGHT_SHIFT);
731 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
732 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
733 surf[4] = 0;
734 surf[5] = 0;
735 }
736
737 /* Creates a new WM constant buffer reflecting the current fragment program's
738 * constants, if needed by the fragment program.
739 *
740 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
741 * state atom.
742 */
743 static void
brw_upload_wm_pull_constants(struct brw_context * brw)744 brw_upload_wm_pull_constants(struct brw_context *brw)
745 {
746 struct brw_stage_state *stage_state = &brw->wm.base;
747 /* BRW_NEW_FRAGMENT_PROGRAM */
748 struct brw_program *fp =
749 (struct brw_program *) brw->programs[MESA_SHADER_FRAGMENT];
750
751 /* BRW_NEW_FS_PROG_DATA */
752 struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
753
754 _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
755 /* _NEW_PROGRAM_CONSTANTS */
756 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program,
757 stage_state, prog_data);
758 }
759
760 const struct brw_tracked_state brw_wm_pull_constants = {
761 .dirty = {
762 .mesa = _NEW_PROGRAM_CONSTANTS,
763 .brw = BRW_NEW_BATCH |
764 BRW_NEW_FRAGMENT_PROGRAM |
765 BRW_NEW_FS_PROG_DATA,
766 },
767 .emit = brw_upload_wm_pull_constants,
768 };
769
770 /**
771 * Creates a null renderbuffer surface.
772 *
773 * This is used when the shader doesn't write to any color output. An FB
774 * write to target 0 will still be emitted, because that's how the thread is
775 * terminated (and computed depth is returned), so we need to have the
776 * hardware discard the target 0 color output..
777 */
778 static void
emit_null_surface_state(struct brw_context * brw,const struct gl_framebuffer * fb,uint32_t * out_offset)779 emit_null_surface_state(struct brw_context *brw,
780 const struct gl_framebuffer *fb,
781 uint32_t *out_offset)
782 {
783 const struct gen_device_info *devinfo = &brw->screen->devinfo;
784 uint32_t *surf = brw_state_batch(brw,
785 brw->isl_dev.ss.size,
786 brw->isl_dev.ss.align,
787 out_offset);
788
789 /* Use the fb dimensions or 1x1x1 */
790 const unsigned width = fb ? _mesa_geometric_width(fb) : 1;
791 const unsigned height = fb ? _mesa_geometric_height(fb) : 1;
792 const unsigned samples = fb ? _mesa_geometric_samples(fb) : 1;
793
794 if (devinfo->gen != 6 || samples <= 1) {
795 isl_null_fill_state(&brw->isl_dev, surf,
796 isl_extent3d(width, height, 1));
797 return;
798 }
799
800 /* On Gen6, null render targets seem to cause GPU hangs when multisampling.
801 * So work around this problem by rendering into dummy color buffer.
802 *
803 * To decrease the amount of memory needed by the workaround buffer, we
804 * set its pitch to 128 bytes (the width of a Y tile). This means that
805 * the amount of memory needed for the workaround buffer is
806 * (width_in_tiles + height_in_tiles - 1) tiles.
807 *
808 * Note that since the workaround buffer will be interpreted by the
809 * hardware as an interleaved multisampled buffer, we need to compute
810 * width_in_tiles and height_in_tiles by dividing the width and height
811 * by 16 rather than the normal Y-tile size of 32.
812 */
813 unsigned width_in_tiles = ALIGN(width, 16) / 16;
814 unsigned height_in_tiles = ALIGN(height, 16) / 16;
815 unsigned pitch_minus_1 = 127;
816 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
817 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
818 size_needed);
819
820 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
821 ISL_FORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
822 surf[1] = brw_state_reloc(&brw->batch, *out_offset + 4,
823 brw->wm.multisampled_null_render_target_bo,
824 0, RELOC_WRITE);
825
826 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
827 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
828
829 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
830 * Notes):
831 *
832 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
833 */
834 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
835 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
836 surf[4] = BRW_SURFACE_MULTISAMPLECOUNT_4;
837 surf[5] = 0;
838 }
839
840 /**
841 * Sets up a surface state structure to point at the given region.
842 * While it is only used for the front/back buffer currently, it should be
843 * usable for further buffers when doing ARB_draw_buffer support.
844 */
845 static uint32_t
gen4_update_renderbuffer_surface(struct brw_context * brw,struct gl_renderbuffer * rb,unsigned unit,uint32_t surf_index)846 gen4_update_renderbuffer_surface(struct brw_context *brw,
847 struct gl_renderbuffer *rb,
848 unsigned unit,
849 uint32_t surf_index)
850 {
851 const struct gen_device_info *devinfo = &brw->screen->devinfo;
852 struct gl_context *ctx = &brw->ctx;
853 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
854 struct intel_mipmap_tree *mt = irb->mt;
855 uint32_t *surf;
856 uint32_t tile_x, tile_y;
857 enum isl_format format;
858 uint32_t offset;
859 /* _NEW_BUFFERS */
860 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
861 /* BRW_NEW_FS_PROG_DATA */
862
863 if (rb->TexImage && !devinfo->has_surface_tile_offset) {
864 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
865
866 if (tile_x != 0 || tile_y != 0) {
867 /* Original gen4 hardware couldn't draw to a non-tile-aligned
868 * destination in a miptree unless you actually setup your renderbuffer
869 * as a miptree and used the fragile lod/array_index/etc. controls to
870 * select the image. So, instead, we just make a new single-level
871 * miptree and render into that.
872 */
873 intel_renderbuffer_move_to_temp(brw, irb, false);
874 assert(irb->align_wa_mt);
875 mt = irb->align_wa_mt;
876 }
877 }
878
879 surf = brw_state_batch(brw, 6 * 4, 32, &offset);
880
881 format = brw->mesa_to_isl_render_format[rb_format];
882 if (unlikely(!brw->mesa_format_supports_render[rb_format])) {
883 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
884 __func__, _mesa_get_format_name(rb_format));
885 }
886
887 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
888 format << BRW_SURFACE_FORMAT_SHIFT);
889
890 /* reloc */
891 assert(mt->offset % mt->cpp == 0);
892 surf[1] = brw_state_reloc(&brw->batch, offset + 4, mt->bo,
893 mt->offset +
894 intel_renderbuffer_get_tile_offsets(irb,
895 &tile_x,
896 &tile_y),
897 RELOC_WRITE);
898
899 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
900 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
901
902 surf[3] = (brw_get_surface_tiling_bits(mt->surf.tiling) |
903 (mt->surf.row_pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
904
905 surf[4] = brw_get_surface_num_multisamples(mt->surf.samples);
906
907 assert(devinfo->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
908 /* Note that the low bits of these fields are missing, so
909 * there's the possibility of getting in trouble.
910 */
911 assert(tile_x % 4 == 0);
912 assert(tile_y % 2 == 0);
913 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
914 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
915 (mt->surf.image_alignment_el.height == 4 ?
916 BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
917
918 if (devinfo->gen < 6) {
919 /* _NEW_COLOR */
920 if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
921 (ctx->Color.BlendEnabled & (1 << unit)))
922 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
923
924 if (!ctx->Color.ColorMask[unit][0])
925 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
926 if (!ctx->Color.ColorMask[unit][1])
927 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
928 if (!ctx->Color.ColorMask[unit][2])
929 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
930
931 /* As mentioned above, disable writes to the alpha component when the
932 * renderbuffer is XRGB.
933 */
934 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
935 !ctx->Color.ColorMask[unit][3]) {
936 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
937 }
938 }
939
940 return offset;
941 }
942
943 static void
update_renderbuffer_surfaces(struct brw_context * brw)944 update_renderbuffer_surfaces(struct brw_context *brw)
945 {
946 const struct gen_device_info *devinfo = &brw->screen->devinfo;
947 const struct gl_context *ctx = &brw->ctx;
948
949 /* _NEW_BUFFERS | _NEW_COLOR */
950 const struct gl_framebuffer *fb = ctx->DrawBuffer;
951
952 /* Render targets always start at binding table index 0. */
953 const unsigned rt_start = 0;
954
955 uint32_t *surf_offsets = brw->wm.base.surf_offset;
956
957 /* Update surfaces for drawing buffers */
958 if (fb->_NumColorDrawBuffers >= 1) {
959 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
960 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
961
962 if (intel_renderbuffer(rb)) {
963 surf_offsets[rt_start + i] = devinfo->gen >= 6 ?
964 gen6_update_renderbuffer_surface(brw, rb, i, rt_start + i) :
965 gen4_update_renderbuffer_surface(brw, rb, i, rt_start + i);
966 } else {
967 emit_null_surface_state(brw, fb, &surf_offsets[rt_start + i]);
968 }
969 }
970 } else {
971 emit_null_surface_state(brw, fb, &surf_offsets[rt_start]);
972 }
973
974 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
975 }
976
977 const struct brw_tracked_state brw_renderbuffer_surfaces = {
978 .dirty = {
979 .mesa = _NEW_BUFFERS |
980 _NEW_COLOR,
981 .brw = BRW_NEW_BATCH,
982 },
983 .emit = update_renderbuffer_surfaces,
984 };
985
986 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
987 .dirty = {
988 .mesa = _NEW_BUFFERS,
989 .brw = BRW_NEW_BATCH |
990 BRW_NEW_AUX_STATE,
991 },
992 .emit = update_renderbuffer_surfaces,
993 };
994
995 static void
update_renderbuffer_read_surfaces(struct brw_context * brw)996 update_renderbuffer_read_surfaces(struct brw_context *brw)
997 {
998 const struct gl_context *ctx = &brw->ctx;
999
1000 /* BRW_NEW_FS_PROG_DATA */
1001 const struct brw_wm_prog_data *wm_prog_data =
1002 brw_wm_prog_data(brw->wm.base.prog_data);
1003
1004 if (wm_prog_data->has_render_target_reads &&
1005 !ctx->Extensions.MESA_shader_framebuffer_fetch) {
1006 /* _NEW_BUFFERS */
1007 const struct gl_framebuffer *fb = ctx->DrawBuffer;
1008
1009 for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
1010 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
1011 const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1012 const unsigned surf_index =
1013 wm_prog_data->binding_table.render_target_read_start + i;
1014 uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
1015
1016 if (irb) {
1017 const enum isl_format format = brw->mesa_to_isl_render_format[
1018 _mesa_get_render_format(ctx, intel_rb_format(irb))];
1019 assert(isl_format_supports_sampling(&brw->screen->devinfo,
1020 format));
1021
1022 /* Override the target of the texture if the render buffer is a
1023 * single slice of a 3D texture (since the minimum array element
1024 * field of the surface state structure is ignored by the sampler
1025 * unit for 3D textures on some hardware), or if the render buffer
1026 * is a 1D array (since shaders always provide the array index
1027 * coordinate at the Z component to avoid state-dependent
1028 * recompiles when changing the texture target of the
1029 * framebuffer).
1030 */
1031 const GLenum target =
1032 (irb->mt->target == GL_TEXTURE_3D &&
1033 irb->layer_count == 1) ? GL_TEXTURE_2D :
1034 irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
1035 irb->mt->target;
1036
1037 const struct isl_view view = {
1038 .format = format,
1039 .base_level = irb->mt_level - irb->mt->first_level,
1040 .levels = 1,
1041 .base_array_layer = irb->mt_layer,
1042 .array_len = irb->layer_count,
1043 .swizzle = ISL_SWIZZLE_IDENTITY,
1044 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
1045 };
1046
1047 enum isl_aux_usage aux_usage =
1048 intel_miptree_texture_aux_usage(brw, irb->mt, format);
1049 if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
1050 aux_usage = ISL_AUX_USAGE_NONE;
1051
1052 brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
1053 surf_offset, surf_index,
1054 0);
1055
1056 } else {
1057 emit_null_surface_state(brw, fb, surf_offset);
1058 }
1059 }
1060
1061 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1062 }
1063 }
1064
1065 const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
1066 .dirty = {
1067 .mesa = _NEW_BUFFERS,
1068 .brw = BRW_NEW_BATCH |
1069 BRW_NEW_AUX_STATE |
1070 BRW_NEW_FS_PROG_DATA,
1071 },
1072 .emit = update_renderbuffer_read_surfaces,
1073 };
1074
1075 static bool
is_depth_texture(struct intel_texture_object * iobj)1076 is_depth_texture(struct intel_texture_object *iobj)
1077 {
1078 GLenum base_format = _mesa_get_format_base_format(iobj->_Format);
1079 return base_format == GL_DEPTH_COMPONENT ||
1080 (base_format == GL_DEPTH_STENCIL && !iobj->base.StencilSampling);
1081 }
1082
1083 static void
update_stage_texture_surfaces(struct brw_context * brw,const struct gl_program * prog,struct brw_stage_state * stage_state,bool for_gather,uint32_t plane)1084 update_stage_texture_surfaces(struct brw_context *brw,
1085 const struct gl_program *prog,
1086 struct brw_stage_state *stage_state,
1087 bool for_gather, uint32_t plane)
1088 {
1089 if (!prog)
1090 return;
1091
1092 struct gl_context *ctx = &brw->ctx;
1093
1094 uint32_t *surf_offset = stage_state->surf_offset;
1095
1096 /* BRW_NEW_*_PROG_DATA */
1097 if (for_gather)
1098 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
1099 else
1100 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
1101
1102 unsigned num_samplers = util_last_bit(prog->SamplersUsed);
1103 for (unsigned s = 0; s < num_samplers; s++) {
1104 surf_offset[s] = 0;
1105
1106 if (prog->SamplersUsed & (1 << s)) {
1107 const unsigned unit = prog->SamplerUnits[s];
1108 const bool used_by_txf = prog->info.textures_used_by_txf & (1 << s);
1109 struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
1110 struct intel_texture_object *iobj = intel_texture_object(obj);
1111
1112 /* _NEW_TEXTURE */
1113 if (!obj)
1114 continue;
1115
1116 if ((prog->ShadowSamplers & (1 << s)) && !is_depth_texture(iobj)) {
1117 /* A programming note for the sample_c message says:
1118 *
1119 * "The Surface Format of the associated surface must be
1120 * indicated as supporting shadow mapping as indicated in the
1121 * surface format table."
1122 *
1123 * Accessing non-depth textures via a sampler*Shadow type is
1124 * undefined. GLSL 4.50 page 162 says:
1125 *
1126 * "If a shadow texture call is made to a sampler that does not
1127 * represent a depth texture, then results are undefined."
1128 *
1129 * We give them a null surface (zeros) for undefined. We've seen
1130 * GPU hangs with color buffers and sample_c, so we try and avoid
1131 * those with this hack.
1132 */
1133 emit_null_surface_state(brw, NULL, surf_offset + s);
1134 } else {
1135 brw_update_texture_surface(ctx, unit, surf_offset + s, for_gather,
1136 used_by_txf, plane);
1137 }
1138 }
1139 }
1140 }
1141
1142
1143 /**
1144 * Construct SURFACE_STATE objects for enabled textures.
1145 */
1146 static void
brw_update_texture_surfaces(struct brw_context * brw)1147 brw_update_texture_surfaces(struct brw_context *brw)
1148 {
1149 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1150
1151 /* BRW_NEW_VERTEX_PROGRAM */
1152 struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
1153
1154 /* BRW_NEW_TESS_PROGRAMS */
1155 struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
1156 struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
1157
1158 /* BRW_NEW_GEOMETRY_PROGRAM */
1159 struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
1160
1161 /* BRW_NEW_FRAGMENT_PROGRAM */
1162 struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
1163
1164 /* _NEW_TEXTURE */
1165 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
1166 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
1167 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
1168 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
1169 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
1170
1171 /* emit alternate set of surface state for gather. this
1172 * allows the surface format to be overriden for only the
1173 * gather4 messages. */
1174 if (devinfo->gen < 8) {
1175 if (vs && vs->info.uses_texture_gather)
1176 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
1177 if (tcs && tcs->info.uses_texture_gather)
1178 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
1179 if (tes && tes->info.uses_texture_gather)
1180 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
1181 if (gs && gs->info.uses_texture_gather)
1182 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
1183 if (fs && fs->info.uses_texture_gather)
1184 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
1185 }
1186
1187 if (fs) {
1188 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
1189 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
1190 }
1191
1192 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1193 }
1194
1195 const struct brw_tracked_state brw_texture_surfaces = {
1196 .dirty = {
1197 .mesa = _NEW_TEXTURE,
1198 .brw = BRW_NEW_BATCH |
1199 BRW_NEW_AUX_STATE |
1200 BRW_NEW_FRAGMENT_PROGRAM |
1201 BRW_NEW_FS_PROG_DATA |
1202 BRW_NEW_GEOMETRY_PROGRAM |
1203 BRW_NEW_GS_PROG_DATA |
1204 BRW_NEW_TESS_PROGRAMS |
1205 BRW_NEW_TCS_PROG_DATA |
1206 BRW_NEW_TES_PROG_DATA |
1207 BRW_NEW_TEXTURE_BUFFER |
1208 BRW_NEW_VERTEX_PROGRAM |
1209 BRW_NEW_VS_PROG_DATA,
1210 },
1211 .emit = brw_update_texture_surfaces,
1212 };
1213
1214 static void
brw_update_cs_texture_surfaces(struct brw_context * brw)1215 brw_update_cs_texture_surfaces(struct brw_context *brw)
1216 {
1217 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1218
1219 /* BRW_NEW_COMPUTE_PROGRAM */
1220 struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
1221
1222 /* _NEW_TEXTURE */
1223 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1224
1225 /* emit alternate set of surface state for gather. this
1226 * allows the surface format to be overriden for only the
1227 * gather4 messages.
1228 */
1229 if (devinfo->gen < 8) {
1230 if (cs && cs->info.uses_texture_gather)
1231 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1232 }
1233
1234 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1235 }
1236
1237 const struct brw_tracked_state brw_cs_texture_surfaces = {
1238 .dirty = {
1239 .mesa = _NEW_TEXTURE,
1240 .brw = BRW_NEW_BATCH |
1241 BRW_NEW_COMPUTE_PROGRAM |
1242 BRW_NEW_AUX_STATE,
1243 },
1244 .emit = brw_update_cs_texture_surfaces,
1245 };
1246
1247 static void
upload_buffer_surface(struct brw_context * brw,struct gl_buffer_binding * binding,uint32_t * out_offset,enum isl_format format,unsigned reloc_flags)1248 upload_buffer_surface(struct brw_context *brw,
1249 struct gl_buffer_binding *binding,
1250 uint32_t *out_offset,
1251 enum isl_format format,
1252 unsigned reloc_flags)
1253 {
1254 struct gl_context *ctx = &brw->ctx;
1255
1256 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1257 emit_null_surface_state(brw, NULL, out_offset);
1258 } else {
1259 ptrdiff_t size = binding->BufferObject->Size - binding->Offset;
1260 if (!binding->AutomaticSize)
1261 size = MIN2(size, binding->Size);
1262
1263 struct intel_buffer_object *iobj =
1264 intel_buffer_object(binding->BufferObject);
1265 struct brw_bo *bo =
1266 intel_bufferobj_buffer(brw, iobj, binding->Offset, size,
1267 (reloc_flags & RELOC_WRITE) != 0);
1268
1269 brw_emit_buffer_surface_state(brw, out_offset, bo, binding->Offset,
1270 format, size, 1, reloc_flags);
1271 }
1272 }
1273
1274 void
brw_upload_ubo_surfaces(struct brw_context * brw,struct gl_program * prog,struct brw_stage_state * stage_state,struct brw_stage_prog_data * prog_data)1275 brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
1276 struct brw_stage_state *stage_state,
1277 struct brw_stage_prog_data *prog_data)
1278 {
1279 struct gl_context *ctx = &brw->ctx;
1280
1281 if (!prog || (prog->info.num_ubos == 0 &&
1282 prog->info.num_ssbos == 0 &&
1283 prog->info.num_abos == 0))
1284 return;
1285
1286 uint32_t *ubo_surf_offsets =
1287 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1288
1289 for (int i = 0; i < prog->info.num_ubos; i++) {
1290 struct gl_buffer_binding *binding =
1291 &ctx->UniformBufferBindings[prog->sh.UniformBlocks[i]->Binding];
1292 upload_buffer_surface(brw, binding, &ubo_surf_offsets[i],
1293 ISL_FORMAT_R32G32B32A32_FLOAT, 0);
1294 }
1295
1296 uint32_t *abo_surf_offsets =
1297 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1298 uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos;
1299
1300 for (int i = 0; i < prog->info.num_abos; i++) {
1301 struct gl_buffer_binding *binding =
1302 &ctx->AtomicBufferBindings[prog->sh.AtomicBuffers[i]->Binding];
1303 upload_buffer_surface(brw, binding, &abo_surf_offsets[i],
1304 ISL_FORMAT_RAW, RELOC_WRITE);
1305 }
1306
1307 for (int i = 0; i < prog->info.num_ssbos; i++) {
1308 struct gl_buffer_binding *binding =
1309 &ctx->ShaderStorageBufferBindings[prog->sh.ShaderStorageBlocks[i]->Binding];
1310
1311 upload_buffer_surface(brw, binding, &ssbo_surf_offsets[i],
1312 ISL_FORMAT_RAW, RELOC_WRITE);
1313 }
1314
1315 stage_state->push_constants_dirty = true;
1316 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1317 }
1318
1319 static void
brw_upload_wm_ubo_surfaces(struct brw_context * brw)1320 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1321 {
1322 struct gl_context *ctx = &brw->ctx;
1323 /* _NEW_PROGRAM */
1324 struct gl_program *prog = ctx->FragmentProgram._Current;
1325
1326 /* BRW_NEW_FS_PROG_DATA */
1327 brw_upload_ubo_surfaces(brw, prog, &brw->wm.base, brw->wm.base.prog_data);
1328 }
1329
1330 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1331 .dirty = {
1332 .mesa = _NEW_PROGRAM,
1333 .brw = BRW_NEW_BATCH |
1334 BRW_NEW_FS_PROG_DATA |
1335 BRW_NEW_UNIFORM_BUFFER,
1336 },
1337 .emit = brw_upload_wm_ubo_surfaces,
1338 };
1339
1340 static void
brw_upload_cs_ubo_surfaces(struct brw_context * brw)1341 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1342 {
1343 struct gl_context *ctx = &brw->ctx;
1344 /* _NEW_PROGRAM */
1345 struct gl_program *prog =
1346 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1347
1348 /* BRW_NEW_CS_PROG_DATA */
1349 brw_upload_ubo_surfaces(brw, prog, &brw->cs.base, brw->cs.base.prog_data);
1350 }
1351
1352 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1353 .dirty = {
1354 .mesa = _NEW_PROGRAM,
1355 .brw = BRW_NEW_BATCH |
1356 BRW_NEW_CS_PROG_DATA |
1357 BRW_NEW_UNIFORM_BUFFER,
1358 },
1359 .emit = brw_upload_cs_ubo_surfaces,
1360 };
1361
1362 static void
brw_upload_cs_image_surfaces(struct brw_context * brw)1363 brw_upload_cs_image_surfaces(struct brw_context *brw)
1364 {
1365 /* _NEW_PROGRAM */
1366 const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
1367
1368 if (cp) {
1369 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1370 brw_upload_image_surfaces(brw, cp, &brw->cs.base,
1371 brw->cs.base.prog_data);
1372 }
1373 }
1374
1375 const struct brw_tracked_state brw_cs_image_surfaces = {
1376 .dirty = {
1377 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1378 .brw = BRW_NEW_BATCH |
1379 BRW_NEW_CS_PROG_DATA |
1380 BRW_NEW_AUX_STATE |
1381 BRW_NEW_IMAGE_UNITS
1382 },
1383 .emit = brw_upload_cs_image_surfaces,
1384 };
1385
1386 static uint32_t
get_image_format(struct brw_context * brw,mesa_format format,GLenum access)1387 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1388 {
1389 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1390 enum isl_format hw_format = brw_isl_format_for_mesa_format(format);
1391 if (access == GL_WRITE_ONLY) {
1392 return hw_format;
1393 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1394 /* Typed surface reads support a very limited subset of the shader
1395 * image formats. Translate it into the closest format the
1396 * hardware supports.
1397 */
1398 return isl_lower_storage_image_format(devinfo, hw_format);
1399 } else {
1400 /* The hardware doesn't actually support a typed format that we can use
1401 * so we have to fall back to untyped read/write messages.
1402 */
1403 return ISL_FORMAT_RAW;
1404 }
1405 }
1406
1407 static void
update_default_image_param(struct brw_context * brw,struct gl_image_unit * u,unsigned surface_idx,struct brw_image_param * param)1408 update_default_image_param(struct brw_context *brw,
1409 struct gl_image_unit *u,
1410 unsigned surface_idx,
1411 struct brw_image_param *param)
1412 {
1413 memset(param, 0, sizeof(*param));
1414 param->surface_idx = surface_idx;
1415 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1416 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1417 * detailed explanation of these parameters.
1418 */
1419 param->swizzling[0] = 0xff;
1420 param->swizzling[1] = 0xff;
1421 }
1422
1423 static void
update_buffer_image_param(struct brw_context * brw,struct gl_image_unit * u,unsigned surface_idx,struct brw_image_param * param)1424 update_buffer_image_param(struct brw_context *brw,
1425 struct gl_image_unit *u,
1426 unsigned surface_idx,
1427 struct brw_image_param *param)
1428 {
1429 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1430 const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
1431 update_default_image_param(brw, u, surface_idx, param);
1432
1433 param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
1434 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1435 }
1436
1437 static unsigned
get_image_num_layers(const struct intel_mipmap_tree * mt,GLenum target,unsigned level)1438 get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
1439 unsigned level)
1440 {
1441 if (target == GL_TEXTURE_CUBE_MAP)
1442 return 6;
1443
1444 return target == GL_TEXTURE_3D ?
1445 minify(mt->surf.logical_level0_px.depth, level) :
1446 mt->surf.logical_level0_px.array_len;
1447 }
1448
1449 static void
update_image_surface(struct brw_context * brw,struct gl_image_unit * u,GLenum access,unsigned surface_idx,uint32_t * surf_offset,struct brw_image_param * param)1450 update_image_surface(struct brw_context *brw,
1451 struct gl_image_unit *u,
1452 GLenum access,
1453 unsigned surface_idx,
1454 uint32_t *surf_offset,
1455 struct brw_image_param *param)
1456 {
1457 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1458 struct gl_texture_object *obj = u->TexObj;
1459 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1460
1461 if (obj->Target == GL_TEXTURE_BUFFER) {
1462 struct intel_buffer_object *intel_obj =
1463 intel_buffer_object(obj->BufferObject);
1464 const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
1465 _mesa_get_format_bytes(u->_ActualFormat));
1466
1467 brw_emit_buffer_surface_state(
1468 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1469 format, intel_obj->Base.Size, texel_size,
1470 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1471
1472 update_buffer_image_param(brw, u, surface_idx, param);
1473
1474 } else {
1475 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1476 struct intel_mipmap_tree *mt = intel_obj->mt;
1477 const unsigned num_layers = u->Layered ?
1478 get_image_num_layers(mt, obj->Target, u->Level) : 1;
1479
1480 struct isl_view view = {
1481 .format = format,
1482 .base_level = obj->MinLevel + u->Level,
1483 .levels = 1,
1484 .base_array_layer = obj->MinLayer + u->_Layer,
1485 .array_len = num_layers,
1486 .swizzle = ISL_SWIZZLE_IDENTITY,
1487 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1488 };
1489
1490 if (format == ISL_FORMAT_RAW) {
1491 brw_emit_buffer_surface_state(
1492 brw, surf_offset, mt->bo, mt->offset,
1493 format, mt->bo->size - mt->offset, 1 /* pitch */,
1494 access != GL_READ_ONLY ? RELOC_WRITE : 0);
1495
1496 } else {
1497 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1498 assert(!intel_miptree_has_color_unresolved(mt,
1499 view.base_level, 1,
1500 view.base_array_layer,
1501 view.array_len));
1502 brw_emit_surface_state(brw, mt, mt->target, view,
1503 ISL_AUX_USAGE_NONE,
1504 surf_offset, surf_index,
1505 access == GL_READ_ONLY ? 0 : RELOC_WRITE);
1506 }
1507
1508 isl_surf_fill_image_param(&brw->isl_dev, param, &mt->surf, &view);
1509 param->surface_idx = surface_idx;
1510 }
1511
1512 } else {
1513 emit_null_surface_state(brw, NULL, surf_offset);
1514 update_default_image_param(brw, u, surface_idx, param);
1515 }
1516 }
1517
1518 void
brw_upload_image_surfaces(struct brw_context * brw,const struct gl_program * prog,struct brw_stage_state * stage_state,struct brw_stage_prog_data * prog_data)1519 brw_upload_image_surfaces(struct brw_context *brw,
1520 const struct gl_program *prog,
1521 struct brw_stage_state *stage_state,
1522 struct brw_stage_prog_data *prog_data)
1523 {
1524 assert(prog);
1525 struct gl_context *ctx = &brw->ctx;
1526
1527 if (prog->info.num_images) {
1528 for (unsigned i = 0; i < prog->info.num_images; i++) {
1529 struct gl_image_unit *u = &ctx->ImageUnits[prog->sh.ImageUnits[i]];
1530 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1531
1532 update_image_surface(brw, u, prog->sh.ImageAccess[i],
1533 surf_idx,
1534 &stage_state->surf_offset[surf_idx],
1535 &stage_state->image_param[i]);
1536 }
1537
1538 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1539 /* This may have changed the image metadata dependent on the context
1540 * image unit state and passed to the program as uniforms, make sure
1541 * that push and pull constants are reuploaded.
1542 */
1543 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1544 }
1545 }
1546
1547 static void
brw_upload_wm_image_surfaces(struct brw_context * brw)1548 brw_upload_wm_image_surfaces(struct brw_context *brw)
1549 {
1550 /* BRW_NEW_FRAGMENT_PROGRAM */
1551 const struct gl_program *wm = brw->programs[MESA_SHADER_FRAGMENT];
1552
1553 if (wm) {
1554 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1555 brw_upload_image_surfaces(brw, wm, &brw->wm.base,
1556 brw->wm.base.prog_data);
1557 }
1558 }
1559
1560 const struct brw_tracked_state brw_wm_image_surfaces = {
1561 .dirty = {
1562 .mesa = _NEW_TEXTURE,
1563 .brw = BRW_NEW_BATCH |
1564 BRW_NEW_AUX_STATE |
1565 BRW_NEW_FRAGMENT_PROGRAM |
1566 BRW_NEW_FS_PROG_DATA |
1567 BRW_NEW_IMAGE_UNITS
1568 },
1569 .emit = brw_upload_wm_image_surfaces,
1570 };
1571
1572 static void
brw_upload_cs_work_groups_surface(struct brw_context * brw)1573 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1574 {
1575 struct gl_context *ctx = &brw->ctx;
1576 /* _NEW_PROGRAM */
1577 struct gl_program *prog =
1578 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1579 /* BRW_NEW_CS_PROG_DATA */
1580 const struct brw_cs_prog_data *cs_prog_data =
1581 brw_cs_prog_data(brw->cs.base.prog_data);
1582
1583 if (prog && cs_prog_data->uses_num_work_groups) {
1584 const unsigned surf_idx =
1585 cs_prog_data->binding_table.work_groups_start;
1586 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1587 struct brw_bo *bo;
1588 uint32_t bo_offset;
1589
1590 if (brw->compute.num_work_groups_bo == NULL) {
1591 bo = NULL;
1592 intel_upload_data(brw,
1593 (void *)brw->compute.num_work_groups,
1594 3 * sizeof(GLuint),
1595 sizeof(GLuint),
1596 &bo,
1597 &bo_offset);
1598 } else {
1599 bo = brw->compute.num_work_groups_bo;
1600 bo_offset = brw->compute.num_work_groups_offset;
1601 }
1602
1603 brw_emit_buffer_surface_state(brw, surf_offset,
1604 bo, bo_offset,
1605 ISL_FORMAT_RAW,
1606 3 * sizeof(GLuint), 1,
1607 RELOC_WRITE);
1608 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1609 }
1610 }
1611
1612 const struct brw_tracked_state brw_cs_work_groups_surface = {
1613 .dirty = {
1614 .brw = BRW_NEW_CS_PROG_DATA |
1615 BRW_NEW_CS_WORK_GROUPS
1616 },
1617 .emit = brw_upload_cs_work_groups_surface,
1618 };
1619