• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 
26 #include "util/macros.h" /* Needed for MAX3 and MAX2 for format_rgb9e5 */
27 #include "util/format_rgb9e5.h"
28 #include "util/format_srgb.h"
29 #include "util/u_math.h"
30 
31 #include "blorp_priv.h"
32 #include "dev/intel_debug.h"
33 #include "dev/intel_device_info.h"
34 
35 #include "blorp_nir_builder.h"
36 
37 #define FILE_DEBUG_FLAG DEBUG_BLORP
38 
39 #pragma pack(push, 1)
40 struct blorp_const_color_prog_key
41 {
42    struct blorp_base_key base;
43    bool use_simd16_replicated_data;
44    bool clear_rgb_as_red;
45    uint8_t local_y;
46 };
47 #pragma pack(pop)
48 
49 static bool
blorp_params_get_clear_kernel_fs(struct blorp_batch * batch,struct blorp_params * params,bool want_replicated_data,bool clear_rgb_as_red)50 blorp_params_get_clear_kernel_fs(struct blorp_batch *batch,
51                                  struct blorp_params *params,
52                                  bool want_replicated_data,
53                                  bool clear_rgb_as_red)
54 {
55    const bool use_replicated_data = want_replicated_data &&
56       batch->blorp->isl_dev->info->ver < 20;
57    struct blorp_context *blorp = batch->blorp;
58 
59    const struct blorp_const_color_prog_key blorp_key = {
60       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
61       .base.shader_pipeline = BLORP_SHADER_PIPELINE_RENDER,
62       .use_simd16_replicated_data = use_replicated_data,
63       .clear_rgb_as_red = clear_rgb_as_red,
64       .local_y = 0,
65    };
66 
67    params->shader_type = blorp_key.base.shader_type;
68    params->shader_pipeline = blorp_key.base.shader_pipeline;
69 
70    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
71                             &params->wm_prog_kernel, &params->wm_prog_data))
72       return true;
73 
74    void *mem_ctx = ralloc_context(NULL);
75 
76    nir_builder b;
77    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_FRAGMENT,
78                          blorp_shader_type_to_name(blorp_key.base.shader_type));
79 
80    nir_variable *v_color =
81       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
82    nir_def *color = nir_load_var(&b, v_color);
83 
84    if (clear_rgb_as_red) {
85       nir_def *pos = nir_f2i32(&b, nir_load_frag_coord(&b));
86       nir_def *comp = nir_umod_imm(&b, nir_channel(&b, pos, 0), 3);
87       color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
88    }
89 
90    nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out,
91                                                   glsl_vec4_type(),
92                                                   "gl_FragColor");
93    frag_color->data.location = FRAG_RESULT_COLOR;
94    nir_store_var(&b, frag_color, color, 0xf);
95 
96    const bool multisample_fbo = false;
97    struct blorp_program p =
98       blorp_compile_fs(blorp, mem_ctx, b.shader, multisample_fbo, use_replicated_data);
99 
100    bool result =
101       blorp->upload_shader(batch, MESA_SHADER_FRAGMENT,
102                            &blorp_key, sizeof(blorp_key),
103                            p.kernel, p.kernel_size,
104                            p.prog_data, p.prog_data_size,
105                            &params->wm_prog_kernel, &params->wm_prog_data);
106 
107    ralloc_free(mem_ctx);
108    return result;
109 }
110 
111 static bool
blorp_params_get_clear_kernel_cs(struct blorp_batch * batch,struct blorp_params * params,bool clear_rgb_as_red)112 blorp_params_get_clear_kernel_cs(struct blorp_batch *batch,
113                                  struct blorp_params *params,
114                                  bool clear_rgb_as_red)
115 {
116    struct blorp_context *blorp = batch->blorp;
117 
118    const struct blorp_const_color_prog_key blorp_key = {
119       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_CLEAR),
120       .base.shader_pipeline = BLORP_SHADER_PIPELINE_COMPUTE,
121       .use_simd16_replicated_data = false,
122       .clear_rgb_as_red = clear_rgb_as_red,
123       .local_y = blorp_get_cs_local_y(params),
124    };
125 
126    params->shader_type = blorp_key.base.shader_type;
127    params->shader_pipeline = blorp_key.base.shader_pipeline;
128 
129    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
130                             &params->cs_prog_kernel, &params->cs_prog_data))
131       return true;
132 
133    void *mem_ctx = ralloc_context(NULL);
134 
135    nir_builder b;
136    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_COMPUTE,
137                          "BLORP-gpgpu-clear");
138    blorp_set_cs_dims(b.shader, blorp_key.local_y);
139 
140    nir_def *dst_pos = nir_load_global_invocation_id(&b, 32);
141 
142    nir_variable *v_color =
143       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
144    nir_def *color = nir_load_var(&b, v_color);
145 
146    nir_variable *v_bounds_rect =
147       BLORP_CREATE_NIR_INPUT(b.shader, bounds_rect, glsl_vec4_type());
148    nir_def *bounds_rect = nir_load_var(&b, v_bounds_rect);
149    nir_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect, dst_pos);
150 
151    if (clear_rgb_as_red) {
152       nir_def *comp = nir_umod_imm(&b, nir_channel(&b, dst_pos, 0), 3);
153       color = nir_pad_vec4(&b, nir_vector_extract(&b, color, comp));
154    }
155 
156    nir_push_if(&b, in_bounds);
157 
158    nir_image_store(&b, nir_imm_int(&b, 0),
159                    nir_pad_vector_imm_int(&b, dst_pos, 0, 4),
160                    nir_imm_int(&b, 0),
161                    nir_pad_vector_imm_int(&b, color, 0, 4),
162                    nir_imm_int(&b, 0),
163                    .image_dim = GLSL_SAMPLER_DIM_2D,
164                    .image_array = true,
165                    .access = ACCESS_NON_READABLE);
166 
167    nir_pop_if(&b, NULL);
168 
169    const struct blorp_program p =
170       blorp_compile_cs(blorp, mem_ctx, b.shader);
171 
172    bool result =
173       blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
174                            &blorp_key, sizeof(blorp_key),
175                            p.kernel, p.kernel_size,
176                            p.prog_data, p.prog_data_size,
177                            &params->cs_prog_kernel, &params->cs_prog_data);
178 
179    ralloc_free(mem_ctx);
180    return result;
181 }
182 
183 static bool
blorp_params_get_clear_kernel(struct blorp_batch * batch,struct blorp_params * params,bool use_replicated_data,bool clear_rgb_as_red)184 blorp_params_get_clear_kernel(struct blorp_batch *batch,
185                               struct blorp_params *params,
186                               bool use_replicated_data,
187                               bool clear_rgb_as_red)
188 {
189    if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
190       assert(!use_replicated_data);
191       return blorp_params_get_clear_kernel_cs(batch, params, clear_rgb_as_red);
192    } else {
193       return blorp_params_get_clear_kernel_fs(batch, params,
194                                               use_replicated_data,
195                                               clear_rgb_as_red);
196    }
197 }
198 
199 /* The x0, y0, x1, and y1 parameters must already be populated with the render
200  * area of the framebuffer to be cleared.
201  */
202 static void
get_fast_clear_rect(const struct isl_device * dev,const struct isl_surf * surf,const struct isl_surf * aux_surf,unsigned * x0,unsigned * y0,unsigned * x1,unsigned * y1)203 get_fast_clear_rect(const struct isl_device *dev,
204                     const struct isl_surf *surf,
205                     const struct isl_surf *aux_surf,
206                     unsigned *x0, unsigned *y0,
207                     unsigned *x1, unsigned *y1)
208 {
209    unsigned int x_align, y_align;
210    unsigned int x_scaledown, y_scaledown;
211 
212    /* Only single sampled surfaces need to (and actually can) be resolved. */
213    if (surf->samples == 1) {
214       const uint32_t bs = isl_format_get_layout(surf->format)->bpb / 8;
215       if (dev->info->ver >= 20) {
216          /* From Bspec 57340, "MCS/CCS Buffers, Fast Clear for Render Target(s)":
217           *
218           *    Table "Tile4/Tile64 2D/2D Array/Cube Surface"
219           *    Table "Tile64 3D/Volumetric"
220           *
221           * The below calculation is derived from these tables.
222           */
223          assert(surf->tiling == ISL_TILING_4 ||
224                 surf->tiling == ISL_TILING_64_XE2);
225          x_align = x_scaledown = 64 / bs;
226          y_align = y_scaledown = 4;
227       } else if (dev->info->verx10 >= 125) {
228          /* From Bspec 47709, "MCS/CCS Buffer for Render Target(s)":
229           *
230           *    SW must ensure that clearing rectangle dimensions cover the
231           *    entire area desired, to accomplish this task initial X/Y
232           *    dimensions need to be rounded up to next multiple of scaledown
233           *    factor before dividing by scale down factor:
234           *
235           * The X and Y scale down factors in the table that follows are used
236           * for both alignment and scaling down.
237           */
238          if (surf->tiling == ISL_TILING_4) {
239             x_align = x_scaledown = 1024 / bs;
240             y_align = y_scaledown = 16;
241          } else if (surf->tiling == ISL_TILING_64) {
242             switch (bs) {
243             case 1:
244                x_align = x_scaledown = 128;
245                y_align = y_scaledown = 128;
246                break;
247             case 2:
248                x_align = x_scaledown = 128;
249                y_align = y_scaledown = 64;
250                break;
251             case 4:
252                x_align = x_scaledown = 64;
253                y_align = y_scaledown = 64;
254                break;
255             case 8:
256                x_align = x_scaledown = 64;
257                y_align = y_scaledown = 32;
258                break;
259             case 16:
260                x_align = x_scaledown = 32;
261                y_align = y_scaledown = 32;
262                break;
263             default:
264                unreachable("unsupported bpp");
265             }
266          } else {
267             unreachable("Unsupported tiling format");
268          }
269       } else {
270          /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
271           * Target(s)", beneath the "Fast Color Clear" bullet (p327):
272           *
273           *     Clear pass must have a clear rectangle that must follow
274           *     alignment rules in terms of pixels and lines as shown in the
275           *     table below. Further, the clear-rectangle height and width
276           *     must be multiple of the following dimensions. If the height
277           *     and width of the render target being cleared do not meet these
278           *     requirements, an MCS buffer can be created such that it
279           *     follows the requirement and covers the RT.
280           *
281           * The alignment size in the table that follows is a multiple of the
282           * alignment size that is baked into the CCS surface format.
283           */
284          enum isl_format ccs_format;
285          if (ISL_GFX_VERX10(dev) == 120) {
286             assert(surf->tiling == ISL_TILING_Y0);
287             switch (isl_format_get_layout(surf->format)->bpb) {
288             case   8: ccs_format = ISL_FORMAT_GFX12_CCS_8BPP_Y0;   break;
289             case  16: ccs_format = ISL_FORMAT_GFX12_CCS_16BPP_Y0;  break;
290             case  32: ccs_format = ISL_FORMAT_GFX12_CCS_32BPP_Y0;  break;
291             case  64: ccs_format = ISL_FORMAT_GFX12_CCS_64BPP_Y0;  break;
292             case 128: ccs_format = ISL_FORMAT_GFX12_CCS_128BPP_Y0; break;
293             default:  unreachable("Invalid surface bpb for fast clearing");
294             }
295          } else {
296             assert(aux_surf->usage == ISL_SURF_USAGE_CCS_BIT);
297             ccs_format = aux_surf->format;
298          }
299 
300          x_align = isl_format_get_layout(ccs_format)->bw * 16;
301          y_align = isl_format_get_layout(ccs_format)->bh * 32 /
302                    isl_format_get_layout(ccs_format)->bpb;
303 
304          /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
305           * Target(s)", beneath the "Fast Color Clear" bullet (p327):
306           *
307           *     In order to optimize the performance MCS buffer (when bound to
308           *     1X RT) clear similarly to MCS buffer clear for MSRT case,
309           *     clear rect is required to be scaled by the following factors
310           *     in the horizontal and vertical directions:
311           *
312           * The X and Y scale down factors in the table that follows are each
313           * equal to half the alignment value computed above.
314           */
315          x_scaledown = x_align / 2;
316          y_scaledown = y_align / 2;
317       }
318 
319       if (ISL_DEV_IS_HASWELL(dev)) {
320          /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
321           * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
322           * Clear of Non-MultiSampled Render Target Restrictions":
323           *
324           *   Clear rectangle must be aligned to two times the number of
325           *   pixels in the table shown below due to 16x16 hashing across the
326           *   slice.
327           *
328           * This restriction is only documented to exist on HSW GT3 but
329           * empirical evidence suggests that it's also needed GT2.
330           */
331          x_align *= 2;
332          y_align *= 2;
333       }
334    } else {
335       assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT);
336 
337       /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
338        * Target(s)", beneath the "MSAA Compression" bullet (p326):
339        *
340        *     Clear pass for this case requires that scaled down primitive
341        *     is sent down with upper left coordinate to coincide with
342        *     actual rectangle being cleared. For MSAA, clear rectangle’s
343        *     height and width need to as show in the following table in
344        *     terms of (width,height) of the RT.
345        *
346        *     MSAA  Width of Clear Rect  Height of Clear Rect
347        *      2X     Ceil(1/8*width)      Ceil(1/2*height)
348        *      4X     Ceil(1/8*width)      Ceil(1/2*height)
349        *      8X     Ceil(1/2*width)      Ceil(1/2*height)
350        *     16X         width            Ceil(1/2*height)
351        *
352        * The text "with upper left coordinate to coincide with actual
353        * rectangle being cleared" is a little confusing--it seems to imply
354        * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
355        * feed the pipeline using the rectangle (x,y) to
356        * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
357        * the number of samples.  Experiments indicate that this is not
358        * quite correct; actually, what the hardware appears to do is to
359        * align whatever rectangle is sent down the pipeline to the nearest
360        * multiple of 2x2 blocks, and then scale it up by a factor of N
361        * horizontally and 2 vertically.  So the resulting alignment is 4
362        * vertically and either 4 or 16 horizontally, and the scaledown
363        * factor is 2 vertically and either 2 or 8 horizontally.
364        *
365        * On Xe2+:
366        * Bspec 57340 (r59562):
367        *
368        *    Fast Clear MCS Surface
369        *    (Table)
370        *
371        * The scaled down values in the Xe2 table are different from what's in
372        * the previous platforms.
373        */
374       switch (aux_surf->format) {
375       case ISL_FORMAT_MCS_2X:
376       case ISL_FORMAT_MCS_4X:
377          x_scaledown = dev->info->ver >= 20 ? 64 : 8;
378          break;
379       case ISL_FORMAT_MCS_8X:
380          x_scaledown = dev->info->ver >= 20 ? 16 : 2;
381          break;
382       case ISL_FORMAT_MCS_16X:
383          x_scaledown = dev->info->ver >= 20 ? 8 : 1;
384          break;
385       default:
386          unreachable("Unexpected MCS format for fast clear");
387       }
388       y_scaledown = dev->info->ver >= 20 ? 4 : 2;
389       x_align = x_scaledown * 2;
390       y_align = y_scaledown * 2;
391    }
392 
393    *x0 = ROUND_DOWN_TO(*x0,  x_align) / x_scaledown;
394    *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown;
395    *x1 = ALIGN(*x1, x_align) / x_scaledown;
396    *y1 = ALIGN(*y1, y_align) / y_scaledown;
397 }
398 
399 static void
convert_rt_from_3d_to_2d(const struct isl_device * isl_dev,struct blorp_surface_info * info)400 convert_rt_from_3d_to_2d(const struct isl_device *isl_dev,
401                          struct blorp_surface_info *info)
402 {
403    assert(info->surf.dim == ISL_SURF_DIM_3D);
404    assert(info->surf.dim_layout == ISL_DIM_LAYOUT_GFX4_2D);
405 
406    /* Some tilings have different swizzling between 2D/3D images. So,
407     * conversion would not be possible.
408     */
409    assert(!isl_tiling_is_std_y(info->surf.tiling));
410    assert(!isl_tiling_is_64(info->surf.tiling));
411 
412    /* Convert from 3D to 2D-array. */
413    uint32_t array_pitch_el_rows = info->surf.array_pitch_el_rows;
414    uint64_t size_B = info->surf.size_B;
415    bool ok = isl_surf_init(isl_dev, &info->surf,
416                            .dim = ISL_SURF_DIM_2D,
417                            .format = info->surf.format,
418                            .width = info->surf.logical_level0_px.w,
419                            .height = info->surf.logical_level0_px.h,
420                            .depth = 1,
421                            .levels = info->surf.levels,
422                            .array_len = info->surf.logical_level0_px.d,
423                            .samples = 1,
424                            .row_pitch_B = info->surf.row_pitch_B,
425                            .usage = info->surf.usage,
426                            .tiling_flags = (1 << info->surf.tiling));
427    assert(ok);
428 
429    /* Fix up the array-pitch and size. */
430    info->surf.array_pitch_el_rows = array_pitch_el_rows;
431    info->surf.size_B = size_B;
432 }
433 
434 void
blorp_fast_clear(struct blorp_batch * batch,const struct blorp_surf * surf,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1)435 blorp_fast_clear(struct blorp_batch *batch,
436                  const struct blorp_surf *surf,
437                  enum isl_format format, struct isl_swizzle swizzle,
438                  uint32_t level, uint32_t start_layer, uint32_t num_layers,
439                  uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1)
440 {
441    struct blorp_params params;
442    blorp_params_init(&params);
443    params.num_layers = num_layers;
444    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
445 
446    params.x0 = x0;
447    params.y0 = y0;
448    params.x1 = x1;
449    params.y1 = y1;
450 
451    if (batch->blorp->isl_dev->info->ver >= 20) {
452       union isl_color_value clear_color =
453          isl_color_value_swizzle_inv(surf->clear_color, swizzle);
454       if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
455          clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32);
456          format = ISL_FORMAT_R32_UINT;
457       } else if (format == ISL_FORMAT_L8_UNORM_SRGB) {
458          clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]);
459          format = ISL_FORMAT_R8_UNORM;
460       }
461 
462       /* Bspec 57340 (r59562):
463        *
464        *   Overview of Fast Clear:
465        *      Pixel shader's color output is treated as Clear Value, value
466        *      should be a constant.
467        */
468       memcpy(&params.wm_inputs.clear_color, &clear_color, 4 * sizeof(float));
469    } else {
470       /* BSpec: 2423 (r153658):
471        *
472        *   The pixel shader kernel requires no attributes, and delivers a
473        *   value of 0xFFFFFFFF in all channels of the render target write
474        *   message The replicated color message should be used.
475        */
476       memset(&params.wm_inputs.clear_color, 0xff, 4 * sizeof(float));
477    }
478 
479    params.fast_clear_op = ISL_AUX_OP_FAST_CLEAR;
480 
481    get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf,
482                        &params.x0, &params.y0, &params.x1, &params.y1);
483 
484    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
485       return;
486 
487    blorp_surface_info_init(batch, &params.dst, surf, level,
488                                start_layer, format, true);
489 
490    /* BSpec: 46969 (r45602):
491     *
492     *   3D/Volumetric surfaces do not support Fast Clear operation.
493     */
494    if (ISL_GFX_VERX10(batch->blorp->isl_dev) == 120 &&
495        params.dst.surf.dim == ISL_SURF_DIM_3D) {
496       convert_rt_from_3d_to_2d(batch->blorp->isl_dev, &params.dst);
497    }
498 
499    params.num_samples = params.dst.surf.samples;
500 
501    assert(params.num_samples != 0);
502    if (params.num_samples == 1)
503       params.op = BLORP_OP_CCS_COLOR_CLEAR;
504    else
505       params.op = BLORP_OP_MCS_COLOR_CLEAR;
506 
507    /* If a swizzle was provided, we need to swizzle the clear color so that
508     * the hardware color format conversion will work properly.
509     */
510    params.dst.clear_color =
511       isl_color_value_swizzle_inv(params.dst.clear_color, swizzle);
512 
513    batch->blorp->exec(batch, &params);
514 }
515 
516 bool
blorp_clear_supports_blitter(struct blorp_context * blorp,const struct blorp_surf * surf,uint8_t color_write_disable,bool blend_enabled)517 blorp_clear_supports_blitter(struct blorp_context *blorp,
518                              const struct blorp_surf *surf,
519                              uint8_t color_write_disable,
520                              bool blend_enabled)
521 {
522    const struct intel_device_info *devinfo = blorp->isl_dev->info;
523 
524    if (devinfo->ver < 12)
525       return false;
526 
527    if (surf->surf->samples > 1)
528       return false;
529 
530    if (color_write_disable != 0 || blend_enabled)
531       return false;
532 
533    if (!blorp_blitter_supports_aux(devinfo, surf->aux_usage))
534       return false;
535 
536    const struct isl_format_layout *fmtl =
537       isl_format_get_layout(surf->surf->format);
538 
539    /* We can only support linear mode for 96bpp. */
540    if (fmtl->bpb == 96 && surf->surf->tiling != ISL_TILING_LINEAR)
541       return false;
542 
543    return true;
544 }
545 
546 bool
blorp_clear_supports_compute(struct blorp_context * blorp,uint8_t color_write_disable,bool blend_enabled,enum isl_aux_usage aux_usage)547 blorp_clear_supports_compute(struct blorp_context *blorp,
548                              uint8_t color_write_disable, bool blend_enabled,
549                              enum isl_aux_usage aux_usage)
550 {
551    if (blorp->isl_dev->info->ver < 7)
552       return false;
553    if (color_write_disable != 0 || blend_enabled)
554       return false;
555    if (blorp->isl_dev->info->ver >= 12) {
556       return aux_usage == ISL_AUX_USAGE_FCV_CCS_E ||
557              aux_usage == ISL_AUX_USAGE_CCS_E ||
558              aux_usage == ISL_AUX_USAGE_NONE;
559    } else {
560       return aux_usage == ISL_AUX_USAGE_NONE;
561    }
562 }
563 
564 void
blorp_clear(struct blorp_batch * batch,const struct blorp_surf * surf,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,union isl_color_value clear_color,uint8_t color_write_disable)565 blorp_clear(struct blorp_batch *batch,
566             const struct blorp_surf *surf,
567             enum isl_format format, struct isl_swizzle swizzle,
568             uint32_t level, uint32_t start_layer, uint32_t num_layers,
569             uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
570             union isl_color_value clear_color,
571             uint8_t color_write_disable)
572 {
573    struct blorp_params params;
574    blorp_params_init(&params);
575    params.op = BLORP_OP_SLOW_COLOR_CLEAR;
576 
577    const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
578    if (compute) {
579       assert(blorp_clear_supports_compute(batch->blorp, color_write_disable,
580                                           false, surf->aux_usage));
581    } else if (batch->flags & BLORP_BATCH_USE_BLITTER) {
582       assert(blorp_clear_supports_blitter(batch->blorp, surf,
583                                           color_write_disable, false));
584    }
585 
586    /* Manually apply the clear destination swizzle.  This way swizzled clears
587     * will work for swizzles which we can't normally use for rendering and it
588     * also ensures that they work on pre-Haswell hardware which can't swizlle
589     * at all.
590     */
591    clear_color = isl_color_value_swizzle_inv(clear_color, swizzle);
592    swizzle = ISL_SWIZZLE_IDENTITY;
593 
594    bool clear_rgb_as_red = false;
595    if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
596       clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32);
597       format = ISL_FORMAT_R32_UINT;
598    } else if (format == ISL_FORMAT_L8_UNORM_SRGB) {
599       clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]);
600       format = ISL_FORMAT_R8_UNORM;
601    } else if (format == ISL_FORMAT_A4B4G4R4_UNORM) {
602       /* Broadwell and earlier cannot render to this format so we need to work
603        * around it by swapping the colors around and using B4G4R4A4 instead.
604        */
605       const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE);
606       clear_color = isl_color_value_swizzle_inv(clear_color, ARGB);
607       format = ISL_FORMAT_B4G4R4A4_UNORM;
608    } else if (isl_format_get_layout(format)->bpb % 3 == 0) {
609       clear_rgb_as_red = true;
610       if (format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
611          clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]);
612          clear_color.f32[1] = util_format_linear_to_srgb_float(clear_color.f32[1]);
613          clear_color.f32[2] = util_format_linear_to_srgb_float(clear_color.f32[2]);
614       }
615    }
616 
617    memcpy(&params.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4);
618 
619    bool use_simd16_replicated_data = true;
620 
621    /* From the SNB PRM (Vol4_Part1):
622     *
623     *     "Replicated data (Message Type = 111) is only supported when
624     *      accessing tiled memory.  Using this Message Type to access linear
625     *      (untiled) memory is UNDEFINED."
626     */
627    if (surf->surf->tiling == ISL_TILING_LINEAR)
628       use_simd16_replicated_data = false;
629 
630    /* Replicated clears don't work before gfx6 */
631    if (batch->blorp->isl_dev->info->ver < 6)
632       use_simd16_replicated_data = false;
633 
634    /* From the BSpec: 47719 (TGL/DG2/MTL) Replicate Data:
635     *
636     * "Replicate Data Render Target Write message should not be used
637     *  on all projects TGL+."
638     *
639     * Xe2 spec (57350) does not mention this restriction.
640     *
641     *  See 14017879046, 14017880152 for additional information.
642     */
643    if (batch->blorp->isl_dev->info->ver >= 12 &&
644        batch->blorp->isl_dev->info->ver < 20)
645       use_simd16_replicated_data = false;
646 
647    if (compute)
648       use_simd16_replicated_data = false;
649 
650    /* Constant color writes ignore everything in blend and color calculator
651     * state.  This is not documented.
652     */
653    params.color_write_disable = color_write_disable & BITFIELD_MASK(4);
654    if (color_write_disable)
655       use_simd16_replicated_data = false;
656 
657    if (!blorp_params_get_clear_kernel(batch, &params,
658                                       use_simd16_replicated_data,
659                                       clear_rgb_as_red))
660       return;
661 
662    if (!compute && !blorp_ensure_sf_program(batch, &params))
663       return;
664 
665    assert(num_layers > 0);
666    while (num_layers > 0) {
667       blorp_surface_info_init(batch, &params.dst, surf, level,
668                                   start_layer, format, true);
669       params.dst.view.swizzle = swizzle;
670 
671       params.x0 = x0;
672       params.y0 = y0;
673       params.x1 = x1;
674       params.y1 = y1;
675 
676       if (compute) {
677          params.wm_inputs.bounds_rect.x0 = x0;
678          params.wm_inputs.bounds_rect.y0 = y0;
679          params.wm_inputs.bounds_rect.x1 = x1;
680          params.wm_inputs.bounds_rect.y1 = y1;
681       }
682 
683       if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
684          assert(params.dst.surf.samples == 1);
685          assert(num_layers == 1);
686          params.x0 += params.dst.tile_x_sa;
687          params.y0 += params.dst.tile_y_sa;
688          params.x1 += params.dst.tile_x_sa;
689          params.y1 += params.dst.tile_y_sa;
690       }
691 
692       /* The MinLOD and MinimumArrayElement don't work properly for cube maps.
693        * Convert them to a single slice on gfx4.
694        */
695       if (batch->blorp->isl_dev->info->ver == 4 &&
696           (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) {
697          blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, &params.dst);
698       }
699 
700       if (clear_rgb_as_red) {
701          surf_fake_rgb_with_red(batch->blorp->isl_dev, &params.dst);
702          params.x0 *= 3;
703          params.x1 *= 3;
704       }
705 
706       if (isl_format_is_compressed(params.dst.surf.format)) {
707          blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, &params.dst,
708                                             NULL, NULL, NULL, NULL);
709                                             //&dst_x, &dst_y, &dst_w, &dst_h);
710       }
711 
712       if (params.dst.tile_x_sa || params.dst.tile_y_sa) {
713          /* Either we're on gfx4 where there is no multisampling or the
714           * surface is compressed which also implies no multisampling.
715           * Therefore, sa == px and we don't need to do a conversion.
716           */
717          assert(params.dst.surf.samples == 1);
718          params.x0 += params.dst.tile_x_sa;
719          params.y0 += params.dst.tile_y_sa;
720          params.x1 += params.dst.tile_x_sa;
721          params.y1 += params.dst.tile_y_sa;
722       }
723 
724       params.num_samples = params.dst.surf.samples;
725 
726       /* We may be restricted on the number of layers we can bind at any one
727        * time.  In particular, Sandy Bridge has a maximum number of layers of
728        * 512 but a maximum 3D texture size is much larger.
729        */
730       params.num_layers = MIN2(params.dst.view.array_len, num_layers);
731 
732       const unsigned max_image_width = 16 * 1024;
733       if (params.dst.surf.logical_level0_px.width > max_image_width) {
734          /* Clearing an RGB image as red multiplies the surface width by 3
735           * so it may now be too wide for the hardware surface limits.  We
736           * have to break the clear up into pieces in order to clear wide
737           * images.
738           */
739          assert(clear_rgb_as_red);
740          assert(params.dst.surf.dim == ISL_SURF_DIM_2D);
741          assert(params.dst.surf.tiling == ISL_TILING_LINEAR);
742          assert(params.dst.surf.logical_level0_px.depth == 1);
743          assert(params.dst.surf.logical_level0_px.array_len == 1);
744          assert(params.dst.surf.levels == 1);
745          assert(params.dst.surf.samples == 1);
746          assert(params.dst.tile_x_sa == 0 || params.dst.tile_y_sa == 0);
747          assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE);
748 
749          /* max_image_width rounded down to a multiple of 3 */
750          const unsigned max_fake_rgb_width = (max_image_width / 3) * 3;
751          const unsigned cpp =
752             isl_format_get_layout(params.dst.surf.format)->bpb / 8;
753 
754          params.dst.surf.logical_level0_px.width = max_fake_rgb_width;
755          params.dst.surf.phys_level0_sa.width = max_fake_rgb_width;
756 
757          uint32_t orig_x0 = params.x0, orig_x1 = params.x1;
758          uint64_t orig_offset = params.dst.addr.offset;
759          for (uint32_t x = orig_x0; x < orig_x1; x += max_fake_rgb_width) {
760             /* Offset to the surface.  It's easy because we're linear */
761             params.dst.addr.offset = orig_offset + x * cpp;
762 
763             params.x0 = 0;
764             params.x1 = MIN2(orig_x1 - x, max_image_width);
765 
766             batch->blorp->exec(batch, &params);
767          }
768       } else {
769          batch->blorp->exec(batch, &params);
770       }
771 
772       start_layer += params.num_layers;
773       num_layers -= params.num_layers;
774    }
775 }
776 
777 static bool
blorp_clear_stencil_as_rgba(struct blorp_batch * batch,const struct blorp_surf * surf,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,uint8_t stencil_mask,uint8_t stencil_value)778 blorp_clear_stencil_as_rgba(struct blorp_batch *batch,
779                             const struct blorp_surf *surf,
780                             uint32_t level, uint32_t start_layer,
781                             uint32_t num_layers,
782                             uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
783                             uint8_t stencil_mask, uint8_t stencil_value)
784 {
785    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
786 
787    /* Stencil mask support would require piles of shader magic */
788    if (stencil_mask != 0xff)
789       return false;
790 
791    /* We only support separate W-tiled stencil for now */
792    if (surf->surf->format != ISL_FORMAT_R8_UINT ||
793        surf->surf->tiling != ISL_TILING_W)
794       return false;
795 
796    if (surf->surf->samples > 1) {
797       /* Adjust x0, y0, x1, and y1 to be in units of samples */
798       assert(surf->surf->msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED);
799       struct isl_extent2d msaa_px_size_sa =
800          isl_get_interleaved_msaa_px_size_sa(surf->surf->samples);
801 
802       x0 *= msaa_px_size_sa.w;
803       y0 *= msaa_px_size_sa.h;
804       x1 *= msaa_px_size_sa.w;
805       y1 *= msaa_px_size_sa.h;
806    }
807 
808    /* W-tiles and Y-tiles have the same layout as far as cache lines are
809     * concerned: both are 8x8 cache lines laid out Y-major.  The difference is
810     * entirely in how the data is arranged within the cache line.  W-tiling
811     * is 8x8 pixels in a swizzled pattern while Y-tiling is 16B by 4 rows
812     * regardless of image format size.  As long as everything is aligned to 8,
813     * we can just treat the W-tiled image as Y-tiled, ignore the layout
814     * difference within a cache line, and blast out data.
815     */
816    if (x0 % 8 != 0 || y0 % 8 != 0 || x1 % 8 != 0 || y1 % 8 != 0)
817       return false;
818 
819    struct blorp_params params;
820    blorp_params_init(&params);
821    params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
822 
823    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
824       return false;
825 
826    memset(&params.wm_inputs.clear_color, stencil_value,
827           sizeof(params.wm_inputs.clear_color));
828 
829    /* The Sandy Bridge PRM Vol. 4 Pt. 2, section 2.11.2.1.1 has the
830     * following footnote to the format table:
831     *
832     *    128 BPE Formats cannot be Tiled Y when used as render targets
833     *
834     * We have to use RGBA16_UINT on SNB.
835     */
836    enum isl_format wide_format;
837    if (ISL_GFX_VER(batch->blorp->isl_dev) <= 6) {
838       wide_format = ISL_FORMAT_R16G16B16A16_UINT;
839 
840       /* For RGBA16_UINT, we need to mask the stencil value otherwise, we risk
841        * clamping giving us the wrong values
842        */
843       for (unsigned i = 0; i < 4; i++)
844          params.wm_inputs.clear_color[i] &= 0xffff;
845    } else {
846       wide_format = ISL_FORMAT_R32G32B32A32_UINT;
847    }
848 
849    for (uint32_t a = 0; a < num_layers; a++) {
850       uint32_t layer = start_layer + a;
851 
852       blorp_surface_info_init(batch, &params.dst, surf, level,
853                                   layer, ISL_FORMAT_UNSUPPORTED, true);
854 
855       if (surf->surf->samples > 1)
856          blorp_surf_fake_interleaved_msaa(batch->blorp->isl_dev, &params.dst);
857 
858       /* Make it Y-tiled */
859       blorp_surf_retile_w_to_y(batch->blorp->isl_dev, &params.dst);
860 
861       unsigned wide_Bpp =
862          isl_format_get_layout(wide_format)->bpb / 8;
863 
864       params.dst.view.format = params.dst.surf.format = wide_format;
865       assert(params.dst.surf.logical_level0_px.width % wide_Bpp == 0);
866       params.dst.surf.logical_level0_px.width /= wide_Bpp;
867       assert(params.dst.tile_x_sa % wide_Bpp == 0);
868       params.dst.tile_x_sa /= wide_Bpp;
869 
870       params.x0 = params.dst.tile_x_sa + x0 / (wide_Bpp / 2);
871       params.y0 = params.dst.tile_y_sa + y0 / 2;
872       params.x1 = params.dst.tile_x_sa + x1 / (wide_Bpp / 2);
873       params.y1 = params.dst.tile_y_sa + y1 / 2;
874 
875       batch->blorp->exec(batch, &params);
876    }
877 
878    return true;
879 }
880 
881 void
blorp_clear_depth_stencil(struct blorp_batch * batch,const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,float depth_value,uint8_t stencil_mask,uint8_t stencil_value)882 blorp_clear_depth_stencil(struct blorp_batch *batch,
883                           const struct blorp_surf *depth,
884                           const struct blorp_surf *stencil,
885                           uint32_t level, uint32_t start_layer,
886                           uint32_t num_layers,
887                           uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
888                           bool clear_depth, float depth_value,
889                           uint8_t stencil_mask, uint8_t stencil_value)
890 {
891    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
892    assert(num_layers > 0);
893 
894    if (!clear_depth && blorp_clear_stencil_as_rgba(batch, stencil, level,
895                                                    start_layer, num_layers,
896                                                    x0, y0, x1, y1,
897                                                    stencil_mask,
898                                                    stencil_value))
899       return;
900 
901    struct blorp_params params;
902    blorp_params_init(&params);
903    params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
904 
905    params.x0 = x0;
906    params.y0 = y0;
907    params.x1 = x1;
908    params.y1 = y1;
909 
910    if (ISL_GFX_VER(batch->blorp->isl_dev) == 6) {
911       /* For some reason, Sandy Bridge gets occlusion queries wrong if we
912        * don't have a shader.  In particular, it records samples even though
913        * we disable statistics in 3DSTATE_WM.  Give it the usual clear shader
914        * to work around the issue.
915        */
916       if (!blorp_params_get_clear_kernel(batch, &params, false, false))
917          return;
918    }
919 
920    while (num_layers > 0) {
921       params.num_layers = num_layers;
922 
923       if (stencil_mask) {
924          blorp_surface_info_init(batch, &params.stencil, stencil,
925                                      level, start_layer,
926                                      ISL_FORMAT_UNSUPPORTED, true);
927          params.stencil_mask = stencil_mask;
928          params.stencil_ref = stencil_value;
929 
930          params.dst.surf.samples = params.stencil.surf.samples;
931          params.dst.surf.logical_level0_px =
932             params.stencil.surf.logical_level0_px;
933          params.dst.view = params.stencil.view;
934 
935          params.num_samples = params.stencil.surf.samples;
936 
937          /* We may be restricted on the number of layers we can bind at any
938           * one time.  In particular, Sandy Bridge has a maximum number of
939           * layers of 512 but a maximum 3D texture size is much larger.
940           */
941          if (params.stencil.view.array_len < params.num_layers)
942             params.num_layers = params.stencil.view.array_len;
943       }
944 
945       if (clear_depth) {
946          blorp_surface_info_init(batch, &params.depth, depth,
947                                      level, start_layer,
948                                      ISL_FORMAT_UNSUPPORTED, true);
949          params.z = depth_value;
950          params.depth_format =
951             isl_format_get_depth_format(depth->surf->format, false);
952 
953          params.dst.surf.samples = params.depth.surf.samples;
954          params.dst.surf.logical_level0_px =
955             params.depth.surf.logical_level0_px;
956          params.dst.view = params.depth.view;
957 
958          params.num_samples = params.depth.surf.samples;
959 
960          /* We may be restricted on the number of layers we can bind at any
961           * one time.  In particular, Sandy Bridge has a maximum number of
962           * layers of 512 but a maximum 3D texture size is much larger.
963           */
964          if (params.depth.view.array_len < params.num_layers)
965             params.num_layers = params.depth.view.array_len;
966       }
967 
968       batch->blorp->exec(batch, &params);
969 
970       start_layer += params.num_layers;
971       num_layers -= params.num_layers;
972    }
973 }
974 
975 static bool
blorp_can_clear_full_surface(const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,bool clear_stencil)976 blorp_can_clear_full_surface(const struct blorp_surf *depth,
977                              const struct blorp_surf *stencil,
978                              uint32_t level,
979                              uint32_t x0, uint32_t y0,
980                              uint32_t x1, uint32_t y1,
981                              bool clear_depth,
982                              bool clear_stencil)
983 {
984    uint32_t width = 0, height = 0;
985    if (clear_stencil) {
986       width = u_minify(stencil->surf->logical_level0_px.width, level);
987       height = u_minify(stencil->surf->logical_level0_px.height, level);
988    }
989 
990    if (clear_depth && !(width || height)) {
991       width = u_minify(depth->surf->logical_level0_px.width, level);
992       height = u_minify(depth->surf->logical_level0_px.height, level);
993    }
994 
995    return x0 == 0 && y0 == 0 && width == x1 && height == y1;
996 }
997 
998 void
blorp_hiz_clear_depth_stencil(struct blorp_batch * batch,const struct blorp_surf * depth,const struct blorp_surf * stencil,uint32_t level,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,float depth_value,bool clear_stencil,uint8_t stencil_value)999 blorp_hiz_clear_depth_stencil(struct blorp_batch *batch,
1000                               const struct blorp_surf *depth,
1001                               const struct blorp_surf *stencil,
1002                               uint32_t level,
1003                               uint32_t start_layer, uint32_t num_layers,
1004                               uint32_t x0, uint32_t y0,
1005                               uint32_t x1, uint32_t y1,
1006                               bool clear_depth, float depth_value,
1007                               bool clear_stencil, uint8_t stencil_value)
1008 {
1009    struct blorp_params params;
1010    blorp_params_init(&params);
1011    params.op = BLORP_OP_HIZ_CLEAR;
1012 
1013    /* This requires WM_HZ_OP which only exists on gfx8+ */
1014    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 8);
1015 
1016    params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
1017    /* From BSpec: 3DSTATE_WM_HZ_OP_BODY >> Full Surface Depth and Stencil Clear
1018     *
1019     *    "Software must set this only when the APP requires the entire Depth
1020     *    surface to be cleared."
1021     */
1022    params.full_surface_hiz_op =
1023       blorp_can_clear_full_surface(depth, stencil, level, x0, y0, x1, y1,
1024                                    clear_depth, clear_stencil);
1025    params.num_layers = 1;
1026 
1027    params.x0 = x0;
1028    params.y0 = y0;
1029    params.x1 = x1;
1030    params.y1 = y1;
1031 
1032    for (uint32_t l = 0; l < num_layers; l++) {
1033       const uint32_t layer = start_layer + l;
1034       if (clear_stencil) {
1035          blorp_surface_info_init(batch, &params.stencil, stencil,
1036                                      level, layer,
1037                                      ISL_FORMAT_UNSUPPORTED, true);
1038          params.stencil_mask = 0xff;
1039          params.stencil_ref = stencil_value;
1040          params.num_samples = params.stencil.surf.samples;
1041       }
1042 
1043       if (clear_depth) {
1044          /* If we're clearing depth, we must have HiZ */
1045          assert(depth && isl_aux_usage_has_hiz(depth->aux_usage));
1046 
1047          blorp_surface_info_init(batch, &params.depth, depth,
1048                                      level, layer,
1049                                      ISL_FORMAT_UNSUPPORTED, true);
1050          params.depth.clear_color.f32[0] = depth_value;
1051          params.depth_format =
1052             isl_format_get_depth_format(depth->surf->format, false);
1053          params.num_samples = params.depth.surf.samples;
1054       }
1055 
1056       batch->blorp->exec(batch, &params);
1057    }
1058 }
1059 
1060 /* Given a depth stencil attachment, this function performs a fast depth clear
1061  * on a depth portion and a regular clear on the stencil portion. When
1062  * performing a fast depth clear on the depth portion, the HiZ buffer is simply
1063  * tagged as cleared so the depth clear value is not actually needed.
1064  */
1065 void
blorp_gfx8_hiz_clear_attachments(struct blorp_batch * batch,uint32_t num_samples,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_depth,bool clear_stencil,uint8_t stencil_value)1066 blorp_gfx8_hiz_clear_attachments(struct blorp_batch *batch,
1067                                  uint32_t num_samples,
1068                                  uint32_t x0, uint32_t y0,
1069                                  uint32_t x1, uint32_t y1,
1070                                  bool clear_depth, bool clear_stencil,
1071                                  uint8_t stencil_value)
1072 {
1073    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1074 
1075    struct blorp_params params;
1076    blorp_params_init(&params);
1077    params.op = BLORP_OP_HIZ_CLEAR;
1078    params.num_layers = 1;
1079    params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
1080    params.x0 = x0;
1081    params.y0 = y0;
1082    params.x1 = x1;
1083    params.y1 = y1;
1084    params.num_samples = num_samples;
1085    params.depth.enabled = clear_depth;
1086    params.stencil.enabled = clear_stencil;
1087    params.stencil_ref = stencil_value;
1088    batch->blorp->exec(batch, &params);
1089 }
1090 
1091 /** Clear active color/depth/stencili attachments
1092  *
1093  * This function performs a clear operation on the currently bound
1094  * color/depth/stencil attachments.  It is assumed that any information passed
1095  * in here is valid, consistent, and in-bounds relative to the currently
1096  * attached depth/stencil.  The binding_table_offset parameter is the 32-bit
1097  * offset relative to surface state base address where pre-baked binding table
1098  * that we are to use lives.  If clear_color is false, binding_table_offset
1099  * must point to a binding table with one entry which is a valid null surface
1100  * that matches the currently bound depth and stencil.
1101  */
1102 void
blorp_clear_attachments(struct blorp_batch * batch,uint32_t binding_table_offset,enum isl_format depth_format,uint32_t num_samples,uint32_t start_layer,uint32_t num_layers,uint32_t x0,uint32_t y0,uint32_t x1,uint32_t y1,bool clear_color,union isl_color_value color_value,bool clear_depth,float depth_value,uint8_t stencil_mask,uint8_t stencil_value)1103 blorp_clear_attachments(struct blorp_batch *batch,
1104                         uint32_t binding_table_offset,
1105                         enum isl_format depth_format,
1106                         uint32_t num_samples,
1107                         uint32_t start_layer, uint32_t num_layers,
1108                         uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1,
1109                         bool clear_color, union isl_color_value color_value,
1110                         bool clear_depth, float depth_value,
1111                         uint8_t stencil_mask, uint8_t stencil_value)
1112 {
1113    struct blorp_params params;
1114    blorp_params_init(&params);
1115 
1116    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1117    assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
1118 
1119    params.x0 = x0;
1120    params.y0 = y0;
1121    params.x1 = x1;
1122    params.y1 = y1;
1123 
1124    params.use_pre_baked_binding_table = true;
1125    params.pre_baked_binding_table_offset = binding_table_offset;
1126 
1127    params.num_layers = num_layers;
1128    params.num_samples = num_samples;
1129 
1130    if (clear_color) {
1131       params.dst.enabled = true;
1132       params.op = BLORP_OP_SLOW_COLOR_CLEAR;
1133 
1134       memcpy(&params.wm_inputs.clear_color, color_value.f32, sizeof(float) * 4);
1135 
1136       /* Unfortunately, without knowing whether or not our destination surface
1137        * is tiled or not, we have to assume it may be linear.  This means no
1138        * SIMD16_REPDATA for us. :-(
1139        */
1140       if (!blorp_params_get_clear_kernel(batch, &params, false, false))
1141          return;
1142    }
1143 
1144    if (clear_depth) {
1145       params.depth.enabled = true;
1146       params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
1147 
1148       params.z = depth_value;
1149       params.depth_format = isl_format_get_depth_format(depth_format, false);
1150    }
1151 
1152    if (stencil_mask) {
1153       params.stencil.enabled = true;
1154       params.op = BLORP_OP_SLOW_DEPTH_CLEAR;
1155 
1156       params.stencil_mask = stencil_mask;
1157       params.stencil_ref = stencil_value;
1158    }
1159 
1160    if (!blorp_params_get_layer_offset_vs(batch, &params))
1161       return;
1162 
1163    params.vs_inputs.base_layer = start_layer;
1164 
1165    batch->blorp->exec(batch, &params);
1166 }
1167 
1168 void
blorp_ccs_resolve(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t level,uint32_t start_layer,uint32_t num_layers,enum isl_format format,enum isl_aux_op resolve_op)1169 blorp_ccs_resolve(struct blorp_batch *batch,
1170                   struct blorp_surf *surf, uint32_t level,
1171                   uint32_t start_layer, uint32_t num_layers,
1172                   enum isl_format format,
1173                   enum isl_aux_op resolve_op)
1174 {
1175    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1176    struct blorp_params params;
1177 
1178    blorp_params_init(&params);
1179    switch(resolve_op) {
1180    case ISL_AUX_OP_AMBIGUATE:
1181       params.op = BLORP_OP_CCS_AMBIGUATE;
1182       break;
1183    case ISL_AUX_OP_FULL_RESOLVE:
1184       params.op = BLORP_OP_CCS_RESOLVE;
1185       break;
1186    case ISL_AUX_OP_PARTIAL_RESOLVE:
1187       params.op = BLORP_OP_CCS_PARTIAL_RESOLVE;
1188       break;
1189    default:
1190       assert(false);
1191    }
1192    blorp_surface_info_init(batch, &params.dst, surf,
1193                                level, start_layer, format, true);
1194 
1195    /* From the TGL PRM, Volume 2d: 3DSTATE_PS_BODY,
1196     *
1197     *    3D/Volumetric surfaces do not support Fast Clear operation.
1198     *
1199     *    [...]
1200     *
1201     *    3D/Volumetric surfaces do not support in-place resolve pass
1202     *    operation.
1203     *
1204     * HSD 1406738321 suggests a more limited scope of restrictions, but
1205     * there should be no harm in complying with the Bspec restrictions.
1206     */
1207    if (ISL_GFX_VERX10(batch->blorp->isl_dev) == 120 &&
1208        params.dst.surf.dim == ISL_SURF_DIM_3D) {
1209       convert_rt_from_3d_to_2d(batch->blorp->isl_dev, &params.dst);
1210    }
1211 
1212    params.x0 = params.y0 = 0;
1213    params.x1 = u_minify(params.dst.surf.logical_level0_px.width, level);
1214    params.y1 = u_minify(params.dst.surf.logical_level0_px.height, level);
1215    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 9) {
1216       /* From Bspec 2424, "Render Target Resolve":
1217        *
1218        *    The Resolve Rectangle size is same as Clear Rectangle size from
1219        *    SKL+.
1220        *
1221        * Note that this differs from Vol7 of the Sky Lake PRM, which only
1222        * specifies aligning by the scaledown factors.
1223        */
1224       get_fast_clear_rect(batch->blorp->isl_dev, surf->surf, surf->aux_surf,
1225                           &params.x0, &params.y0, &params.x1, &params.y1);
1226    } else {
1227       /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
1228        *
1229        *    A rectangle primitive must be scaled down by the following factors
1230        *    with respect to render target being resolved.
1231        *
1232        * The scaledown factors in the table that follows are related to the
1233        * block size of the CCS format. For IVB and HSW, we divide by two, for
1234        * BDW we multiply by 8 and 16.
1235        */
1236       const struct isl_format_layout *aux_fmtl =
1237          isl_format_get_layout(params.dst.aux_surf.format);
1238       assert(aux_fmtl->txc == ISL_TXC_CCS);
1239 
1240       unsigned x_scaledown, y_scaledown;
1241       if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) {
1242          x_scaledown = aux_fmtl->bw * 8;
1243          y_scaledown = aux_fmtl->bh * 16;
1244       } else {
1245          x_scaledown = aux_fmtl->bw / 2;
1246          y_scaledown = aux_fmtl->bh / 2;
1247       }
1248       params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown;
1249       params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
1250    }
1251 
1252    if (batch->blorp->isl_dev->info->ver >= 10) {
1253       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
1254              resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE ||
1255              resolve_op == ISL_AUX_OP_AMBIGUATE);
1256    } else if (batch->blorp->isl_dev->info->ver >= 9) {
1257       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
1258              resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
1259    } else {
1260       /* Broadwell and earlier do not have a partial resolve */
1261       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE);
1262    }
1263    params.fast_clear_op = resolve_op;
1264    params.num_layers = num_layers;
1265 
1266    /* Note: there is no need to initialize push constants because it doesn't
1267     * matter what data gets dispatched to the render target.  However, we must
1268     * ensure that the fragment shader delivers the data using the "replicated
1269     * color" message.
1270     */
1271 
1272    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1273       return;
1274 
1275    batch->blorp->exec(batch, &params);
1276 
1277    if (batch->blorp->isl_dev->info->ver <= 8) {
1278       assert(surf->aux_usage == ISL_AUX_USAGE_CCS_D);
1279       assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE);
1280       /* ISL's state-machine of CCS_D describes full resolves as leaving the
1281        * aux buffer in the pass-through state. Hardware doesn't behave this
1282        * way on Broadwell however. On that platform, full resolves transition
1283        * the aux buffer to the resolved state. We assume that gfx7 behaves the
1284        * same. Use an ambiguate to match driver expectations.
1285        */
1286       for (int l = 0; l < num_layers; l++)
1287          blorp_ccs_ambiguate(batch, surf, level, start_layer + l);
1288    }
1289 }
1290 
1291 static nir_def *
blorp_nir_bit(nir_builder * b,nir_def * src,unsigned bit)1292 blorp_nir_bit(nir_builder *b, nir_def *src, unsigned bit)
1293 {
1294    return nir_iand_imm(b, nir_ushr_imm(b, src, bit), 1);
1295 }
1296 
1297 #pragma pack(push, 1)
1298 struct blorp_mcs_partial_resolve_key
1299 {
1300    struct blorp_base_key base;
1301    bool indirect_clear_color;
1302    bool int_format;
1303    uint32_t num_samples;
1304 };
1305 #pragma pack(pop)
1306 
1307 static bool
blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch * batch,struct blorp_params * params)1308 blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
1309                                             struct blorp_params *params)
1310 {
1311    struct blorp_context *blorp = batch->blorp;
1312    const struct blorp_mcs_partial_resolve_key blorp_key = {
1313       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE),
1314       .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL,
1315       .int_format = isl_format_has_int_channel(params->dst.view.format),
1316       .num_samples = params->num_samples,
1317    };
1318 
1319    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
1320                             &params->wm_prog_kernel, &params->wm_prog_data))
1321       return true;
1322 
1323    void *mem_ctx = ralloc_context(NULL);
1324 
1325    nir_builder b;
1326    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_FRAGMENT,
1327                          blorp_shader_type_to_name(blorp_key.base.shader_type));
1328 
1329    nir_variable *v_color =
1330       BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
1331 
1332    nir_variable *frag_color =
1333       nir_variable_create(b.shader, nir_var_shader_out,
1334                           glsl_vec4_type(), "gl_FragColor");
1335    frag_color->data.location = FRAG_RESULT_COLOR;
1336 
1337    /* Do an MCS fetch and check if it is equal to the magic clear value */
1338    nir_def *mcs =
1339       blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, nir_load_frag_coord(&b)),
1340                                nir_load_layer_id(&b));
1341    nir_def *is_clear =
1342       blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);
1343 
1344    /* If we aren't the clear value, discard. */
1345    nir_discard_if(&b, nir_inot(&b, is_clear));
1346 
1347    nir_def *clear_color = nir_load_var(&b, v_color);
1348    if (blorp_key.indirect_clear_color && blorp->isl_dev->info->ver <= 8) {
1349       /* Gfx7-8 clear colors are stored as single 0/1 bits */
1350       clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
1351                                  blorp_nir_bit(&b, clear_color, 30),
1352                                  blorp_nir_bit(&b, clear_color, 29),
1353                                  blorp_nir_bit(&b, clear_color, 28));
1354 
1355       if (!blorp_key.int_format)
1356          clear_color = nir_i2f32(&b, clear_color);
1357    }
1358    nir_store_var(&b, frag_color, clear_color, 0xf);
1359 
1360    const bool multisample_fbo = true;
1361    const struct blorp_program p =
1362       blorp_compile_fs(blorp, mem_ctx, b.shader, multisample_fbo, false);
1363 
1364    bool result =
1365       blorp->upload_shader(batch, MESA_SHADER_FRAGMENT,
1366                            &blorp_key, sizeof(blorp_key),
1367                            p.kernel, p.kernel_size,
1368                            p.prog_data, p.prog_data_size,
1369                            &params->wm_prog_kernel, &params->wm_prog_data);
1370 
1371    ralloc_free(mem_ctx);
1372    return result;
1373 }
1374 
1375 void
blorp_mcs_partial_resolve(struct blorp_batch * batch,struct blorp_surf * surf,enum isl_format format,uint32_t start_layer,uint32_t num_layers)1376 blorp_mcs_partial_resolve(struct blorp_batch *batch,
1377                           struct blorp_surf *surf,
1378                           enum isl_format format,
1379                           uint32_t start_layer, uint32_t num_layers)
1380 {
1381    struct blorp_params params;
1382    blorp_params_init(&params);
1383    params.op = BLORP_OP_MCS_PARTIAL_RESOLVE;
1384 
1385    assert(batch->blorp->isl_dev->info->ver >= 7);
1386 
1387    params.x0 = 0;
1388    params.y0 = 0;
1389    params.x1 = surf->surf->logical_level0_px.width;
1390    params.y1 = surf->surf->logical_level0_px.height;
1391 
1392    blorp_surface_info_init(batch, &params.src, surf, 0,
1393                                start_layer, format, false);
1394    blorp_surface_info_init(batch, &params.dst, surf, 0,
1395                                start_layer, format, true);
1396 
1397    params.num_samples = params.dst.surf.samples;
1398    params.num_layers = num_layers;
1399    params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL;
1400 
1401    memcpy(&params.wm_inputs.clear_color,
1402           surf->clear_color.f32, sizeof(float) * 4);
1403 
1404    if (!blorp_params_get_mcs_partial_resolve_kernel(batch, &params))
1405       return;
1406 
1407    batch->blorp->exec(batch, &params);
1408 }
1409 
1410 static uint64_t
get_mcs_ambiguate_pixel(int sample_count)1411 get_mcs_ambiguate_pixel(int sample_count)
1412 {
1413    /* See the Broadwell PRM, Volume 5 "Memory Views", Section "Compressed
1414     * Multisample Surfaces".
1415     */
1416    assert(sample_count >= 2);
1417    assert(sample_count <= 16);
1418 
1419    /* Each MCS element contains an array of sample slice (SS) elements. The
1420     * size of this array matches the sample count.
1421     */
1422    const int num_ss_entries = sample_count;
1423 
1424    /* The width of each SS entry is just large enough to index every slice. */
1425    const int ss_entry_size_b = util_logbase2(num_ss_entries);
1426 
1427    /* The encoding for "ambiguated" has each sample slice value storing its
1428     * index (e.g., SS[0] = 0, SS[1] = 1, etc.). The values are stored in
1429     * little endian order. The unused bits are defined as either Reserved or
1430     * Reserved (MBZ). We choose to interpret both as MBZ.
1431     */
1432    uint64_t ambiguate_pixel = 0;
1433    for (uint64_t entry = 0; entry < num_ss_entries; entry++)
1434       ambiguate_pixel |= entry << (entry * ss_entry_size_b);
1435 
1436    return ambiguate_pixel;
1437 }
1438 
1439 /** Clear an MCS to the "uncompressed" state
1440  *
1441  * This pass is the MCS equivalent of a "HiZ resolve".  It sets the MCS values
1442  * for a given layer of a surface to a sample-count dependent value which is
1443  * the "uncompressed" state which tells the sampler to go look at the main
1444  * surface.
1445  */
1446 void
blorp_mcs_ambiguate(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t start_layer,uint32_t num_layers)1447 blorp_mcs_ambiguate(struct blorp_batch *batch,
1448                     struct blorp_surf *surf,
1449                     uint32_t start_layer, uint32_t num_layers)
1450 {
1451    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1452 
1453    struct blorp_params params;
1454    blorp_params_init(&params);
1455    params.op = BLORP_OP_MCS_AMBIGUATE;
1456 
1457    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 7);
1458 
1459    enum isl_format renderable_format;
1460    switch (isl_format_get_layout(surf->aux_surf->format)->bpb) {
1461    case 8:  renderable_format = ISL_FORMAT_R8_UINT;     break;
1462    case 32: renderable_format = ISL_FORMAT_R32_UINT;    break;
1463    case 64: renderable_format = ISL_FORMAT_R32G32_UINT; break;
1464    default: unreachable("Unexpected MCS format size for ambiguate");
1465    }
1466 
1467    /* From Bspec 57340 (r59562):
1468     *
1469     *   To the calculated MCS size we add 4kb page to be used as clear value
1470     *   storage.
1471     *
1472     * and
1473     *
1474     *   When allocating memory, MCS buffer size is extended by 4KB over its
1475     *   original calculated size. First 4KB page of the MCS is reserved for
1476     *   internal HW usage.
1477     *
1478     * We shift aux buffer's start address by 4KB, accordingly.
1479     */
1480    struct blorp_address aux_addr = surf->aux_addr;
1481    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 20)
1482       aux_addr.offset += 4096;
1483 
1484    params.dst = (struct blorp_surface_info) {
1485       .enabled = true,
1486       .surf = *surf->aux_surf,
1487       .addr = aux_addr,
1488       .view = {
1489          .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1490          .format = renderable_format,
1491          .base_level = 0,
1492          .base_array_layer = start_layer,
1493          .levels = 1,
1494          .array_len = num_layers,
1495          .swizzle = ISL_SWIZZLE_IDENTITY,
1496       },
1497    };
1498 
1499    params.x0 = 0;
1500    params.y0 = 0;
1501    params.x1 = params.dst.surf.logical_level0_px.width;
1502    params.y1 = params.dst.surf.logical_level0_px.height;
1503    params.num_layers = params.dst.view.array_len;
1504 
1505    const uint64_t pixel = get_mcs_ambiguate_pixel(surf->surf->samples);
1506    params.wm_inputs.clear_color[0] = pixel & 0xFFFFFFFF;
1507    params.wm_inputs.clear_color[1] = pixel >> 32;
1508 
1509    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1510       return;
1511 
1512    batch->blorp->exec(batch, &params);
1513 }
1514 
1515 /** Clear a CCS to the "uncompressed" state
1516  *
1517  * This pass is the CCS equivalent of a "HiZ resolve".  It sets the CCS values
1518  * for a given layer/level of a surface to 0x0 which is the "uncompressed"
1519  * state which tells the sampler to go look at the main surface.
1520  */
1521 void
blorp_ccs_ambiguate(struct blorp_batch * batch,struct blorp_surf * surf,uint32_t level,uint32_t layer)1522 blorp_ccs_ambiguate(struct blorp_batch *batch,
1523                     struct blorp_surf *surf,
1524                     uint32_t level, uint32_t layer)
1525 {
1526    assert((batch->flags & BLORP_BATCH_USE_COMPUTE) == 0);
1527 
1528    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 10) {
1529       /* On gfx10 and above, we have a hardware resolve op for this */
1530       return blorp_ccs_resolve(batch, surf, level, layer, 1,
1531                                surf->surf->format, ISL_AUX_OP_AMBIGUATE);
1532    }
1533 
1534    struct blorp_params params;
1535    blorp_params_init(&params);
1536    params.op = BLORP_OP_CCS_AMBIGUATE;
1537 
1538    assert(ISL_GFX_VER(batch->blorp->isl_dev) >= 7);
1539 
1540    const struct isl_format_layout *aux_fmtl =
1541       isl_format_get_layout(surf->aux_surf->format);
1542    assert(aux_fmtl->txc == ISL_TXC_CCS);
1543 
1544    params.dst = (struct blorp_surface_info) {
1545       .enabled = true,
1546       .addr = surf->aux_addr,
1547       .view = {
1548          .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1549          .format = ISL_FORMAT_R32G32B32A32_UINT,
1550          .base_level = 0,
1551          .base_array_layer = 0,
1552          .levels = 1,
1553          .array_len = 1,
1554          .swizzle = ISL_SWIZZLE_IDENTITY,
1555       },
1556    };
1557 
1558    uint32_t z = 0;
1559    if (surf->surf->dim == ISL_SURF_DIM_3D) {
1560       z = layer;
1561       layer = 0;
1562    }
1563 
1564    uint64_t offset_B;
1565    uint32_t x_offset_el, y_offset_el;
1566    isl_surf_get_image_offset_B_tile_el(surf->aux_surf, level, layer, z,
1567                                        &offset_B, &x_offset_el, &y_offset_el);
1568    params.dst.addr.offset += offset_B;
1569 
1570    const uint32_t width_px =
1571       u_minify(surf->aux_surf->logical_level0_px.width, level);
1572    const uint32_t height_px =
1573       u_minify(surf->aux_surf->logical_level0_px.height, level);
1574    const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
1575    const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
1576 
1577    struct isl_tile_info ccs_tile_info;
1578    isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info);
1579 
1580    /* We're going to map it as a regular RGBA32_UINT surface.  We need to
1581     * downscale a good deal.  We start by computing the area on the CCS to
1582     * clear in units of Y-tiled cache lines.
1583     */
1584    uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl;
1585    if (ISL_GFX_VER(batch->blorp->isl_dev) >= 8) {
1586       /* From the Sky Lake PRM Vol. 12 in the section on planes:
1587        *
1588        *    "The Color Control Surface (CCS) contains the compression status
1589        *    of the cache-line pairs. The compression state of the cache-line
1590        *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
1591        *    represents an area on the main surface of 16x16 sets of 128 byte
1592        *    Y-tiled cache-line-pairs. CCS is always Y tiled."
1593        *
1594        * Each 2-bit surface element in the CCS corresponds to a single
1595        * cache-line pair in the main surface.  This means that 16x16 el block
1596        * in the CCS maps to a Y-tiled cache line.  Fortunately, CCS layouts
1597        * are calculated with a very large alignment so we can round up to a
1598        * whole cache line without worrying about overdraw.
1599        */
1600 
1601       /* On Broadwell and above, a CCS tile is the same as a Y tile when
1602        * viewed at the cache-line granularity.  Fortunately, the horizontal
1603        * and vertical alignment requirements of the CCS are such that we can
1604        * align to an entire cache line without worrying about crossing over
1605        * from one LOD to another.
1606        */
1607       const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8;
1608       const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8;
1609       assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0);
1610       assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0);
1611 
1612       assert(x_offset_el % x_el_per_cl == 0);
1613       assert(y_offset_el % y_el_per_cl == 0);
1614       x_offset_cl = x_offset_el / x_el_per_cl;
1615       y_offset_cl = y_offset_el / y_el_per_cl;
1616       width_cl = DIV_ROUND_UP(width_el, x_el_per_cl);
1617       height_cl = DIV_ROUND_UP(height_el, y_el_per_cl);
1618    } else {
1619       /* On gfx7, the CCS tiling is not so nice.  However, there we are
1620        * guaranteed that we only have a single level and slice so we don't
1621        * have to worry about it and can just align to a whole tile.
1622        */
1623       assert(surf->aux_surf->logical_level0_px.depth == 1);
1624       assert(surf->aux_surf->logical_level0_px.array_len == 1);
1625       assert(x_offset_el == 0 && y_offset_el == 0);
1626       const uint32_t width_tl =
1627          DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w);
1628       const uint32_t height_tl =
1629          DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h);
1630       x_offset_cl = 0;
1631       y_offset_cl = 0;
1632       width_cl = width_tl * 8;
1633       height_cl = height_tl * 8;
1634    }
1635 
1636    /* We're going to use a RGBA32 format so as to write data as quickly as
1637     * possible.  A y-tiled cache line will then be 1x4 px.
1638     */
1639    const uint32_t x_offset_rgba_px = x_offset_cl;
1640    const uint32_t y_offset_rgba_px = y_offset_cl * 4;
1641    const uint32_t width_rgba_px = width_cl;
1642    const uint32_t height_rgba_px = height_cl * 4;
1643 
1644    ASSERTED bool ok =
1645       isl_surf_init(batch->blorp->isl_dev, &params.dst.surf,
1646                     .dim = ISL_SURF_DIM_2D,
1647                     .format = ISL_FORMAT_R32G32B32A32_UINT,
1648                     .width = width_rgba_px + x_offset_rgba_px,
1649                     .height = height_rgba_px + y_offset_rgba_px,
1650                     .depth = 1,
1651                     .levels = 1,
1652                     .array_len = 1,
1653                     .samples = 1,
1654                     .row_pitch_B = surf->aux_surf->row_pitch_B,
1655                     .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1656                     .tiling_flags = ISL_TILING_Y0_BIT);
1657    assert(ok);
1658 
1659    params.x0 = x_offset_rgba_px;
1660    params.y0 = y_offset_rgba_px;
1661    params.x1 = x_offset_rgba_px + width_rgba_px;
1662    params.y1 = y_offset_rgba_px + height_rgba_px;
1663 
1664    /* A CCS value of 0 means "uncompressed." */
1665    memset(&params.wm_inputs.clear_color, 0,
1666           sizeof(params.wm_inputs.clear_color));
1667 
1668    if (!blorp_params_get_clear_kernel(batch, &params, true, false))
1669       return;
1670 
1671    batch->blorp->exec(batch, &params);
1672 }
1673