• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  */
25 
26 /* This file implements randomized texture blit tests. */
27 
28 #include "si_pipe.h"
29 #include "util/rand_xor.h"
30 #include "util/u_surface.h"
31 #include "amd/addrlib/inc/addrtypes.h"
32 
33 static uint64_t seed_xorshift128plus[2];
34 
35 #define RAND_NUM_SIZE 8
36 
37 /* The GPU blits are emulated on the CPU using these CPU textures. */
38 
39 struct cpu_texture {
40    uint8_t *ptr;
41    uint64_t size;
42    uint64_t layer_stride;
43    unsigned stride;
44 };
45 
alloc_cpu_texture(struct cpu_texture * tex,struct pipe_resource * templ,unsigned level)46 static void alloc_cpu_texture(struct cpu_texture *tex, struct pipe_resource *templ, unsigned level)
47 {
48    unsigned width = u_minify(templ->width0, level);
49    unsigned height = u_minify(templ->height0, level);
50 
51    tex->stride = align(util_format_get_stride(templ->format, width), RAND_NUM_SIZE);
52    tex->layer_stride = util_format_get_2d_size(templ->format, tex->stride, height);
53    tex->size = tex->layer_stride * util_num_layers(templ, level);
54    tex->ptr = malloc(tex->size);
55    assert(tex->ptr);
56 }
57 
set_random_pixels(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)58 static void set_random_pixels(struct pipe_context *ctx, struct pipe_resource *tex,
59                               struct cpu_texture *cpu, unsigned level)
60 {
61    struct pipe_transfer *t;
62    uint8_t *map;
63    int x, y, z;
64    unsigned width = u_minify(tex->width0, level);
65    unsigned height = u_minify(tex->height0, level);
66    unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
67    unsigned num_layers = util_num_layers(tex, level);
68 
69    map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_WRITE, 0, 0, 0, width, height,
70                              num_layers, &t);
71    assert(map);
72 
73    for (z = 0; z < num_layers; z++) {
74       for (y = 0; y < num_y_blocks; y++) {
75          uint64_t *ptr = (uint64_t *)(map + t->layer_stride * z + t->stride * y);
76          uint64_t *ptr_cpu = (uint64_t *)(cpu->ptr + cpu->layer_stride * z + cpu->stride * y);
77          unsigned size = cpu->stride / RAND_NUM_SIZE;
78 
79          assert(t->stride % RAND_NUM_SIZE == 0);
80          assert(cpu->stride % RAND_NUM_SIZE == 0);
81 
82          for (x = 0; x < size; x++) {
83             *ptr++ = *ptr_cpu++ = rand_xorshift128plus(seed_xorshift128plus);
84          }
85       }
86    }
87 
88    pipe_texture_unmap(ctx, t);
89 }
90 
set_random_pixels_for_2_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)91 static void set_random_pixels_for_2_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
92                                              struct pipe_resource *tex2)
93 {
94    /* tex1 and tex2 are assumed to be the same size, format, and layout */
95    for (unsigned level = 0; level <= tex1->last_level; level++) {
96       for (unsigned sample = 0; sample < MAX2(tex1->nr_samples, 1); sample++) {
97          struct pipe_transfer *t1, *t2;
98          uint8_t *map1, *map2;
99          int x, y, z;
100          unsigned width = align(u_minify(tex1->width0, level), util_format_get_blockwidth(tex1->format));
101          unsigned height = align(u_minify(tex1->height0, level), util_format_get_blockheight(tex1->format));
102          unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
103          unsigned num_layers = util_num_layers(tex1, level);
104          /* If we set level to sample + 1, we will only upload that sample instead of
105           * overwriting all samples.
106           */
107          unsigned level_or_sample = tex1->nr_samples > 1 ? sample + 1 : level;
108 
109          map1 = pipe_texture_map_3d(ctx, tex1, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
110                                     num_layers, &t1);
111          map2 = pipe_texture_map_3d(ctx, tex2, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
112                                     num_layers, &t2);
113          assert(map1 && map2);
114          assert(t1->stride == t2->stride);
115 
116          for (z = 0; z < num_layers; z++) {
117             for (y = 0; y < num_y_blocks; y++) {
118                uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
119                uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
120                unsigned size = t1->stride / 8;
121 
122                assert(t1->stride % 8 == 0);
123                assert(t2->stride % 8 == 0);
124 
125                for (x = 0; x < size; x++) {
126                   *ptr1++ = *ptr2++ = rand_xorshift128plus(seed_xorshift128plus);
127                }
128             }
129          }
130 
131          pipe_texture_unmap(ctx, t1);
132          pipe_texture_unmap(ctx, t2);
133       }
134    }
135 }
136 
compare_textures(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)137 static bool compare_textures(struct pipe_context *ctx, struct pipe_resource *tex,
138                              struct cpu_texture *cpu, unsigned level)
139 {
140    struct pipe_transfer *t;
141    uint8_t *map;
142    int y, z;
143    bool pass = true;
144    unsigned width = u_minify(tex->width0, level);
145    unsigned height = u_minify(tex->height0, level);
146    unsigned stride = util_format_get_stride(tex->format, width);
147    unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
148    unsigned num_layers = util_num_layers(tex, level);
149 
150    map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_READ, 0, 0, 0, width, height,
151                              num_layers, &t);
152    assert(map);
153 
154    for (z = 0; z < num_layers; z++) {
155       for (y = 0; y < num_y_blocks; y++) {
156          uint8_t *ptr = map + t->layer_stride * z + t->stride * y;
157          uint8_t *cpu_ptr = cpu->ptr + cpu->layer_stride * z + cpu->stride * y;
158 
159          if (memcmp(ptr, cpu_ptr, stride)) {
160             pass = false;
161             goto done;
162          }
163       }
164    }
165 done:
166    pipe_texture_unmap(ctx, t);
167    return pass;
168 }
169 
compare_gpu_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)170 static bool compare_gpu_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
171                                  struct pipe_resource *tex2)
172 {
173    /* tex1 and tex2 are assumed to be the same size, format, and layout */
174    for (unsigned level = 0; level <= tex1->last_level; level++) {
175       struct pipe_transfer *t1, *t2;
176       uint8_t *map1, *map2;
177       unsigned width = u_minify(tex1->width0, level);
178       unsigned height = u_minify(tex1->height0, level);
179       unsigned stride = util_format_get_stride(tex1->format, width);
180       unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
181       unsigned num_layers = util_num_layers(tex1, level);
182 
183       map1 = pipe_texture_map_3d(ctx, tex1, level, PIPE_MAP_READ, 0, 0, 0, width, height,
184                                  num_layers, &t1);
185       map2 = pipe_texture_map_3d(ctx, tex2, level, PIPE_MAP_READ, 0, 0, 0, width, height,
186                                  num_layers, &t2);
187       assert(map1 && map2);
188       assert(t1->stride == t2->stride);
189 
190       for (unsigned z = 0; z < num_layers; z++) {
191          for (unsigned y = 0; y < num_y_blocks; y++) {
192             uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
193             uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
194 
195             assert(t1->stride % 8 == 0);
196             assert(t2->stride % 8 == 0);
197 
198             if (memcmp(ptr1, ptr2, stride)) {
199                pipe_texture_unmap(ctx, t1);
200                pipe_texture_unmap(ctx, t2);
201                return false;
202             }
203          }
204       }
205 
206       pipe_texture_unmap(ctx, t1);
207       pipe_texture_unmap(ctx, t2);
208    }
209 
210    return true;
211 }
212 
213 struct si_format_options {
214    bool only_resolve;
215    bool allow_float;
216    bool allow_unorm16;
217    bool allow_srgb;
218    bool allow_x_channels;
219    bool allow_subsampled;
220    bool allow_compressed;
221 };
222 
get_random_format(struct si_screen * sscreen,bool render_target,enum pipe_format color_or_zs,enum pipe_format res_format,enum pipe_format integer_or_not,const struct si_format_options * options)223 static enum pipe_format get_random_format(struct si_screen *sscreen, bool render_target,
224                                           enum pipe_format color_or_zs, /* must be color or Z/S */
225                                           enum pipe_format res_format,  /* must have the same bpp */
226                                           enum pipe_format integer_or_not, /* must be integer or non-integer */
227                                           const struct si_format_options *options)
228 {
229    /* Depth/stencil formats can only select Z/S using the blit mask, not via the view format. */
230    if (res_format != PIPE_FORMAT_NONE && util_format_is_depth_or_stencil(res_format))
231       return res_format;
232 
233    /* Keep generating formats until we get a supported one. */
234    while (1) {
235       /* Skip one format: PIPE_FORMAT_NONE */
236       enum pipe_format format = (rand() % (PIPE_FORMAT_COUNT - 1)) + 1;
237       const struct util_format_description *desc = util_format_description(format);
238 
239       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV ||
240           format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
241          continue;
242 
243       if (!options->allow_srgb && desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
244          continue;
245 
246       if (!options->allow_subsampled && desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
247          continue;
248 
249       if (!options->allow_compressed && util_format_get_blockwidth(format) >= 4)
250          continue;
251 
252       if (color_or_zs != PIPE_FORMAT_NONE &&
253           (util_format_is_depth_or_stencil(color_or_zs) !=
254            util_format_is_depth_or_stencil(format)))
255          continue;
256 
257       if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
258          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
259             /* Don't select stencil-only formats - we don't support them for rendering. */
260             if (util_format_has_stencil(desc) && !util_format_has_depth(desc))
261                continue;
262          }
263 
264          if (!options->allow_x_channels) {
265             unsigned i;
266 
267             /* Don't test formats with X channels because cpu_texture doesn't emulate them. */
268             for (i = 0; i < desc->nr_channels; i++) {
269                if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID)
270                   break;
271             }
272             if (i != desc->nr_channels)
273                continue;
274          }
275       }
276 
277       if (res_format != PIPE_FORMAT_NONE) {
278          /* If the resource format is Z/S, we handle it at the beginning of this function,
279           * so here res_format can only be a color format.
280           */
281          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
282             continue;
283 
284          if (util_format_get_blocksize(res_format) != util_format_get_blocksize(format) ||
285              util_format_get_blockwidth(res_format) != util_format_get_blockwidth(format) ||
286              util_format_get_blockheight(res_format) != util_format_get_blockheight(format))
287             continue;
288       }
289 
290       if (integer_or_not != PIPE_FORMAT_NONE) {
291          /* The integer property must match between blit src/dst. */
292          if (util_format_is_pure_integer(integer_or_not) != util_format_is_pure_integer(format))
293             continue;
294       }
295 
296       if (options->only_resolve &&
297           (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || util_format_is_pure_integer(format)))
298          continue;
299 
300       if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
301          /* Every integer format should have an equivalent non-integer format, but 128-bit integer
302           * formats don't have that if floats are disallowed, which can cause an infinite loop later
303           * if compat_type is non-integer.
304           */
305          if (!options->allow_float &&
306              (util_format_is_float(format) || util_format_get_blocksizebits(format) == 128))
307             continue;
308 
309          if (!options->allow_unorm16 &&
310              desc->channel[0].size == 16 && desc->channel[0].normalized &&
311              desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
312             continue;
313       }
314 
315       unsigned bind = PIPE_BIND_SAMPLER_VIEW;
316       if (render_target) {
317          if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
318             bind = PIPE_BIND_DEPTH_STENCIL;
319          else
320             bind = PIPE_BIND_RENDER_TARGET;
321       }
322 
323       if (sscreen->b.is_format_supported(&sscreen->b, format, PIPE_TEXTURE_2D, 1, 1, bind))
324          return format;
325    }
326 }
327 
328 #define MAX_ALLOC_SIZE (64 * 1024 * 1024)
329 
set_random_image_attrs(struct pipe_resource * templ,bool allow_msaa,bool only_cb_resolve)330 static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa,
331                                    bool only_cb_resolve)
332 {
333    unsigned target_index;
334 
335    if (only_cb_resolve) {
336       target_index = 6; /* CB resolving doesn't support array textures. */
337    } else {
338       target_index = rand() % (allow_msaa ? 8 : 6);
339    }
340 
341    switch (target_index) {
342    case 0:
343       templ->target = PIPE_TEXTURE_1D;
344       break;
345    case 1:
346       templ->target = PIPE_TEXTURE_2D;
347       break;
348    case 2:
349       if (util_format_is_depth_or_stencil(templ->format))
350          templ->target = PIPE_TEXTURE_2D_ARRAY; /* 3D doesn't support Z/S */
351       else
352          templ->target = PIPE_TEXTURE_3D;
353       break;
354    case 3:
355       templ->target = PIPE_TEXTURE_RECT;
356       break;
357    case 4:
358       templ->target = PIPE_TEXTURE_1D_ARRAY;
359       break;
360    case 5:
361       templ->target = PIPE_TEXTURE_2D_ARRAY;
362       break;
363    case 6:
364       templ->target = PIPE_TEXTURE_2D;
365       templ->nr_samples = 2 << (rand() % 3);
366       break;
367    case 7:
368       templ->target = PIPE_TEXTURE_2D_ARRAY;
369       templ->nr_samples = 2 << (rand() % 3);
370       break;
371    default:
372       unreachable("invalid path");
373    }
374 
375    templ->usage = PIPE_USAGE_DEFAULT;
376 
377    templ->height0 = 1;
378    templ->depth0 = 1;
379    templ->array_size = 1;
380    templ->nr_storage_samples = templ->nr_samples;
381 
382    /* Try to hit microtiling in 1/2 of the cases. */
383    unsigned max_tex_size = rand() & 1 ? 128 : 1024;
384 
385    templ->width0 = (rand() % max_tex_size) + 1;
386 
387    if (templ->target != PIPE_TEXTURE_1D &&
388        templ->target != PIPE_TEXTURE_1D_ARRAY)
389       templ->height0 = (rand() % max_tex_size) + 1;
390 
391    if (templ->target == PIPE_TEXTURE_3D)
392       templ->depth0 = (rand() % max_tex_size) + 1;
393 
394    if (templ->target == PIPE_TEXTURE_1D_ARRAY ||
395        templ->target == PIPE_TEXTURE_2D_ARRAY)
396       templ->array_size = (rand() % max_tex_size) + 1;
397 
398    /* Keep reducing the size until it we get a small enough size. */
399    while ((uint64_t)util_format_get_nblocks(templ->format, templ->width0, templ->height0) *
400           templ->depth0 * templ->array_size * util_format_get_blocksize(templ->format) >
401           MAX_ALLOC_SIZE) {
402       switch (rand() % 3) {
403       case 0:
404          if (templ->width0 > 1)
405             templ->width0 /= 2;
406          break;
407       case 1:
408          if (templ->height0 > 1)
409             templ->height0 /= 2;
410          break;
411       case 2:
412          if (templ->depth0 > 1)
413             templ->depth0 /= 2;
414          else if (templ->array_size > 1)
415             templ->array_size /= 2;
416          break;
417       }
418    }
419 
420    if (util_format_get_blockwidth(templ->format) == 2)
421       templ->width0 = align(templ->width0, 2);
422 
423    if (templ->target != PIPE_TEXTURE_RECT &&
424        util_format_description(templ->format)->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
425       unsigned max_dim = MAX3(templ->width0, templ->height0, templ->depth0);
426 
427       if (templ->nr_samples <= 1)
428          templ->last_level = rand() % (util_logbase2(max_dim) + 1);
429    }
430 }
431 
print_image_attrs(struct si_screen * sscreen,struct si_texture * tex)432 static void print_image_attrs(struct si_screen *sscreen, struct si_texture *tex)
433 {
434    const char *mode;
435 
436    if (sscreen->info.gfx_level >= GFX9) {
437       static const char *modes[32] = {
438          [ADDR_SW_LINEAR] = "LINEAR",
439          [ADDR_SW_4KB_S_X] = "4KB_S_X",
440          [ADDR_SW_4KB_D_X] = "4KB_D_X",
441          [ADDR_SW_64KB_Z_X] = "64KB_Z_X",
442          [ADDR_SW_64KB_S_X] = "64KB_S_X",
443          [ADDR_SW_64KB_D_X] = "64KB_D_X",
444          [ADDR_SW_64KB_R_X] = "64KB_R_X",
445       };
446       mode = modes[tex->surface.u.gfx9.swizzle_mode];
447    } else {
448       static const char *modes[32] = {
449          [RADEON_SURF_MODE_LINEAR_ALIGNED] = "LINEAR",
450          [RADEON_SURF_MODE_1D] = "1D_TILED",
451          [RADEON_SURF_MODE_2D] = "2D_TILED",
452       };
453       mode = modes[tex->surface.u.legacy.level[0].mode];
454    }
455 
456    if (!mode)
457       mode = "UNKNOWN";
458 
459    static const char *targets[PIPE_MAX_TEXTURE_TYPES] = {
460       [PIPE_TEXTURE_1D] = "1D",
461       [PIPE_TEXTURE_2D] = "2D",
462       [PIPE_TEXTURE_3D] = "3D",
463       [PIPE_TEXTURE_RECT] = "RECT",
464       [PIPE_TEXTURE_1D_ARRAY] = "1D_ARRAY",
465       [PIPE_TEXTURE_2D_ARRAY] = "2D_ARRAY",
466    };
467 
468    char size[64];
469    if (tex->buffer.b.b.target == PIPE_TEXTURE_1D)
470       snprintf(size, sizeof(size), "%u", tex->buffer.b.b.width0);
471    else if (tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
472             tex->buffer.b.b.target == PIPE_TEXTURE_RECT)
473       snprintf(size, sizeof(size), "%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0);
474    else
475       snprintf(size, sizeof(size), "%ux%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0,
476                util_num_layers(&tex->buffer.b.b, 0));
477 
478    printf("%8s, %14s, %2u %7s, %8s", targets[tex->buffer.b.b.target], size,
479           tex->buffer.b.b.nr_samples > 1 ? tex->buffer.b.b.nr_samples : tex->buffer.b.b.last_level + 1,
480           tex->buffer.b.b.nr_samples > 1 ? "samples" : "levels", mode);
481 }
482 
si_test_image_copy_region(struct si_screen * sscreen)483 void si_test_image_copy_region(struct si_screen *sscreen)
484 {
485    struct pipe_screen *screen = &sscreen->b;
486    struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
487    struct si_context *sctx = (struct si_context *)ctx;
488    unsigned i, iterations, num_partial_copies;
489    unsigned num_pass = 0, num_fail = 0;
490 
491    /* the seed for random test parameters */
492    srand(0x9b47d95b);
493    /* the seed for random pixel data */
494    s_rand_xorshift128plus(seed_xorshift128plus, false);
495 
496    iterations = 1000000000; /* just kill it when you are bored */
497    num_partial_copies = 30;
498 
499    /* These parameters are randomly generated per test:
500     * - which texture dimensions to use
501     * - random initial pixels in src
502     * - execute multiple subrectangle copies for partial blits
503     */
504    for (i = 0; i < iterations; i++) {
505       struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
506       struct si_texture *sdst;
507       struct si_texture *ssrc;
508       struct cpu_texture src_cpu[RADEON_SURF_MAX_LEVELS], dst_cpu[RADEON_SURF_MAX_LEVELS];
509       unsigned max_width, max_height, max_depth, j;
510       unsigned gfx_blits = 0, cs_blits = 0;
511       bool pass;
512 
513       /* generate a random test case */
514       struct si_format_options format_options = {
515          .only_resolve = false,
516          .allow_float = true,
517          .allow_unorm16 = true,
518          .allow_x_channels = false, /* cpu_texture doesn't implement X channels */
519          .allow_subsampled = false, /* TODO: fix subsampled formats */
520          .allow_compressed = false, /* TODO: fix compressed formats */
521       };
522 
523       tsrc.format = tdst.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
524 
525       /* MSAA copy testing not implemented and might be too difficult because of how
526        * cpu_texture works.
527        */
528       set_random_image_attrs(&tsrc, false, false);
529       set_random_image_attrs(&tdst, false, false);
530 
531       /* Allocate textures (both the GPU and CPU copies).
532        * The CPU will emulate what the GPU should be doing.
533        */
534       src = screen->resource_create(screen, &tsrc);
535       dst = screen->resource_create(screen, &tdst);
536       assert(src);
537       assert(dst);
538       sdst = (struct si_texture *)dst;
539       ssrc = (struct si_texture *)src;
540 
541       printf("%4u: dst = (", i);
542       print_image_attrs(sscreen, sdst);
543       printf("), src = (");
544       print_image_attrs(sscreen, ssrc);
545       printf("), format = %20s, ", util_format_description(tsrc.format)->short_name);
546       fflush(stdout);
547 
548       for (unsigned level = 0; level <= tsrc.last_level; level++) {
549          alloc_cpu_texture(&src_cpu[level], &tsrc, level);
550          set_random_pixels(ctx, src, &src_cpu[level], level);
551       }
552       for (unsigned level = 0; level <= tdst.last_level; level++) {
553          alloc_cpu_texture(&dst_cpu[level], &tdst, level);
554          memset(dst_cpu[level].ptr, 0, dst_cpu[level].layer_stride * util_num_layers(&tdst, level));
555       }
556 
557       /* clear dst pixels */
558       uint32_t zero = 0;
559       si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, SI_OP_SYNC_BEFORE_AFTER,
560                       SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
561 
562       for (j = 0; j < num_partial_copies; j++) {
563          int width, height, depth;
564          int srcx, srcy, srcz, dstx, dsty, dstz;
565          struct pipe_box box;
566          unsigned old_num_draw_calls = sctx->num_draw_calls;
567          unsigned old_num_cs_calls = sctx->num_compute_calls;
568 
569          unsigned src_level = j % (tsrc.last_level + 1);
570          unsigned dst_level = j % (tdst.last_level + 1);
571 
572          max_width = MIN2(u_minify(tsrc.width0, src_level), u_minify(tdst.width0, dst_level));
573          max_height = MIN2(u_minify(tsrc.height0, src_level), u_minify(tdst.height0, dst_level));
574          max_depth = MIN2(util_num_layers(&tsrc, src_level), util_num_layers(&tdst, dst_level));
575 
576          /* random sub-rectangle copies from src to dst */
577          depth = (rand() % max_depth) + 1;
578          srcz = rand() % (util_num_layers(&tsrc, src_level) - depth + 1);
579          dstz = rand() % (util_num_layers(&tdst, dst_level) - depth + 1);
580 
581          /* just make sure that it doesn't divide by zero */
582          assert(max_width > 0 && max_height > 0);
583 
584          width = (rand() % max_width) + 1;
585          height = (rand() % max_height) + 1;
586 
587          srcx = rand() % (u_minify(tsrc.width0, src_level) - width + 1);
588          srcy = rand() % (u_minify(tsrc.height0, src_level) - height + 1);
589 
590          dstx = rand() % (u_minify(tdst.width0, dst_level) - width + 1);
591          dsty = rand() % (u_minify(tdst.height0, dst_level) - height + 1);
592 
593          /* Align the box to the format block size. */
594          srcx &= ~(util_format_get_blockwidth(src->format) - 1);
595          srcy &= ~(util_format_get_blockheight(src->format) - 1);
596 
597          dstx &= ~(util_format_get_blockwidth(dst->format) - 1);
598          dsty &= ~(util_format_get_blockheight(dst->format) - 1);
599 
600          width = align(width, util_format_get_blockwidth(src->format));
601          height = align(height, util_format_get_blockheight(src->format));
602 
603          /* GPU copy */
604          u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
605          si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, &box);
606 
607          /* See which engine was used. */
608          gfx_blits += sctx->num_draw_calls > old_num_draw_calls;
609          cs_blits += sctx->num_compute_calls > old_num_cs_calls;
610 
611          /* CPU copy */
612          util_copy_box(dst_cpu[dst_level].ptr, tdst.format, dst_cpu[dst_level].stride,
613                        dst_cpu[dst_level].layer_stride, dstx, dsty, dstz,
614                        width, height, depth, src_cpu[src_level].ptr, src_cpu[src_level].stride,
615                        src_cpu[src_level].layer_stride, srcx, srcy, srcz);
616       }
617 
618       pass = true;
619       for (unsigned level = 0; level <= tdst.last_level; level++)
620          pass &= compare_textures(ctx, dst, &dst_cpu[level], level);
621 
622       if (pass)
623          num_pass++;
624       else
625          num_fail++;
626 
627       printf("BLITs: GFX = %2u, CS = %2u, %s [%u/%u]\n", gfx_blits, cs_blits,
628              pass ? "pass" : "fail", num_pass, num_pass + num_fail);
629 
630       /* cleanup */
631       pipe_resource_reference(&src, NULL);
632       pipe_resource_reference(&dst, NULL);
633       for (unsigned level = 0; level <= tsrc.last_level; level++)
634          free(src_cpu[level].ptr);
635       for (unsigned level = 0; level <= tdst.last_level; level++)
636          free(dst_cpu[level].ptr);
637    }
638 
639    ctx->destroy(ctx);
640    exit(0);
641 }
642 
si_test_blit(struct si_screen * sscreen,unsigned test_flags)643 void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
644 {
645    struct pipe_screen *screen = &sscreen->b;
646    struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
647    struct si_context *sctx = (struct si_context *)ctx;
648    unsigned iterations;
649    unsigned num_pass = 0, num_fail = 0;
650    bool only_cb_resolve = test_flags == DBG(TEST_CB_RESOLVE);
651 
652    bool allow_float = false;
653    bool allow_unorm16_dst = false;
654    bool allow_srgb_dst = false;
655    bool allow_filter = false;
656    bool allow_scaled_min = false;
657    bool allow_scaled_mag = false;
658    bool allow_out_of_bounds_dst = false;
659    bool allow_out_of_bounds_src = false;
660    bool allow_scissor = false;
661    bool allow_flip = false;
662 
663    /* The following tests always compare the tested operation with the gfx blit (u_blitter). */
664    switch (test_flags) {
665    case DBG(TEST_CB_RESOLVE):
666       /* This is mostly failing because the precision of CB_RESOLVE is very different
667        * from the gfx blit. FP32 and FP16 are the only formats that mostly pass.
668        */
669       allow_float = true;
670       allow_unorm16_dst = true;
671       allow_srgb_dst = true;
672       break;
673 
674    case DBG(TEST_COMPUTE_BLIT):
675       //allow_float = true;      /* precision difference: NaNs not preserved by CB (u_blitter) */
676       allow_unorm16_dst = true;
677       //allow_srgb_dst = true;   /* precision difference: sRGB is less precise in CB (u_blitter) */
678       //allow_filter = true;     /* not implemented by compute blits, lots of precision differences */
679       //allow_scaled_min = true; /* not implemented by compute blits, lots of precision differences */
680       //allow_scaled_mag = true; /* not implemented by compute blits, lots of precision differences */
681       allow_out_of_bounds_dst = true;
682       allow_out_of_bounds_src = true;
683       //allow_scissor = true;    /* not implemented by compute blits */
684       allow_flip = true;
685       break;
686 
687    default:
688       assert(0);
689    }
690 
691    /* the seed for random test parameters */
692    srand(0x9b47d95b);
693    /* the seed for random pixel data */
694    s_rand_xorshift128plus(seed_xorshift128plus, false);
695 
696    iterations = 10000000; /* just kill it when you are bored */
697 
698    /* These parameters are randomly generated per test:
699     * - which texture dimensions to use
700     * - random initial pixels in src
701     * - random pipe_blit_info
702     */
703    for (unsigned i = 0; i < iterations; i++) {
704       struct pipe_resource tsrc = {}, tdst = {}, *gfx_src, *gfx_dst, *comp_src, *comp_dst;
705 
706       /* Generate a random test case. */
707       {
708          struct si_format_options format_options = {
709             .only_resolve = only_cb_resolve,
710             .allow_float = allow_float,
711             .allow_unorm16 = true,
712             .allow_srgb = true,
713             .allow_x_channels = true,
714             .allow_subsampled = false, /* TODO: fix subsampled formats */
715             .allow_compressed = false, /* TODO: fix compressed formats */
716          };
717 
718          tsrc.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
719          tdst.format = get_random_format(sscreen, true, tsrc.format, 0, 0, &format_options);
720       }
721 
722       set_random_image_attrs(&tsrc, true, only_cb_resolve);
723       set_random_image_attrs(&tdst, !only_cb_resolve, false);
724 
725       /* MSAA blits must have matching sample counts. */
726       if (tsrc.nr_samples > 1 && tdst.nr_samples > 1)
727          tdst.nr_samples = tdst.nr_storage_samples = tsrc.nr_samples;
728 
729       /* Allocate textures. */
730       gfx_src = screen->resource_create(screen, &tsrc);
731       gfx_dst = screen->resource_create(screen, &tdst);
732       comp_src = screen->resource_create(screen, &tsrc);
733       comp_dst = screen->resource_create(screen, &tdst);
734 
735       /* clear dst pixels */
736       uint32_t zero = 0;
737       si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero,
738                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
739       si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero,
740                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
741 
742       /* TODO: These two fix quite a lot of BCn cases. */
743       /*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,
744                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
745       si_clear_buffer(sctx, comp_src, 0, ((struct si_texture *)comp_src)->surface.surf_size, &zero,
746                       4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);*/
747 
748       set_random_pixels_for_2_textures(ctx, gfx_src, comp_src);
749 
750       struct pipe_blit_info info;
751       memset(&info, 0, sizeof(info));
752 
753       {
754          struct si_format_options format_options = {
755             .only_resolve = only_cb_resolve,
756             .allow_float = allow_float,
757             .allow_unorm16 = true,
758             .allow_srgb = true,
759             .allow_x_channels = true,
760             .allow_subsampled = false, /* TODO: fix subsampled formats */
761             .allow_compressed = false, /* TODO: fix compressed formats */
762          };
763 
764          info.src.format = get_random_format(sscreen, false, 0, tsrc.format, 0, &format_options);
765          format_options.allow_unorm16 = allow_unorm16_dst;
766          format_options.allow_srgb = allow_srgb_dst;
767          info.dst.format = get_random_format(sscreen, true, 0, tdst.format, info.src.format,
768                                              &format_options);
769       }
770 
771       printf("%4u: dst = (", i);
772       print_image_attrs(sscreen, (struct si_texture *)gfx_dst);
773       printf(", %20s as %20s), src = (",
774              util_format_description(tdst.format)->short_name,
775              util_format_short_name(info.dst.format));
776       print_image_attrs(sscreen, (struct si_texture *)gfx_src);
777       printf(", %20s as %20s)",
778              util_format_description(tsrc.format)->short_name,
779              util_format_short_name(info.src.format));
780       fflush(stdout);
781 
782       int src_width, src_height, src_depth, dst_width, dst_height, dst_depth;
783       int srcx, srcy, srcz, dstx, dsty, dstz;
784 
785       unsigned src_level = rand() % (tsrc.last_level + 1);
786       unsigned dst_level = rand() % (tdst.last_level + 1);
787 
788       unsigned max_src_width = u_minify(tsrc.width0, src_level);
789       unsigned max_src_height = u_minify(tsrc.height0, src_level);
790       unsigned max_src_depth = util_num_layers(&tsrc, src_level);
791 
792       unsigned max_dst_width = u_minify(tdst.width0, dst_level);
793       unsigned max_dst_height = u_minify(tdst.height0, dst_level);
794       unsigned max_dst_depth = util_num_layers(&tdst, dst_level);
795 
796       /* make sure that it doesn't divide by zero */
797       assert(max_src_width && max_src_height && max_src_depth &&
798              max_dst_width && max_dst_height && max_dst_depth);
799 
800       /* random sub-rectangle copies from src to dst */
801       src_width = (rand() % max_src_width) + 1;
802       src_height = (rand() % max_src_height) + 1;
803       src_depth = (rand() % max_src_depth) + 1;
804 
805       dst_width = (rand() % max_dst_width) + 1;
806       dst_height = (rand() % max_dst_height) + 1;
807       dst_depth = (rand() % max_dst_depth) + 1;
808 
809       srcx = rand() % (u_minify(tsrc.width0, src_level) - src_width + 1);
810       srcy = rand() % (u_minify(tsrc.height0, src_level) - src_height + 1);
811       srcz = rand() % (util_num_layers(&tsrc, src_level) - src_depth + 1);
812 
813       dstx = rand() % (u_minify(tdst.width0, dst_level) - dst_width + 1);
814       dsty = rand() % (u_minify(tdst.height0, dst_level) - dst_height + 1);
815       dstz = rand() % (util_num_layers(&tdst, dst_level) - dst_depth + 1);
816 
817       /* Test out-of-bounds boxes. Add -dim/10 .. +dim/10 */
818       if (allow_out_of_bounds_src) {
819          if (max_src_width / 5 >= 2)
820             srcx += rand() % (max_src_width / 5) - max_src_width / 10;
821          if (max_src_height / 5 >= 2)
822             srcy += rand() % (max_src_height / 5) - max_src_height / 10;
823       }
824 
825       if (allow_out_of_bounds_dst) {
826          if (max_dst_width / 5 >= 2)
827             dstx += rand() % (max_dst_width / 5) - max_dst_width / 10;
828          if (max_dst_height / 5 >= 2)
829             dsty += rand() % (max_dst_height / 5) - max_dst_height / 10;
830       }
831 
832       /* Align the box to the format block size. */
833       srcx &= ~(util_format_get_blockwidth(tsrc.format) - 1);
834       srcy &= ~(util_format_get_blockheight(tsrc.format) - 1);
835 
836       dstx &= ~(util_format_get_blockwidth(tdst.format) - 1);
837       dsty &= ~(util_format_get_blockheight(tdst.format) - 1);
838 
839       src_width = align(src_width, util_format_get_blockwidth(tsrc.format));
840       src_height = align(src_height, util_format_get_blockheight(tsrc.format));
841 
842       dst_width = align(dst_width, util_format_get_blockwidth(tdst.format));
843       dst_height = align(dst_height, util_format_get_blockheight(tdst.format));
844 
845       if (!allow_scaled_min) {
846          if (src_width > dst_width)
847             src_width = dst_width;
848          if (src_height > dst_height)
849             src_height = dst_height;
850          if (src_depth > dst_depth)
851             src_depth = dst_depth;
852       }
853 
854       if (!allow_scaled_mag) {
855          if (src_width < dst_width)
856             dst_width = src_width;
857          if (src_height < dst_height)
858             dst_height = src_height;
859          if (src_depth < dst_depth)
860             dst_depth = src_depth;
861       }
862 
863       /* Flips */
864       if (allow_flip) {
865          if (rand() % 2) {
866             srcx += src_width;
867             src_width = -src_width;
868          }
869          if (rand() % 2) {
870             srcy += src_height;
871             src_height = -src_height;
872          }
873       }
874 
875       info.src.level = src_level;
876       info.dst.level = dst_level;
877 
878       u_box_3d(srcx, srcy, srcz, src_width, src_height, src_depth, &info.src.box);
879       u_box_3d(dstx, dsty, dstz, dst_width, dst_height, dst_depth, &info.dst.box);
880 
881       if (util_format_is_depth_and_stencil(tsrc.format)) {
882          switch (rand() % 3) {
883          case 0:
884             info.mask = PIPE_MASK_ZS;
885             break;
886          case 1:
887             info.mask = PIPE_MASK_Z;
888             break;
889          case 2:
890             info.mask = PIPE_MASK_S;
891             break;
892          }
893       } else {
894          /* RGBA, Z, or S */
895          info.mask = util_format_get_mask(tdst.format);
896       }
897 
898       /* Don't filter MSAA and integer sources. */
899       if (allow_filter && tsrc.nr_samples <= 1 &&
900           !util_format_is_pure_integer(info.src.format) && rand() % 2)
901          info.filter = PIPE_TEX_FILTER_LINEAR;
902       else
903          info.filter = PIPE_TEX_FILTER_NEAREST;
904 
905       info.scissor_enable = allow_scissor ? rand() % 2 : false;
906 
907       if (info.scissor_enable) {
908          info.scissor.minx = MAX2(MIN2(info.dst.box.x, info.dst.box.x + info.dst.box.width), 0);
909          info.scissor.miny = MAX2(MIN2(info.dst.box.y, info.dst.box.y + info.dst.box.height), 0);
910          info.scissor.maxx = MIN2(MAX2(info.dst.box.x, info.dst.box.x + info.dst.box.width), UINT16_MAX);
911          info.scissor.maxy = MIN2(MAX2(info.dst.box.y, info.dst.box.y + info.dst.box.height), UINT16_MAX);
912 
913          if (abs(info.dst.box.width) / 2 >= 2) {
914             info.scissor.minx += rand() % (abs(info.dst.box.width) / 2);
915             info.scissor.maxx -= rand() % (abs(info.dst.box.width) / 2);
916          }
917          if (abs(info.dst.box.height) / 2 >= 2) {
918             info.scissor.miny += rand() % (abs(info.dst.box.height) / 2);
919             info.scissor.maxy -= rand() % (abs(info.dst.box.height) / 2);
920          }
921       }
922 
923       char dstbox_s[128], srcbox_s[128], scissor[128];
924 
925       snprintf(dstbox_s, sizeof(dstbox_s), "{%ix%ix%i .. %ix%ix%i}",
926                info.dst.box.x, info.dst.box.y, info.dst.box.z,
927                info.dst.box.width, info.dst.box.height, info.dst.box.depth);
928       snprintf(srcbox_s, sizeof(srcbox_s), "{%ix%ix%i .. %ix%ix%i}",
929                info.src.box.x, info.src.box.y, info.src.box.z,
930                info.src.box.width, info.src.box.height, info.src.box.depth);
931       if (info.scissor_enable) {
932          snprintf(scissor, sizeof(scissor), "(%u..%u, %u..%u)",
933                   info.scissor.minx, info.scissor.maxx, info.scissor.miny, info.scissor.maxy);
934       } else {
935          snprintf(scissor, sizeof(scissor), "(none)");
936       }
937 
938       printf(", filter %u, mask 0x%02x, ", info.filter, info.mask);
939       printf("dst(level %u, box = %-28s), ", info.dst.level, dstbox_s);
940       printf("src(level %u, box = %-28s), ", info.src.level, srcbox_s);
941       printf("scissor%-20s", scissor);
942 
943       /* Blits. */
944       info.src.resource = gfx_src;
945       info.dst.resource = gfx_dst;
946       si_gfx_blit(ctx, &info);
947 
948       info.src.resource = comp_src;
949       info.dst.resource = comp_dst;
950 
951       bool success;
952       if (only_cb_resolve)
953          success = si_msaa_resolve_blit_via_CB(ctx, &info);
954       else
955          success = false;
956 
957       if (success) {
958          printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
959       } else {
960          si_gfx_blit(ctx, &info);
961          printf(" %-7s", "gfx");
962       }
963 
964       bool pass = compare_gpu_textures(ctx, gfx_dst, comp_dst);
965       if (pass)
966          num_pass++;
967       else
968          num_fail++;
969 
970       printf(" %s [%u/%u]\n", pass ? "pass" : "fail", num_pass, num_pass + num_fail);
971 
972       /* cleanup */
973       pipe_resource_reference(&gfx_src, NULL);
974       pipe_resource_reference(&gfx_dst, NULL);
975       pipe_resource_reference(&comp_src, NULL);
976       pipe_resource_reference(&comp_dst, NULL);
977    }
978 
979    ctx->destroy(ctx);
980    exit(0);
981 }
982