1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 /* This file implements randomized texture blit tests. */
27
28 #include "si_pipe.h"
29 #include "util/rand_xor.h"
30 #include "util/u_surface.h"
31 #include "amd/addrlib/inc/addrtypes.h"
32
33 static uint64_t seed_xorshift128plus[2];
34
35 #define RAND_NUM_SIZE 8
36
37 /* The GPU blits are emulated on the CPU using these CPU textures. */
38
39 struct cpu_texture {
40 uint8_t *ptr;
41 uint64_t size;
42 uint64_t layer_stride;
43 unsigned stride;
44 };
45
alloc_cpu_texture(struct cpu_texture * tex,struct pipe_resource * templ,unsigned level)46 static void alloc_cpu_texture(struct cpu_texture *tex, struct pipe_resource *templ, unsigned level)
47 {
48 unsigned width = u_minify(templ->width0, level);
49 unsigned height = u_minify(templ->height0, level);
50
51 tex->stride = align(util_format_get_stride(templ->format, width), RAND_NUM_SIZE);
52 tex->layer_stride = util_format_get_2d_size(templ->format, tex->stride, height);
53 tex->size = tex->layer_stride * util_num_layers(templ, level);
54 tex->ptr = malloc(tex->size);
55 assert(tex->ptr);
56 }
57
set_random_pixels(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)58 static void set_random_pixels(struct pipe_context *ctx, struct pipe_resource *tex,
59 struct cpu_texture *cpu, unsigned level)
60 {
61 struct pipe_transfer *t;
62 uint8_t *map;
63 int x, y, z;
64 unsigned width = u_minify(tex->width0, level);
65 unsigned height = u_minify(tex->height0, level);
66 unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
67 unsigned num_layers = util_num_layers(tex, level);
68
69 map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_WRITE, 0, 0, 0, width, height,
70 num_layers, &t);
71 assert(map);
72
73 for (z = 0; z < num_layers; z++) {
74 for (y = 0; y < num_y_blocks; y++) {
75 uint64_t *ptr = (uint64_t *)(map + t->layer_stride * z + t->stride * y);
76 uint64_t *ptr_cpu = (uint64_t *)(cpu->ptr + cpu->layer_stride * z + cpu->stride * y);
77 unsigned size = cpu->stride / RAND_NUM_SIZE;
78
79 assert(t->stride % RAND_NUM_SIZE == 0);
80 assert(cpu->stride % RAND_NUM_SIZE == 0);
81
82 for (x = 0; x < size; x++) {
83 *ptr++ = *ptr_cpu++ = rand_xorshift128plus(seed_xorshift128plus);
84 }
85 }
86 }
87
88 pipe_texture_unmap(ctx, t);
89 }
90
set_random_pixels_for_2_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)91 static void set_random_pixels_for_2_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
92 struct pipe_resource *tex2)
93 {
94 /* tex1 and tex2 are assumed to be the same size, format, and layout */
95 for (unsigned level = 0; level <= tex1->last_level; level++) {
96 for (unsigned sample = 0; sample < MAX2(tex1->nr_samples, 1); sample++) {
97 struct pipe_transfer *t1, *t2;
98 uint8_t *map1, *map2;
99 int x, y, z;
100 unsigned width = align(u_minify(tex1->width0, level), util_format_get_blockwidth(tex1->format));
101 unsigned height = align(u_minify(tex1->height0, level), util_format_get_blockheight(tex1->format));
102 unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
103 unsigned num_layers = util_num_layers(tex1, level);
104 /* If we set level to sample + 1, we will only upload that sample instead of
105 * overwriting all samples.
106 */
107 unsigned level_or_sample = tex1->nr_samples > 1 ? sample + 1 : level;
108
109 map1 = pipe_texture_map_3d(ctx, tex1, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
110 num_layers, &t1);
111 map2 = pipe_texture_map_3d(ctx, tex2, level_or_sample, PIPE_MAP_WRITE, 0, 0, 0, width, height,
112 num_layers, &t2);
113 assert(map1 && map2);
114 assert(t1->stride == t2->stride);
115
116 for (z = 0; z < num_layers; z++) {
117 for (y = 0; y < num_y_blocks; y++) {
118 uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
119 uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
120 unsigned size = t1->stride / 8;
121
122 assert(t1->stride % 8 == 0);
123 assert(t2->stride % 8 == 0);
124
125 for (x = 0; x < size; x++) {
126 *ptr1++ = *ptr2++ = rand_xorshift128plus(seed_xorshift128plus);
127 }
128 }
129 }
130
131 pipe_texture_unmap(ctx, t1);
132 pipe_texture_unmap(ctx, t2);
133 }
134 }
135 }
136
compare_textures(struct pipe_context * ctx,struct pipe_resource * tex,struct cpu_texture * cpu,unsigned level)137 static bool compare_textures(struct pipe_context *ctx, struct pipe_resource *tex,
138 struct cpu_texture *cpu, unsigned level)
139 {
140 struct pipe_transfer *t;
141 uint8_t *map;
142 int y, z;
143 bool pass = true;
144 unsigned width = u_minify(tex->width0, level);
145 unsigned height = u_minify(tex->height0, level);
146 unsigned stride = util_format_get_stride(tex->format, width);
147 unsigned num_y_blocks = util_format_get_nblocksy(tex->format, height);
148 unsigned num_layers = util_num_layers(tex, level);
149
150 map = pipe_texture_map_3d(ctx, tex, level, PIPE_MAP_READ, 0, 0, 0, width, height,
151 num_layers, &t);
152 assert(map);
153
154 for (z = 0; z < num_layers; z++) {
155 for (y = 0; y < num_y_blocks; y++) {
156 uint8_t *ptr = map + t->layer_stride * z + t->stride * y;
157 uint8_t *cpu_ptr = cpu->ptr + cpu->layer_stride * z + cpu->stride * y;
158
159 if (memcmp(ptr, cpu_ptr, stride)) {
160 pass = false;
161 goto done;
162 }
163 }
164 }
165 done:
166 pipe_texture_unmap(ctx, t);
167 return pass;
168 }
169
compare_gpu_textures(struct pipe_context * ctx,struct pipe_resource * tex1,struct pipe_resource * tex2)170 static bool compare_gpu_textures(struct pipe_context *ctx, struct pipe_resource *tex1,
171 struct pipe_resource *tex2)
172 {
173 /* tex1 and tex2 are assumed to be the same size, format, and layout */
174 for (unsigned level = 0; level <= tex1->last_level; level++) {
175 struct pipe_transfer *t1, *t2;
176 uint8_t *map1, *map2;
177 unsigned width = u_minify(tex1->width0, level);
178 unsigned height = u_minify(tex1->height0, level);
179 unsigned stride = util_format_get_stride(tex1->format, width);
180 unsigned num_y_blocks = util_format_get_nblocksy(tex1->format, height);
181 unsigned num_layers = util_num_layers(tex1, level);
182
183 map1 = pipe_texture_map_3d(ctx, tex1, level, PIPE_MAP_READ, 0, 0, 0, width, height,
184 num_layers, &t1);
185 map2 = pipe_texture_map_3d(ctx, tex2, level, PIPE_MAP_READ, 0, 0, 0, width, height,
186 num_layers, &t2);
187 assert(map1 && map2);
188 assert(t1->stride == t2->stride);
189
190 for (unsigned z = 0; z < num_layers; z++) {
191 for (unsigned y = 0; y < num_y_blocks; y++) {
192 uint64_t *ptr1 = (uint64_t *)(map1 + t1->layer_stride * z + t1->stride * y);
193 uint64_t *ptr2 = (uint64_t *)(map2 + t2->layer_stride * z + t2->stride * y);
194
195 assert(t1->stride % 8 == 0);
196 assert(t2->stride % 8 == 0);
197
198 if (memcmp(ptr1, ptr2, stride)) {
199 pipe_texture_unmap(ctx, t1);
200 pipe_texture_unmap(ctx, t2);
201 return false;
202 }
203 }
204 }
205
206 pipe_texture_unmap(ctx, t1);
207 pipe_texture_unmap(ctx, t2);
208 }
209
210 return true;
211 }
212
213 struct si_format_options {
214 bool only_resolve;
215 bool allow_float;
216 bool allow_unorm16;
217 bool allow_srgb;
218 bool allow_x_channels;
219 bool allow_subsampled;
220 bool allow_compressed;
221 };
222
get_random_format(struct si_screen * sscreen,bool render_target,enum pipe_format color_or_zs,enum pipe_format res_format,enum pipe_format integer_or_not,const struct si_format_options * options)223 static enum pipe_format get_random_format(struct si_screen *sscreen, bool render_target,
224 enum pipe_format color_or_zs, /* must be color or Z/S */
225 enum pipe_format res_format, /* must have the same bpp */
226 enum pipe_format integer_or_not, /* must be integer or non-integer */
227 const struct si_format_options *options)
228 {
229 /* Depth/stencil formats can only select Z/S using the blit mask, not via the view format. */
230 if (res_format != PIPE_FORMAT_NONE && util_format_is_depth_or_stencil(res_format))
231 return res_format;
232
233 /* Keep generating formats until we get a supported one. */
234 while (1) {
235 /* Skip one format: PIPE_FORMAT_NONE */
236 enum pipe_format format = (rand() % (PIPE_FORMAT_COUNT - 1)) + 1;
237 const struct util_format_description *desc = util_format_description(format);
238
239 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV ||
240 format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
241 continue;
242
243 if (!options->allow_srgb && desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
244 continue;
245
246 if (!options->allow_subsampled && desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
247 continue;
248
249 if (!options->allow_compressed && util_format_get_blockwidth(format) >= 4)
250 continue;
251
252 if (color_or_zs != PIPE_FORMAT_NONE &&
253 (util_format_is_depth_or_stencil(color_or_zs) !=
254 util_format_is_depth_or_stencil(format)))
255 continue;
256
257 if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
258 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
259 /* Don't select stencil-only formats - we don't support them for rendering. */
260 if (util_format_has_stencil(desc) && !util_format_has_depth(desc))
261 continue;
262 }
263
264 if (!options->allow_x_channels) {
265 unsigned i;
266
267 /* Don't test formats with X channels because cpu_texture doesn't emulate them. */
268 for (i = 0; i < desc->nr_channels; i++) {
269 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID)
270 break;
271 }
272 if (i != desc->nr_channels)
273 continue;
274 }
275 }
276
277 if (res_format != PIPE_FORMAT_NONE) {
278 /* If the resource format is Z/S, we handle it at the beginning of this function,
279 * so here res_format can only be a color format.
280 */
281 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
282 continue;
283
284 if (util_format_get_blocksize(res_format) != util_format_get_blocksize(format) ||
285 util_format_get_blockwidth(res_format) != util_format_get_blockwidth(format) ||
286 util_format_get_blockheight(res_format) != util_format_get_blockheight(format))
287 continue;
288 }
289
290 if (integer_or_not != PIPE_FORMAT_NONE) {
291 /* The integer property must match between blit src/dst. */
292 if (util_format_is_pure_integer(integer_or_not) != util_format_is_pure_integer(format))
293 continue;
294 }
295
296 if (options->only_resolve &&
297 (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS || util_format_is_pure_integer(format)))
298 continue;
299
300 if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
301 /* Every integer format should have an equivalent non-integer format, but 128-bit integer
302 * formats don't have that if floats are disallowed, which can cause an infinite loop later
303 * if compat_type is non-integer.
304 */
305 if (!options->allow_float &&
306 (util_format_is_float(format) || util_format_get_blocksizebits(format) == 128))
307 continue;
308
309 if (!options->allow_unorm16 &&
310 desc->channel[0].size == 16 && desc->channel[0].normalized &&
311 desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
312 continue;
313 }
314
315 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
316 if (render_target) {
317 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
318 bind = PIPE_BIND_DEPTH_STENCIL;
319 else
320 bind = PIPE_BIND_RENDER_TARGET;
321 }
322
323 if (sscreen->b.is_format_supported(&sscreen->b, format, PIPE_TEXTURE_2D, 1, 1, bind))
324 return format;
325 }
326 }
327
328 #define MAX_ALLOC_SIZE (64 * 1024 * 1024)
329
set_random_image_attrs(struct pipe_resource * templ,bool allow_msaa,bool only_cb_resolve)330 static void set_random_image_attrs(struct pipe_resource *templ, bool allow_msaa,
331 bool only_cb_resolve)
332 {
333 unsigned target_index;
334
335 if (only_cb_resolve) {
336 target_index = 6; /* CB resolving doesn't support array textures. */
337 } else {
338 target_index = rand() % (allow_msaa ? 8 : 6);
339 }
340
341 switch (target_index) {
342 case 0:
343 templ->target = PIPE_TEXTURE_1D;
344 break;
345 case 1:
346 templ->target = PIPE_TEXTURE_2D;
347 break;
348 case 2:
349 if (util_format_is_depth_or_stencil(templ->format))
350 templ->target = PIPE_TEXTURE_2D_ARRAY; /* 3D doesn't support Z/S */
351 else
352 templ->target = PIPE_TEXTURE_3D;
353 break;
354 case 3:
355 templ->target = PIPE_TEXTURE_RECT;
356 break;
357 case 4:
358 templ->target = PIPE_TEXTURE_1D_ARRAY;
359 break;
360 case 5:
361 templ->target = PIPE_TEXTURE_2D_ARRAY;
362 break;
363 case 6:
364 templ->target = PIPE_TEXTURE_2D;
365 templ->nr_samples = 2 << (rand() % 3);
366 break;
367 case 7:
368 templ->target = PIPE_TEXTURE_2D_ARRAY;
369 templ->nr_samples = 2 << (rand() % 3);
370 break;
371 default:
372 unreachable("invalid path");
373 }
374
375 templ->usage = PIPE_USAGE_DEFAULT;
376
377 templ->height0 = 1;
378 templ->depth0 = 1;
379 templ->array_size = 1;
380 templ->nr_storage_samples = templ->nr_samples;
381
382 /* Try to hit microtiling in 1/2 of the cases. */
383 unsigned max_tex_size = rand() & 1 ? 128 : 1024;
384
385 templ->width0 = (rand() % max_tex_size) + 1;
386
387 if (templ->target != PIPE_TEXTURE_1D &&
388 templ->target != PIPE_TEXTURE_1D_ARRAY)
389 templ->height0 = (rand() % max_tex_size) + 1;
390
391 if (templ->target == PIPE_TEXTURE_3D)
392 templ->depth0 = (rand() % max_tex_size) + 1;
393
394 if (templ->target == PIPE_TEXTURE_1D_ARRAY ||
395 templ->target == PIPE_TEXTURE_2D_ARRAY)
396 templ->array_size = (rand() % max_tex_size) + 1;
397
398 /* Keep reducing the size until it we get a small enough size. */
399 while ((uint64_t)util_format_get_nblocks(templ->format, templ->width0, templ->height0) *
400 templ->depth0 * templ->array_size * util_format_get_blocksize(templ->format) >
401 MAX_ALLOC_SIZE) {
402 switch (rand() % 3) {
403 case 0:
404 if (templ->width0 > 1)
405 templ->width0 /= 2;
406 break;
407 case 1:
408 if (templ->height0 > 1)
409 templ->height0 /= 2;
410 break;
411 case 2:
412 if (templ->depth0 > 1)
413 templ->depth0 /= 2;
414 else if (templ->array_size > 1)
415 templ->array_size /= 2;
416 break;
417 }
418 }
419
420 if (util_format_get_blockwidth(templ->format) == 2)
421 templ->width0 = align(templ->width0, 2);
422
423 if (templ->target != PIPE_TEXTURE_RECT &&
424 util_format_description(templ->format)->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
425 unsigned max_dim = MAX3(templ->width0, templ->height0, templ->depth0);
426
427 if (templ->nr_samples <= 1)
428 templ->last_level = rand() % (util_logbase2(max_dim) + 1);
429 }
430 }
431
print_image_attrs(struct si_screen * sscreen,struct si_texture * tex)432 static void print_image_attrs(struct si_screen *sscreen, struct si_texture *tex)
433 {
434 const char *mode;
435
436 if (sscreen->info.gfx_level >= GFX9) {
437 static const char *modes[32] = {
438 [ADDR_SW_LINEAR] = "LINEAR",
439 [ADDR_SW_4KB_S_X] = "4KB_S_X",
440 [ADDR_SW_4KB_D_X] = "4KB_D_X",
441 [ADDR_SW_64KB_Z_X] = "64KB_Z_X",
442 [ADDR_SW_64KB_S_X] = "64KB_S_X",
443 [ADDR_SW_64KB_D_X] = "64KB_D_X",
444 [ADDR_SW_64KB_R_X] = "64KB_R_X",
445 };
446 mode = modes[tex->surface.u.gfx9.swizzle_mode];
447 } else {
448 static const char *modes[32] = {
449 [RADEON_SURF_MODE_LINEAR_ALIGNED] = "LINEAR",
450 [RADEON_SURF_MODE_1D] = "1D_TILED",
451 [RADEON_SURF_MODE_2D] = "2D_TILED",
452 };
453 mode = modes[tex->surface.u.legacy.level[0].mode];
454 }
455
456 if (!mode)
457 mode = "UNKNOWN";
458
459 static const char *targets[PIPE_MAX_TEXTURE_TYPES] = {
460 [PIPE_TEXTURE_1D] = "1D",
461 [PIPE_TEXTURE_2D] = "2D",
462 [PIPE_TEXTURE_3D] = "3D",
463 [PIPE_TEXTURE_RECT] = "RECT",
464 [PIPE_TEXTURE_1D_ARRAY] = "1D_ARRAY",
465 [PIPE_TEXTURE_2D_ARRAY] = "2D_ARRAY",
466 };
467
468 char size[64];
469 if (tex->buffer.b.b.target == PIPE_TEXTURE_1D)
470 snprintf(size, sizeof(size), "%u", tex->buffer.b.b.width0);
471 else if (tex->buffer.b.b.target == PIPE_TEXTURE_2D ||
472 tex->buffer.b.b.target == PIPE_TEXTURE_RECT)
473 snprintf(size, sizeof(size), "%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0);
474 else
475 snprintf(size, sizeof(size), "%ux%ux%u", tex->buffer.b.b.width0, tex->buffer.b.b.height0,
476 util_num_layers(&tex->buffer.b.b, 0));
477
478 printf("%8s, %14s, %2u %7s, %8s", targets[tex->buffer.b.b.target], size,
479 tex->buffer.b.b.nr_samples > 1 ? tex->buffer.b.b.nr_samples : tex->buffer.b.b.last_level + 1,
480 tex->buffer.b.b.nr_samples > 1 ? "samples" : "levels", mode);
481 }
482
si_test_image_copy_region(struct si_screen * sscreen)483 void si_test_image_copy_region(struct si_screen *sscreen)
484 {
485 struct pipe_screen *screen = &sscreen->b;
486 struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
487 struct si_context *sctx = (struct si_context *)ctx;
488 unsigned i, iterations, num_partial_copies;
489 unsigned num_pass = 0, num_fail = 0;
490
491 /* the seed for random test parameters */
492 srand(0x9b47d95b);
493 /* the seed for random pixel data */
494 s_rand_xorshift128plus(seed_xorshift128plus, false);
495
496 iterations = 1000000000; /* just kill it when you are bored */
497 num_partial_copies = 30;
498
499 /* These parameters are randomly generated per test:
500 * - which texture dimensions to use
501 * - random initial pixels in src
502 * - execute multiple subrectangle copies for partial blits
503 */
504 for (i = 0; i < iterations; i++) {
505 struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
506 struct si_texture *sdst;
507 struct si_texture *ssrc;
508 struct cpu_texture src_cpu[RADEON_SURF_MAX_LEVELS], dst_cpu[RADEON_SURF_MAX_LEVELS];
509 unsigned max_width, max_height, max_depth, j;
510 unsigned gfx_blits = 0, cs_blits = 0;
511 bool pass;
512
513 /* generate a random test case */
514 struct si_format_options format_options = {
515 .only_resolve = false,
516 .allow_float = true,
517 .allow_unorm16 = true,
518 .allow_x_channels = false, /* cpu_texture doesn't implement X channels */
519 .allow_subsampled = false, /* TODO: fix subsampled formats */
520 .allow_compressed = false, /* TODO: fix compressed formats */
521 };
522
523 tsrc.format = tdst.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
524
525 /* MSAA copy testing not implemented and might be too difficult because of how
526 * cpu_texture works.
527 */
528 set_random_image_attrs(&tsrc, false, false);
529 set_random_image_attrs(&tdst, false, false);
530
531 /* Allocate textures (both the GPU and CPU copies).
532 * The CPU will emulate what the GPU should be doing.
533 */
534 src = screen->resource_create(screen, &tsrc);
535 dst = screen->resource_create(screen, &tdst);
536 assert(src);
537 assert(dst);
538 sdst = (struct si_texture *)dst;
539 ssrc = (struct si_texture *)src;
540
541 printf("%4u: dst = (", i);
542 print_image_attrs(sscreen, sdst);
543 printf("), src = (");
544 print_image_attrs(sscreen, ssrc);
545 printf("), format = %20s, ", util_format_description(tsrc.format)->short_name);
546 fflush(stdout);
547
548 for (unsigned level = 0; level <= tsrc.last_level; level++) {
549 alloc_cpu_texture(&src_cpu[level], &tsrc, level);
550 set_random_pixels(ctx, src, &src_cpu[level], level);
551 }
552 for (unsigned level = 0; level <= tdst.last_level; level++) {
553 alloc_cpu_texture(&dst_cpu[level], &tdst, level);
554 memset(dst_cpu[level].ptr, 0, dst_cpu[level].layer_stride * util_num_layers(&tdst, level));
555 }
556
557 /* clear dst pixels */
558 uint32_t zero = 0;
559 si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, SI_OP_SYNC_BEFORE_AFTER,
560 SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
561
562 for (j = 0; j < num_partial_copies; j++) {
563 int width, height, depth;
564 int srcx, srcy, srcz, dstx, dsty, dstz;
565 struct pipe_box box;
566 unsigned old_num_draw_calls = sctx->num_draw_calls;
567 unsigned old_num_cs_calls = sctx->num_compute_calls;
568
569 unsigned src_level = j % (tsrc.last_level + 1);
570 unsigned dst_level = j % (tdst.last_level + 1);
571
572 max_width = MIN2(u_minify(tsrc.width0, src_level), u_minify(tdst.width0, dst_level));
573 max_height = MIN2(u_minify(tsrc.height0, src_level), u_minify(tdst.height0, dst_level));
574 max_depth = MIN2(util_num_layers(&tsrc, src_level), util_num_layers(&tdst, dst_level));
575
576 /* random sub-rectangle copies from src to dst */
577 depth = (rand() % max_depth) + 1;
578 srcz = rand() % (util_num_layers(&tsrc, src_level) - depth + 1);
579 dstz = rand() % (util_num_layers(&tdst, dst_level) - depth + 1);
580
581 /* just make sure that it doesn't divide by zero */
582 assert(max_width > 0 && max_height > 0);
583
584 width = (rand() % max_width) + 1;
585 height = (rand() % max_height) + 1;
586
587 srcx = rand() % (u_minify(tsrc.width0, src_level) - width + 1);
588 srcy = rand() % (u_minify(tsrc.height0, src_level) - height + 1);
589
590 dstx = rand() % (u_minify(tdst.width0, dst_level) - width + 1);
591 dsty = rand() % (u_minify(tdst.height0, dst_level) - height + 1);
592
593 /* Align the box to the format block size. */
594 srcx &= ~(util_format_get_blockwidth(src->format) - 1);
595 srcy &= ~(util_format_get_blockheight(src->format) - 1);
596
597 dstx &= ~(util_format_get_blockwidth(dst->format) - 1);
598 dsty &= ~(util_format_get_blockheight(dst->format) - 1);
599
600 width = align(width, util_format_get_blockwidth(src->format));
601 height = align(height, util_format_get_blockheight(src->format));
602
603 /* GPU copy */
604 u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
605 si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, src, src_level, &box);
606
607 /* See which engine was used. */
608 gfx_blits += sctx->num_draw_calls > old_num_draw_calls;
609 cs_blits += sctx->num_compute_calls > old_num_cs_calls;
610
611 /* CPU copy */
612 util_copy_box(dst_cpu[dst_level].ptr, tdst.format, dst_cpu[dst_level].stride,
613 dst_cpu[dst_level].layer_stride, dstx, dsty, dstz,
614 width, height, depth, src_cpu[src_level].ptr, src_cpu[src_level].stride,
615 src_cpu[src_level].layer_stride, srcx, srcy, srcz);
616 }
617
618 pass = true;
619 for (unsigned level = 0; level <= tdst.last_level; level++)
620 pass &= compare_textures(ctx, dst, &dst_cpu[level], level);
621
622 if (pass)
623 num_pass++;
624 else
625 num_fail++;
626
627 printf("BLITs: GFX = %2u, CS = %2u, %s [%u/%u]\n", gfx_blits, cs_blits,
628 pass ? "pass" : "fail", num_pass, num_pass + num_fail);
629
630 /* cleanup */
631 pipe_resource_reference(&src, NULL);
632 pipe_resource_reference(&dst, NULL);
633 for (unsigned level = 0; level <= tsrc.last_level; level++)
634 free(src_cpu[level].ptr);
635 for (unsigned level = 0; level <= tdst.last_level; level++)
636 free(dst_cpu[level].ptr);
637 }
638
639 ctx->destroy(ctx);
640 exit(0);
641 }
642
si_test_blit(struct si_screen * sscreen,unsigned test_flags)643 void si_test_blit(struct si_screen *sscreen, unsigned test_flags)
644 {
645 struct pipe_screen *screen = &sscreen->b;
646 struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
647 struct si_context *sctx = (struct si_context *)ctx;
648 unsigned iterations;
649 unsigned num_pass = 0, num_fail = 0;
650 bool only_cb_resolve = test_flags == DBG(TEST_CB_RESOLVE);
651
652 bool allow_float = false;
653 bool allow_unorm16_dst = false;
654 bool allow_srgb_dst = false;
655 bool allow_filter = false;
656 bool allow_scaled_min = false;
657 bool allow_scaled_mag = false;
658 bool allow_out_of_bounds_dst = false;
659 bool allow_out_of_bounds_src = false;
660 bool allow_scissor = false;
661 bool allow_flip = false;
662
663 /* The following tests always compare the tested operation with the gfx blit (u_blitter). */
664 switch (test_flags) {
665 case DBG(TEST_CB_RESOLVE):
666 /* This is mostly failing because the precision of CB_RESOLVE is very different
667 * from the gfx blit. FP32 and FP16 are the only formats that mostly pass.
668 */
669 allow_float = true;
670 allow_unorm16_dst = true;
671 allow_srgb_dst = true;
672 break;
673
674 case DBG(TEST_COMPUTE_BLIT):
675 //allow_float = true; /* precision difference: NaNs not preserved by CB (u_blitter) */
676 allow_unorm16_dst = true;
677 //allow_srgb_dst = true; /* precision difference: sRGB is less precise in CB (u_blitter) */
678 //allow_filter = true; /* not implemented by compute blits, lots of precision differences */
679 //allow_scaled_min = true; /* not implemented by compute blits, lots of precision differences */
680 //allow_scaled_mag = true; /* not implemented by compute blits, lots of precision differences */
681 allow_out_of_bounds_dst = true;
682 allow_out_of_bounds_src = true;
683 //allow_scissor = true; /* not implemented by compute blits */
684 allow_flip = true;
685 break;
686
687 default:
688 assert(0);
689 }
690
691 /* the seed for random test parameters */
692 srand(0x9b47d95b);
693 /* the seed for random pixel data */
694 s_rand_xorshift128plus(seed_xorshift128plus, false);
695
696 iterations = 10000000; /* just kill it when you are bored */
697
698 /* These parameters are randomly generated per test:
699 * - which texture dimensions to use
700 * - random initial pixels in src
701 * - random pipe_blit_info
702 */
703 for (unsigned i = 0; i < iterations; i++) {
704 struct pipe_resource tsrc = {}, tdst = {}, *gfx_src, *gfx_dst, *comp_src, *comp_dst;
705
706 /* Generate a random test case. */
707 {
708 struct si_format_options format_options = {
709 .only_resolve = only_cb_resolve,
710 .allow_float = allow_float,
711 .allow_unorm16 = true,
712 .allow_srgb = true,
713 .allow_x_channels = true,
714 .allow_subsampled = false, /* TODO: fix subsampled formats */
715 .allow_compressed = false, /* TODO: fix compressed formats */
716 };
717
718 tsrc.format = get_random_format(sscreen, false, 0, 0, 0, &format_options);
719 tdst.format = get_random_format(sscreen, true, tsrc.format, 0, 0, &format_options);
720 }
721
722 set_random_image_attrs(&tsrc, true, only_cb_resolve);
723 set_random_image_attrs(&tdst, !only_cb_resolve, false);
724
725 /* MSAA blits must have matching sample counts. */
726 if (tsrc.nr_samples > 1 && tdst.nr_samples > 1)
727 tdst.nr_samples = tdst.nr_storage_samples = tsrc.nr_samples;
728
729 /* Allocate textures. */
730 gfx_src = screen->resource_create(screen, &tsrc);
731 gfx_dst = screen->resource_create(screen, &tdst);
732 comp_src = screen->resource_create(screen, &tsrc);
733 comp_dst = screen->resource_create(screen, &tdst);
734
735 /* clear dst pixels */
736 uint32_t zero = 0;
737 si_clear_buffer(sctx, gfx_dst, 0, ((struct si_texture *)gfx_dst)->surface.surf_size, &zero,
738 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
739 si_clear_buffer(sctx, comp_dst, 0, ((struct si_texture *)comp_dst)->surface.surf_size, &zero,
740 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
741
742 /* TODO: These two fix quite a lot of BCn cases. */
743 /*si_clear_buffer(sctx, gfx_src, 0, ((struct si_texture *)gfx_src)->surface.surf_size, &zero,
744 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);
745 si_clear_buffer(sctx, comp_src, 0, ((struct si_texture *)comp_src)->surface.surf_size, &zero,
746 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD);*/
747
748 set_random_pixels_for_2_textures(ctx, gfx_src, comp_src);
749
750 struct pipe_blit_info info;
751 memset(&info, 0, sizeof(info));
752
753 {
754 struct si_format_options format_options = {
755 .only_resolve = only_cb_resolve,
756 .allow_float = allow_float,
757 .allow_unorm16 = true,
758 .allow_srgb = true,
759 .allow_x_channels = true,
760 .allow_subsampled = false, /* TODO: fix subsampled formats */
761 .allow_compressed = false, /* TODO: fix compressed formats */
762 };
763
764 info.src.format = get_random_format(sscreen, false, 0, tsrc.format, 0, &format_options);
765 format_options.allow_unorm16 = allow_unorm16_dst;
766 format_options.allow_srgb = allow_srgb_dst;
767 info.dst.format = get_random_format(sscreen, true, 0, tdst.format, info.src.format,
768 &format_options);
769 }
770
771 printf("%4u: dst = (", i);
772 print_image_attrs(sscreen, (struct si_texture *)gfx_dst);
773 printf(", %20s as %20s), src = (",
774 util_format_description(tdst.format)->short_name,
775 util_format_short_name(info.dst.format));
776 print_image_attrs(sscreen, (struct si_texture *)gfx_src);
777 printf(", %20s as %20s)",
778 util_format_description(tsrc.format)->short_name,
779 util_format_short_name(info.src.format));
780 fflush(stdout);
781
782 int src_width, src_height, src_depth, dst_width, dst_height, dst_depth;
783 int srcx, srcy, srcz, dstx, dsty, dstz;
784
785 unsigned src_level = rand() % (tsrc.last_level + 1);
786 unsigned dst_level = rand() % (tdst.last_level + 1);
787
788 unsigned max_src_width = u_minify(tsrc.width0, src_level);
789 unsigned max_src_height = u_minify(tsrc.height0, src_level);
790 unsigned max_src_depth = util_num_layers(&tsrc, src_level);
791
792 unsigned max_dst_width = u_minify(tdst.width0, dst_level);
793 unsigned max_dst_height = u_minify(tdst.height0, dst_level);
794 unsigned max_dst_depth = util_num_layers(&tdst, dst_level);
795
796 /* make sure that it doesn't divide by zero */
797 assert(max_src_width && max_src_height && max_src_depth &&
798 max_dst_width && max_dst_height && max_dst_depth);
799
800 /* random sub-rectangle copies from src to dst */
801 src_width = (rand() % max_src_width) + 1;
802 src_height = (rand() % max_src_height) + 1;
803 src_depth = (rand() % max_src_depth) + 1;
804
805 dst_width = (rand() % max_dst_width) + 1;
806 dst_height = (rand() % max_dst_height) + 1;
807 dst_depth = (rand() % max_dst_depth) + 1;
808
809 srcx = rand() % (u_minify(tsrc.width0, src_level) - src_width + 1);
810 srcy = rand() % (u_minify(tsrc.height0, src_level) - src_height + 1);
811 srcz = rand() % (util_num_layers(&tsrc, src_level) - src_depth + 1);
812
813 dstx = rand() % (u_minify(tdst.width0, dst_level) - dst_width + 1);
814 dsty = rand() % (u_minify(tdst.height0, dst_level) - dst_height + 1);
815 dstz = rand() % (util_num_layers(&tdst, dst_level) - dst_depth + 1);
816
817 /* Test out-of-bounds boxes. Add -dim/10 .. +dim/10 */
818 if (allow_out_of_bounds_src) {
819 if (max_src_width / 5 >= 2)
820 srcx += rand() % (max_src_width / 5) - max_src_width / 10;
821 if (max_src_height / 5 >= 2)
822 srcy += rand() % (max_src_height / 5) - max_src_height / 10;
823 }
824
825 if (allow_out_of_bounds_dst) {
826 if (max_dst_width / 5 >= 2)
827 dstx += rand() % (max_dst_width / 5) - max_dst_width / 10;
828 if (max_dst_height / 5 >= 2)
829 dsty += rand() % (max_dst_height / 5) - max_dst_height / 10;
830 }
831
832 /* Align the box to the format block size. */
833 srcx &= ~(util_format_get_blockwidth(tsrc.format) - 1);
834 srcy &= ~(util_format_get_blockheight(tsrc.format) - 1);
835
836 dstx &= ~(util_format_get_blockwidth(tdst.format) - 1);
837 dsty &= ~(util_format_get_blockheight(tdst.format) - 1);
838
839 src_width = align(src_width, util_format_get_blockwidth(tsrc.format));
840 src_height = align(src_height, util_format_get_blockheight(tsrc.format));
841
842 dst_width = align(dst_width, util_format_get_blockwidth(tdst.format));
843 dst_height = align(dst_height, util_format_get_blockheight(tdst.format));
844
845 if (!allow_scaled_min) {
846 if (src_width > dst_width)
847 src_width = dst_width;
848 if (src_height > dst_height)
849 src_height = dst_height;
850 if (src_depth > dst_depth)
851 src_depth = dst_depth;
852 }
853
854 if (!allow_scaled_mag) {
855 if (src_width < dst_width)
856 dst_width = src_width;
857 if (src_height < dst_height)
858 dst_height = src_height;
859 if (src_depth < dst_depth)
860 dst_depth = src_depth;
861 }
862
863 /* Flips */
864 if (allow_flip) {
865 if (rand() % 2) {
866 srcx += src_width;
867 src_width = -src_width;
868 }
869 if (rand() % 2) {
870 srcy += src_height;
871 src_height = -src_height;
872 }
873 }
874
875 info.src.level = src_level;
876 info.dst.level = dst_level;
877
878 u_box_3d(srcx, srcy, srcz, src_width, src_height, src_depth, &info.src.box);
879 u_box_3d(dstx, dsty, dstz, dst_width, dst_height, dst_depth, &info.dst.box);
880
881 if (util_format_is_depth_and_stencil(tsrc.format)) {
882 switch (rand() % 3) {
883 case 0:
884 info.mask = PIPE_MASK_ZS;
885 break;
886 case 1:
887 info.mask = PIPE_MASK_Z;
888 break;
889 case 2:
890 info.mask = PIPE_MASK_S;
891 break;
892 }
893 } else {
894 /* RGBA, Z, or S */
895 info.mask = util_format_get_mask(tdst.format);
896 }
897
898 /* Don't filter MSAA and integer sources. */
899 if (allow_filter && tsrc.nr_samples <= 1 &&
900 !util_format_is_pure_integer(info.src.format) && rand() % 2)
901 info.filter = PIPE_TEX_FILTER_LINEAR;
902 else
903 info.filter = PIPE_TEX_FILTER_NEAREST;
904
905 info.scissor_enable = allow_scissor ? rand() % 2 : false;
906
907 if (info.scissor_enable) {
908 info.scissor.minx = MAX2(MIN2(info.dst.box.x, info.dst.box.x + info.dst.box.width), 0);
909 info.scissor.miny = MAX2(MIN2(info.dst.box.y, info.dst.box.y + info.dst.box.height), 0);
910 info.scissor.maxx = MIN2(MAX2(info.dst.box.x, info.dst.box.x + info.dst.box.width), UINT16_MAX);
911 info.scissor.maxy = MIN2(MAX2(info.dst.box.y, info.dst.box.y + info.dst.box.height), UINT16_MAX);
912
913 if (abs(info.dst.box.width) / 2 >= 2) {
914 info.scissor.minx += rand() % (abs(info.dst.box.width) / 2);
915 info.scissor.maxx -= rand() % (abs(info.dst.box.width) / 2);
916 }
917 if (abs(info.dst.box.height) / 2 >= 2) {
918 info.scissor.miny += rand() % (abs(info.dst.box.height) / 2);
919 info.scissor.maxy -= rand() % (abs(info.dst.box.height) / 2);
920 }
921 }
922
923 char dstbox_s[128], srcbox_s[128], scissor[128];
924
925 snprintf(dstbox_s, sizeof(dstbox_s), "{%ix%ix%i .. %ix%ix%i}",
926 info.dst.box.x, info.dst.box.y, info.dst.box.z,
927 info.dst.box.width, info.dst.box.height, info.dst.box.depth);
928 snprintf(srcbox_s, sizeof(srcbox_s), "{%ix%ix%i .. %ix%ix%i}",
929 info.src.box.x, info.src.box.y, info.src.box.z,
930 info.src.box.width, info.src.box.height, info.src.box.depth);
931 if (info.scissor_enable) {
932 snprintf(scissor, sizeof(scissor), "(%u..%u, %u..%u)",
933 info.scissor.minx, info.scissor.maxx, info.scissor.miny, info.scissor.maxy);
934 } else {
935 snprintf(scissor, sizeof(scissor), "(none)");
936 }
937
938 printf(", filter %u, mask 0x%02x, ", info.filter, info.mask);
939 printf("dst(level %u, box = %-28s), ", info.dst.level, dstbox_s);
940 printf("src(level %u, box = %-28s), ", info.src.level, srcbox_s);
941 printf("scissor%-20s", scissor);
942
943 /* Blits. */
944 info.src.resource = gfx_src;
945 info.dst.resource = gfx_dst;
946 si_gfx_blit(ctx, &info);
947
948 info.src.resource = comp_src;
949 info.dst.resource = comp_dst;
950
951 bool success;
952 if (only_cb_resolve)
953 success = si_msaa_resolve_blit_via_CB(ctx, &info);
954 else
955 success = false;
956
957 if (success) {
958 printf(" %-7s", only_cb_resolve ? "resolve" : "comp");
959 } else {
960 si_gfx_blit(ctx, &info);
961 printf(" %-7s", "gfx");
962 }
963
964 bool pass = compare_gpu_textures(ctx, gfx_dst, comp_dst);
965 if (pass)
966 num_pass++;
967 else
968 num_fail++;
969
970 printf(" %s [%u/%u]\n", pass ? "pass" : "fail", num_pass, num_pass + num_fail);
971
972 /* cleanup */
973 pipe_resource_reference(&gfx_src, NULL);
974 pipe_resource_reference(&gfx_dst, NULL);
975 pipe_resource_reference(&comp_src, NULL);
976 pipe_resource_reference(&comp_dst, NULL);
977 }
978
979 ctx->destroy(ctx);
980 exit(0);
981 }
982