1 /*
2 * Copyright © 2021 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 */
26
27 /* Make the test not meaningless when asserts are disabled. */
28 #undef NDEBUG
29
30 #include <assert.h>
31 #include <inttypes.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34
35 #include <amdgpu.h>
36 #include "drm-uapi/amdgpu_drm.h"
37 #include "drm-uapi/drm_fourcc.h"
38
39 #include "ac_surface.h"
40 #include "util/macros.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_vector.h"
44 #include "util/mesa-sha1.h"
45 #include "addrlib/inc/addrinterface.h"
46
47 #include "ac_surface_test_common.h"
48
49 /*
50 * The main goal of this test is to validate that our dcc/htile addressing
51 * functions match addrlib behavior.
52 */
53
54 /* DCC address computation without mipmapping.
55 * CMASK address computation without mipmapping and without multisampling.
56 */
gfx9_meta_addr_from_coord(const struct radeon_info * info,const struct gfx9_addr_meta_equation * eq,unsigned meta_block_width,unsigned meta_block_height,unsigned meta_block_depth,unsigned meta_pitch,unsigned meta_height,unsigned x,unsigned y,unsigned z,unsigned sample,unsigned pipe_xor,unsigned * bit_position)57 static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
58 /* Shader key inputs: */
59 /* equation varies with resource_type, swizzle_mode,
60 * bpp, number of fragments, pipe_aligned, rb_aligned */
61 const struct gfx9_addr_meta_equation *eq,
62 unsigned meta_block_width, unsigned meta_block_height,
63 unsigned meta_block_depth,
64 /* Shader inputs: */
65 unsigned meta_pitch, unsigned meta_height,
66 unsigned x, unsigned y, unsigned z,
67 unsigned sample, unsigned pipe_xor,
68 /* Shader outputs (CMASK only): */
69 unsigned *bit_position)
70 {
71 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
72 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
73 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
74 unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
75
76 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
77 unsigned numPipeBits = eq->numPipeBits;
78 unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
79 unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
80
81 unsigned xb = x >> meta_block_width_log2;
82 unsigned yb = y >> meta_block_height_log2;
83 unsigned zb = z >> meta_block_depth_log2;
84
85 unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
86 unsigned coords[] = {x, y, z, sample, blockIndex};
87
88 unsigned address = 0;
89 unsigned num_bits = eq->num_bits;
90 assert(num_bits <= 32);
91
92 /* Compute the address up until the last bit that doesn't use the block index. */
93 for (unsigned b = 0; b < num_bits - 1; b++) {
94 unsigned xor = 0;
95 for (unsigned c = 0; c < 5; c++) {
96 if (eq->bit[b].coord[c].dim >= 5)
97 continue;
98
99 assert(eq->bit[b].coord[c].ord < 32);
100 unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
101 eq->bit[b].coord[c].ord) & 0x1;
102
103 xor ^= ison;
104 }
105 address |= xor << b;
106 }
107
108 /* Fill the remaining bits with the block index. */
109 unsigned last = num_bits - 1;
110 address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
111
112 if (bit_position)
113 *bit_position = (address & 1) << 2;
114
115 unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
116 return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
117 }
118
119 /* DCC/CMASK/HTILE address computation for GFX10. */
gfx10_meta_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned blkSizeLog2,unsigned meta_pitch,unsigned meta_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)120 static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
121 /* Shader key inputs: */
122 const uint16_t *equation,
123 unsigned meta_block_width, unsigned meta_block_height,
124 unsigned blkSizeLog2,
125 /* Shader inputs: */
126 unsigned meta_pitch, unsigned meta_slice_size,
127 unsigned x, unsigned y, unsigned z,
128 unsigned pipe_xor,
129 /* Shader outputs: (CMASK only) */
130 unsigned *bit_position)
131 {
132 /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
133 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
134 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
135
136 unsigned coord[] = {x, y, z, 0};
137 unsigned address = 0;
138
139 for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
140 unsigned v = 0;
141
142 for (unsigned c = 0; c < 4; c++) {
143 if (equation[i*4+c] != 0) {
144 unsigned mask = equation[i*4+c];
145 unsigned bits = coord[c];
146
147 while (mask)
148 v ^= (bits >> u_bit_scan(&mask)) & 0x1;
149 }
150 }
151
152 address |= v << i;
153 }
154
155 unsigned blkMask = (1 << blkSizeLog2) - 1;
156 unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
157 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
158 unsigned xb = x >> meta_block_width_log2;
159 unsigned yb = y >> meta_block_height_log2;
160 unsigned pb = meta_pitch >> meta_block_width_log2;
161 unsigned blkIndex = (yb * pb) + xb;
162 unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
163
164 if (bit_position)
165 *bit_position = (address & 1) << 2;
166
167 return (meta_slice_size * z) +
168 (blkIndex * (1 << blkSizeLog2)) +
169 ((address >> 1) ^ pipeXor);
170 }
171
172 /* DCC address computation without mipmapping and MSAA. */
gfx10_dcc_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned dcc_pitch,unsigned dcc_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)173 static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
174 /* Shader key inputs: */
175 /* equation varies with bpp and pipe_aligned */
176 const uint16_t *equation, unsigned bpp,
177 unsigned meta_block_width, unsigned meta_block_height,
178 /* Shader inputs: */
179 unsigned dcc_pitch, unsigned dcc_slice_size,
180 unsigned x, unsigned y, unsigned z,
181 unsigned pipe_xor)
182 {
183 unsigned bpp_log2 = util_logbase2(bpp >> 3);
184 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
185 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
186 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
187
188 return gfx10_meta_addr_from_coord(info, equation,
189 meta_block_width, meta_block_height,
190 blkSizeLog2,
191 dcc_pitch, dcc_slice_size,
192 x, y, z, pipe_xor, NULL);
193 }
194
one_dcc_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned samples,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z,unsigned start_sample)195 static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
196 const struct radeon_info *info, unsigned width, unsigned height,
197 unsigned depth, unsigned samples, unsigned bpp,
198 unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
199 unsigned mrt_index,
200 unsigned start_x, unsigned start_y, unsigned start_z,
201 unsigned start_sample)
202 {
203 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
204 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
205 ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
206 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
207 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
208 ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
209 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
210
211 dout.pMipInfo = meta_mip_info;
212
213 /* Compute DCC info. */
214 in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
215 in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
216 xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
217 xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
218 in.bpp = din.bpp = bpp;
219 xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
220 in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
221 din.unalignedWidth = width;
222 din.unalignedHeight = height;
223 din.numSlices = depth;
224 din.firstMipIdInTail = 1;
225
226 int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
227 assert(ret == ADDR_OK);
228
229 /* Compute xor. */
230 static AddrFormat format[] = {
231 ADDR_FMT_8,
232 ADDR_FMT_16,
233 ADDR_FMT_32,
234 ADDR_FMT_32_32,
235 ADDR_FMT_32_32_32_32,
236 };
237 xin.flags.color = 1;
238 xin.flags.texture = 1;
239 xin.flags.opt4space = 1;
240 xin.flags.metaRbUnaligned = !rb_aligned;
241 xin.flags.metaPipeUnaligned = !pipe_aligned;
242 xin.format = format[util_logbase2(bpp / 8)];
243 xin.surfIndex = mrt_index;
244
245 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
246 assert(ret == ADDR_OK);
247
248 /* Compute addresses */
249 in.compressBlkWidth = dout.compressBlkWidth;
250 in.compressBlkHeight = dout.compressBlkHeight;
251 in.compressBlkDepth = dout.compressBlkDepth;
252 in.metaBlkWidth = dout.metaBlkWidth;
253 in.metaBlkHeight = dout.metaBlkHeight;
254 in.metaBlkDepth = dout.metaBlkDepth;
255 in.dccRamSliceSize = dout.dccRamSliceSize;
256
257 in.mipId = 0;
258 in.pitch = dout.pitch;
259 in.height = dout.height;
260 in.pipeXor = xout.pipeBankXor;
261
262 /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
263 const struct gfx9_meta_equation eq;
264 if (info->gfx_level == GFX9) {
265 /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
266 assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
267 } else {
268 /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
269 for (unsigned i = 0; i < 4; i++)
270 assert(dout.equation.gfx10_bits[i] == 0);
271
272 for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
273 assert(dout.equation.gfx10_bits[i] == 0);
274 }
275
276 for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
277 for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
278 for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
279 for (in.sample = start_sample; in.sample < samples; in.sample++) {
280 int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
281 if (r != ADDR_OK) {
282 printf("%s addrlib error: %s\n", name, test);
283 abort();
284 }
285
286 unsigned addr;
287 if (info->gfx_level == GFX9) {
288 addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
289 dout.metaBlkDepth, dout.pitch, dout.height,
290 in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
291 if (in.sample == 1) {
292 /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
293 assert(addr - 1 ==
294 gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
295 dout.metaBlkDepth, dout.pitch, dout.height,
296 in.x, in.y, in.slice, 0, in.pipeXor, NULL));
297 }
298 } else {
299 addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
300 in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
301 dout.pitch, dout.dccRamSliceSize,
302 in.x, in.y, in.slice, in.pipeXor);
303 }
304
305 if (out.addr != addr) {
306 printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
307 name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
308 return false;
309 }
310 }
311 }
312 }
313 }
314 return true;
315 }
316
run_dcc_address_test(const char * name,const struct radeon_info * info,bool full)317 static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
318 {
319 unsigned total = 0;
320 unsigned fails = 0;
321 unsigned last_size, max_samples, min_bpp, max_bpp;
322 unsigned swizzle_modes[2], num_swizzle_modes = 0;
323
324 switch (info->gfx_level) {
325 case GFX9:
326 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_S_X;
327 break;
328 case GFX10:
329 case GFX10_3:
330 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
331 break;
332 case GFX11:
333 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
334 swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_R_X;
335 break;
336 default:
337 unreachable("unhandled gfx level");
338 }
339
340 if (full) {
341 last_size = 6*6 - 1;
342 max_samples = 8;
343 min_bpp = 8;
344 max_bpp = 128;
345 } else {
346 /* The test coverage is reduced for Gitlab CI because it timeouts. */
347 last_size = 0;
348 max_samples = 2;
349 min_bpp = 32;
350 max_bpp = 64;
351 }
352
353 #ifdef HAVE_OPENMP
354 #pragma omp parallel for
355 #endif
356 for (unsigned size = 0; size <= last_size; size++) {
357 unsigned width = 8 + 379 * (size % 6);
358 unsigned height = 8 + 379 * ((size / 6) % 6);
359
360 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
361 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
362
363 unsigned local_fails = 0;
364 unsigned local_total = 0;
365
366 for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
367 for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
368 /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
369 for (unsigned samples = 1; samples <= (info->gfx_level == GFX9 ? max_samples : 1); samples *= 2) {
370 for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
371 for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
372 for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
373 unsigned depth = 2;
374 char test[256];
375
376 snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
377 width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
378
379 if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
380 bpp, swizzle_modes[swizzle_mode], pipe_aligned,
381 rb_aligned, mrt_index, 0, 0, 0, 0)) {
382 } else {
383 local_fails++;
384 }
385 local_total++;
386 }
387 }
388 }
389 }
390 }
391 }
392
393 ac_addrlib_destroy(ac_addrlib);
394 p_atomic_add(&fails, local_fails);
395 p_atomic_add(&total, local_total);
396 }
397 printf("%16s total: %u, fail: %u\n", name, total, fails);
398 }
399
400 /* HTILE address computation without mipmapping. */
gfx10_htile_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned htile_pitch,unsigned htile_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)401 static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
402 const uint16_t *equation,
403 unsigned meta_block_width,
404 unsigned meta_block_height,
405 unsigned htile_pitch, unsigned htile_slice_size,
406 unsigned x, unsigned y, unsigned z,
407 unsigned pipe_xor)
408 {
409 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
410 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
411 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
412
413 return gfx10_meta_addr_from_coord(info, equation,
414 meta_block_width, meta_block_height,
415 blkSizeLog2,
416 htile_pitch, htile_slice_size,
417 x, y, z, pipe_xor, NULL);
418 }
419
one_htile_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,unsigned start_x,unsigned start_y,unsigned start_z)420 static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
421 const struct radeon_info *info,
422 unsigned width, unsigned height, unsigned depth,
423 unsigned bpp, unsigned swizzle_mode,
424 unsigned start_x, unsigned start_y, unsigned start_z)
425 {
426 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
427 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
428 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
429 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
430 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
431 ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
432 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
433
434 hout.pMipInfo = meta_mip_info;
435
436 /* Compute HTILE info. */
437 hin.hTileFlags.pipeAligned = 1;
438 hin.hTileFlags.rbAligned = 1;
439 hin.depthFlags.depth = 1;
440 hin.depthFlags.texture = 1;
441 hin.depthFlags.opt4space = 1;
442 hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
443 hin.unalignedWidth = in.unalignedWidth = width;
444 hin.unalignedHeight = in.unalignedHeight = height;
445 hin.numSlices = in.numSlices = depth;
446 hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
447 hin.firstMipIdInTail = 1;
448
449 int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
450 assert(ret == ADDR_OK);
451
452 /* Compute xor. */
453 static AddrFormat format[] = {
454 ADDR_FMT_8, /* unused */
455 ADDR_FMT_16,
456 ADDR_FMT_32,
457 };
458 xin.flags = hin.depthFlags;
459 xin.resourceType = ADDR_RSRC_TEX_2D;
460 xin.format = format[util_logbase2(bpp / 8)];
461 xin.numFrags = xin.numSamples = in.numSamples = 1;
462
463 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
464 assert(ret == ADDR_OK);
465
466 in.hTileFlags = hin.hTileFlags;
467 in.depthflags = xin.flags;
468 in.bpp = bpp;
469 in.pipeXor = xout.pipeBankXor;
470
471 for (in.x = start_x; in.x < width; in.x++) {
472 for (in.y = start_y; in.y < height; in.y++) {
473 for (in.slice = start_z; in.slice < depth; in.slice++) {
474 int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
475 if (r != ADDR_OK) {
476 printf("%s addrlib error: %s\n", name, test);
477 abort();
478 }
479
480 unsigned addr =
481 gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
482 hout.metaBlkWidth, hout.metaBlkHeight,
483 hout.pitch, hout.sliceSize,
484 in.x, in.y, in.slice, in.pipeXor);
485 if (out.addr != addr) {
486 printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
487 name, test, in.x, in.y, in.slice, out.addr, addr);
488 return false;
489 }
490 }
491 }
492 }
493
494 return true;
495 }
496
run_htile_address_test(const char * name,const struct radeon_info * info,bool full)497 static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
498 {
499 unsigned total = 0;
500 unsigned fails = 0;
501 unsigned first_size = 0, last_size = 6*6 - 1;
502 unsigned swizzle_modes[2], num_swizzle_modes = 0;
503
504 switch (info->gfx_level) {
505 case GFX9:
506 case GFX10:
507 case GFX10_3:
508 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
509 break;
510 case GFX11:
511 swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
512 swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_Z_X;
513 break;
514 default:
515 unreachable("unhandled gfx level");
516 }
517
518 /* The test coverage is reduced for Gitlab CI because it timeouts. */
519 if (!full) {
520 first_size = last_size = 0;
521 }
522
523 #ifdef HAVE_OPENMP
524 #pragma omp parallel for
525 #endif
526 for (unsigned size = first_size; size <= last_size; size++) {
527 unsigned width = 8 + 379 * (size % 6);
528 unsigned height = 8 + 379 * (size / 6);
529
530 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
531 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
532
533 for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
534 for (unsigned depth = 1; depth <= 2; depth *= 2) {
535 for (unsigned bpp = 16; bpp <= 32; bpp *= 2) {
536 if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
537 bpp, swizzle_modes[swizzle_mode], 0, 0, 0)) {
538 } else {
539 p_atomic_inc(&fails);
540 }
541 p_atomic_inc(&total);
542 }
543 }
544 }
545
546 ac_addrlib_destroy(ac_addrlib);
547 }
548 printf("%16s total: %u, fail: %u\n", name, total, fails);
549 }
550
551 /* CMASK address computation without mipmapping and MSAA. */
gfx10_cmask_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned cmask_pitch,unsigned cmask_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)552 static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
553 /* Shader key inputs: */
554 /* equation varies with bpp and pipe_aligned */
555 const uint16_t *equation, unsigned bpp,
556 unsigned meta_block_width, unsigned meta_block_height,
557 /* Shader inputs: */
558 unsigned cmask_pitch, unsigned cmask_slice_size,
559 unsigned x, unsigned y, unsigned z,
560 unsigned pipe_xor,
561 /* Shader outputs: */
562 unsigned *bit_position)
563
564 {
565 unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
566 unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
567 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
568
569 return gfx10_meta_addr_from_coord(info, equation,
570 meta_block_width, meta_block_height,
571 blkSizeLog2,
572 cmask_pitch, cmask_slice_size,
573 x, y, z, pipe_xor, bit_position);
574 }
575
one_cmask_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z)576 static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
577 const struct radeon_info *info,
578 unsigned width, unsigned height, unsigned depth,
579 unsigned bpp, unsigned swizzle_mode,
580 bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
581 unsigned start_x, unsigned start_y, unsigned start_z)
582 {
583 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
584 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
585 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
586 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
587 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
588 ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
589
590 /* Compute CMASK info. */
591 cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
592 cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
593 cin.unalignedWidth = in.unalignedWidth = width;
594 cin.unalignedHeight = in.unalignedHeight = height;
595 cin.numSlices = in.numSlices = depth;
596 cin.numMipLevels = 1;
597 cin.firstMipIdInTail = 1;
598 cin.cMaskFlags.pipeAligned = pipe_aligned;
599 cin.cMaskFlags.rbAligned = rb_aligned;
600 cin.cMaskFlags.linear = false;
601 cin.colorFlags.color = 1;
602 cin.colorFlags.texture = 1;
603 cin.colorFlags.opt4space = 1;
604 cin.colorFlags.metaRbUnaligned = !rb_aligned;
605 cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
606
607 int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
608 assert(ret == ADDR_OK);
609
610 /* Compute xor. */
611 static AddrFormat format[] = {
612 ADDR_FMT_8,
613 ADDR_FMT_16,
614 ADDR_FMT_32,
615 ADDR_FMT_32_32,
616 ADDR_FMT_32_32_32_32,
617 };
618 xin.flags = cin.colorFlags;
619 xin.format = format[util_logbase2(bpp / 8)];
620 xin.surfIndex = mrt_index;
621 xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
622
623 ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
624 assert(ret == ADDR_OK);
625
626 in.cMaskFlags = cin.cMaskFlags;
627 in.colorFlags = cin.colorFlags;
628 in.pipeXor = xout.pipeBankXor;
629
630 for (in.x = start_x; in.x < width; in.x++) {
631 for (in.y = start_y; in.y < height; in.y++) {
632 for (in.slice = start_z; in.slice < depth; in.slice++) {
633 int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
634 if (r != ADDR_OK) {
635 printf("%s addrlib error: %s\n", name, test);
636 abort();
637 }
638
639 unsigned addr, bit_position;
640
641 if (info->gfx_level == GFX9) {
642 addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
643 cout.metaBlkWidth, cout.metaBlkHeight, 1,
644 cout.pitch, cout.height,
645 in.x, in.y, in.slice, 0, in.pipeXor,
646 &bit_position);
647 } else {
648 addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
649 bpp, cout.metaBlkWidth,
650 cout.metaBlkHeight,
651 cout.pitch, cout.sliceSize,
652 in.x, in.y, in.slice,
653 in.pipeXor,
654 &bit_position);
655 }
656
657 if (out.addr != addr || out.bitPosition != bit_position) {
658 printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
659 "expected (bit_position) = %u, got = %u\n",
660 name, test, in.x, in.y, in.slice, out.addr, addr,
661 out.bitPosition, bit_position);
662 return false;
663 }
664 }
665 }
666 }
667
668 return true;
669 }
670
run_cmask_address_test(const char * name,const struct radeon_info * info,bool full)671 static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
672 {
673 unsigned total = 0;
674 unsigned fails = 0;
675 unsigned swizzle_mode = info->gfx_level == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
676 unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
677
678 /* GFX11 doesn't have CMASK. */
679 if (info->gfx_level >= GFX11)
680 return;
681
682 /* The test coverage is reduced for Gitlab CI because it timeouts. */
683 if (!full) {
684 first_size = last_size = 0;
685 }
686
687 #ifdef HAVE_OPENMP
688 #pragma omp parallel for
689 #endif
690 for (unsigned size = first_size; size <= last_size; size++) {
691 unsigned width = 8 + 379 * (size % 6);
692 unsigned height = 8 + 379 * (size / 6);
693
694 struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
695 ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
696
697 for (unsigned depth = 1; depth <= 2; depth *= 2) {
698 for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
699 for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
700 for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
701 if (one_cmask_address_test(name, name, addrlib, info,
702 width, height, depth, bpp,
703 swizzle_mode,
704 pipe_aligned, rb_aligned,
705 0, 0, 0, 0)) {
706 } else {
707 p_atomic_inc(&fails);
708 }
709 p_atomic_inc(&total);
710 }
711 }
712 }
713 }
714
715 ac_addrlib_destroy(ac_addrlib);
716 }
717 printf("%16s total: %u, fail: %u\n", name, total, fails);
718 }
719
main(int argc,char ** argv)720 int main(int argc, char **argv)
721 {
722 bool full = false;
723
724 if (argc == 2 && !strcmp(argv[1], "--full"))
725 full = true;
726 else
727 puts("Specify --full to run the full test.");
728
729 puts("DCC:");
730 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
731 struct radeon_info info = get_radeon_info(&testcases[i]);
732
733 run_dcc_address_test(testcases[i].name, &info, full);
734 }
735
736 puts("HTILE:");
737 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
738 struct radeon_info info = get_radeon_info(&testcases[i]);
739
740 /* Only GFX10+ is currently supported. */
741 if (info.gfx_level < GFX10)
742 continue;
743
744 run_htile_address_test(testcases[i].name, &info, full);
745 }
746
747 puts("CMASK:");
748 for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
749 struct radeon_info info = get_radeon_info(&testcases[i]);
750
751 run_cmask_address_test(testcases[i].name, &info, full);
752 }
753
754 return 0;
755 }
756