• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /* Make the test not meaningless when asserts are disabled. */
28 #undef NDEBUG
29 
30 #include <assert.h>
31 #include <inttypes.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 
35 #include <amdgpu.h>
36 #include "drm-uapi/amdgpu_drm.h"
37 #include "drm-uapi/drm_fourcc.h"
38 
39 #include "ac_surface.h"
40 #include "util/macros.h"
41 #include "util/u_atomic.h"
42 #include "util/u_math.h"
43 #include "util/u_vector.h"
44 #include "util/mesa-sha1.h"
45 #include "addrlib/inc/addrinterface.h"
46 
47 #include "ac_surface_test_common.h"
48 
49 /*
50  * The main goal of this test is to validate that our dcc/htile addressing
51  * functions match addrlib behavior.
52  */
53 
54 /* DCC address computation without mipmapping.
55  * CMASK address computation without mipmapping and without multisampling.
56  */
gfx9_meta_addr_from_coord(const struct radeon_info * info,const struct gfx9_addr_meta_equation * eq,unsigned meta_block_width,unsigned meta_block_height,unsigned meta_block_depth,unsigned meta_pitch,unsigned meta_height,unsigned x,unsigned y,unsigned z,unsigned sample,unsigned pipe_xor,unsigned * bit_position)57 static unsigned gfx9_meta_addr_from_coord(const struct radeon_info *info,
58                                           /* Shader key inputs: */
59                                           /* equation varies with resource_type, swizzle_mode,
60                                            * bpp, number of fragments, pipe_aligned, rb_aligned */
61                                           const struct gfx9_addr_meta_equation *eq,
62                                           unsigned meta_block_width, unsigned meta_block_height,
63                                           unsigned meta_block_depth,
64                                           /* Shader inputs: */
65                                           unsigned meta_pitch, unsigned meta_height,
66                                           unsigned x, unsigned y, unsigned z,
67                                           unsigned sample, unsigned pipe_xor,
68                                           /* Shader outputs (CMASK only): */
69                                           unsigned *bit_position)
70 {
71    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
72    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
73    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
74    unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);
75 
76    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
77    unsigned numPipeBits = eq->numPipeBits;
78    unsigned pitchInBlock = meta_pitch >> meta_block_width_log2;
79    unsigned sliceSizeInBlock = (meta_height >> meta_block_height_log2) * pitchInBlock;
80 
81    unsigned xb = x >> meta_block_width_log2;
82    unsigned yb = y >> meta_block_height_log2;
83    unsigned zb = z >> meta_block_depth_log2;
84 
85    unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;
86    unsigned coords[] = {x, y, z, sample, blockIndex};
87 
88    unsigned address = 0;
89    unsigned num_bits = eq->num_bits;
90    assert(num_bits <= 32);
91 
92    /* Compute the address up until the last bit that doesn't use the block index. */
93    for (unsigned b = 0; b < num_bits - 1; b++) {
94       unsigned xor = 0;
95       for (unsigned c = 0; c < 5; c++) {
96          if (eq->bit[b].coord[c].dim >= 5)
97             continue;
98 
99          assert(eq->bit[b].coord[c].ord < 32);
100          unsigned ison = (coords[eq->bit[b].coord[c].dim] >>
101                                  eq->bit[b].coord[c].ord) & 0x1;
102 
103          xor ^= ison;
104       }
105       address |= xor << b;
106    }
107 
108    /* Fill the remaining bits with the block index. */
109    unsigned last = num_bits - 1;
110    address |= (blockIndex >> eq->bit[last].coord[0].ord) << last;
111 
112    if (bit_position)
113       *bit_position = (address & 1) << 2;
114 
115    unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);
116    return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);
117 }
118 
119 /* DCC/CMASK/HTILE address computation for GFX10. */
gfx10_meta_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned blkSizeLog2,unsigned meta_pitch,unsigned meta_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)120 static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,
121                                            /* Shader key inputs: */
122                                            const uint16_t *equation,
123                                            unsigned meta_block_width, unsigned meta_block_height,
124                                            unsigned blkSizeLog2,
125                                            /* Shader inputs: */
126                                            unsigned meta_pitch, unsigned meta_slice_size,
127                                            unsigned x, unsigned y, unsigned z,
128                                            unsigned pipe_xor,
129                                            /* Shader outputs: (CMASK only) */
130                                            unsigned *bit_position)
131 {
132    /* The compiled shader shouldn't be complicated considering there are a lot of constants here. */
133    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
134    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
135 
136    unsigned coord[] = {x, y, z, 0};
137    unsigned address = 0;
138 
139    for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {
140       unsigned v = 0;
141 
142       for (unsigned c = 0; c < 4; c++) {
143          if (equation[i*4+c] != 0) {
144             unsigned mask = equation[i*4+c];
145             unsigned bits = coord[c];
146 
147             while (mask)
148                v ^= (bits >> u_bit_scan(&mask)) & 0x1;
149          }
150       }
151 
152       address |= v << i;
153    }
154 
155    unsigned blkMask = (1 << blkSizeLog2) - 1;
156    unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
157    unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
158    unsigned xb = x >> meta_block_width_log2;
159    unsigned yb = y >> meta_block_height_log2;
160    unsigned pb = meta_pitch >> meta_block_width_log2;
161    unsigned blkIndex = (yb * pb) + xb;
162    unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
163 
164    if (bit_position)
165       *bit_position = (address & 1) << 2;
166 
167    return (meta_slice_size * z) +
168           (blkIndex * (1 << blkSizeLog2)) +
169           ((address >> 1) ^ pipeXor);
170 }
171 
172 /* DCC address computation without mipmapping and MSAA. */
gfx10_dcc_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned dcc_pitch,unsigned dcc_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)173 static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,
174                                           /* Shader key inputs: */
175                                           /* equation varies with bpp and pipe_aligned */
176                                           const uint16_t *equation, unsigned bpp,
177                                           unsigned meta_block_width, unsigned meta_block_height,
178                                           /* Shader inputs: */
179                                           unsigned dcc_pitch, unsigned dcc_slice_size,
180                                           unsigned x, unsigned y, unsigned z,
181                                           unsigned pipe_xor)
182 {
183    unsigned bpp_log2 = util_logbase2(bpp >> 3);
184    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
185    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
186    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;
187 
188    return gfx10_meta_addr_from_coord(info, equation,
189                                      meta_block_width, meta_block_height,
190                                      blkSizeLog2,
191                                      dcc_pitch, dcc_slice_size,
192                                      x, y, z, pipe_xor, NULL);
193 }
194 
one_dcc_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned samples,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z,unsigned start_sample)195 static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
196                                  const struct radeon_info *info, unsigned width, unsigned height,
197                                  unsigned depth, unsigned samples, unsigned bpp,
198                                  unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,
199                                  unsigned mrt_index,
200                                  unsigned start_x, unsigned start_y, unsigned start_z,
201                                  unsigned start_sample)
202 {
203    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};
204    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};
205    ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};
206    ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};
207    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};
208    ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
209    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
210 
211    dout.pMipInfo = meta_mip_info;
212 
213    /* Compute DCC info. */
214    in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;
215    in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;
216    xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;
217    xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;
218    in.bpp = din.bpp = bpp;
219    xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;
220    in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */
221    din.unalignedWidth = width;
222    din.unalignedHeight = height;
223    din.numSlices = depth;
224    din.firstMipIdInTail = 1;
225 
226    int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);
227    assert(ret == ADDR_OK);
228 
229    /* Compute xor. */
230    static AddrFormat format[] = {
231       ADDR_FMT_8,
232       ADDR_FMT_16,
233       ADDR_FMT_32,
234       ADDR_FMT_32_32,
235       ADDR_FMT_32_32_32_32,
236    };
237    xin.flags.color = 1;
238    xin.flags.texture = 1;
239    xin.flags.opt4space = 1;
240    xin.flags.metaRbUnaligned = !rb_aligned;
241    xin.flags.metaPipeUnaligned = !pipe_aligned;
242    xin.format = format[util_logbase2(bpp / 8)];
243    xin.surfIndex = mrt_index;
244 
245    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
246    assert(ret == ADDR_OK);
247 
248    /* Compute addresses */
249    in.compressBlkWidth = dout.compressBlkWidth;
250    in.compressBlkHeight = dout.compressBlkHeight;
251    in.compressBlkDepth = dout.compressBlkDepth;
252    in.metaBlkWidth = dout.metaBlkWidth;
253    in.metaBlkHeight = dout.metaBlkHeight;
254    in.metaBlkDepth = dout.metaBlkDepth;
255    in.dccRamSliceSize = dout.dccRamSliceSize;
256 
257    in.mipId = 0;
258    in.pitch = dout.pitch;
259    in.height = dout.height;
260    in.pipeXor = xout.pipeBankXor;
261 
262    /* Validate that the packed gfx9_meta_equation structure can fit all fields. */
263    const struct gfx9_meta_equation eq;
264    if (info->gfx_level == GFX9) {
265       /* The bit array is smaller in gfx9_meta_equation than in addrlib. */
266       assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));
267    } else {
268       /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
269       for (unsigned i = 0; i < 4; i++)
270          assert(dout.equation.gfx10_bits[i] == 0);
271 
272       for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)
273          assert(dout.equation.gfx10_bits[i] == 0);
274    }
275 
276    for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {
277       for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {
278          for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {
279             for (in.sample = start_sample; in.sample < samples; in.sample++) {
280                int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);
281                if (r != ADDR_OK) {
282                   printf("%s addrlib error: %s\n", name, test);
283                   abort();
284                }
285 
286                unsigned addr;
287                if (info->gfx_level == GFX9) {
288                   addr = gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
289                                                    dout.metaBlkDepth, dout.pitch, dout.height,
290                                                    in.x, in.y, in.slice, in.sample, in.pipeXor, NULL);
291                   if (in.sample == 1) {
292                      /* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */
293                      assert(addr - 1 ==
294                             gfx9_meta_addr_from_coord(info, &dout.equation.gfx9, dout.metaBlkWidth, dout.metaBlkHeight,
295                                                       dout.metaBlkDepth, dout.pitch, dout.height,
296                                                       in.x, in.y, in.slice, 0, in.pipeXor, NULL));
297                   }
298                } else {
299                   addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,
300                                                    in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,
301                                                    dout.pitch, dout.dccRamSliceSize,
302                                                    in.x, in.y, in.slice, in.pipeXor);
303                }
304 
305                if (out.addr != addr) {
306                   printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",
307                          name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);
308                   return false;
309                }
310             }
311          }
312       }
313    }
314    return true;
315 }
316 
run_dcc_address_test(const char * name,const struct radeon_info * info,bool full)317 static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)
318 {
319    unsigned total = 0;
320    unsigned fails = 0;
321    unsigned last_size, max_samples, min_bpp, max_bpp;
322    unsigned swizzle_modes[2], num_swizzle_modes = 0;
323 
324    switch (info->gfx_level) {
325    case GFX9:
326       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_S_X;
327       break;
328    case GFX10:
329    case GFX10_3:
330       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
331       break;
332    case GFX11:
333       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_R_X;
334       swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_R_X;
335       break;
336    default:
337       unreachable("unhandled gfx level");
338    }
339 
340    if (full) {
341       last_size = 6*6 - 1;
342       max_samples = 8;
343       min_bpp = 8;
344       max_bpp = 128;
345    } else {
346       /* The test coverage is reduced for Gitlab CI because it timeouts. */
347       last_size = 0;
348       max_samples = 2;
349       min_bpp = 32;
350       max_bpp = 64;
351    }
352 
353 #ifdef HAVE_OPENMP
354 #pragma omp parallel for
355 #endif
356    for (unsigned size = 0; size <= last_size; size++) {
357       unsigned width = 8 + 379 * (size % 6);
358       unsigned height = 8 + 379 * ((size / 6) % 6);
359 
360       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
361       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
362 
363       unsigned local_fails = 0;
364       unsigned local_total = 0;
365 
366       for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
367          for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {
368             /* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */
369             for (unsigned samples = 1; samples <= (info->gfx_level == GFX9 ? max_samples : 1); samples *= 2) {
370                for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {
371                   for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {
372                      for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {
373                         unsigned depth = 2;
374                         char test[256];
375 
376                         snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",
377                                  width, height, depth, bpp, samples, rb_aligned, pipe_aligned);
378 
379                         if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,
380                                                  bpp, swizzle_modes[swizzle_mode], pipe_aligned,
381                                                  rb_aligned, mrt_index, 0, 0, 0, 0)) {
382                         } else {
383                            local_fails++;
384                         }
385                         local_total++;
386                      }
387                   }
388                }
389             }
390          }
391       }
392 
393       ac_addrlib_destroy(ac_addrlib);
394       p_atomic_add(&fails, local_fails);
395       p_atomic_add(&total, local_total);
396    }
397    printf("%16s total: %u, fail: %u\n", name, total, fails);
398 }
399 
400 /* HTILE address computation without mipmapping. */
gfx10_htile_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned meta_block_width,unsigned meta_block_height,unsigned htile_pitch,unsigned htile_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor)401 static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,
402                                             const uint16_t *equation,
403                                             unsigned meta_block_width,
404                                             unsigned meta_block_height,
405                                             unsigned htile_pitch, unsigned htile_slice_size,
406                                             unsigned x, unsigned y, unsigned z,
407                                             unsigned pipe_xor)
408 {
409    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
410    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
411    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;
412 
413    return gfx10_meta_addr_from_coord(info, equation,
414                                      meta_block_width, meta_block_height,
415                                      blkSizeLog2,
416                                      htile_pitch, htile_slice_size,
417                                      x, y, z, pipe_xor, NULL);
418 }
419 
one_htile_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,unsigned start_x,unsigned start_y,unsigned start_z)420 static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
421                                    const struct radeon_info *info,
422                                    unsigned width, unsigned height, unsigned depth,
423                                    unsigned bpp, unsigned swizzle_mode,
424                                    unsigned start_x, unsigned start_y, unsigned start_z)
425 {
426    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
427    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
428    ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
429    ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
430    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};
431    ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};
432    ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
433 
434    hout.pMipInfo = meta_mip_info;
435 
436    /* Compute HTILE info. */
437    hin.hTileFlags.pipeAligned = 1;
438    hin.hTileFlags.rbAligned = 1;
439    hin.depthFlags.depth = 1;
440    hin.depthFlags.texture = 1;
441    hin.depthFlags.opt4space = 1;
442    hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;
443    hin.unalignedWidth = in.unalignedWidth = width;
444    hin.unalignedHeight = in.unalignedHeight = height;
445    hin.numSlices = in.numSlices = depth;
446    hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */
447    hin.firstMipIdInTail = 1;
448 
449    int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
450    assert(ret == ADDR_OK);
451 
452    /* Compute xor. */
453    static AddrFormat format[] = {
454       ADDR_FMT_8, /* unused */
455       ADDR_FMT_16,
456       ADDR_FMT_32,
457    };
458    xin.flags = hin.depthFlags;
459    xin.resourceType = ADDR_RSRC_TEX_2D;
460    xin.format = format[util_logbase2(bpp / 8)];
461    xin.numFrags = xin.numSamples = in.numSamples = 1;
462 
463    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
464    assert(ret == ADDR_OK);
465 
466    in.hTileFlags = hin.hTileFlags;
467    in.depthflags = xin.flags;
468    in.bpp = bpp;
469    in.pipeXor = xout.pipeBankXor;
470 
471    for (in.x = start_x; in.x < width; in.x++) {
472       for (in.y = start_y; in.y < height; in.y++) {
473          for (in.slice = start_z; in.slice < depth; in.slice++) {
474             int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);
475             if (r != ADDR_OK) {
476                printf("%s addrlib error: %s\n", name, test);
477                abort();
478             }
479 
480             unsigned addr =
481                gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,
482                                            hout.metaBlkWidth, hout.metaBlkHeight,
483                                            hout.pitch, hout.sliceSize,
484                                            in.x, in.y, in.slice, in.pipeXor);
485             if (out.addr != addr) {
486                printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",
487                       name, test, in.x, in.y, in.slice, out.addr, addr);
488                return false;
489             }
490          }
491       }
492    }
493 
494    return true;
495 }
496 
run_htile_address_test(const char * name,const struct radeon_info * info,bool full)497 static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)
498 {
499    unsigned total = 0;
500    unsigned fails = 0;
501    unsigned first_size = 0, last_size = 6*6 - 1;
502    unsigned swizzle_modes[2], num_swizzle_modes = 0;
503 
504    switch (info->gfx_level) {
505    case GFX9:
506    case GFX10:
507    case GFX10_3:
508       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
509       break;
510    case GFX11:
511       swizzle_modes[num_swizzle_modes++] = ADDR_SW_64KB_Z_X;
512       swizzle_modes[num_swizzle_modes++] = ADDR_SW_256KB_Z_X;
513       break;
514    default:
515       unreachable("unhandled gfx level");
516    }
517 
518    /* The test coverage is reduced for Gitlab CI because it timeouts. */
519    if (!full) {
520       first_size = last_size = 0;
521    }
522 
523 #ifdef HAVE_OPENMP
524 #pragma omp parallel for
525 #endif
526    for (unsigned size = first_size; size <= last_size; size++) {
527       unsigned width = 8 + 379 * (size % 6);
528       unsigned height = 8 + 379 * (size / 6);
529 
530       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
531       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
532 
533       for (unsigned swizzle_mode = 0; swizzle_mode < num_swizzle_modes; swizzle_mode++) {
534          for (unsigned depth = 1; depth <= 2; depth *= 2) {
535             for (unsigned bpp = 16; bpp <= 32; bpp *= 2) {
536                if (one_htile_address_test(name, name, addrlib, info, width, height, depth,
537                                           bpp, swizzle_modes[swizzle_mode], 0, 0, 0)) {
538                } else {
539                   p_atomic_inc(&fails);
540                }
541                p_atomic_inc(&total);
542             }
543          }
544       }
545 
546       ac_addrlib_destroy(ac_addrlib);
547    }
548    printf("%16s total: %u, fail: %u\n", name, total, fails);
549 }
550 
551 /* CMASK address computation without mipmapping and MSAA. */
gfx10_cmask_addr_from_coord(const struct radeon_info * info,const uint16_t * equation,unsigned bpp,unsigned meta_block_width,unsigned meta_block_height,unsigned cmask_pitch,unsigned cmask_slice_size,unsigned x,unsigned y,unsigned z,unsigned pipe_xor,unsigned * bit_position)552 static unsigned gfx10_cmask_addr_from_coord(const struct radeon_info *info,
553                                             /* Shader key inputs: */
554                                             /* equation varies with bpp and pipe_aligned */
555                                             const uint16_t *equation, unsigned bpp,
556                                             unsigned meta_block_width, unsigned meta_block_height,
557                                             /* Shader inputs: */
558                                             unsigned cmask_pitch, unsigned cmask_slice_size,
559                                             unsigned x, unsigned y, unsigned z,
560                                             unsigned pipe_xor,
561                                             /* Shader outputs: */
562                                             unsigned *bit_position)
563 
564 {
565    unsigned meta_block_width_log2 = util_logbase2(meta_block_width);
566    unsigned meta_block_height_log2 = util_logbase2(meta_block_height);
567    unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 7;
568 
569    return gfx10_meta_addr_from_coord(info, equation,
570                                      meta_block_width, meta_block_height,
571                                      blkSizeLog2,
572                                      cmask_pitch, cmask_slice_size,
573                                      x, y, z, pipe_xor, bit_position);
574 }
575 
one_cmask_address_test(const char * name,const char * test,ADDR_HANDLE addrlib,const struct radeon_info * info,unsigned width,unsigned height,unsigned depth,unsigned bpp,unsigned swizzle_mode,bool pipe_aligned,bool rb_aligned,unsigned mrt_index,unsigned start_x,unsigned start_y,unsigned start_z)576 static bool one_cmask_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,
577                                    const struct radeon_info *info,
578                                    unsigned width, unsigned height, unsigned depth,
579                                    unsigned bpp, unsigned swizzle_mode,
580                                    bool pipe_aligned, bool rb_aligned, unsigned mrt_index,
581                                    unsigned start_x, unsigned start_y, unsigned start_z)
582 {
583    ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(xin)};
584    ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(xout)};
585    ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {sizeof(cin)};
586    ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {sizeof(cout)};
587    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT in = {sizeof(in)};
588    ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};
589 
590    /* Compute CMASK info. */
591    cin.resourceType = xin.resourceType = in.resourceType = ADDR_RSRC_TEX_2D;
592    cin.swizzleMode = xin.swizzleMode = in.swizzleMode = swizzle_mode;
593    cin.unalignedWidth = in.unalignedWidth = width;
594    cin.unalignedHeight = in.unalignedHeight = height;
595    cin.numSlices = in.numSlices = depth;
596    cin.numMipLevels = 1;
597    cin.firstMipIdInTail = 1;
598    cin.cMaskFlags.pipeAligned = pipe_aligned;
599    cin.cMaskFlags.rbAligned = rb_aligned;
600    cin.cMaskFlags.linear = false;
601    cin.colorFlags.color = 1;
602    cin.colorFlags.texture = 1;
603    cin.colorFlags.opt4space = 1;
604    cin.colorFlags.metaRbUnaligned = !rb_aligned;
605    cin.colorFlags.metaPipeUnaligned = !pipe_aligned;
606 
607    int ret = Addr2ComputeCmaskInfo(addrlib, &cin, &cout);
608    assert(ret == ADDR_OK);
609 
610    /* Compute xor. */
611    static AddrFormat format[] = {
612       ADDR_FMT_8,
613       ADDR_FMT_16,
614       ADDR_FMT_32,
615       ADDR_FMT_32_32,
616       ADDR_FMT_32_32_32_32,
617    };
618    xin.flags = cin.colorFlags;
619    xin.format = format[util_logbase2(bpp / 8)];
620    xin.surfIndex = mrt_index;
621    xin.numSamples = in.numSamples = xin.numFrags = in.numFrags = 1;
622 
623    ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);
624    assert(ret == ADDR_OK);
625 
626    in.cMaskFlags = cin.cMaskFlags;
627    in.colorFlags = cin.colorFlags;
628    in.pipeXor = xout.pipeBankXor;
629 
630    for (in.x = start_x; in.x < width; in.x++) {
631       for (in.y = start_y; in.y < height; in.y++) {
632          for (in.slice = start_z; in.slice < depth; in.slice++) {
633             int r = Addr2ComputeCmaskAddrFromCoord(addrlib, &in, &out);
634             if (r != ADDR_OK) {
635                printf("%s addrlib error: %s\n", name, test);
636                abort();
637             }
638 
639             unsigned addr, bit_position;
640 
641             if (info->gfx_level == GFX9) {
642                addr = gfx9_meta_addr_from_coord(info, &cout.equation.gfx9,
643                                                 cout.metaBlkWidth, cout.metaBlkHeight, 1,
644                                                 cout.pitch, cout.height,
645                                                 in.x, in.y, in.slice, 0, in.pipeXor,
646                                                 &bit_position);
647             } else {
648                addr = gfx10_cmask_addr_from_coord(info, cout.equation.gfx10_bits,
649                                                   bpp, cout.metaBlkWidth,
650                                                   cout.metaBlkHeight,
651                                                   cout.pitch, cout.sliceSize,
652                                                   in.x, in.y, in.slice,
653                                                   in.pipeXor,
654                                                   &bit_position);
655             }
656 
657             if (out.addr != addr || out.bitPosition != bit_position) {
658                printf("%s fail (%s) at %ux%ux%u: expected (addr) = %llu, got = %u, "
659                       "expected (bit_position) = %u, got = %u\n",
660                       name, test, in.x, in.y, in.slice, out.addr, addr,
661                       out.bitPosition, bit_position);
662                return false;
663             }
664          }
665       }
666    }
667 
668    return true;
669 }
670 
run_cmask_address_test(const char * name,const struct radeon_info * info,bool full)671 static void run_cmask_address_test(const char *name, const struct radeon_info *info, bool full)
672 {
673    unsigned total = 0;
674    unsigned fails = 0;
675    unsigned swizzle_mode = info->gfx_level == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_Z_X;
676    unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;
677 
678    /* GFX11 doesn't have CMASK. */
679    if (info->gfx_level >= GFX11)
680       return;
681 
682    /* The test coverage is reduced for Gitlab CI because it timeouts. */
683    if (!full) {
684       first_size = last_size = 0;
685    }
686 
687 #ifdef HAVE_OPENMP
688 #pragma omp parallel for
689 #endif
690    for (unsigned size = first_size; size <= last_size; size++) {
691       unsigned width = 8 + 379 * (size % 6);
692       unsigned height = 8 + 379 * (size / 6);
693 
694       struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);
695       ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);
696 
697       for (unsigned depth = 1; depth <= 2; depth *= 2) {
698          for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {
699             for (int rb_aligned = true; rb_aligned >= true; rb_aligned--) {
700                for (int pipe_aligned = true; pipe_aligned >= true; pipe_aligned--) {
701                   if (one_cmask_address_test(name, name, addrlib, info,
702                                              width, height, depth, bpp,
703                                              swizzle_mode,
704                                              pipe_aligned, rb_aligned,
705                                              0, 0, 0, 0)) {
706                   } else {
707                      p_atomic_inc(&fails);
708                   }
709                   p_atomic_inc(&total);
710                }
711             }
712          }
713       }
714 
715       ac_addrlib_destroy(ac_addrlib);
716    }
717    printf("%16s total: %u, fail: %u\n", name, total, fails);
718 }
719 
main(int argc,char ** argv)720 int main(int argc, char **argv)
721 {
722    bool full = false;
723 
724    if (argc == 2 && !strcmp(argv[1], "--full"))
725       full = true;
726    else
727       puts("Specify --full to run the full test.");
728 
729    puts("DCC:");
730    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
731       struct radeon_info info = get_radeon_info(&testcases[i]);
732 
733       run_dcc_address_test(testcases[i].name, &info, full);
734    }
735 
736    puts("HTILE:");
737    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
738       struct radeon_info info = get_radeon_info(&testcases[i]);
739 
740       /* Only GFX10+ is currently supported. */
741       if (info.gfx_level < GFX10)
742          continue;
743 
744       run_htile_address_test(testcases[i].name, &info, full);
745    }
746 
747    puts("CMASK:");
748    for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {
749       struct radeon_info info = get_radeon_info(&testcases[i]);
750 
751       run_cmask_address_test(testcases[i].name, &info, full);
752    }
753 
754    return 0;
755 }
756