1#!/bin/bash 2 3# This tests for the size of the register file. We do this by launching a 4# lot of workgroups with only one invocation, which causes the GPU to be 5# saturated with in-flight waves. Each thread records its wave id using "getwid" 6# (only available in a6xx+!) and stores it in the buffer. We then vary the 7# register footprint by introducing uses of higher and higher registers. This 8# lets us determine: 9# 1. The total number of waves available (always 16 for known models) 10# 2. The wave granularity (how many waves are always launched together, always 2 11# for known models). 12# 3. The total size of the register file that is divvied up between the waves. 13 14set -e 15 16gen_shader() { 17 n=$1; 18 cat <<EOF 19@localsize 1, 1, 1 20@buf 128 ; g[0] 21@wgid(r48.x) 22getwid.u32 r1.x 23mov.u32u32 r0.x, r48.x 24 25; busy loop to make sure it actually uses all possible waves 26mov.u32u32 r0.y, 16 27(rpt2)nop 28loop: 29cmps.u.gt p0.x, r0.y, 0 30sub.u r0.y, r0.y, 1 31(rpt5)nop 32br p0.x, #loop 33add.f r1.y, r1.x, r$n.w 34 35(ss)(sy)(rpt5)nop 36stib.b.untyped.1d.u32.1.imm r1.x, r0.x, 0 37end 38nop 39EOF 40} 41 42# generate reference: 43gen_shader 1 | ./computerator -g 128,1,1 | tee reference.log 44 45for n in `seq 2 32`; do 46 echo "Trying max reg: r$n" 47 gen_shader $n | ./computerator -g 128,1,1 | tee new.log 48 diff reference.log new.log 49 if [ "$?" != "0" ]; then 50 echo "Changes at r$n" 51 break 52 fi 53done 54