• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2
3# This tests for the size of the register file. We do this by launching a
4# lot of workgroups with only one invocation, which causes the GPU to be
5# saturated with in-flight waves. Each thread records its wave id using "getwid"
6# (only available in a6xx+!) and stores it in the buffer. We then vary the
7# register footprint by introducing uses of higher and higher registers. This
8# lets us determine:
9# 1. The total number of waves available (always 16 for known models)
10# 2. The wave granularity (how many waves are always launched together, always 2
11# for known models).
12# 3. The total size of the register file that is divvied up between the waves.
13
14set -e
15
16gen_shader() {
17	n=$1;
18	cat <<EOF
19@localsize 1, 1, 1
20@buf 128  ; g[0]
21@wgid(r48.x)
22getwid.u32 r1.x
23mov.u32u32 r0.x, r48.x
24
25; busy loop to make sure it actually uses all possible waves
26mov.u32u32 r0.y, 16
27(rpt2)nop
28loop:
29cmps.u.gt p0.x, r0.y, 0
30sub.u r0.y, r0.y, 1
31(rpt5)nop
32br p0.x, #loop
33add.f r1.y, r1.x, r$n.w
34
35(ss)(sy)(rpt5)nop
36stib.b.untyped.1d.u32.1.imm r1.x, r0.x, 0
37end
38nop
39EOF
40}
41
42# generate reference:
43gen_shader 1 | ./computerator -g 128,1,1 | tee reference.log
44
45for n in `seq 2 32`; do
46	echo "Trying max reg: r$n"
47	gen_shader $n | ./computerator -g 128,1,1 | tee new.log
48	diff reference.log new.log
49	if [ "$?" != "0" ]; then
50		echo "Changes at r$n"
51		break
52	fi
53done
54