1#!/bin/bash 2# 3# Test various instructions to check whether half<->full widening/narrowing 4# works. The basic premise is to perform the same instruction with and 5# without the widening/narrowing folded in and check if the results match. 6# 7# Note this doesn't currently diferentiate between signed/unsigned/bool, 8# and just assumes int is signed (since unsigned is basically(ish) like 9# signed but without sign extension) 10# 11# TODO probably good pick numeric src values that are better at triggering 12# edge cases, while still not loosing precision in a full->half->full 13# seqeuence.. but some instructions like absneg don't even appear to be 14# subtlely wrong when you try to fold in a precision conversion. 15# 16# add '-v' arg to see the result values 17 18set -e 19 20# 21# Templates for float->float instructions: 22# 23f2f_instrs=( 24 'add.f $dst, $src1, $src2' 25 'min.f $dst, $src1, $src2' 26 'min.f $dst, $src2, $src1' 27 'max.f $dst, $src1, $src2' 28 'max.f $dst, $src2, $src1' 29 'mul.f $dst, $src1, $src2' 30 'sign.f $dst, $src1' 31 'absneg.f $dst, \(neg\)$src1' 32 'absneg.f $dst, \(abs\)$src1' 33 'floor.f $dst, $src1' 34 'ceil.f $dst, $src1' 35 'rndne.f $dst, $src1' 36 'rndaz.f $dst, $src1' 37 'trunc.f $dst, $src1' 38) 39 40# 41# Templates for float->int instructions: 42# 43f2i_instrs=( 44 'cmps.f.gt $dst, $src1, $src2' 45 'cmps.f.lt $dst, $src1, $src2' 46 'cmpv.f.gt $dst, $src1, $src2' 47 'cmpv.f.lt $dst, $src1, $src2' 48) 49 50# 51# Templates for int->int instructions: 52# 53i2i_instrs=( 54 'add.u $dst, $src1, $src2' 55 'add.s $dst, $src1, $src2' 56 'sub.u $dst, $src1, $src2' 57 'sub.s $dst, $src1, $src2' 58 'cmps.f.gt $dst, $src1, $src2' 59 'cmps.f.lt $dst, $src1, $src2' 60 'min.u $dst, $src1, $src2' 61 'min.u $dst, $src2, $src1' 62 'min.s $dst, $src1, $src2' 63 'min.s $dst, $src2, $src1' 64 'max.u $dst, $src1, $src2' 65 'max.u $dst, $src2, $src1' 66 'max.s $dst, $src1, $src2' 67 'max.s $dst, $src2, $src1' 68 'absneg.s $dst, \(neg\)$src1' 69 'absneg.s $dst, \(abs\)$src1' 70 'and.b $dst, $src2, $src3' 71 'or.b $dst, $src1, $src2' 72 'not.b $dst, $src1' 73 'xor.b $dst, $src1, $src2' 74 'cmpv.u.gt $dst, $src1, $src2' 75 'cmpv.u.lt $dst, $src1, $src2' 76 'cmpv.s.gt $dst, $src1, $src2' 77 'cmpv.s.lt $dst, $src1, $src2' 78 'mul.u24 $dst, $src1, $src2' 79 'mul.s24 $dst, $src1, $src2' 80 'mull.u $dst, $src1, $src2' 81 'bfrev.b $dst, $src1' 82 'clz.s $dst, $src2' 83 'clz.b $dst, $src2' 84 'shl.b $dst, $src1, $src2' 85 'shr.b $dst, $src3, $src1' 86 'ashr.b $dst, $src3, $src1' 87 'mgen.b $dst, $src1, $src2' 88 'getbit.b $dst, $src3, $src2' 89 'setrm $dst, $src1' 90 'cbits.b $dst, $src3' 91 'shb $dst, $src1, $src2' 92 'msad $dst, $src1, $src2' 93) 94 95# 96# Helper to expand instruction template: 97# 98expand() { 99 instr=$1 100 dst=$2 101 src1=$3 102 src2=$4 103 src3=$5 104 eval echo $instr 105} 106 107expand_test() { 108 instr=$1 109 110 echo '; control, half->half:' 111 expand $instr "hr1.x" "hr0.x" "hr0.y" "hr0.z" 112 echo '; test, full->half:' 113 expand $instr "hr1.y" "r1.x" "r1.y" "r1.z" 114 115 echo '; control, full->full:' 116 expand $instr "r2.x" "r1.x" "r1.y" "r1.z" 117 echo '; test, half->full:' 118 expand $instr "r2.y" "hr0.x" "hr0.y" "hr0.z" 119 120 echo "(rpt5)nop" 121} 122 123# 124# Helpers to construct test program assembly: 125# 126header_asm() { 127 cat <<EOF 128@localsize 1, 1, 1 129@buf 4 ; g[0] 130EOF 131} 132 133footer_asm() { 134 cat <<EOF 135; dest offsets: 136mov.u32u32 r3.x, 0 137mov.u32u32 r3.y, 1 138mov.u32u32 r3.z, 2 139mov.u32u32 r3.w, 3 140(rpt5)nop 141 142; and store results: 143stib.untyped.1d.u32.1 g[0] + r3.x, r2.x ; control: full->full 144stib.untyped.1d.u32.1 g[0] + r3.y, r2.y ; test: half->full 145stib.untyped.1d.u32.1 g[0] + r3.z, r2.z ; control: half->half 146stib.untyped.1d.u32.1 g[0] + r3.w, r2.w ; test: full->half 147(sy)nop 148end 149EOF 150} 151 152setup_asm_float() { 153 cat <<EOF 154; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst 155; r1->r2 avail for full, r1 for src, r2 for dst 156cov.f32f16 hr0.x, (1.0) 157cov.f32f16 hr0.y, (2.0) 158cov.f32f16 hr0.z, (3.0) 159mov.f32f32 r1.x, (1.0) 160mov.f32f32 r1.y, (2.0) 161mov.f32f32 r1.z, (3.0) 162(rpt5)nop 163EOF 164} 165 166setup_asm_int() { 167 cat <<EOF 168; hr0->hr1 (r0) avail for half, hr0 for src, hr1 for dst 169; r1->r2 avail for full, r1 for src, r2 for dst 170cov.s32s16 hr0.x, 1 171cov.s32s16 hr0.y, -2 172cov.s32s16 hr0.z, 3 173mov.s32s32 r1.x, 1 174mov.s32s32 r1.y, -2 175mov.s32s32 r1.z, 3 176(rpt5)nop 177EOF 178} 179 180# 181# Generate assembly code to test float->float opcode 182# 183f2f_asm() { 184 instr=$1 185 186 header_asm 187 setup_asm_float 188 expand_test $instr 189 190 cat <<EOF 191; convert half results back to full: 192cov.f16f32 r2.z, hr1.x 193cov.f16f32 r2.w, hr1.y 194EOF 195 196 footer_asm 197} 198 199# 200# Generate assembly code to test float->int opcode 201# 202f2i_asm() { 203 instr=$1 204 205 header_asm 206 setup_asm_float 207 expand_test $instr 208 209 cat <<EOF 210; convert half results back to full: 211cov.s16s32 r2.z, hr1.x 212cov.s16s32 r2.w, hr1.y 213EOF 214 215 footer_asm 216} 217 218# 219# Generate assembly code to test int->int opcode 220# 221i2i_asm() { 222 instr=$1 223 224 header_asm 225 setup_asm_int 226 expand_test $instr 227 228 cat <<EOF 229; convert half results back to full: 230cov.s16s32 r2.z, hr1.x 231cov.s16s32 r2.w, hr1.y 232EOF 233 234 footer_asm 235} 236 237 238# 239# Helper to parse computerator output and print results: 240# 241check_results() { 242 str=`cat - | grep " " | head -1 | xargs` 243 244 if [ "$verbose" = "true" ]; then 245 echo $str 246 fi 247 248 # Split components of result buffer: 249 cf=$(echo $str | cut -f1 -d' ') 250 tf=$(echo $str | cut -f2 -d' ') 251 ch=$(echo $str | cut -f3 -d' ') 252 th=$(echo $str | cut -f4 -d' ') 253 254 # Sanity test, make sure the control results match: 255 if [ $cf != $ch ]; then 256 echo " FAIL: control results do not match! Half vs full op is not equivalent!" 257 echo " full=$cf half=$ch" 258 fi 259 260 # Compare test (with conversion folded) to control: 261 if [ $cf != $tf ]; then 262 echo " FAIL: half -> full widening result does not match control!" 263 echo " control=$cf result=$tf" 264 fi 265 if [ $ch != $th ]; then 266 echo " FAIL: full -> half narrowing result does not match control!" 267 echo " control=$ch result=$th" 268 fi 269 270 # HACK without a delay different invocations 271 # of computerator seem to somehow clobber each 272 # other.. which isn't great.. 273 sleep 0.1 274} 275 276# 277# Run the tests! 278# 279 280if [ "$1" = "-v" ]; then 281 verbose="true" 282fi 283 284IFS="" 285for instr in ${f2f_instrs[@]}; do 286 echo "TEST: $instr" 287 f2f_asm $instr | ./computerator -g 1,1,1 | check_results 288done 289for instr in ${f2i_instrs[@]}; do 290 echo "TEST: $instr" 291 f2i_asm $instr | ./computerator -g 1,1,1 | check_results 292done 293for instr in ${i2i_instrs[@]}; do 294 echo "TEST: $instr" 295 i2i_asm $instr | ./computerator -g 1,1,1 | check_results 296done 297 298