1-- Xe3 adds a new ARF to store scalar values. It supports only a limited 2-- set of operations. 3 4check_ver(30) 5 6local r = execute { 7 src=[[ 8 // The new scalar register s0 has 32 bytes. 9 // 10 // Only MOV can have scalar as destination. Offsets in scalar 11 // must be aligned to word boundary. This non-overlapping MOVs 12 // will fill the first 16 bytes. 13 14 mov(1) s0<1>UW 0x1111UW; 15 16 mov(1) r2<1>UW 0x2222UW; 17 mov(1) s0.1<1>UW r2<0,1,0>UW {I@1}; 18 19 mov(1) s0.1<1>UD 0x33333333UD {I@1}; 20 21 mov(4) r4<1>UW 0x4444UW; 22 mov(1) s0.1<1>UQ r4<0,1,0>UQ {I@1}; 23 24 25 // Scalar content can be broadcasted back into GRFs. 26 27 mov(16) r20<1>UD s0.0<0,1,0>UD; 28 mov(16) r32<1>UD s0.1<0,1,0>UD; 29 mov(16) r96<1>UD s0.2<0,1,0>UD; 30 mov(16) r64<1>UD s0.3<0,1,0>UD; 31 32 33 // Scalar can store a list of register numbers to be used as source for 34 // SEND. Note the scalar will contain the hex value for r11 and r20. 35 // And the scalar register is indexed as bytes in r[...] syntax, unlike 36 // in the MOV. 37 38 mov(16) r11<1>UD 0x30000000UD; 39 mov(1) s0.4<1>UD 0x0000140bUD; 40 41 send(16) nullUD r[s0.16] 0x04000504 0x00000000 ugm MsgDesc: () { I@1 $1 }; 42 43 44 // A larger SEND, note that registers for the payload don't need to be 45 // contiguous, the hardware will gather them together. 46 47 add(16) r11<1>UD r11<1,1,0>UD 0x4UD {A@1, $1.src}; 48 mov(1) s0.4<1>UD 0x4060200bUD; 49 50 send(16) nullUD r[s0.16] 0x08002504 0x00000000 ugm MsgDesc: ( ) { I@1 $1 }; 51 52 @eot 53 ]] 54} 55 56expected = {[0] = 0x22221111, 0x33333333, 0x44444444, 0x44444444} 57 58print("result") 59dump(r, 4) 60 61print("expected") 62dump(expected, 4) 63 64for i=0,3 do 65 if r[i] ~= expected[i] then 66 print("FAIL") 67 return 68 end 69end 70 71print("OK") 72