-- Xe3 adds a new ARF to store scalar values. It supports only a limited -- set of operations. check_ver(30) local r = execute { src=[[ // The new scalar register s0 has 32 bytes. // // Only MOV can have scalar as destination. Offsets in scalar // must be aligned to word boundary. This non-overlapping MOVs // will fill the first 16 bytes. mov(1) s0<1>UW 0x1111UW; mov(1) r2<1>UW 0x2222UW; mov(1) s0.1<1>UW r2<0,1,0>UW {I@1}; mov(1) s0.1<1>UD 0x33333333UD {I@1}; mov(4) r4<1>UW 0x4444UW; mov(1) s0.1<1>UQ r4<0,1,0>UQ {I@1}; // Scalar content can be broadcasted back into GRFs. mov(16) r20<1>UD s0.0<0,1,0>UD; mov(16) r32<1>UD s0.1<0,1,0>UD; mov(16) r96<1>UD s0.2<0,1,0>UD; mov(16) r64<1>UD s0.3<0,1,0>UD; // Scalar can store a list of register numbers to be used as source for // SEND. Note the scalar will contain the hex value for r11 and r20. // And the scalar register is indexed as bytes in r[...] syntax, unlike // in the MOV. mov(16) r11<1>UD 0x30000000UD; mov(1) s0.4<1>UD 0x0000140bUD; send(16) nullUD r[s0.16] 0x04000504 0x00000000 ugm MsgDesc: () { I@1 $1 }; // A larger SEND, note that registers for the payload don't need to be // contiguous, the hardware will gather them together. add(16) r11<1>UD r11<1,1,0>UD 0x4UD {A@1, $1.src}; mov(1) s0.4<1>UD 0x4060200bUD; send(16) nullUD r[s0.16] 0x08002504 0x00000000 ugm MsgDesc: ( ) { I@1 $1 }; @eot ]] } expected = {[0] = 0x22221111, 0x33333333, 0x44444444, 0x44444444} print("result") dump(r, 4) print("expected") dump(expected, 4) for i=0,3 do if r[i] ~= expected[i] then print("FAIL") return end end print("OK")