1#version 450 core 2#extension GL_KHR_memory_scope_semantics : enable 3#extension GL_NV_cooperative_matrix : enable 4#extension GL_NV_integer_cooperative_matrix : enable 5#extension GL_EXT_shader_explicit_arithmetic_types : enable 6#extension GL_EXT_buffer_reference : enable 7 8layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 9 10const int X = 8; 11layout(constant_id = 0) const int Y = 2; 12const int Z = X*Y; 13 14icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC; 15icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3]; 16ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC; 17ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3]; 18 19int iarr[miC.length()]; 20int iarr2[miC2[1].length()]; 21int uarr[muC.length()]; 22int uarr2[muC2[1].length()]; 23 24const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1); 25const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1); 26 27struct S { int a; int b; int c; }; 28 29const S s = S(12, 23, 34); 30 31layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { 32 uint y[1024*1024]; 33 uint x[]; 34} block; 35 36layout(set = 0, binding = 0) coherent buffer Block16 { 37 int8_t y[1024*1024]; 38 int8_t x[]; 39 40 Block b; 41} block8; 42 43icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; } 44ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); } 45 46layout(constant_id = 2) const int SC = 1; 47ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC]; 48 49// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16> 50shared uvec4 shmatrix[16*16*2/16]; 51 52void main() 53{ 54 ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); 55 icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); 56 57 mu = mu + mu; 58 mu = mu - mu; 59 mi = -mi; 60 mi = mi * int8_t(2); 61 62 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu); 63 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu); 64 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi); 65 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi); 66 67 uint8_t x = mu[1]; 68 mi[0] = int8_t(x); 69 70 coopMatLoadNV(mi, block.x, 16, 128, false); 71 coopMatStoreNV(mi, block.x, 16, 128, false); 72 coopMatLoadNV(mu, block8.x, 16, 128, false); 73 coopMatStoreNV(mu, block8.x, 16, 128, false); 74 coopMatLoadNV(mi, block8.b.x, 16, 128, false); 75 coopMatStoreNV(mi, block8.b.x, 16, 128, false); 76 77 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A; 78 ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B; 79 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C; 80 ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D; 81 D = coopMatMulAddNV(A, B, C); 82 83 int l = D.length(); 84 85 86 icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5]; 87 a[3][0] = int8_t(1); 88 89 int md1 = mD[1]; 90 91 md1 += (mi += mi)[1234]; 92 93 muC2[0] = muC2[1]; 94 muC2[1][0] = (miC2[2][0]); 95 96 coopMatLoadNV(mi, block.y, 16, 128, false); 97 coopMatStoreNV(mi, block.y, 16, 128, false); 98 coopMatLoadNV(mu, block8.y, 16, 128, false); 99 coopMatStoreNV(mu, block8.y, 16, 128, false); 100 101 icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1; 102 ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2; 103 104 p1 = ineg(p1); 105 p2 = umul(p2); 106 107 p1 /= p1; 108 p2 /= p2; 109 110 p1 *= int8_t(2); 111 p2 *= uint8_t(4); 112 113 icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms; 114 coopMatLoadNV(ms, shmatrix, 1, 2, false); 115 coopMatStoreNV(ms, shmatrix, 1, 2, false); 116 117} 118