1#version 450 core 2#extension GL_KHR_memory_scope_semantics : enable 3#extension GL_NV_cooperative_matrix : enable 4#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable 5#extension GL_EXT_buffer_reference : enable 6 7layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 8 9const int X = 8; 10layout(constant_id = 0) const int Y = 2; 11const int Z = X*Y; 12 13fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC; 14fcoopmatNV<16, gl_ScopeSubgroup, Z, 8> mC2[3]; 15 16int arr[mC.length()]; 17int arr2[mC2[1].length()]; 18 19layout(constant_id = 1) const float F = 3.0; 20 21const fcoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = fcoopmatNV<32, gl_ScopeSubgroup, Z, 8>(0.0); 22const fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> mD2 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(1); 23 24struct S { int a; int b; int c; }; 25 26const S s = S(12, 23, 34); 27 28layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { 29 float y[1024*1024]; 30 float x[]; 31} block; 32 33layout(set = 0, binding = 0) coherent buffer Block16 { 34 float16_t y[1024*1024]; 35 float16_t x[]; 36 37 Block b; 38} block16; 39 40fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> f16(fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> m) { return -m; } 41fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> f32(fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> m) { return -m; } 42 43layout(constant_id = 2) const int SC = 1; 44fcoopmatNV<16, gl_ScopeSubgroup, SC, SC> scm[SC][SC]; 45 46// sized for fcoopmatNV<16, gl_ScopeSubgroup, 16, 16> 47shared uvec4 shmatrix[16*16*2/16]; 48 49void main() 50{ 51 fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> m = fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)>(0.0); 52 53 m = m + m; 54 m = m - m; 55 m = -m; 56 m = 2.0*m; 57 m = m*2.0; 58 59 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> m2 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(m); 60 61 float x = m[1]; 62 m[0] = x; 63 64 coopMatLoadNV(m, block.x, 16, 128, false); 65 coopMatStoreNV(m, block.x, 16, 128, false); 66 coopMatLoadNV(m2, block16.x, 16, 128, false); 67 coopMatStoreNV(m2, block16.x, 16, 128, false); 68 coopMatLoadNV(m, block16.b.x, 16, 128, false); 69 coopMatStoreNV(m, block16.b.x, 16, 128, false); 70 71 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> A; 72 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> B; 73 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> C; 74 fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> D; 75 D = coopMatMulAddNV(A, B, C); 76 77 int l = D.length(); 78 79 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> E; 80 81 fcoopmatNV<16, gl_ScopeSubgroup, Z, Z> F = fcoopmatNV<16, gl_ScopeSubgroup, Z, Z>(0.0); 82 83 fcoopmatNV<32, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5]; 84 a[3][0] = 1.0; 85 86 float md1 = mD[1]; 87 88 md1 += (m += m)[1234]; 89 90 mC2[1] = mC2[2]; 91 92 coopMatLoadNV(m, block.y, 16, 128, false); 93 coopMatStoreNV(m, block.y, 16, 128, false); 94 coopMatLoadNV(m2, block16.y, 16, 128, false); 95 coopMatStoreNV(m2, block16.y, 16, 128, false); 96 97 fcoopmatNV<16, gl_ScopeSubgroup, 8, 8> p1; 98 fcoopmatNV<32, gl_ScopeSubgroup, 8, 8> p2; 99 100 p1 = f16(p1); 101 p2 = f32(p2); 102 103 p1 = fcoopmatNV<16, gl_ScopeSubgroup, 8, 8>(0.0); 104 p2 = fcoopmatNV<32, gl_ScopeSubgroup, 8, 8>(0.0); 105 106 p1 /= p1; 107 108 p1 *= float16_t(2.0); 109 p2 *= 4.0; 110 111 fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> ms; 112 coopMatLoadNV(ms, shmatrix, 1, 2, false); 113 coopMatStoreNV(ms, shmatrix, 1, 2, false); 114 115} 116