1#version 450 core 2#extension GL_KHR_memory_scope_semantics : enable 3#extension GL_KHR_cooperative_matrix : enable 4#extension GL_EXT_shader_explicit_arithmetic_types : enable 5#extension GL_NV_cooperative_matrix2 : enable 6#extension GL_EXT_buffer_reference : enable 7 8layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 9 10buffer BufType { 11 float16_t x[]; 12} Buf; 13 14layout(buffer_reference, std430, buffer_reference_align = 2) buffer fp16Buf { 15 float16_t f; 16}; 17 18 19float16_t decode(const in fp16Buf b, const in uint32_t blockCoords[2], const in uint32_t coordInBlock[2]) 20{ 21 return b.f; 22} 23 24struct S { 25 f16vec2 x; 26}; 27 28 29layout(std430, binding = 0) buffer SBuf { 30 S s[]; 31} sbuf; 32 33layout(constant_id = 0) const uint32_t Clamp = gl_CooperativeMatrixClampModeConstantNV; 34 35void main() 36{ 37 coopmat<float16_t, gl_ScopeWorkgroup, 64, 32, gl_MatrixUseA> A; 38 39 tensorLayoutNV<2> t = createTensorLayoutNV(2); 40 tensorLayoutNV<3, 1> t2 = createTensorLayoutNV(3, 1); 41 42 t = setTensorLayoutBlockSizeNV(t, 4, 8); 43 t = setTensorLayoutDimensionNV(t, 256, 512); 44 t = sliceTensorLayoutNV(t, 128, 32, 256, 32); 45 46 tensorViewNV<5> v = createTensorViewNV(5); 47 v = setTensorViewDimensionsNV(v, 10, 11, 12, 13, 14); 48 v = setTensorViewStrideNV(v, 10, 11, 12, 13, 15); 49 v = setTensorViewClipNV(v, 0, 16, 0, 16); 50 51 tensorViewNV<5, true> v2 = createTensorViewNV(5, true); 52 tensorViewNV<2, true, 1, 0> v3 = createTensorViewNV(2, true, 1, 0); 53 54 coopMatLoadTensorNV(A, Buf.x, 0, t); 55 coopMatStoreTensorNV(A, Buf.x, 0, t); 56 57 coopMatLoadTensorNV(A, Buf.x, 0, t, v); 58 coopMatStoreTensorNV(A, Buf.x, 0, t, v); 59 60 coopMatLoadTensorNV(A, Buf.x, 0, t, decode); 61 coopMatLoadTensorNV(A, Buf.x, 0, t, v, decode); 62 63 coopMatLoadTensorNV(A, sbuf.s, 1, t, v, decode); 64 65 tensorLayoutNV<2, Clamp> tc = createTensorLayoutNV(2, Clamp); 66} 67