• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#version 450 core
2#extension GL_KHR_memory_scope_semantics : enable
3#extension GL_NV_cooperative_matrix : enable
4#extension GL_NV_integer_cooperative_matrix : enable
5#extension GL_EXT_shader_explicit_arithmetic_types : enable
6#extension GL_EXT_buffer_reference : enable
7
8layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
9
10const int X = 8;
11layout(constant_id = 0) const int Y = 2;
12const int Z = X*Y;
13
14icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;
15icoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];
16ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;
17ucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];
18
19int iarr[miC.length()];
20int iarr2[miC2[1].length()];
21int uarr[muC.length()];
22int uarr2[muC2[1].length()];
23
24const icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);
25const ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);
26
27struct S { int a; int b; int c; };
28
29const S s = S(12, 23, 34);
30
31layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
32    uint y[1024*1024];
33    uint x[];
34} block;
35
36layout(set = 0, binding = 0) coherent buffer Block16 {
37    int8_t y[1024*1024];
38    int8_t x[];
39
40    Block b;
41} block8;
42
43icoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }
44ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }
45
46layout(constant_id = 2) const int SC = 1;
47ucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
48
49// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>
50shared uvec4 shmatrix[16*16*2/16];
51
52void main()
53{
54    ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
55    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
56
57    mu = mu + mu;
58    mu = mu - mu;
59    mi = -mi;
60    mi = mi * int8_t(2);
61
62    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);
63    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);
64    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);
65    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);
66
67    uint8_t x = mu[1];
68    mi[0] = int8_t(x);
69
70    coopMatLoadNV(mi, block.x, 16, 128, false);
71    coopMatStoreNV(mi, block.x, 16, 128, false);
72    coopMatLoadNV(mu, block8.x, 16, 128, false);
73    coopMatStoreNV(mu, block8.x, 16, 128, false);
74    coopMatLoadNV(mi, block8.b.x, 16, 128, false);
75    coopMatStoreNV(mi, block8.b.x, 16, 128, false);
76
77    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;
78    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;
79    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;
80    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;
81    D = coopMatMulAddNV(A, B, C);
82
83    int l = D.length();
84
85
86    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
87    a[3][0] = int8_t(1);
88
89    int md1 = mD[1];
90
91    md1 += (mi += mi)[1234];
92
93    muC2[0] = muC2[1];
94    muC2[1][0] = (miC2[2][0]);
95
96    coopMatLoadNV(mi, block.y, 16, 128, false);
97    coopMatStoreNV(mi, block.y, 16, 128, false);
98    coopMatLoadNV(mu, block8.y, 16, 128, false);
99    coopMatStoreNV(mu, block8.y, 16, 128, false);
100
101    icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;
102    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;
103
104    p1 = ineg(p1);
105    p2 = umul(p2);
106
107    p1 /= p1;
108    p2 /= p2;
109
110    p1 *= int8_t(2);
111    p2 *= uint8_t(4);
112
113    icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;
114    coopMatLoadNV(ms, shmatrix, 1, 2, false);
115    coopMatStoreNV(ms, shmatrix, 1, 2, false);
116
117}
118