1#version 450 2 3#extension GL_KHR_shader_subgroup_quad: enable 4#extension GL_EXT_shader_subgroup_extended_types_int8: enable 5#extension GL_EXT_shader_subgroup_extended_types_int16: enable 6#extension GL_EXT_shader_subgroup_extended_types_int64: enable 7#extension GL_EXT_shader_subgroup_extended_types_float16: enable 8 9layout (local_size_x = 8) in; 10 11layout(binding = 0) buffer Buffers 12{ 13 i8vec4 i8; 14 u8vec4 u8; 15 i16vec4 i16; 16 u16vec4 u16; 17 i64vec4 i64; 18 u64vec4 u64; 19 f16vec4 f16; 20} data[4]; 21 22void main() 23{ 24 uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4; 25 26 data[invocation].i8.x = subgroupQuadBroadcast(data[0].i8.x, 1); 27 data[invocation].i8.xy = subgroupQuadBroadcast(data[1].i8.xy, 1); 28 data[invocation].i8.xyz = subgroupQuadBroadcast(data[2].i8.xyz, 1); 29 data[invocation].i8 = subgroupQuadBroadcast(data[3].i8, 1); 30 31 data[invocation].i8.x = subgroupQuadSwapHorizontal(data[0].i8.x); 32 data[invocation].i8.xy = subgroupQuadSwapHorizontal(data[1].i8.xy); 33 data[invocation].i8.xyz = subgroupQuadSwapHorizontal(data[2].i8.xyz); 34 data[invocation].i8 = subgroupQuadSwapHorizontal(data[3].i8); 35 36 data[invocation].i8.x = subgroupQuadSwapVertical(data[0].i8.x); 37 data[invocation].i8.xy = subgroupQuadSwapVertical(data[1].i8.xy); 38 data[invocation].i8.xyz = subgroupQuadSwapVertical(data[2].i8.xyz); 39 data[invocation].i8 = subgroupQuadSwapVertical(data[3].i8); 40 41 data[invocation].i8.x = subgroupQuadSwapDiagonal(data[0].i8.x); 42 data[invocation].i8.xy = subgroupQuadSwapDiagonal(data[1].i8.xy); 43 data[invocation].i8.xyz = subgroupQuadSwapDiagonal(data[2].i8.xyz); 44 data[invocation].i8 = subgroupQuadSwapDiagonal(data[3].i8); 45 46 data[invocation].u8.x = subgroupQuadBroadcast(data[0].u8.x, 1); 47 data[invocation].u8.xy = subgroupQuadBroadcast(data[1].u8.xy, 1); 48 data[invocation].u8.xyz = subgroupQuadBroadcast(data[2].u8.xyz, 1); 49 data[invocation].u8 = subgroupQuadBroadcast(data[3].u8, 1); 50 51 data[invocation].u8.x = subgroupQuadSwapHorizontal(data[0].u8.x); 52 data[invocation].u8.xy = subgroupQuadSwapHorizontal(data[1].u8.xy); 53 data[invocation].u8.xyz = subgroupQuadSwapHorizontal(data[2].u8.xyz); 54 data[invocation].u8 = subgroupQuadSwapHorizontal(data[3].u8); 55 56 data[invocation].u8.x = subgroupQuadSwapVertical(data[0].u8.x); 57 data[invocation].u8.xy = subgroupQuadSwapVertical(data[1].u8.xy); 58 data[invocation].u8.xyz = subgroupQuadSwapVertical(data[2].u8.xyz); 59 data[invocation].u8 = subgroupQuadSwapVertical(data[3].u8); 60 61 data[invocation].u8.x = subgroupQuadSwapDiagonal(data[0].u8.x); 62 data[invocation].u8.xy = subgroupQuadSwapDiagonal(data[1].u8.xy); 63 data[invocation].u8.xyz = subgroupQuadSwapDiagonal(data[2].u8.xyz); 64 data[invocation].u8 = subgroupQuadSwapDiagonal(data[3].u8); 65 66 data[invocation].i16.x = subgroupQuadBroadcast(data[0].i16.x, 1); 67 data[invocation].i16.xy = subgroupQuadBroadcast(data[1].i16.xy, 1); 68 data[invocation].i16.xyz = subgroupQuadBroadcast(data[2].i16.xyz, 1); 69 data[invocation].i16 = subgroupQuadBroadcast(data[3].i16, 1); 70 71 data[invocation].i16.x = subgroupQuadSwapHorizontal(data[0].i16.x); 72 data[invocation].i16.xy = subgroupQuadSwapHorizontal(data[1].i16.xy); 73 data[invocation].i16.xyz = subgroupQuadSwapHorizontal(data[2].i16.xyz); 74 data[invocation].i16 = subgroupQuadSwapHorizontal(data[3].i16); 75 76 data[invocation].i16.x = subgroupQuadSwapVertical(data[0].i16.x); 77 data[invocation].i16.xy = subgroupQuadSwapVertical(data[1].i16.xy); 78 data[invocation].i16.xyz = subgroupQuadSwapVertical(data[2].i16.xyz); 79 data[invocation].i16 = subgroupQuadSwapVertical(data[3].i16); 80 81 data[invocation].i16.x = subgroupQuadSwapDiagonal(data[0].i16.x); 82 data[invocation].i16.xy = subgroupQuadSwapDiagonal(data[1].i16.xy); 83 data[invocation].i16.xyz = subgroupQuadSwapDiagonal(data[2].i16.xyz); 84 data[invocation].i16 = subgroupQuadSwapDiagonal(data[3].i16); 85 86 data[invocation].u16.x = subgroupQuadBroadcast(data[0].u16.x, 1); 87 data[invocation].u16.xy = subgroupQuadBroadcast(data[1].u16.xy, 1); 88 data[invocation].u16.xyz = subgroupQuadBroadcast(data[2].u16.xyz, 1); 89 data[invocation].u16 = subgroupQuadBroadcast(data[3].u16, 1); 90 91 data[invocation].u16.x = subgroupQuadSwapHorizontal(data[0].u16.x); 92 data[invocation].u16.xy = subgroupQuadSwapHorizontal(data[1].u16.xy); 93 data[invocation].u16.xyz = subgroupQuadSwapHorizontal(data[2].u16.xyz); 94 data[invocation].u16 = subgroupQuadSwapHorizontal(data[3].u16); 95 96 data[invocation].u16.x = subgroupQuadSwapVertical(data[0].u16.x); 97 data[invocation].u16.xy = subgroupQuadSwapVertical(data[1].u16.xy); 98 data[invocation].u16.xyz = subgroupQuadSwapVertical(data[2].u16.xyz); 99 data[invocation].u16 = subgroupQuadSwapVertical(data[3].u16); 100 101 data[invocation].u16.x = subgroupQuadSwapDiagonal(data[0].u16.x); 102 data[invocation].u16.xy = subgroupQuadSwapDiagonal(data[1].u16.xy); 103 data[invocation].u16.xyz = subgroupQuadSwapDiagonal(data[2].u16.xyz); 104 data[invocation].u16 = subgroupQuadSwapDiagonal(data[3].u16); 105 106 data[invocation].i64.x = subgroupQuadBroadcast(data[0].i64.x, 1); 107 data[invocation].i64.xy = subgroupQuadBroadcast(data[1].i64.xy, 1); 108 data[invocation].i64.xyz = subgroupQuadBroadcast(data[2].i64.xyz, 1); 109 data[invocation].i64 = subgroupQuadBroadcast(data[3].i64, 1); 110 111 data[invocation].i64.x = subgroupQuadSwapHorizontal(data[0].i64.x); 112 data[invocation].i64.xy = subgroupQuadSwapHorizontal(data[1].i64.xy); 113 data[invocation].i64.xyz = subgroupQuadSwapHorizontal(data[2].i64.xyz); 114 data[invocation].i64 = subgroupQuadSwapHorizontal(data[3].i64); 115 116 data[invocation].i64.x = subgroupQuadSwapVertical(data[0].i64.x); 117 data[invocation].i64.xy = subgroupQuadSwapVertical(data[1].i64.xy); 118 data[invocation].i64.xyz = subgroupQuadSwapVertical(data[2].i64.xyz); 119 data[invocation].i64 = subgroupQuadSwapVertical(data[3].i64); 120 121 data[invocation].i64.x = subgroupQuadSwapDiagonal(data[0].i64.x); 122 data[invocation].i64.xy = subgroupQuadSwapDiagonal(data[1].i64.xy); 123 data[invocation].i64.xyz = subgroupQuadSwapDiagonal(data[2].i64.xyz); 124 data[invocation].i64 = subgroupQuadSwapDiagonal(data[3].i64); 125 126 data[invocation].u64.x = subgroupQuadBroadcast(data[0].u64.x, 1); 127 data[invocation].u64.xy = subgroupQuadBroadcast(data[1].u64.xy, 1); 128 data[invocation].u64.xyz = subgroupQuadBroadcast(data[2].u64.xyz, 1); 129 data[invocation].u64 = subgroupQuadBroadcast(data[3].u64, 1); 130 131 data[invocation].u64.x = subgroupQuadSwapHorizontal(data[0].u64.x); 132 data[invocation].u64.xy = subgroupQuadSwapHorizontal(data[1].u64.xy); 133 data[invocation].u64.xyz = subgroupQuadSwapHorizontal(data[2].u64.xyz); 134 data[invocation].u64 = subgroupQuadSwapHorizontal(data[3].u64); 135 136 data[invocation].u64.x = subgroupQuadSwapVertical(data[0].u64.x); 137 data[invocation].u64.xy = subgroupQuadSwapVertical(data[1].u64.xy); 138 data[invocation].u64.xyz = subgroupQuadSwapVertical(data[2].u64.xyz); 139 data[invocation].u64 = subgroupQuadSwapVertical(data[3].u64); 140 141 data[invocation].u64.x = subgroupQuadSwapDiagonal(data[0].u64.x); 142 data[invocation].u64.xy = subgroupQuadSwapDiagonal(data[1].u64.xy); 143 data[invocation].u64.xyz = subgroupQuadSwapDiagonal(data[2].u64.xyz); 144 data[invocation].u64 = subgroupQuadSwapDiagonal(data[3].u64); 145 146 data[invocation].f16.x = subgroupQuadBroadcast(data[0].f16.x, 1); 147 data[invocation].f16.xy = subgroupQuadBroadcast(data[1].f16.xy, 1); 148 data[invocation].f16.xyz = subgroupQuadBroadcast(data[2].f16.xyz, 1); 149 data[invocation].f16 = subgroupQuadBroadcast(data[3].f16, 1); 150 151 data[invocation].f16.x = subgroupQuadSwapHorizontal(data[0].f16.x); 152 data[invocation].f16.xy = subgroupQuadSwapHorizontal(data[1].f16.xy); 153 data[invocation].f16.xyz = subgroupQuadSwapHorizontal(data[2].f16.xyz); 154 data[invocation].f16 = subgroupQuadSwapHorizontal(data[3].f16); 155 156 data[invocation].f16.x = subgroupQuadSwapVertical(data[0].f16.x); 157 data[invocation].f16.xy = subgroupQuadSwapVertical(data[1].f16.xy); 158 data[invocation].f16.xyz = subgroupQuadSwapVertical(data[2].f16.xyz); 159 data[invocation].f16 = subgroupQuadSwapVertical(data[3].f16); 160 161 data[invocation].f16.x = subgroupQuadSwapDiagonal(data[0].f16.x); 162 data[invocation].f16.xy = subgroupQuadSwapDiagonal(data[1].f16.xy); 163 data[invocation].f16.xyz = subgroupQuadSwapDiagonal(data[2].f16.xyz); 164 data[invocation].f16 = subgroupQuadSwapDiagonal(data[3].f16); 165} 166