• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#version 450 core
2#define PRECISION ${PRECISION}
3#define FORMAT ${FORMAT}
4
5layout(std430) buffer;
6
7/* Qualifiers: layout - storage - precision - memory */
8
9layout(set = 0, binding = 0, rgba8ui) uniform PRECISION restrict writeonly uimage3D   uOutput;
10layout(set = 0, binding = 1)          uniform PRECISION                    isampler3D uInput0; //quantized input
11layout(set = 0, binding = 2)          uniform PRECISION                    isampler3D uInput1; //quantized input
12layout(set = 0, binding = 3)          uniform PRECISION restrict           Block {
13  ivec4 size;
14  ivec4 isize0;
15  ivec4 isize1;
16  vec2 in_scale;
17  ivec2 in_zero_point;
18  vec2 out_scale;
19  ivec2 out_zero_point;
20} uBlock;
21
22layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
23
24void main() {
25  const ivec3 pos = ivec3(gl_GlobalInvocationID);
26  if (all(lessThan(pos, uBlock.size.xyz))) {
27    const ivec3 input0_pos = pos % uBlock.isize0.xyz;
28    const ivec3 input1_pos = pos % uBlock.isize1.xyz;
29
30    const vec4 v0 = uBlock.isize0.w == 1
31                      ? texelFetch(uInput0, input0_pos, 0).xxxx
32                      : texelFetch(uInput0, input0_pos, 0);
33    vec4 v1 = uBlock.isize1.w == 1
34                ? texelFetch(uInput1, input1_pos, 0).xxxx
35                : texelFetch(uInput1, input1_pos, 0);
36
37    const int c_index = (pos.z % ((uBlock.size.w + 3) / 4)) * 4;
38    if (uBlock.isize1.w != 1 && c_index + 3 >= uBlock.size.w) {
39      ivec4 c_ind = ivec4(c_index) + ivec4(0, 1, 2, 3);
40      vec4 mask = vec4(lessThan(c_ind, ivec4(uBlock.size.w)));
41      v1 = v1 * mask + (vec4(1, 1, 1, 1) - mask) * (uBlock.in_zero_point.y + 1);
42    }
43
44    vec4 deq_in_0 = uBlock.in_scale.x * (v0 - uBlock.in_zero_point.x);
45    vec4 deq_in_1 = uBlock.in_scale.y * (v1 - uBlock.in_zero_point.y);
46
47    vec4 res = deq_in_0 / deq_in_1;
48    vec4 q_res = roundEven(res / uBlock.out_scale.x) + uBlock.out_zero_point.x;
49
50    uvec4 ret = uvec4(q_res);
51
52    imageStore(
53        uOutput,
54        pos,
55        ret);
56  }
57}
58