1#version 450 core 2#define PRECISION ${PRECISION} 3#define FORMAT ${FORMAT} 4 5layout(std430) buffer; 6 7/* Qualifiers: layout - storage - precision - memory */ 8 9layout(set = 0, binding = 0, rgba8ui) uniform PRECISION restrict writeonly uimage3D uOutput; 10layout(set = 0, binding = 1) uniform PRECISION isampler3D uInput0; //quantized input 11layout(set = 0, binding = 2) uniform PRECISION isampler3D uInput1; //quantized input 12layout(set = 0, binding = 3) uniform PRECISION restrict Block { 13 ivec4 size; 14 ivec4 isize0; 15 ivec4 isize1; 16 vec2 in_scale; 17 ivec2 in_zero_point; 18 vec2 out_scale; 19 ivec2 out_zero_point; 20} uBlock; 21 22layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; 23 24void main() { 25 const ivec3 pos = ivec3(gl_GlobalInvocationID); 26 if (all(lessThan(pos, uBlock.size.xyz))) { 27 const ivec3 input0_pos = pos % uBlock.isize0.xyz; 28 const ivec3 input1_pos = pos % uBlock.isize1.xyz; 29 30 const vec4 v0 = uBlock.isize0.w == 1 31 ? texelFetch(uInput0, input0_pos, 0).xxxx 32 : texelFetch(uInput0, input0_pos, 0); 33 const vec4 v1 = uBlock.isize1.w == 1 34 ? texelFetch(uInput1, input1_pos, 0).xxxx 35 : texelFetch(uInput1, input1_pos, 0); 36 37 vec4 deq_in_0 = uBlock.in_scale.x * (v0 - uBlock.in_zero_point.x); 38 vec4 deq_in_1 = uBlock.in_scale.y * (v1 - uBlock.in_zero_point.y); 39 40 vec4 res = deq_in_0 + deq_in_1; 41 vec4 q_res = roundEven(res / uBlock.out_scale.x) + uBlock.out_zero_point.x; 42 43 uvec4 ret = uvec4(q_res); 44 45 imageStore( 46 uOutput, 47 pos, 48 ret); 49 } 50} 51