1/* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9#version 450 core 10 11#define PRECISION ${PRECISION} 12#define VEC4_T ${texel_type(DTYPE)} 13 14layout(std430) buffer; 15 16${layout_declare_sampler(0, "r", "A", DTYPE)} 17${layout_declare_buffer(1, "w", "B", DTYPE, "PRECISION", False)} 18 19layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; 20 21layout(constant_id = 3) const int niter = 1; 22layout(constant_id = 4) const int nvec = 1; 23layout(constant_id = 5) const int local_group_size = 1; 24// The address mask works as a modulo because x % 2^n == x & (2^n - 1). 25// This will help us limit address accessing to a specific set of unique 26// addresses depending on the access size we want to measure. 27layout(constant_id = 6) const int addr_mask = 1; 28layout(constant_id = 7) const int workgroup_width = 1; 29 30void main() { 31 vec4 sum = vec4(0); 32 uint offset = (gl_WorkGroupID[0] * workgroup_width + gl_LocalInvocationID[0]) & addr_mask; 33 34 int i = 0; 35 for (; i < niter; ++i){ 36 VEC4_T in_texel; 37 $for j in range(int(NUNROLL)): 38 $if DIM == 0: 39 in_texel = texelFetch(A, ivec3(offset, 0, 0), 0); 40 $elif DIM == 1: 41 in_texel = texelFetch(A, ivec3(0, offset, 0), 0); 42 $elif DIM == 2: 43 in_texel = texelFetch(A, ivec3(0, 0, offset), 0); 44 45 sum *= in_texel; 46 47 // On each unroll, a new unique address will be accessed through the offset, 48 // limited by the address mask to a specific set of unique addresses 49 offset = (offset + local_group_size) & addr_mask; 50 } 51 52 // This is to ensure no compiler optimizations occur 53 vec4 zero = vec4(i>>31); 54 55 B[gl_LocalInvocationID[0]] = sum + zero; 56} 57