• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9#version 450 core
10
11#define PRECISION ${PRECISION}
12#define VEC4_T ${texel_type(DTYPE)}
13
14layout(std430) buffer;
15
16${layout_declare_sampler(0, "r", "A", DTYPE)}
17${layout_declare_buffer(1, "w", "B", DTYPE, "PRECISION", False)}
18
19layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
20
21layout(constant_id = 3) const int niter = 1;
22layout(constant_id = 4) const int nvec = 1;
23layout(constant_id = 5) const int local_group_size = 1;
24// The address mask works as a modulo because x % 2^n == x & (2^n - 1).
25// This will help us limit address accessing to a specific set of unique
26// addresses depending on the access size we want to measure.
27layout(constant_id = 6) const int addr_mask = 1;
28layout(constant_id = 7) const int workgroup_width = 1;
29
30void main() {
31    vec4 sum = vec4(0);
32    uint offset = (gl_WorkGroupID[0] * workgroup_width  + gl_LocalInvocationID[0]) & addr_mask;
33
34    int i = 0;
35    for (; i < niter; ++i){
36      VEC4_T in_texel;
37      $for j in range(int(NUNROLL)):
38        $if DIM == 0:
39            in_texel = texelFetch(A, ivec3(offset, 0, 0), 0);
40        $elif DIM == 1:
41            in_texel = texelFetch(A, ivec3(0, offset, 0), 0);
42        $elif DIM == 2:
43            in_texel = texelFetch(A, ivec3(0, 0, offset), 0);
44
45        sum *= in_texel;
46
47        // On each unroll, a new unique address will be accessed through the offset,
48        // limited by the address mask to a specific set of unique addresses
49        offset = (offset + local_group_size) & addr_mask;
50    }
51
52    // This is to ensure no compiler optimizations occur
53    vec4 zero = vec4(i>>31);
54
55    B[gl_LocalInvocationID[0]] = sum + zero;
56}
57