1/* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9#version 450 core 10 11#define PRECISION ${PRECISION} 12 13#define VEC4_T ${texel_type(DTYPE)} 14 15layout(std430) buffer; 16 17#include "indexing_utils.h" 18 19layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; 20layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; 21 22layout(set = 0, binding = 2) uniform PRECISION restrict RepeatArgs { 23 // With input_size (n, c_i, h, w) and repeat r 24 // out_size == (n, c_i * r, h, w) 25 ivec4 out_sizes; 26 ivec4 in_sizes; 27}; 28 29layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; 30 31layout(constant_id = 3) const int packed_dim = C_DIM; 32 33 34void main() { 35 const ivec3 out_pos = ivec3(gl_GlobalInvocationID); 36 37 const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim); 38 39 if (any(greaterThanEqual(out_whcn, out_sizes))) { 40 return; 41 } 42 43 VEC4_T v; 44 // Loop over the 4 elements in texel, calculate the corresponding elem, and 45 // fetch. Not most efficient algorithm because likely we fetch same texel 46 // multiple times in this loop. 47 48 for (int i=0; i<4;i++) { 49 ivec4 in_whcn = out_whcn; 50 in_whcn.z = (out_whcn.z + i) % in_sizes.z; 51 52 ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim); 53 54 v[i] = VEC4_T(texelFetch(image_in, in_elem_pos.xyz, 0))[in_elem_pos.w]; 55 } 56 57 imageStore(image_out, out_pos, v); 58} 59