• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
10 
11 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
12 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h>
13 #include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
14 #include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
15 
16 namespace vkcompute {
17 
18 using utils::ivec3;
19 using utils::uvec3;
20 
add_copy_offset_node(ComputeGraph & graph,const ValueRef in,const ivec3 & range,const ivec3 & src_offset,const ivec3 & dst_offset,const ValueRef out)21 void add_copy_offset_node(
22     ComputeGraph& graph,
23     const ValueRef in,
24     const ivec3& range,
25     const ivec3& src_offset,
26     const ivec3& dst_offset,
27     const ValueRef out) {
28   vTensorPtr t_in = graph.get_tensor(in);
29   vTensorPtr t_out = graph.get_tensor(out);
30 
31   std::string kernel_name = "copy_offset";
32   kernel_name.reserve(kShaderNameReserve);
33   add_dtype_suffix(kernel_name, *t_out);
34   add_storage_type_suffix(kernel_name, *t_out);
35 
36   const struct Block final {
37     alignas(16) ivec3 range;
38     alignas(16) ivec3 src_offset;
39     alignas(16) ivec3 dst_offset;
40   } offset_params{
41       range,
42       src_offset,
43       dst_offset,
44   };
45 
46   auto shader = VK_KERNEL_FROM_STR(kernel_name);
47 
48   graph.execute_nodes().emplace_back(new DispatchNode(
49       graph,
50       VK_KERNEL_FROM_STR(kernel_name),
51       graph.create_global_wg_size(out),
52       graph.create_local_wg_size(out),
53       // Inputs and Outputs
54       {
55           {out, vkapi::kWrite},
56           {in, vkapi::kRead},
57       },
58       // Parameter buffers
59       {
60           graph.create_params_buffer(offset_params),
61       },
62       // Specialization Constants
63       {graph.hashed_layout_of(out), graph.hashed_layout_of(in)}));
64 }
65 
add_copy_channel_offset_node(ComputeGraph & graph,const ValueRef in,int32_t channel_range,int32_t src_channel_offset,int32_t dst_channel_offset,const ValueRef out)66 void add_copy_channel_offset_node(
67     ComputeGraph& graph,
68     const ValueRef in,
69     int32_t channel_range,
70     int32_t src_channel_offset,
71     int32_t dst_channel_offset,
72     const ValueRef out) {
73   vTensorPtr t_in = graph.get_tensor(in);
74   vTensorPtr t_out = graph.get_tensor(out);
75 
76   // Likely need to prepad these numbers.
77   std::vector<int64_t> in_sizes = t_in->sizes();
78   std::vector<int64_t> out_sizes = t_out->sizes();
79 
80   VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim));
81   VK_CHECK_COND(check_packed_dim_is(*t_out, WHCN::kChannelsDim));
82 
83   // NOTE: This function should be able to support 1d and 2d tensors when
84   // range=1, src_offset=dst_offset=1.
85   VK_CHECK_COND(t_in->dim() >= 3, "Src dim should be at least 3");
86   VK_CHECK_COND(t_out->dim() >= 3, "Dst dim should be at least 3");
87 
88   VK_CHECK_COND(
89       dim_at<kChannel4D>(in_sizes) >= src_channel_offset + channel_range,
90       "Src channel (",
91       src_channel_offset,
92       ") and range (",
93       channel_range,
94       ") should be less than or equal to input tensor's channel size (",
95       dim_at<kChannel4D>(in_sizes),
96       ")");
97 
98   VK_CHECK_COND(
99       dim_at<kChannel4D>(out_sizes) >= dst_channel_offset + channel_range,
100       "Dst channel (",
101       dst_channel_offset,
102       ") and range (",
103       channel_range,
104       ") should be less than or equal to input tensor's channel size (",
105       dim_at<kChannel4D>(out_sizes),
106       ")");
107 
108   VK_CHECK_COND(channel_range >= 0, "Channel range must be non-negative");
109   VK_CHECK_COND(
110       src_channel_offset >= 0, "Src channel offset must be non-negative");
111   VK_CHECK_COND(
112       dst_channel_offset >= 0, "Dst channel offset must be non-negative");
113 
114   std::string kernel_name = "copy_channel_offset";
115   kernel_name.reserve(kShaderNameReserve);
116   add_dtype_suffix(kernel_name, *t_out);
117 
118   int32_t out_channels = dim_at<kChannel4D>(out_sizes);
119 
120   // Copy one batch at a time.
121   for (int batch_idx = 0; batch_idx < dim_at<kBatch4D>(in_sizes); batch_idx++) {
122     // Mapping the tensor NCHW coordinates into texture XYZ coordinates
123     int32_t dst_first_z = dst_channel_offset / 4;
124     int32_t dst_last_z = (dst_channel_offset + channel_range - 1) / 4;
125 
126     // We copy the entire width and height dimension. For the channel dimension,
127     // we use the z-dimension of the global_size to specify the texture range.
128     // The shader combines the global invocation id and the dst_offset to get
129     // the actual coordinate.
130 
131     ivec3 dst_offset{
132         0, 0, dst_first_z + batch_idx * utils::div_up_4(out_channels)};
133 
134     uvec3 global_size{
135         utils::safe_downcast<uint32_t>(dim_at<kWidth4D>(in_sizes)),
136         utils::safe_downcast<uint32_t>(dim_at<kHeight4D>(in_sizes)),
137         utils::safe_downcast<uint32_t>(dst_last_z - dst_first_z + 1)};
138     uvec3 local_size = adaptive_work_group_size(global_size);
139 
140     const struct Block final {
141       ivec3 range;
142       int32_t channel_range;
143       ivec3 dst_offset;
144       int32_t dst_channel_offset;
145       int32_t src_channel_offset;
146     } channel_offset_params{
147         utils::make_ivec3(global_size),
148         channel_range,
149         dst_offset,
150         dst_channel_offset,
151         src_channel_offset,
152     };
153 
154     auto shader = VK_KERNEL_FROM_STR(kernel_name);
155 
156     graph.execute_nodes().emplace_back(new DispatchNode(
157         graph,
158         VK_KERNEL_FROM_STR(kernel_name),
159         global_size,
160         local_size,
161         // Inputs and Outputs
162         {
163             {out, vkapi::MemoryAccessType::WRITE},
164             {out, vkapi::MemoryAccessType::READ},
165             {in, vkapi::MemoryAccessType::READ},
166         },
167         // Parameter buffers
168         {
169             t_out->sizes_ubo(),
170             t_in->sizes_ubo(),
171             graph.create_params_buffer(channel_offset_params),
172         },
173         // Specialization Constants
174         {graph.hashed_layout_of(out), graph.hashed_layout_of(in)}));
175   }
176 }
177 
add_copy_offset_node(ComputeGraph & graph,ValueRef in,ValueRef range_ref,ValueRef src_offset_ref,ValueRef dst_offset_ref,ValueRef out)178 void add_copy_offset_node(
179     ComputeGraph& graph,
180     ValueRef in,
181     ValueRef range_ref,
182     ValueRef src_offset_ref,
183     ValueRef dst_offset_ref,
184     ValueRef out) {
185   ivec3 range = utils::make_ivec3(*graph.get_int_list(range_ref));
186   ivec3 src_offset = utils::make_ivec3(*graph.get_int_list(src_offset_ref));
187   ivec3 dst_offset = utils::make_ivec3(*graph.get_int_list(dst_offset_ref));
188 
189   add_copy_offset_node(graph, in, range, src_offset, dst_offset, out);
190 }
191 
copy_offset(ComputeGraph & graph,const std::vector<ValueRef> & args)192 void copy_offset(ComputeGraph& graph, const std::vector<ValueRef>& args) {
193   add_copy_offset_node(graph, args[0], args[1], args[2], args[3], args[4]);
194 }
195 
copy_channel_offset(ComputeGraph & graph,const std::vector<ValueRef> & args)196 void copy_channel_offset(
197     ComputeGraph& graph,
198     const std::vector<ValueRef>& args) {
199   ValueRef in = args[0];
200   ValueRef channel_range_ref = args[1];
201   ValueRef src_channel_offset_ref = args[2];
202   ValueRef dst_channel_offset_ref = args[3];
203   ValueRef out = args[4];
204 
205   auto channel_range = graph.extract_scalar<int64_t>(channel_range_ref);
206   auto src_channel_offset =
207       graph.extract_scalar<int64_t>(src_channel_offset_ref);
208   auto dst_channel_offset =
209       graph.extract_scalar<int64_t>(dst_channel_offset_ref);
210 
211   add_copy_channel_offset_node(
212       graph, in, channel_range, src_channel_offset, dst_channel_offset, out);
213 }
214 
215 REGISTER_OPERATORS {
216   VK_REGISTER_OP(etvk.copy_offset, copy_offset);
217   VK_REGISTER_OP(etvk.copy_channel_offset, copy_channel_offset);
218 }
219 
220 } // namespace vkcompute
221