1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
18
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/shape.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31
32 namespace tflite {
33 namespace gpu {
34
35 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)36 void RearrangeWeightsToOHWIOGroupI4O4(
37 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
38 absl::Span<T> dst) {
39 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
40 const int src_slices = DivideRoundUp(weights.shape.i, 4);
41 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
42
43 int counter = 0;
44 for (int d = 0; d < dst_groups; ++d) {
45 for (int y = 0; y < weights.shape.h; ++y) {
46 for (int x = 0; x < weights.shape.w; ++x) {
47 for (int s = 0; s < src_slices; ++s) {
48 for (int d_group = 0; d_group < out_group_size; ++d_group) {
49 for (int j = 0; j < 4; ++j) {
50 T filter;
51 for (int i = 0; i < 4; ++i) {
52 const int s_ch = s * 4 + j;
53 const int d_ch = (d * out_group_size + d_group) * 4 + i;
54 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
55 const int f_index =
56 weights.shape.LinearIndex({d_ch, y, x, s_ch});
57 filter[i] = weights.data[f_index];
58 } else {
59 filter[i] = 0.0f;
60 }
61 }
62 dst[counter++] = filter;
63 }
64 }
65 }
66 }
67 }
68 }
69 }
70
71 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)72 void RearrangeWeightsToOHWIOGroupO4I4(
73 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
74 absl::Span<T> dst) {
75 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
76 const int src_slices = DivideRoundUp(weights.shape.i, 4);
77 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
78
79 int counter = 0;
80 for (int d = 0; d < dst_groups; ++d) {
81 for (int y = 0; y < weights.shape.h; ++y) {
82 for (int x = 0; x < weights.shape.w; ++x) {
83 for (int s = 0; s < src_slices; ++s) {
84 for (int d_group = 0; d_group < out_group_size; ++d_group) {
85 for (int j = 0; j < 4; ++j) {
86 T filter;
87 for (int i = 0; i < 4; ++i) {
88 const int s_ch = s * 4 + i;
89 const int d_ch = (d * out_group_size + d_group) * 4 + j;
90 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
91 const int f_index =
92 weights.shape.LinearIndex({d_ch, y, x, s_ch});
93 filter[i] = weights.data[f_index];
94 } else {
95 filter[i] = 0.0f;
96 }
97 }
98 dst[counter++] = filter;
99 }
100 }
101 }
102 }
103 }
104 }
105 }
106
107 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)108 void RearrangeWeightsToODHWIOGroupI4O4(
109 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
110 absl::Span<T> dst) {
111 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
112 const int src_slices = DivideRoundUp(weights.shape.i, 4);
113 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
114
115 int counter = 0;
116 for (int d = 0; d < dst_groups; ++d) {
117 for (int z = 0; z < weights.shape.d; ++z) {
118 for (int y = 0; y < weights.shape.h; ++y) {
119 for (int x = 0; x < weights.shape.w; ++x) {
120 for (int s = 0; s < src_slices; ++s) {
121 for (int d_group = 0; d_group < out_group_size; ++d_group) {
122 for (int j = 0; j < 4; ++j) {
123 T filter;
124 for (int i = 0; i < 4; ++i) {
125 const int s_ch = s * 4 + j;
126 const int d_ch = (d * out_group_size + d_group) * 4 + i;
127 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
128 const int f_index =
129 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
130 filter[i] = weights.data[f_index];
131 } else {
132 filter[i] = 0.0f;
133 }
134 }
135 dst[counter++] = filter;
136 }
137 }
138 }
139 }
140 }
141 }
142 }
143 }
144
145 template <DataType S, typename T>
RearrangeWeightsToI4HWIOOGroupO4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)146 void RearrangeWeightsToI4HWIOOGroupO4(
147 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
148 absl::Span<T> dst) {
149 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
150 const int src_slices = DivideRoundUp(weights.shape.i, 4);
151 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
152
153 int counter = 0;
154 for (int j = 0; j < 4; ++j) {
155 for (int y = 0; y < weights.shape.h; ++y) {
156 for (int x = 0; x < weights.shape.w; ++x) {
157 for (int s = 0; s < src_slices; ++s) {
158 for (int d = 0; d < dst_groups; ++d) {
159 for (int d_group = 0; d_group < out_group_size; ++d_group) {
160 T filter;
161 for (int i = 0; i < 4; ++i) {
162 const int s_ch = s * 4 + j;
163 const int d_ch = (d * out_group_size + d_group) * 4 + i;
164 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
165 const int f_index =
166 weights.shape.LinearIndex({d_ch, y, x, s_ch});
167 filter[i] = weights.data[f_index];
168 } else {
169 filter[i] = 0.0f;
170 }
171 }
172 dst[counter++] = filter;
173 }
174 }
175 }
176 }
177 }
178 }
179 }
180
181 template <DataType S, typename T>
RearrangeWeightsToO4HWIOOGroupI4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)182 void RearrangeWeightsToO4HWIOOGroupI4(
183 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
184 absl::Span<T> dst) {
185 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
186 const int src_slices = DivideRoundUp(weights.shape.i, 4);
187 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
188
189 int counter = 0;
190 for (int j = 0; j < 4; ++j) {
191 for (int y = 0; y < weights.shape.h; ++y) {
192 for (int x = 0; x < weights.shape.w; ++x) {
193 for (int s = 0; s < src_slices; ++s) {
194 for (int d = 0; d < dst_groups; ++d) {
195 for (int d_group = 0; d_group < out_group_size; ++d_group) {
196 T filter;
197 for (int i = 0; i < 4; ++i) {
198 const int s_ch = s * 4 + i;
199 const int d_ch = (d * out_group_size + d_group) * 4 + j;
200 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
201 const int f_index =
202 weights.shape.LinearIndex({d_ch, y, x, s_ch});
203 filter[i] = weights.data[f_index];
204 } else {
205 filter[i] = 0.0f;
206 }
207 }
208 dst[counter++] = filter;
209 }
210 }
211 }
212 }
213 }
214 }
215 }
216
217 template <DataType S, typename T>
RearrangeWeightsToI4DHWIOOGroupO4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)218 void RearrangeWeightsToI4DHWIOOGroupO4(
219 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
220 absl::Span<T> dst) {
221 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
222 const int src_slices = DivideRoundUp(weights.shape.i, 4);
223 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
224
225 int counter = 0;
226 for (int j = 0; j < 4; ++j) {
227 for (int z = 0; z < weights.shape.d; ++z) {
228 for (int y = 0; y < weights.shape.h; ++y) {
229 for (int x = 0; x < weights.shape.w; ++x) {
230 for (int s = 0; s < src_slices; ++s) {
231 for (int d = 0; d < dst_groups; ++d) {
232 for (int d_group = 0; d_group < out_group_size; ++d_group) {
233 T filter;
234 for (int i = 0; i < 4; ++i) {
235 const int s_ch = s * 4 + j;
236 const int d_ch = (d * out_group_size + d_group) * 4 + i;
237 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
238 const int f_index =
239 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
240 filter[i] = weights.data[f_index];
241 } else {
242 filter[i] = 0.0f;
243 }
244 }
245 dst[counter++] = filter;
246 }
247 }
248 }
249 }
250 }
251 }
252 }
253 }
254
255 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)256 void RearrangeWeightsToOICustomSpatialI4O4(
257 const tflite::gpu::Tensor<OHWI, S>& weights,
258 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
259 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
260 const int src_slices = DivideRoundUp(weights.shape.i, 4);
261
262 int counter = 0;
263 for (int d = 0; d < dst_slices; ++d) {
264 for (int s = 0; s < src_slices; ++s) {
265 for (int y = 0; y < weights.shape.h; ++y) {
266 for (int x = 0; x < weights.shape.w; ++x) {
267 const int kernel_index = spatial_remap[y * weights.shape.w + x];
268 const int kernel_index_x = kernel_index % weights.shape.w;
269 const int kernel_index_y = kernel_index / weights.shape.w;
270 for (int i = 0; i < 4; ++i) {
271 T filter;
272 for (int j = 0; j < 4; ++j) {
273 const int s_ch = s * 4 + i;
274 const int d_ch = d * 4 + j;
275 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
276 const int f_index = weights.shape.LinearIndex(
277 {d_ch, kernel_index_y, kernel_index_x, s_ch});
278 filter[j] = weights.data[f_index];
279 } else {
280 filter[j] = 0.0f;
281 }
282 }
283 dst[counter++] = filter;
284 }
285 }
286 }
287 }
288 }
289 }
290
291 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)292 void RearrangeWeightsToOICustomSpatialO4I4(
293 const tflite::gpu::Tensor<OHWI, S>& weights,
294 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
295 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
296 const int src_slices = DivideRoundUp(weights.shape.i, 4);
297
298 int counter = 0;
299 for (int d = 0; d < dst_slices; ++d) {
300 for (int s = 0; s < src_slices; ++s) {
301 for (int y = 0; y < weights.shape.h; ++y) {
302 for (int x = 0; x < weights.shape.w; ++x) {
303 const int kernel_index = spatial_remap[y * weights.shape.w + x];
304 const int kernel_index_x = kernel_index % weights.shape.w;
305 const int kernel_index_y = kernel_index / weights.shape.w;
306 for (int i = 0; i < 4; ++i) {
307 T filter;
308 for (int j = 0; j < 4; ++j) {
309 const int s_ch = s * 4 + j;
310 const int d_ch = d * 4 + i;
311 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
312 const int f_index = weights.shape.LinearIndex(
313 {d_ch, kernel_index_y, kernel_index_x, s_ch});
314 filter[j] = weights.data[f_index];
315 } else {
316 filter[j] = 0.0f;
317 }
318 }
319 dst[counter++] = filter;
320 }
321 }
322 }
323 }
324 }
325 }
326
327 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
328 const OHWI& shape);
329
330 void RearrangeWeights(
331 const tflite::gpu::Tensor<OHWI, DataType::FLOAT32>& weights,
332 const WeightsDescription& dst_weight_desc, DataType dst_type,
333 absl::Span<uint8_t> dst);
334
335 } // namespace gpu
336 } // namespace tflite
337
338 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
339