1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/convert.h"
17
18 #include <stdint.h>
19 #include <string.h>
20
21 #include <string>
22 #include <vector>
23
24 #include <fp16.h>
25 #include "absl/strings/str_cat.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/shape.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
31 #include "tensorflow/lite/delegates/gpu/common/types.h"
32 #include "tensorflow/lite/delegates/gpu/common/util.h"
33
34 namespace tflite {
35 namespace gpu {
36 namespace {
37
38 constexpr int kPhwc4ChannelsInPlane = 4;
39 constexpr int kPhwo4i4ChannelsInPlane = 4;
40 constexpr int kPiohw4ChannelsInPlane = 4;
41
42 // Layout is Po,H,W,OI4x4.
ConvertToPHWO4I4(absl::Span<const float> in,const OHWI & shape,absl::Span<float> out,bool reverse_space)43 absl::Status ConvertToPHWO4I4(absl::Span<const float> in, const OHWI& shape,
44 absl::Span<float> out, bool reverse_space) {
45 if (in.size() != shape.DimensionsProduct()) {
46 return absl::InvalidArgumentError(absl::StrCat(
47 "ConvertToPHWO4I4: Input data size does not match expected size: ",
48 in.size(), " != ", shape.DimensionsProduct()));
49 }
50 if (out.size() != GetElementsSizeForPHWO4I4(shape)) {
51 return absl::InvalidArgumentError(absl::StrCat(
52 "ConvertToPHWO4I4: Output data size does not match expected size: ",
53 out.size(), " != ", GetElementsSizeForPHWO4I4(shape)));
54 }
55
56 float* output = out.data();
57 for (int p = 0; p < DivideRoundUp(shape.o, kPhwo4i4ChannelsInPlane); ++p) {
58 for (int h = 0; h < shape.h; ++h) {
59 for (int w = 0; w < shape.w; ++w) {
60 for (int c = 0; c < DivideRoundUp(shape.i, kPhwo4i4ChannelsInPlane);
61 ++c) {
62 for (int co = 0; co < kPhwo4i4ChannelsInPlane; ++co) {
63 for (int ci = 0; ci < kPhwo4i4ChannelsInPlane; ++ci) {
64 float value = 0;
65 if (c * kPhwo4i4ChannelsInPlane + ci < shape.i &&
66 p * kPhwo4i4ChannelsInPlane + co < shape.o) {
67 // tensor is in OHWI
68 int tensor_o = p * kPhwo4i4ChannelsInPlane + co;
69 int tensor_i = c * kPhwo4i4ChannelsInPlane + ci;
70 const int in_h = reverse_space ? shape.h - 1 - h : h;
71 const int in_w = reverse_space ? shape.w - 1 - w : w;
72 value = in[shape.LinearIndex({tensor_o, in_h, in_w, tensor_i})];
73 }
74 (*output++) = value;
75 }
76 }
77 }
78 }
79 }
80 }
81 return absl::OkStatus();
82 }
83
84 } // namespace
85
GetElementsSizeForPHWO4I4(const OHWI & shape)86 uint32_t GetElementsSizeForPHWO4I4(const OHWI& shape) {
87 return AlignByN(shape.i, kPhwo4i4ChannelsInPlane) *
88 AlignByN(shape.o, kPhwo4i4ChannelsInPlane) * shape.h * shape.w;
89 }
90
GetElementsSizeForPHWO4I4(const IHWO & shape)91 uint32_t GetElementsSizeForPHWO4I4(const IHWO& shape) {
92 return AlignByN(shape.i, kPhwo4i4ChannelsInPlane) *
93 AlignByN(shape.o, kPhwo4i4ChannelsInPlane) * shape.h * shape.w;
94 }
95
ConvertToPHWO4I4(const Tensor<OHWI,DataType::FLOAT32> & tensor)96 std::vector<float> ConvertToPHWO4I4(
97 const Tensor<OHWI, DataType::FLOAT32>& tensor) {
98 std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
99 ConvertToPHWO4I4(tensor.data, tensor.shape,
100 absl::MakeSpan(transposed.data(), transposed.size()),
101 /*reverse_space=*/false)
102 .IgnoreError();
103 return transposed;
104 }
105
ConvertToPHWO4I4Transposed(const Tensor<OHWI,DataType::FLOAT32> & tensor)106 std::vector<float> ConvertToPHWO4I4Transposed(
107 const Tensor<OHWI, DataType::FLOAT32>& tensor) {
108 std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
109 ConvertToPHWO4I4(tensor.data, tensor.shape,
110 absl::MakeSpan(transposed.data(), transposed.size()),
111 /*reverse_space=*/true)
112 .IgnoreError();
113 return transposed;
114 }
115
Get3DSizeForPHWO4I4(const OHWI & shape)116 uint3 Get3DSizeForPHWO4I4(const OHWI& shape) {
117 return uint3(AlignByN(shape.i, 4), shape.h * shape.w,
118 DivideRoundUp(shape.o, 4));
119 }
120
121 // Layout is Po,H,W,OI4x4.
ConvertToPHWO4I4(absl::Span<const float> in,const IHWO & shape,absl::Span<float> out)122 absl::Status ConvertToPHWO4I4(absl::Span<const float> in, const IHWO& shape,
123 absl::Span<float> out) {
124 if (in.size() != shape.DimensionsProduct()) {
125 return absl::InvalidArgumentError(absl::StrCat(
126 "ConvertToPHWO4I4: Input data size does not match expected size: ",
127 in.size(), " != ", shape.DimensionsProduct()));
128 }
129 if (out.size() != GetElementsSizeForPHWO4I4(shape)) {
130 return absl::InvalidArgumentError(absl::StrCat(
131 "ConvertToPHWO4I4: Output data size does not match expected size: ",
132 out.size(), " != ", GetElementsSizeForPHWO4I4(shape)));
133 }
134
135 const int dst_depth = DivideRoundUp(shape.o, 4);
136 const int src_depth = DivideRoundUp(shape.i, 4);
137
138 float* output = out.data();
139 for (int f = 0; f < dst_depth; ++f) {
140 for (int y = 0; y < shape.h; ++y) {
141 for (int x = 0; x < shape.w; ++x) {
142 for (int ch = 0; ch < src_depth; ++ch) {
143 for (int co = 0; co < 4; ++co) {
144 for (int ci = 0; ci < 4; ++ci) {
145 const int src_channel = ch * 4 + ci;
146 const int dst_channel = f * 4 + co;
147 float value = 0;
148 if (src_channel < shape.i && dst_channel < shape.o) {
149 // tensor is in IHWO
150 value = in[shape.LinearIndex({src_channel, y, x, dst_channel})];
151 }
152 (*output++) = value;
153 }
154 }
155 }
156 }
157 }
158 }
159 return absl::OkStatus();
160 }
161
ConvertToPHWO4I4(const Tensor<IHWO,DataType::FLOAT32> & tensor)162 std::vector<float> ConvertToPHWO4I4(
163 const Tensor<IHWO, DataType::FLOAT32>& tensor) {
164 std::vector<float> transposed(GetElementsSizeForPHWO4I4(tensor.shape));
165 ConvertToPHWO4I4(tensor.data, tensor.shape,
166 absl::MakeSpan(transposed.data(), transposed.size()))
167 .IgnoreError();
168 return transposed;
169 }
170
GetElementsSizeForPIOHW4(const OHWI & shape)171 uint32_t GetElementsSizeForPIOHW4(const OHWI& shape) {
172 return AlignByN(shape.o * shape.i, kPiohw4ChannelsInPlane) * shape.h *
173 shape.w;
174 }
175
ConvertToPIOHW4(absl::Span<const float> in,const OHWI & shape,absl::Span<float> out)176 absl::Status ConvertToPIOHW4(absl::Span<const float> in, const OHWI& shape,
177 absl::Span<float> out) {
178 if (in.size() != shape.DimensionsProduct()) {
179 return absl::InvalidArgumentError(absl::StrCat(
180 "ConvertToPIOHW4: Input data size does not match expected size: ",
181 in.size(), " != ", shape.DimensionsProduct()));
182 }
183 if (out.size() != GetElementsSizeForPIOHW4(shape)) {
184 return absl::InvalidArgumentError(absl::StrCat(
185 "ConvertToPIOHW4: Output data size does not match expected size: ",
186 out.size(), " != ", GetElementsSizeForPIOHW4(shape)));
187 }
188
189 int32_t output_channels = shape.o * shape.i;
190 int32_t num_planes = DivideRoundUp(output_channels, kPiohw4ChannelsInPlane);
191 float* output = out.data();
192 for (int p = 0; p < num_planes; ++p) {
193 for (int h = 0; h < shape.h; ++h) {
194 for (int w = 0; w < shape.w; ++w) {
195 for (int c = 0; c < kPiohw4ChannelsInPlane; ++c) {
196 int output_c = p * kPiohw4ChannelsInPlane + c;
197 (*output++) = output_c >= output_channels
198 ? 0
199 : in[shape.LinearIndex({output_c % shape.o, h, w,
200 output_c / shape.o})];
201 }
202 }
203 }
204 }
205 return absl::OkStatus();
206 }
207
ConvertToPIOHW4(const Tensor<OHWI,DataType::FLOAT32> & tensor)208 std::vector<float> ConvertToPIOHW4(
209 const Tensor<OHWI, DataType::FLOAT32>& tensor) {
210 std::vector<float> transposed(GetElementsSizeForPIOHW4(tensor.shape));
211 ConvertToPIOHW4(tensor.data, tensor.shape,
212 absl::MakeSpan(transposed.data(), transposed.size()))
213 .IgnoreError();
214 return transposed;
215 }
216
217 template <typename T>
ValidateConvertToPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<T> out)218 absl::Status ValidateConvertToPHWC4(absl::Span<const float> in,
219 const BHWC& shape, absl::Span<T> out) {
220 if (in.size() != shape.DimensionsProduct()) {
221 return absl::InvalidArgumentError(absl::StrCat(
222 "ConvertToPHWC4: Input data size does not match expected size: ",
223 in.size(), " != ", shape.DimensionsProduct()));
224 }
225 if (out.size() != GetElementsSizeForPHWC4(shape)) {
226 return absl::InvalidArgumentError(absl::StrCat(
227 "ConvertToPHWC4: Output data size does not match expected size: ",
228 out.size(), " != ", GetElementsSizeForPHWC4(shape)));
229 }
230 return absl::OkStatus();
231 }
232
233 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
ConvertToPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<float> out)234 absl::Status ConvertToPHWC4(absl::Span<const float> in, const BHWC& shape,
235 absl::Span<float> out) {
236 RETURN_IF_ERROR(ValidateConvertToPHWC4(in, shape, out));
237 if (shape.c == 4) {
238 std::memcpy(out.data(), in.data(),
239 shape.DimensionsProduct() * sizeof(float));
240 return absl::OkStatus();
241 }
242 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
243 int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
244 const int num_pixels = shape.h * shape.w;
245 // A layer is a set of kPhwc4ChannelsInPlane channels images.
246 const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
247 for (int b = 0; b < shape.b; b++) {
248 float* dest =
249 out.data() + b * num_pixels * num_planes * kPhwc4ChannelsInPlane;
250 for (int p = 0; p < num_full_planes; p++) {
251 const float* src =
252 in.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
253 for (int i = 0; i < num_pixels; i++) {
254 std::memcpy(dest, src, kPhwc4ChannelsInPlane * sizeof(float));
255 src += shape.c;
256 dest += kPhwc4ChannelsInPlane;
257 }
258 }
259 }
260
261 // Padding last kPhwc4ChannelsInPlane-channel layer to multiple of
262 // kPhwc4ChannelsInPlane.
263 const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
264 const int remaining_channels =
265 shape.c - num_full_planes * kPhwc4ChannelsInPlane;
266 if (remaining_channels == 0) {
267 return absl::OkStatus();
268 }
269 for (int b = 0; b < shape.b; b++) {
270 const float* src =
271 in.data() +
272 shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
273 float* dest = out.data() + b * padded_size +
274 num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
275 for (int p = 0; p < num_pixels; p++) {
276 std::memcpy(dest, src, remaining_channels * sizeof(float));
277 std::memset(dest + remaining_channels, 0,
278 (4 - remaining_channels) * sizeof(float));
279 src += shape.c;
280 dest += kPhwc4ChannelsInPlane;
281 }
282 }
283 return absl::OkStatus();
284 }
285
286 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
ConvertToPHWC4Half(absl::Span<const float> in,const BHWC & shape,absl::Span<HalfBits> out)287 absl::Status ConvertToPHWC4Half(absl::Span<const float> in, const BHWC& shape,
288 absl::Span<HalfBits> out) {
289 RETURN_IF_ERROR(ValidateConvertToPHWC4(in, shape, out));
290
291 // Layout is Pc,H,W,C4 where P - is a plane based on channels.
292 int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
293 const int num_pixels = shape.h * shape.w;
294 // A layer is a set of kPhwc4ChannelsInPlane channels images.
295 const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
296 for (int b = 0; b < shape.b; b++) {
297 HalfBits* dest =
298 out.data() + b * num_pixels * num_planes * kPhwc4ChannelsInPlane;
299 for (int p = 0; p < num_full_planes; p++) {
300 const float* src =
301 in.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
302 for (int i = 0; i < num_pixels; i++) {
303 dest[0] = fp16_ieee_from_fp32_value(src[0]);
304 dest[1] = fp16_ieee_from_fp32_value(src[1]);
305 dest[2] = fp16_ieee_from_fp32_value(src[2]);
306 dest[3] = fp16_ieee_from_fp32_value(src[3]);
307 src += shape.c;
308 dest += kPhwc4ChannelsInPlane;
309 }
310 }
311 }
312
313 // Padding last kPhwc4ChannelsInPlane-channel layer to multiple of
314 // kPhwc4ChannelsInPlane.
315 const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
316 const int remaining_channels =
317 shape.c - num_full_planes * kPhwc4ChannelsInPlane;
318 if (remaining_channels == 0) {
319 return absl::OkStatus();
320 }
321
322 for (int b = 0; b < shape.b; b++) {
323 const float* src =
324 in.data() +
325 shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
326 HalfBits* dest = out.data() + b * padded_size +
327 num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
328 switch (remaining_channels) {
329 case 1:
330 for (int p = 0; p < num_pixels; p++) {
331 dest[0] = fp16_ieee_from_fp32_value(src[0]);
332 dest[1] = 0;
333 dest[2] = 0;
334 dest[3] = 0;
335 src += shape.c;
336 dest += kPhwc4ChannelsInPlane;
337 }
338 break;
339 case 2:
340 for (int p = 0; p < num_pixels; p++) {
341 dest[0] = fp16_ieee_from_fp32_value(src[0]);
342 dest[1] = fp16_ieee_from_fp32_value(src[1]);
343 dest[2] = 0;
344 dest[3] = 0;
345 src += shape.c;
346 dest += kPhwc4ChannelsInPlane;
347 }
348 break;
349 case 3:
350 for (int p = 0; p < num_pixels; p++) {
351 dest[0] = fp16_ieee_from_fp32_value(src[0]);
352 dest[1] = fp16_ieee_from_fp32_value(src[1]);
353 dest[2] = fp16_ieee_from_fp32_value(src[2]);
354 dest[3] = 0;
355 src += shape.c;
356 dest += kPhwc4ChannelsInPlane;
357 }
358 break;
359 default:
360 return absl::UnimplementedError(
361 "ConvertToPHWC4Half: Unsupported channels per planes count.");
362 }
363 }
364 return absl::OkStatus();
365 }
366
ConvertToPHWC4(const Tensor<BHWC,DataType::FLOAT32> & tensor)367 std::vector<float> ConvertToPHWC4(
368 const Tensor<BHWC, DataType::FLOAT32>& tensor) {
369 std::vector<float> transposed(GetElementsSizeForPHWC4(tensor.shape));
370 ConvertToPHWC4(tensor.data, tensor.shape,
371 absl::MakeSpan(transposed.data(), transposed.size()))
372 .IgnoreError();
373 // TODO(akulik): Maybe safer to return Status.
374 return transposed;
375 }
376
ConvertToPHWC4(const Tensor<HWC,DataType::FLOAT32> & tensor)377 std::vector<float> ConvertToPHWC4(
378 const Tensor<HWC, DataType::FLOAT32>& tensor) {
379 const BHWC batched_shape =
380 BHWC(1, tensor.shape.h, tensor.shape.w, tensor.shape.c);
381 std::vector<float> transposed(GetElementsSizeForPHWC4(batched_shape));
382 ConvertToPHWC4(tensor.data, batched_shape,
383 absl::MakeSpan(transposed.data(), transposed.size()))
384 .IgnoreError();
385 // TODO(akulik): Maybe safer to return Status.
386 return transposed;
387 }
388
GetElementsSizeForPHWC4(const BHWC & shape)389 uint32_t GetElementsSizeForPHWC4(const BHWC& shape) {
390 return shape.b * shape.h * shape.w * AlignByN(shape.c, kPhwc4ChannelsInPlane);
391 }
392
393 template <typename T>
ValidateConvertFromPHWC4(absl::Span<const T> in,const BHWC & shape,absl::Span<float> out)394 absl::Status ValidateConvertFromPHWC4(absl::Span<const T> in, const BHWC& shape,
395 absl::Span<float> out) {
396 if (in.size() != GetElementsSizeForPHWC4(shape)) {
397 return absl::InvalidArgumentError(absl::StrCat(
398 "ConvertFromPHWC4: Input data size does not match expected size: ",
399 in.size(), " != ", GetElementsSizeForPHWC4(shape)));
400 }
401 if (out.size() != shape.DimensionsProduct()) {
402 return absl::InvalidArgumentError(absl::StrCat(
403 "ConvertFromPHWC4: Output data size does not match expected size: ",
404 out.size(), " != ", shape.DimensionsProduct()));
405 }
406 return absl::OkStatus();
407 }
408
ConvertFromPHWC4(absl::Span<const float> in,const BHWC & shape,absl::Span<float> out)409 absl::Status ConvertFromPHWC4(absl::Span<const float> in, const BHWC& shape,
410 absl::Span<float> out) {
411 RETURN_IF_ERROR(ValidateConvertFromPHWC4(in, shape, out));
412 if (shape.c == 4) {
413 std::memcpy(out.data(), in.data(),
414 shape.DimensionsProduct() * sizeof(float));
415 return absl::OkStatus();
416 }
417
418 int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
419 const int num_pixels = shape.h * shape.w;
420 const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
421 // A layer is a set of kPhwc4ChannelsInPlane channels images.
422 const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
423 for (int b = 0; b < shape.b; b++) {
424 const float* src = in.data() + b * padded_size;
425 for (int p = 0; p < num_full_planes; p++) {
426 float* dest =
427 out.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
428 for (int i = 0; i < num_pixels; i++) {
429 std::memcpy(dest, src, kPhwc4ChannelsInPlane * sizeof(float));
430 src += kPhwc4ChannelsInPlane;
431 dest += shape.c;
432 }
433 }
434 }
435
436 // Unpadding last kPhwc4ChannelsInPlane-channel plane
437 const int remaining_channels =
438 shape.c - num_full_planes * kPhwc4ChannelsInPlane;
439 if (remaining_channels == 0) {
440 return absl::OkStatus();
441 }
442 for (int b = 0; b < shape.b; b++) {
443 const float* src = in.data() + b * padded_size +
444 num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
445 float* dest =
446 out.data() +
447 shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
448 for (int p = 0; p < num_pixels; p++) {
449 std::memcpy(dest, src, remaining_channels * sizeof(float));
450 src += kPhwc4ChannelsInPlane;
451 dest += shape.c;
452 }
453 }
454 return absl::OkStatus();
455 }
456
ConvertFromPHWC4Half(absl::Span<const HalfBits> in,const BHWC & shape,absl::Span<float> out)457 absl::Status ConvertFromPHWC4Half(absl::Span<const HalfBits> in,
458 const BHWC& shape, absl::Span<float> out) {
459 RETURN_IF_ERROR(ValidateConvertFromPHWC4(in, shape, out));
460 int num_planes = DivideRoundUp(shape.c, kPhwc4ChannelsInPlane);
461 const int num_pixels = shape.h * shape.w;
462 const int padded_size = num_pixels * num_planes * kPhwc4ChannelsInPlane;
463 // A layer is a set of kPhwc4ChannelsInPlane channels images.
464 const int num_full_planes = shape.c / kPhwc4ChannelsInPlane;
465 for (int b = 0; b < shape.b; b++) {
466 const HalfBits* src = in.data() + b * padded_size;
467 for (int p = 0; p < num_full_planes; p++) {
468 float* dest =
469 out.data() + shape.LinearIndex({b, 0, 0, p * kPhwc4ChannelsInPlane});
470 for (int i = 0; i < num_pixels; i++) {
471 dest[0] = fp16_ieee_to_fp32_value(src[0]);
472 dest[1] = fp16_ieee_to_fp32_value(src[1]);
473 dest[2] = fp16_ieee_to_fp32_value(src[2]);
474 dest[3] = fp16_ieee_to_fp32_value(src[3]);
475 src += kPhwc4ChannelsInPlane;
476 dest += shape.c;
477 }
478 }
479 }
480
481 // Unpadding last kPhwc4ChannelsInPlane-channel plane
482 const int remaining_channels =
483 shape.c - num_full_planes * kPhwc4ChannelsInPlane;
484 if (remaining_channels == 0) {
485 return absl::OkStatus();
486 }
487 for (int b = 0; b < shape.b; b++) {
488 const HalfBits* src = in.data() + b * padded_size +
489 num_pixels * num_full_planes * kPhwc4ChannelsInPlane;
490 float* dest =
491 out.data() +
492 shape.LinearIndex({b, 0, 0, num_full_planes * kPhwc4ChannelsInPlane});
493 switch (remaining_channels) {
494 case 1:
495 for (int p = 0; p < num_pixels; p++) {
496 dest[0] = fp16_ieee_to_fp32_value(src[0]);
497 src += kPhwc4ChannelsInPlane;
498 dest += shape.c;
499 }
500 break;
501 case 2:
502 for (int p = 0; p < num_pixels; p++) {
503 dest[0] = fp16_ieee_to_fp32_value(src[0]);
504 dest[1] = fp16_ieee_to_fp32_value(src[1]);
505 src += kPhwc4ChannelsInPlane;
506 dest += shape.c;
507 }
508 break;
509 case 3:
510 for (int p = 0; p < num_pixels; p++) {
511 dest[0] = fp16_ieee_to_fp32_value(src[0]);
512 dest[1] = fp16_ieee_to_fp32_value(src[1]);
513 dest[2] = fp16_ieee_to_fp32_value(src[2]);
514 src += kPhwc4ChannelsInPlane;
515 dest += shape.c;
516 }
517 break;
518 default:
519 return absl::UnimplementedError(
520 "ConvertToPHWC4Half: Unsupported channels per planes count.");
521 }
522 }
523 return absl::OkStatus();
524 }
525
526 } // namespace gpu
527 } // namespace tflite
528