1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17
18 #include <cstring>
19 #include <memory>
20
21 #include "absl/strings/str_cat.h"
22 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
23 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
28
29 namespace tflite {
30 namespace gpu {
31 namespace cl {
32 namespace {
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,const void * data_ptr,CLMemory * result)33 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
34 const TensorDescriptor& descriptor,
35 const void* data_ptr, CLMemory* result) {
36 const int slices = DivideRoundUp(shape.c, 4);
37 cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
38 if (data_ptr) {
39 mem_flags |= CL_MEM_COPY_HOST_PTR;
40 }
41 switch (descriptor.storage_type) {
42 case TensorStorageType::BUFFER:
43 case TensorStorageType::IMAGE_BUFFER: {
44 const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
45 4 * SizeOf(descriptor.data_type);
46 cl_int error_code;
47 cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
48 const_cast<void*>(data_ptr), &error_code);
49 if (!memory) {
50 return absl::UnknownError(
51 absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
52 CLErrorCodeToString(error_code)));
53 }
54 *result = CLMemory(memory, true);
55 return absl::OkStatus();
56 }
57 case TensorStorageType::TEXTURE_2D: {
58 cl_image_desc desc;
59 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
60 desc.image_width = shape.w * shape.b * shape.d;
61 desc.image_height = shape.h * slices;
62 desc.image_depth = 0;
63 desc.image_row_pitch = 0;
64 desc.image_slice_pitch = 0;
65 desc.num_mip_levels = 0;
66 desc.num_samples = 0;
67 desc.buffer = nullptr;
68
69 cl_image_format format;
70 format.image_channel_order = CL_RGBA;
71 format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
72
73 cl_int error_code;
74 cl_mem memory =
75 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
76 const_cast<void*>(data_ptr), &error_code);
77 if (error_code != CL_SUCCESS) {
78 return absl::UnknownError(
79 absl::StrCat("Failed to create 2D texture (clCreateImage): ",
80 CLErrorCodeToString(error_code)));
81 }
82
83 *result = CLMemory(memory, true);
84 return absl::OkStatus();
85 }
86 case TensorStorageType::TEXTURE_3D: {
87 cl_image_desc desc;
88 desc.image_type = CL_MEM_OBJECT_IMAGE3D;
89 desc.image_width = shape.w * shape.b;
90 desc.image_height = shape.h;
91 desc.image_depth = slices * shape.d;
92 desc.image_row_pitch = 0;
93 desc.image_slice_pitch = 0;
94 desc.num_mip_levels = 0;
95 desc.num_samples = 0;
96 desc.buffer = nullptr;
97
98 cl_image_format format;
99 format.image_channel_order = CL_RGBA;
100 format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
101
102 cl_int error_code;
103 cl_mem memory =
104 CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
105 const_cast<void*>(data_ptr), &error_code);
106 if (error_code != CL_SUCCESS) {
107 return absl::UnknownError(
108 absl::StrCat("Failed to create 3D texture (clCreateImage): ",
109 CLErrorCodeToString(error_code)));
110 }
111
112 *result = CLMemory(memory, true);
113 return absl::OkStatus();
114 }
115 case TensorStorageType::TEXTURE_ARRAY: {
116 cl_image_desc desc;
117 desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
118 desc.image_width = shape.w * shape.b;
119 desc.image_height = shape.h;
120 desc.image_depth = 0;
121 desc.image_array_size = slices * shape.d;
122 desc.image_row_pitch = 0;
123 desc.image_slice_pitch = 0;
124 desc.num_mip_levels = 0;
125 desc.num_samples = 0;
126 desc.buffer = nullptr;
127
128 cl_image_format format;
129 format.image_channel_order = CL_RGBA;
130 format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
131
132 cl_int error_code;
133 cl_mem memory =
134 clCreateImage(context.context(), mem_flags, &format, &desc,
135 const_cast<void*>(data_ptr), &error_code);
136 if (error_code != CL_SUCCESS) {
137 return absl::UnknownError(
138 absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
139 CLErrorCodeToString(error_code)));
140 }
141
142 *result = CLMemory(memory, true);
143 return absl::OkStatus();
144 }
145
146 case TensorStorageType::SINGLE_TEXTURE_2D: {
147 if (slices != 1) {
148 return absl::InvalidArgumentError(absl::StrCat(
149 "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
150 shape.c, "was provided"));
151 }
152 cl_image_desc desc;
153 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
154 desc.image_width = shape.w * shape.b * shape.d;
155 desc.image_height = shape.h;
156 desc.image_depth = 0;
157 desc.image_row_pitch = 0;
158 desc.image_slice_pitch = 0;
159 desc.num_mip_levels = 0;
160 desc.num_samples = 0;
161 desc.buffer = nullptr;
162
163 cl_image_format format;
164 if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
165 format.image_channel_order = ToChannelOrder(shape.c);
166 format.image_channel_data_type =
167 ToImageChannelType(descriptor.data_type);
168 } else {
169 return absl::InvalidArgumentError(absl::StrCat(
170 "This device doesn't support ", shape.c, "-channel textures."));
171 }
172
173 cl_int error_code;
174 cl_mem memory =
175 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
176 const_cast<void*>(data_ptr), &error_code);
177 if (error_code != CL_SUCCESS) {
178 return absl::UnknownError(
179 absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
180 CLErrorCodeToString(error_code)));
181 }
182
183 *result = CLMemory(memory, true);
184 return absl::OkStatus();
185 }
186
187 default:
188 return absl::InternalError("Unsupported tensor storage type");
189 }
190 }
191
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)192 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
193 cl_mem memory, DataType data_type,
194 int width, cl_mem* result) {
195 cl_image_format format;
196 cl_image_desc desc;
197 std::memset(&desc, 0, sizeof(desc));
198 desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
199 desc.image_width = width;
200 desc.mem_object = memory;
201
202 format.image_channel_data_type = ToImageChannelType(data_type);
203 format.image_channel_order = CL_RGBA;
204
205 cl_int error_code;
206 *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
207 nullptr, &error_code);
208 if (error_code != CL_SUCCESS) {
209 return absl::UnknownError(
210 absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
211 CLErrorCodeToString(error_code)));
212 }
213 return absl::OkStatus();
214 }
215
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)216 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
217 const TensorDescriptor& descriptor, cl_mem memory,
218 Tensor* result) {
219 const bool memory_owner = memory == nullptr;
220 if (memory_owner) {
221 CLMemory mem;
222 RETURN_IF_ERROR(
223 AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
224 memory = mem.Release();
225 }
226 if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
227 cl_mem image_memory;
228 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
229 context, memory, descriptor.data_type,
230 shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
231 &image_memory));
232 *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
233 } else {
234 *result = Tensor(memory, memory_owner, shape, descriptor);
235 }
236 return absl::OkStatus();
237 }
238
CreateTensorShared(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)239 absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape,
240 const TensorDescriptor& descriptor,
241 cl_mem memory, Tensor* result) {
242 const bool memory_owner = false;
243 if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
244 cl_mem image_memory;
245 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
246 context, memory, descriptor.data_type,
247 shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
248 &image_memory));
249 *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
250 } else {
251 *result = Tensor(memory, memory_owner, shape, descriptor);
252 }
253 return absl::OkStatus();
254 }
255
256 } // namespace
257
Tensor(cl_mem memory,bool memory_owner,const BHWC & shape,const TensorDescriptor & descriptor)258 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
259 const TensorDescriptor& descriptor)
260 : memory_(memory),
261 image_buffer_memory_(nullptr),
262 memory_owner_(memory_owner),
263 shape_(shape.b, shape.h, shape.w, 1, shape.c),
264 descriptor_(descriptor) {}
265
Tensor(cl_mem memory,bool memory_owner,const BHWDC & shape,const TensorDescriptor & descriptor)266 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
267 const TensorDescriptor& descriptor)
268 : memory_(memory),
269 image_buffer_memory_(nullptr),
270 memory_owner_(memory_owner),
271 shape_(shape),
272 descriptor_(descriptor) {}
273
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWC & shape,const TensorDescriptor & descriptor)274 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
275 const BHWC& shape, const TensorDescriptor& descriptor)
276 : memory_(memory),
277 image_buffer_memory_(image_buffer_memory),
278 memory_owner_(memory_owner),
279 shape_(shape.b, shape.h, shape.w, 1, shape.c),
280 descriptor_(descriptor) {}
281
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWDC & shape,const TensorDescriptor & descriptor)282 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
283 const BHWDC& shape, const TensorDescriptor& descriptor)
284 : memory_(memory),
285 image_buffer_memory_(image_buffer_memory),
286 memory_owner_(memory_owner),
287 shape_(shape),
288 descriptor_(descriptor) {}
289
Tensor(Tensor && tensor)290 Tensor::Tensor(Tensor&& tensor)
291 : memory_(tensor.memory_),
292 image_buffer_memory_(tensor.image_buffer_memory_),
293 memory_owner_(tensor.memory_owner_),
294 shape_(tensor.shape_),
295 descriptor_(tensor.descriptor_) {
296 tensor.memory_ = nullptr;
297 tensor.image_buffer_memory_ = nullptr;
298 }
299
operator =(Tensor && tensor)300 Tensor& Tensor::operator=(Tensor&& tensor) {
301 if (this != &tensor) {
302 Release();
303 std::swap(memory_, tensor.memory_);
304 std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
305 std::swap(memory_owner_, tensor.memory_owner_);
306 std::swap(shape_, tensor.shape_);
307 std::swap(descriptor_, tensor.descriptor_);
308 }
309 return *this;
310 }
311
Release()312 void Tensor::Release() {
313 // image_buffer_memory_ always owned by object
314 if (image_buffer_memory_) {
315 clReleaseMemObject(image_buffer_memory_);
316 image_buffer_memory_ = nullptr;
317 }
318 if (memory_owner_ && memory_) {
319 clReleaseMemObject(memory_);
320 memory_ = nullptr;
321 }
322 }
323
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const324 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
325 GPUResourcesWithValue* resources) const {
326 const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
327 if (buffer_desc) {
328 if (descriptor_.storage_type != TensorStorageType::BUFFER) {
329 return absl::InvalidArgumentError(
330 "Tensor can be used with BufferDescriptor only with "
331 "TensorStorageType::BUFFER.");
332 }
333 resources->buffers.push_back({"buffer", memory_});
334 return absl::OkStatus();
335 }
336 const auto* texture2d_desc =
337 dynamic_cast<const Texture2DDescriptor*>(obj_ptr);
338 if (texture2d_desc) {
339 if (descriptor_.storage_type != TensorStorageType::TEXTURE_2D) {
340 return absl::InvalidArgumentError(
341 "Tensor can be used with Texture2DDescriptor only with "
342 "TensorStorageType::TEXTURE_2D.");
343 }
344 resources->images2d.push_back({"tex2d", memory_});
345 return absl::OkStatus();
346 }
347 const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
348 if (!tensor_desc) {
349 return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
350 }
351 resources->ints.push_back(
352 {"slice_stride", tensor_desc->GetSliceStrideSize(shape_)});
353 if (descriptor_.HasAxis(Axis::WIDTH)) {
354 resources->ints.push_back({"width", Width()});
355 resources->ints.push_back({"width_div2", Width() / 2});
356 resources->ints.push_back({"width_div4", Width() / 4});
357 resources->ints.push_back({"width_batched", Width() * Batch()});
358 resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2});
359 resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4});
360 }
361 if (descriptor_.HasAxis(Axis::HEIGHT)) {
362 resources->ints.push_back({"height", Height()});
363 }
364 if (descriptor_.HasAxis(Axis::CHANNELS)) {
365 resources->ints.push_back({"slices", Slices()});
366 resources->ints.push_back({"channels", Channels()});
367 }
368 if (descriptor_.HasAxis(Axis::BATCH)) {
369 resources->ints.push_back({"batch", Batch()});
370 }
371 if (descriptor_.HasAxis(Axis::DEPTH)) {
372 resources->ints.push_back({"depth", Depth()});
373 }
374
375 if (descriptor_.storage_type == TensorStorageType::BUFFER) {
376 resources->buffers.push_back({"buffer", memory_});
377 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
378 descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
379 resources->images2d.push_back({"image2d", memory_});
380 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) {
381 resources->image2d_arrays.push_back({"image2d_array", memory_});
382 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) {
383 resources->images3d.push_back({"image3d", memory_});
384 } else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) {
385 if (obj_ptr->GetAccess() == AccessType::READ) {
386 resources->image_buffers.push_back(
387 {"image_buffer", image_buffer_memory_});
388 } else {
389 resources->buffers.push_back({"buffer", memory_});
390 }
391 }
392
393 return absl::OkStatus();
394 }
395
GetFullTensorRegion() const396 int3 Tensor::GetFullTensorRegion() const {
397 switch (descriptor_.storage_type) {
398 case TensorStorageType::BUFFER:
399 case TensorStorageType::TEXTURE_ARRAY:
400 case TensorStorageType::TEXTURE_3D:
401 case TensorStorageType::IMAGE_BUFFER:
402 return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
403 case TensorStorageType::TEXTURE_2D:
404 return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
405 case TensorStorageType::SINGLE_TEXTURE_2D:
406 return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
407 case TensorStorageType::UNKNOWN:
408 return {-1, -1, -1};
409 }
410 }
411
IsValid(const BHWC & shape) const412 absl::Status Tensor::IsValid(const BHWC& shape) const {
413 if (shape.b != shape_.b) {
414 return absl::InvalidArgumentError(
415 "Shape batch does not match tensor batch");
416 }
417 if (shape.w != shape_.w) {
418 return absl::InvalidArgumentError(
419 "Shape width does not match tensor width");
420 }
421 if (shape.h != shape_.h) {
422 return absl::InvalidArgumentError(
423 "Shape height does not match tensor height");
424 }
425 if (shape.c != shape_.c) {
426 return absl::InvalidArgumentError(
427 "Shape channels does not match tensor channels");
428 }
429 return absl::OkStatus();
430 }
431
IsValid(const BHWDC & shape) const432 absl::Status Tensor::IsValid(const BHWDC& shape) const {
433 if (shape.b != shape_.b) {
434 return absl::InvalidArgumentError(
435 "Shape batch does not match tensor batch");
436 }
437 if (shape.w != shape_.w) {
438 return absl::InvalidArgumentError(
439 "Shape width does not match tensor width");
440 }
441 if (shape.h != shape_.h) {
442 return absl::InvalidArgumentError(
443 "Shape height does not match tensor height");
444 }
445 if (shape.d != shape_.d) {
446 return absl::InvalidArgumentError(
447 "Shape depth does not match tensor depth");
448 }
449 if (shape.c != shape_.c) {
450 return absl::InvalidArgumentError(
451 "Shape channels does not match tensor channels");
452 }
453 return absl::OkStatus();
454 }
455
GetAlignedChannels() const456 int Tensor::GetAlignedChannels() const {
457 return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
458 ? shape_.c
459 : AlignByN(shape_.c, 4);
460 }
461
GetMemorySizeInBytes() const462 uint64_t Tensor::GetMemorySizeInBytes() const {
463 const int flt_size = SizeOf(descriptor_.data_type);
464 const int flt4_size = 4 * flt_size;
465 switch (descriptor_.storage_type) {
466 case TensorStorageType::BUFFER:
467 case TensorStorageType::IMAGE_BUFFER:
468 case TensorStorageType::TEXTURE_ARRAY:
469 case TensorStorageType::TEXTURE_2D:
470 case TensorStorageType::TEXTURE_3D:
471 return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
472 case TensorStorageType::SINGLE_TEXTURE_2D:
473 return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
474 default:
475 return 0;
476 }
477 }
478
GetMemoryPtr() const479 cl_mem Tensor::GetMemoryPtr() const {
480 return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER
481 ? image_buffer_memory_
482 : memory_;
483 }
484
GetMemoryPtrForWriting() const485 cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; }
486
WriteDataBHWDC(const float * in,CLCommandQueue * queue)487 absl::Status Tensor::WriteDataBHWDC(const float* in, CLCommandQueue* queue) {
488 void* data_ptr = nullptr;
489 const int aligned_channels = GetAlignedChannels();
490 const int elements_count =
491 shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
492
493 const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
494 std::unique_ptr<float[]> data_f;
495 std::unique_ptr<half[]> data_h;
496 if (descriptor_.data_type == DataType::FLOAT32) {
497 data_f.reset(new float[elements_count]);
498 data_ptr = data_f.get();
499 DataFromBHWDC(in, shape_, descriptor_, data_f.get());
500 } else {
501 data_h.reset(new half[elements_count]);
502 data_ptr = data_h.get();
503 DataFromBHWDC(in, shape_, descriptor_, data_h.get());
504 }
505
506 switch (descriptor_.storage_type) {
507 case TensorStorageType::BUFFER:
508 case TensorStorageType::IMAGE_BUFFER:
509 RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr));
510 break;
511 case TensorStorageType::TEXTURE_ARRAY:
512 case TensorStorageType::TEXTURE_2D:
513 case TensorStorageType::TEXTURE_3D:
514 case TensorStorageType::SINGLE_TEXTURE_2D:
515 RETURN_IF_ERROR(
516 queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
517 break;
518 default:
519 return absl::InternalError("Unsupported tensor storage type");
520 }
521
522 return absl::OkStatus();
523 }
524
WriteData(CLCommandQueue * queue,const TensorFloat32 & src)525 absl::Status Tensor::WriteData(CLCommandQueue* queue,
526 const TensorFloat32& src) {
527 RETURN_IF_ERROR(IsValid(src.shape));
528 return WriteDataBHWDC(src.data.data(), queue);
529 }
530
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)531 absl::Status Tensor::WriteData(
532 CLCommandQueue* queue,
533 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
534 return WriteDataBHWDC(src.data.data(), queue);
535 }
536
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)537 absl::Status Tensor::WriteData(
538 CLCommandQueue* queue,
539 const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
540 return WriteDataBHWDC(src.data.data(), queue);
541 }
542
WriteData(CLCommandQueue * queue,const Tensor5DFloat32 & src)543 absl::Status Tensor::WriteData(CLCommandQueue* queue,
544 const Tensor5DFloat32& src) {
545 RETURN_IF_ERROR(IsValid(src.shape));
546 return WriteDataBHWDC(src.data.data(), queue);
547 }
548
ReadDataBHWDC(float * out,CLCommandQueue * queue) const549 absl::Status Tensor::ReadDataBHWDC(float* out, CLCommandQueue* queue) const {
550 void* data_ptr = nullptr;
551 const int aligned_channels = GetAlignedChannels();
552 const int elements_count =
553 shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
554 const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
555 std::unique_ptr<float[]> data_f;
556 std::unique_ptr<half[]> data_h;
557 if (descriptor_.data_type == DataType::FLOAT32) {
558 data_f.reset(new float[elements_count]);
559 data_ptr = data_f.get();
560 } else {
561 data_h.reset(new half[elements_count]);
562 data_ptr = data_h.get();
563 }
564
565 switch (descriptor_.storage_type) {
566 case TensorStorageType::BUFFER:
567 case TensorStorageType::IMAGE_BUFFER:
568 RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr));
569 break;
570 case TensorStorageType::TEXTURE_ARRAY:
571 case TensorStorageType::TEXTURE_2D:
572 case TensorStorageType::TEXTURE_3D:
573 case TensorStorageType::SINGLE_TEXTURE_2D:
574 RETURN_IF_ERROR(
575 queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
576 break;
577 default:
578 return absl::InternalError("Unsupported tensor storage type");
579 }
580
581 if (descriptor_.data_type == DataType::FLOAT32) {
582 DataToBHWDC(data_f.get(), shape_, descriptor_, out);
583 } else {
584 DataToBHWDC(data_h.get(), shape_, descriptor_, out);
585 }
586
587 return absl::OkStatus();
588 }
589
ReadData(CLCommandQueue * queue,TensorFloat32 * dst) const590 absl::Status Tensor::ReadData(CLCommandQueue* queue, TensorFloat32* dst) const {
591 RETURN_IF_ERROR(IsValid(dst->shape));
592 return ReadDataBHWDC(dst->data.data(), queue);
593 }
594
ReadData(CLCommandQueue * queue,Tensor5DFloat32 * dst) const595 absl::Status Tensor::ReadData(CLCommandQueue* queue,
596 Tensor5DFloat32* dst) const {
597 RETURN_IF_ERROR(IsValid(dst->shape));
598 return ReadDataBHWDC(dst->data.data(), queue);
599 }
600
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)601 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
602 CLContext* context) {
603 shape_ = desc.shape;
604 descriptor_.data_type = desc.data_type;
605 descriptor_.storage_type = desc.storage_type;
606 descriptor_.layout = desc.layout;
607 memory_owner_ = true;
608 CLMemory memory;
609 uint8_t* data_ptr = desc.data.empty()
610 ? nullptr
611 : const_cast<unsigned char*>(desc.data.data());
612 RETURN_IF_ERROR(
613 AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
614 memory_ = memory.Release();
615 if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
616 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
617 *context, memory_, desc.data_type,
618 shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4),
619 &image_buffer_memory_));
620 }
621 return absl::OkStatus();
622 }
623
CreateTensor(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)624 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
625 const TensorDescriptor& descriptor, Tensor* result) {
626 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
627 return CreateTensor(context, shape5D, descriptor, nullptr, result);
628 }
629
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)630 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
631 const TensorDescriptor& descriptor, Tensor* result) {
632 return CreateTensor(context, shape, descriptor, nullptr, result);
633 }
634
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)635 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
636 const BHWC& shape,
637 const TensorDescriptor& descriptor,
638 Tensor* result) {
639 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
640 return CreateTensorShared(context, shape5D, descriptor, memory, result);
641 }
642
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)643 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
644 const BHWDC& shape,
645 const TensorDescriptor& descriptor,
646 Tensor* result) {
647 return CreateTensorShared(context, shape, descriptor, memory, result);
648 }
649
AllocateTensorMemory(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,CLMemory * result)650 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
651 const TensorDescriptor& descriptor,
652 CLMemory* result) {
653 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
654 return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
655 }
656
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,CLMemory * result)657 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
658 const TensorDescriptor& descriptor,
659 CLMemory* result) {
660 return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
661 }
662
663 } // namespace cl
664 } // namespace gpu
665 } // namespace tflite
666