1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17 
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 
22 #include "absl/strings/str_cat.h"
23 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
24 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
25 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
29 
30 namespace tflite {
31 namespace gpu {
32 namespace cl {
33 namespace {
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,const void * data_ptr,CLMemory * result)34 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
35                                   const TensorDescriptor& descriptor,
36                                   const void* data_ptr, CLMemory* result) {
37   const int slices = DivideRoundUp(shape.c, 4);
38   cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
39   if (data_ptr) {
40     mem_flags |= CL_MEM_COPY_HOST_PTR;
41   }
42   switch (descriptor.storage_type) {
43     case TensorStorageType::BUFFER:
44     case TensorStorageType::IMAGE_BUFFER: {
45       const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
46                                4 * SizeOf(descriptor.data_type);
47       cl_int error_code;
48       cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
49                                      const_cast<void*>(data_ptr), &error_code);
50       if (!memory) {
51         return absl::UnknownError(
52             absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
53                          CLErrorCodeToString(error_code)));
54       }
55       *result = CLMemory(memory, true);
56       return absl::OkStatus();
57     }
58     case TensorStorageType::TEXTURE_2D: {
59       cl_image_desc desc;
60       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
61       desc.image_width = shape.w * shape.b * shape.d;
62       desc.image_height = shape.h * slices;
63       desc.image_depth = 0;
64       desc.image_row_pitch = 0;
65       desc.image_slice_pitch = 0;
66       desc.num_mip_levels = 0;
67       desc.num_samples = 0;
68       desc.buffer = nullptr;
69 
70       cl_image_format format;
71       format.image_channel_order = CL_RGBA;
72       format.image_channel_data_type =
73           DataTypeToChannelType(descriptor.data_type);
74 
75       cl_int error_code;
76       cl_mem memory =
77           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
78                               const_cast<void*>(data_ptr), &error_code);
79       if (error_code != CL_SUCCESS) {
80         return absl::UnknownError(
81             absl::StrCat("Failed to create 2D texture (clCreateImage): ",
82                          CLErrorCodeToString(error_code)));
83       }
84 
85       *result = CLMemory(memory, true);
86       return absl::OkStatus();
87     }
88     case TensorStorageType::TEXTURE_3D: {
89       cl_image_desc desc;
90       desc.image_type = CL_MEM_OBJECT_IMAGE3D;
91       desc.image_width = shape.w * shape.b;
92       desc.image_height = shape.h;
93       desc.image_depth = slices * shape.d;
94       desc.image_row_pitch = 0;
95       desc.image_slice_pitch = 0;
96       desc.num_mip_levels = 0;
97       desc.num_samples = 0;
98       desc.buffer = nullptr;
99 
100       cl_image_format format;
101       format.image_channel_order = CL_RGBA;
102       format.image_channel_data_type =
103           DataTypeToChannelType(descriptor.data_type);
104 
105       cl_int error_code;
106       cl_mem memory =
107           CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
108                               const_cast<void*>(data_ptr), &error_code);
109       if (error_code != CL_SUCCESS) {
110         return absl::UnknownError(
111             absl::StrCat("Failed to create 3D texture (clCreateImage): ",
112                          CLErrorCodeToString(error_code)));
113       }
114 
115       *result = CLMemory(memory, true);
116       return absl::OkStatus();
117     }
118     case TensorStorageType::TEXTURE_ARRAY: {
119       cl_image_desc desc;
120       desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
121       desc.image_width = shape.w * shape.b;
122       desc.image_height = shape.h;
123       desc.image_depth = 0;
124       desc.image_array_size = slices * shape.d;
125       desc.image_row_pitch = 0;
126       desc.image_slice_pitch = 0;
127       desc.num_mip_levels = 0;
128       desc.num_samples = 0;
129       desc.buffer = nullptr;
130 
131       cl_image_format format;
132       format.image_channel_order = CL_RGBA;
133       format.image_channel_data_type =
134           DataTypeToChannelType(descriptor.data_type);
135 
136       cl_int error_code;
137       cl_mem memory =
138           clCreateImage(context.context(), mem_flags, &format, &desc,
139                         const_cast<void*>(data_ptr), &error_code);
140       if (error_code != CL_SUCCESS) {
141         return absl::UnknownError(
142             absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
143                          CLErrorCodeToString(error_code)));
144       }
145 
146       *result = CLMemory(memory, true);
147       return absl::OkStatus();
148     }
149 
150     case TensorStorageType::SINGLE_TEXTURE_2D: {
151       if (slices != 1) {
152         return absl::InvalidArgumentError(absl::StrCat(
153             "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
154             shape.c, "was provided"));
155       }
156       cl_image_desc desc;
157       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
158       desc.image_width = shape.w * shape.b * shape.d;
159       desc.image_height = shape.h;
160       desc.image_depth = 0;
161       desc.image_row_pitch = 0;
162       desc.image_slice_pitch = 0;
163       desc.num_mip_levels = 0;
164       desc.num_samples = 0;
165       desc.buffer = nullptr;
166 
167       cl_image_format format;
168       if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
169         format.image_channel_order = ToChannelOrder(shape.c);
170         format.image_channel_data_type =
171             DataTypeToChannelType(descriptor.data_type);
172       } else {
173         return absl::InvalidArgumentError(absl::StrCat(
174             "This device doesn't support ", shape.c, "-channel textures."));
175       }
176 
177       cl_int error_code;
178       cl_mem memory =
179           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
180                               const_cast<void*>(data_ptr), &error_code);
181       if (error_code != CL_SUCCESS) {
182         return absl::UnknownError(
183             absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
184                          CLErrorCodeToString(error_code)));
185       }
186 
187       *result = CLMemory(memory, true);
188       return absl::OkStatus();
189     }
190 
191     default:
192       return absl::InternalError("Unsupported tensor storage type");
193   }
194 }
195 
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)196 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
197                                          cl_mem memory, DataType data_type,
198                                          int width, cl_mem* result) {
199   cl_image_format format;
200   cl_image_desc desc;
201   std::memset(&desc, 0, sizeof(desc));
202   desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
203   desc.image_width = width;
204   desc.mem_object = memory;
205 
206   format.image_channel_data_type = DataTypeToChannelType(data_type);
207   format.image_channel_order = CL_RGBA;
208 
209   cl_int error_code;
210   *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
211                           nullptr, &error_code);
212   if (error_code != CL_SUCCESS) {
213     return absl::UnknownError(
214         absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
215                      CLErrorCodeToString(error_code)));
216   }
217   return absl::OkStatus();
218 }
219 
CreateImage2DFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,int height,int channels,int row_bytes_alignment,cl_mem * result)220 absl::Status CreateImage2DFromBuffer(const CLContext& context, cl_mem memory,
221                                      DataType data_type, int width, int height,
222                                      int channels, int row_bytes_alignment,
223                                      cl_mem* result) {
224   if (!context.IsFloatTexture2DSupported(channels, data_type)) {
225     return absl::InvalidArgumentError(absl::StrCat(
226         "This device doesn't support ", channels, "-channel textures."));
227   }
228 
229   cl_image_desc desc;
230   desc.image_type = CL_MEM_OBJECT_IMAGE2D;
231   desc.image_width = width;
232   desc.image_height = height;
233   desc.image_depth = 0;
234   if (row_bytes_alignment == 0) {
235     desc.image_row_pitch = 0;
236   } else {
237     const size_t bytes_per_row = width * channels * SizeOf(data_type);
238     desc.image_row_pitch = AlignByN(bytes_per_row, row_bytes_alignment);
239   }
240   desc.image_slice_pitch = 0;
241   desc.num_mip_levels = 0;
242   desc.num_samples = 0;
243   desc.mem_object = memory;
244 
245   cl_image_format format;
246   format.image_channel_order = ToChannelOrder(channels);
247   format.image_channel_data_type = DataTypeToChannelType(data_type);
248 
249   cl_int error_code;
250   *result = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, &format,
251                                 &desc, nullptr, &error_code);
252   if (error_code != CL_SUCCESS) {
253     return absl::UnknownError(
254         absl::StrCat("Failed to create Image2D from Buffer (clCreateImage): ",
255                      CLErrorCodeToString(error_code)));
256   }
257   return absl::OkStatus();
258 }
259 
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)260 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
261                           const TensorDescriptor& descriptor, cl_mem memory,
262                           Tensor* result) {
263   const bool memory_owner = memory == nullptr;
264   if (memory_owner) {
265     CLMemory mem;
266     RETURN_IF_ERROR(
267         AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
268     memory = mem.Release();
269   }
270   if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
271     cl_mem image_memory;
272     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
273         context, memory, descriptor.data_type,
274         shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
275         &image_memory));
276     *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
277   } else {
278     *result = Tensor(memory, memory_owner, shape, descriptor);
279   }
280   return absl::OkStatus();
281 }
282 
CreateTensorShared(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)283 absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape,
284                                 const TensorDescriptor& descriptor,
285                                 cl_mem memory, Tensor* result) {
286   const bool memory_owner = false;
287   if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
288     cl_mem image_memory;
289     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
290         context, memory, descriptor.data_type,
291         shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
292         &image_memory));
293     *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
294   } else {
295     *result = Tensor(memory, memory_owner, shape, descriptor);
296   }
297   return absl::OkStatus();
298 }
299 
300 }  // namespace
301 
Tensor(cl_mem memory,bool memory_owner,const BHWC & shape,const TensorDescriptor & descriptor)302 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
303                const TensorDescriptor& descriptor)
304     : memory_(memory),
305       image_buffer_memory_(nullptr),
306       memory_owner_(memory_owner),
307       shape_(shape.b, shape.h, shape.w, 1, shape.c),
308       descriptor_(descriptor) {}
309 
Tensor(cl_mem memory,bool memory_owner,const BHWDC & shape,const TensorDescriptor & descriptor)310 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
311                const TensorDescriptor& descriptor)
312     : memory_(memory),
313       image_buffer_memory_(nullptr),
314       memory_owner_(memory_owner),
315       shape_(shape),
316       descriptor_(descriptor) {}
317 
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWC & shape,const TensorDescriptor & descriptor)318 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
319                const BHWC& shape, const TensorDescriptor& descriptor)
320     : memory_(memory),
321       image_buffer_memory_(image_buffer_memory),
322       memory_owner_(memory_owner),
323       shape_(shape.b, shape.h, shape.w, 1, shape.c),
324       descriptor_(descriptor) {
325   if (image_buffer_memory &&
326       (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
327        descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)) {
328     buffer_based_ = true;
329   }
330 }
331 
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWDC & shape,const TensorDescriptor & descriptor)332 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
333                const BHWDC& shape, const TensorDescriptor& descriptor)
334     : memory_(memory),
335       image_buffer_memory_(image_buffer_memory),
336       memory_owner_(memory_owner),
337       shape_(shape),
338       descriptor_(descriptor) {
339   if (image_buffer_memory &&
340       (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
341        descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)) {
342     buffer_based_ = true;
343   }
344 }
345 
Tensor(Tensor && tensor)346 Tensor::Tensor(Tensor&& tensor)
347     : memory_(tensor.memory_),
348       image_buffer_memory_(tensor.image_buffer_memory_),
349       memory_owner_(tensor.memory_owner_),
350       buffer_based_(tensor.buffer_based_),
351       shape_(tensor.shape_),
352       descriptor_(tensor.descriptor_) {
353   tensor.memory_ = nullptr;
354   tensor.image_buffer_memory_ = nullptr;
355 }
356 
operator =(Tensor && tensor)357 Tensor& Tensor::operator=(Tensor&& tensor) {
358   if (this != &tensor) {
359     Release();
360     std::swap(memory_, tensor.memory_);
361     std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
362     std::swap(memory_owner_, tensor.memory_owner_);
363     std::swap(buffer_based_, tensor.buffer_based_);
364     std::swap(shape_, tensor.shape_);
365     std::swap(descriptor_, tensor.descriptor_);
366   }
367   return *this;
368 }
369 
Release()370 void Tensor::Release() {
371   // image_buffer_memory_ always owned by object
372   if (image_buffer_memory_) {
373     clReleaseMemObject(image_buffer_memory_);
374     image_buffer_memory_ = nullptr;
375   }
376   if (memory_owner_ && memory_) {
377     clReleaseMemObject(memory_);
378     memory_ = nullptr;
379   }
380 }
381 
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const382 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
383                                      GPUResourcesWithValue* resources) const {
384   const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
385   if (buffer_desc) {
386     if (descriptor_.storage_type != TensorStorageType::BUFFER) {
387       return absl::InvalidArgumentError(
388           "Tensor can be used with BufferDescriptor only with "
389           "TensorStorageType::BUFFER.");
390     }
391     resources->buffers.push_back({"buffer", memory_});
392     return absl::OkStatus();
393   }
394   const auto* texture2d_desc =
395       dynamic_cast<const Texture2DDescriptor*>(obj_ptr);
396   if (texture2d_desc) {
397     if (descriptor_.storage_type != TensorStorageType::TEXTURE_2D) {
398       return absl::InvalidArgumentError(
399           "Tensor can be used with Texture2DDescriptor only with "
400           "TensorStorageType::TEXTURE_2D.");
401     }
402     cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
403     resources->images2d.push_back({"tex2d", mem});
404     return absl::OkStatus();
405   }
406   const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
407   if (!tensor_desc) {
408     return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
409   }
410   resources->ints.push_back(
411       {"slice_stride", tensor_desc->GetSliceStrideSize(shape_)});
412   if (descriptor_.HasAxis(Axis::WIDTH)) {
413     resources->ints.push_back({"width", tensor_desc->GetWidthSize(shape_)});
414   }
415   if (descriptor_.HasAxis(Axis::HEIGHT)) {
416     resources->ints.push_back({"height", Height()});
417   }
418   if (descriptor_.HasAxis(Axis::CHANNELS)) {
419     resources->ints.push_back({"slices", Slices()});
420     resources->ints.push_back({"channels", Channels()});
421   }
422   if (descriptor_.HasAxis(Axis::BATCH)) {
423     resources->ints.push_back({"batch", Batch()});
424   }
425   if (descriptor_.HasAxis(Axis::DEPTH)) {
426     resources->ints.push_back({"depth", Depth()});
427   }
428 
429   if (descriptor_.storage_type == TensorStorageType::BUFFER) {
430     resources->buffers.push_back({"buffer", memory_});
431   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
432              descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
433     cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
434     resources->images2d.push_back({"image2d", mem});
435   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) {
436     resources->image2d_arrays.push_back({"image2d_array", memory_});
437   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) {
438     resources->images3d.push_back({"image3d", memory_});
439   } else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) {
440     if (obj_ptr->GetAccess() == AccessType::READ) {
441       resources->image_buffers.push_back(
442           {"image_buffer", image_buffer_memory_});
443     } else {
444       resources->buffers.push_back({"buffer", memory_});
445     }
446   }
447 
448   return absl::OkStatus();
449 }
450 
GetFullTensorRegion() const451 int3 Tensor::GetFullTensorRegion() const {
452   switch (descriptor_.storage_type) {
453     case TensorStorageType::BUFFER:
454     case TensorStorageType::TEXTURE_ARRAY:
455     case TensorStorageType::TEXTURE_3D:
456     case TensorStorageType::IMAGE_BUFFER:
457       return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
458     case TensorStorageType::TEXTURE_2D:
459       return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
460     case TensorStorageType::SINGLE_TEXTURE_2D:
461       return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
462     case TensorStorageType::UNKNOWN:
463       return {-1, -1, -1};
464   }
465 }
466 
IsValid(const BHWC & shape) const467 absl::Status Tensor::IsValid(const BHWC& shape) const {
468   if (shape.b != shape_.b) {
469     return absl::InvalidArgumentError(
470         "Shape batch does not match tensor batch");
471   }
472   if (shape.w != shape_.w) {
473     return absl::InvalidArgumentError(
474         "Shape width does not match tensor width");
475   }
476   if (shape.h != shape_.h) {
477     return absl::InvalidArgumentError(
478         "Shape height does not match tensor height");
479   }
480   if (shape.c != shape_.c) {
481     return absl::InvalidArgumentError(
482         "Shape channels does not match tensor channels");
483   }
484   return absl::OkStatus();
485 }
486 
IsValid(const BHWDC & shape) const487 absl::Status Tensor::IsValid(const BHWDC& shape) const {
488   if (shape.b != shape_.b) {
489     return absl::InvalidArgumentError(
490         "Shape batch does not match tensor batch");
491   }
492   if (shape.w != shape_.w) {
493     return absl::InvalidArgumentError(
494         "Shape width does not match tensor width");
495   }
496   if (shape.h != shape_.h) {
497     return absl::InvalidArgumentError(
498         "Shape height does not match tensor height");
499   }
500   if (shape.d != shape_.d) {
501     return absl::InvalidArgumentError(
502         "Shape depth does not match tensor depth");
503   }
504   if (shape.c != shape_.c) {
505     return absl::InvalidArgumentError(
506         "Shape channels does not match tensor channels");
507   }
508   return absl::OkStatus();
509 }
510 
GetAlignedChannels() const511 int Tensor::GetAlignedChannels() const {
512   return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
513              ? shape_.c
514              : AlignByN(shape_.c, 4);
515 }
516 
GetMemorySizeInBytes() const517 uint64_t Tensor::GetMemorySizeInBytes() const {
518   const int flt_size = SizeOf(descriptor_.data_type);
519   const int flt4_size = 4 * flt_size;
520   switch (descriptor_.storage_type) {
521     case TensorStorageType::BUFFER:
522     case TensorStorageType::IMAGE_BUFFER:
523     case TensorStorageType::TEXTURE_ARRAY:
524     case TensorStorageType::TEXTURE_2D:
525     case TensorStorageType::TEXTURE_3D:
526       return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
527     case TensorStorageType::SINGLE_TEXTURE_2D:
528       return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
529     default:
530       return 0;
531   }
532 }
533 
GetMemoryPtr() const534 cl_mem Tensor::GetMemoryPtr() const {
535   if (buffer_based_) {
536     return image_buffer_memory_;
537   } else {
538     return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER
539                ? image_buffer_memory_
540                : memory_;
541   }
542 }
543 
GetMemoryPtrForWriting() const544 cl_mem Tensor::GetMemoryPtrForWriting() const {
545   if (buffer_based_) {
546     return image_buffer_memory_;
547   } else {
548     return memory_;
549   }
550 }
551 
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)552 absl::Status Tensor::WriteData(
553     CLCommandQueue* queue,
554     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
555   return WriteDataBHWDC(src.data.data(), queue);
556 }
557 
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)558 absl::Status Tensor::WriteData(
559     CLCommandQueue* queue,
560     const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
561   return WriteDataBHWDC(src.data.data(), queue);
562 }
563 
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)564 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
565                                           CLContext* context) {
566   shape_ = desc.shape;
567   descriptor_.data_type = desc.data_type;
568   descriptor_.storage_type = desc.storage_type;
569   descriptor_.layout = desc.layout;
570   memory_owner_ = true;
571   CLMemory memory;
572   uint8_t* data_ptr = desc.data.empty()
573                           ? nullptr
574                           : const_cast<unsigned char*>(desc.data.data());
575   RETURN_IF_ERROR(
576       AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
577   memory_ = memory.Release();
578   if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
579     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
580         *context, memory_, desc.data_type,
581         shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4),
582         &image_buffer_memory_));
583   }
584   return absl::OkStatus();
585 }
586 
CreateTensor(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)587 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
588                           const TensorDescriptor& descriptor, Tensor* result) {
589   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
590   return CreateTensor(context, shape5D, descriptor, nullptr, result);
591 }
592 
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)593 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
594                           const TensorDescriptor& descriptor, Tensor* result) {
595   return CreateTensor(context, shape, descriptor, nullptr, result);
596 }
597 
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)598 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
599                                 const BHWC& shape,
600                                 const TensorDescriptor& descriptor,
601                                 Tensor* result) {
602   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
603   return CreateTensorShared(context, shape5D, descriptor, memory, result);
604 }
605 
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)606 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
607                                 const BHWDC& shape,
608                                 const TensorDescriptor& descriptor,
609                                 Tensor* result) {
610   return CreateTensorShared(context, shape, descriptor, memory, result);
611 }
612 
CreateSharedImage2DBufferTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,int row_bytes_alignment,Tensor * result)613 absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
614                                              cl_mem memory, const BHWC& shape,
615                                              const TensorDescriptor& descriptor,
616                                              int row_bytes_alignment,
617                                              Tensor* result) {
618   const int width = shape.b * shape.w;
619   const int height =
620       descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
621           ? shape.h
622           : shape.h * DivideRoundUp(shape.c, 4);
623   const int channels =
624       descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c
625                                                                       : 4;
626   cl_mem image_memory;
627   RETURN_IF_ERROR(CreateImage2DFromBuffer(context, memory, descriptor.data_type,
628                                           width, height, channels,
629                                           row_bytes_alignment, &image_memory));
630   *result = Tensor(memory, false, image_memory, shape, descriptor);
631   return absl::OkStatus();
632 }
633 
AllocateTensorMemory(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,CLMemory * result)634 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
635                                   const TensorDescriptor& descriptor,
636                                   CLMemory* result) {
637   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
638   return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
639 }
640 
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,CLMemory * result)641 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
642                                   const TensorDescriptor& descriptor,
643                                   CLMemory* result) {
644   return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
645 }
646 
647 }  // namespace cl
648 }  // namespace gpu
649 }  // namespace tflite
650