• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17 
18 #include <cstring>
19 #include <memory>
20 
21 #include "absl/strings/str_cat.h"
22 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
23 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
28 
29 namespace tflite {
30 namespace gpu {
31 namespace cl {
32 namespace {
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,const void * data_ptr,CLMemory * result)33 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
34                                   const TensorDescriptor& descriptor,
35                                   const void* data_ptr, CLMemory* result) {
36   const int slices = DivideRoundUp(shape.c, 4);
37   cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
38   if (data_ptr) {
39     mem_flags |= CL_MEM_COPY_HOST_PTR;
40   }
41   switch (descriptor.storage_type) {
42     case TensorStorageType::BUFFER:
43     case TensorStorageType::IMAGE_BUFFER: {
44       const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
45                                4 * SizeOf(descriptor.data_type);
46       cl_int error_code;
47       cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
48                                      const_cast<void*>(data_ptr), &error_code);
49       if (!memory) {
50         return absl::UnknownError(
51             absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
52                          CLErrorCodeToString(error_code)));
53       }
54       *result = CLMemory(memory, true);
55       return absl::OkStatus();
56     }
57     case TensorStorageType::TEXTURE_2D: {
58       cl_image_desc desc;
59       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
60       desc.image_width = shape.w * shape.b * shape.d;
61       desc.image_height = shape.h * slices;
62       desc.image_depth = 0;
63       desc.image_row_pitch = 0;
64       desc.image_slice_pitch = 0;
65       desc.num_mip_levels = 0;
66       desc.num_samples = 0;
67       desc.buffer = nullptr;
68 
69       cl_image_format format;
70       format.image_channel_order = CL_RGBA;
71       format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
72 
73       cl_int error_code;
74       cl_mem memory =
75           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
76                               const_cast<void*>(data_ptr), &error_code);
77       if (error_code != CL_SUCCESS) {
78         return absl::UnknownError(
79             absl::StrCat("Failed to create 2D texture (clCreateImage): ",
80                          CLErrorCodeToString(error_code)));
81       }
82 
83       *result = CLMemory(memory, true);
84       return absl::OkStatus();
85     }
86     case TensorStorageType::TEXTURE_3D: {
87       cl_image_desc desc;
88       desc.image_type = CL_MEM_OBJECT_IMAGE3D;
89       desc.image_width = shape.w * shape.b;
90       desc.image_height = shape.h;
91       desc.image_depth = slices * shape.d;
92       desc.image_row_pitch = 0;
93       desc.image_slice_pitch = 0;
94       desc.num_mip_levels = 0;
95       desc.num_samples = 0;
96       desc.buffer = nullptr;
97 
98       cl_image_format format;
99       format.image_channel_order = CL_RGBA;
100       format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
101 
102       cl_int error_code;
103       cl_mem memory =
104           CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
105                               const_cast<void*>(data_ptr), &error_code);
106       if (error_code != CL_SUCCESS) {
107         return absl::UnknownError(
108             absl::StrCat("Failed to create 3D texture (clCreateImage): ",
109                          CLErrorCodeToString(error_code)));
110       }
111 
112       *result = CLMemory(memory, true);
113       return absl::OkStatus();
114     }
115     case TensorStorageType::TEXTURE_ARRAY: {
116       cl_image_desc desc;
117       desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
118       desc.image_width = shape.w * shape.b;
119       desc.image_height = shape.h;
120       desc.image_depth = 0;
121       desc.image_array_size = slices * shape.d;
122       desc.image_row_pitch = 0;
123       desc.image_slice_pitch = 0;
124       desc.num_mip_levels = 0;
125       desc.num_samples = 0;
126       desc.buffer = nullptr;
127 
128       cl_image_format format;
129       format.image_channel_order = CL_RGBA;
130       format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
131 
132       cl_int error_code;
133       cl_mem memory =
134           clCreateImage(context.context(), mem_flags, &format, &desc,
135                         const_cast<void*>(data_ptr), &error_code);
136       if (error_code != CL_SUCCESS) {
137         return absl::UnknownError(
138             absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
139                          CLErrorCodeToString(error_code)));
140       }
141 
142       *result = CLMemory(memory, true);
143       return absl::OkStatus();
144     }
145 
146     case TensorStorageType::SINGLE_TEXTURE_2D: {
147       if (slices != 1) {
148         return absl::InvalidArgumentError(absl::StrCat(
149             "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
150             shape.c, "was provided"));
151       }
152       cl_image_desc desc;
153       desc.image_type = CL_MEM_OBJECT_IMAGE2D;
154       desc.image_width = shape.w * shape.b * shape.d;
155       desc.image_height = shape.h;
156       desc.image_depth = 0;
157       desc.image_row_pitch = 0;
158       desc.image_slice_pitch = 0;
159       desc.num_mip_levels = 0;
160       desc.num_samples = 0;
161       desc.buffer = nullptr;
162 
163       cl_image_format format;
164       if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
165         format.image_channel_order = ToChannelOrder(shape.c);
166         format.image_channel_data_type =
167             ToImageChannelType(descriptor.data_type);
168       } else {
169         return absl::InvalidArgumentError(absl::StrCat(
170             "This device doesn't support ", shape.c, "-channel textures."));
171       }
172 
173       cl_int error_code;
174       cl_mem memory =
175           CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
176                               const_cast<void*>(data_ptr), &error_code);
177       if (error_code != CL_SUCCESS) {
178         return absl::UnknownError(
179             absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
180                          CLErrorCodeToString(error_code)));
181       }
182 
183       *result = CLMemory(memory, true);
184       return absl::OkStatus();
185     }
186 
187     default:
188       return absl::InternalError("Unsupported tensor storage type");
189   }
190 }
191 
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)192 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
193                                          cl_mem memory, DataType data_type,
194                                          int width, cl_mem* result) {
195   cl_image_format format;
196   cl_image_desc desc;
197   std::memset(&desc, 0, sizeof(desc));
198   desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
199   desc.image_width = width;
200   desc.mem_object = memory;
201 
202   format.image_channel_data_type = ToImageChannelType(data_type);
203   format.image_channel_order = CL_RGBA;
204 
205   cl_int error_code;
206   *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
207                           nullptr, &error_code);
208   if (error_code != CL_SUCCESS) {
209     return absl::UnknownError(
210         absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
211                      CLErrorCodeToString(error_code)));
212   }
213   return absl::OkStatus();
214 }
215 
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)216 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
217                           const TensorDescriptor& descriptor, cl_mem memory,
218                           Tensor* result) {
219   const bool memory_owner = memory == nullptr;
220   if (memory_owner) {
221     CLMemory mem;
222     RETURN_IF_ERROR(
223         AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
224     memory = mem.Release();
225   }
226   if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
227     cl_mem image_memory;
228     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
229         context, memory, descriptor.data_type,
230         shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
231         &image_memory));
232     *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
233   } else {
234     *result = Tensor(memory, memory_owner, shape, descriptor);
235   }
236   return absl::OkStatus();
237 }
238 
CreateTensorShared(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)239 absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape,
240                                 const TensorDescriptor& descriptor,
241                                 cl_mem memory, Tensor* result) {
242   const bool memory_owner = false;
243   if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
244     cl_mem image_memory;
245     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
246         context, memory, descriptor.data_type,
247         shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
248         &image_memory));
249     *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
250   } else {
251     *result = Tensor(memory, memory_owner, shape, descriptor);
252   }
253   return absl::OkStatus();
254 }
255 
256 }  // namespace
257 
Tensor(cl_mem memory,bool memory_owner,const BHWC & shape,const TensorDescriptor & descriptor)258 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
259                const TensorDescriptor& descriptor)
260     : memory_(memory),
261       image_buffer_memory_(nullptr),
262       memory_owner_(memory_owner),
263       shape_(shape.b, shape.h, shape.w, 1, shape.c),
264       descriptor_(descriptor) {}
265 
Tensor(cl_mem memory,bool memory_owner,const BHWDC & shape,const TensorDescriptor & descriptor)266 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
267                const TensorDescriptor& descriptor)
268     : memory_(memory),
269       image_buffer_memory_(nullptr),
270       memory_owner_(memory_owner),
271       shape_(shape),
272       descriptor_(descriptor) {}
273 
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWC & shape,const TensorDescriptor & descriptor)274 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
275                const BHWC& shape, const TensorDescriptor& descriptor)
276     : memory_(memory),
277       image_buffer_memory_(image_buffer_memory),
278       memory_owner_(memory_owner),
279       shape_(shape.b, shape.h, shape.w, 1, shape.c),
280       descriptor_(descriptor) {}
281 
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWDC & shape,const TensorDescriptor & descriptor)282 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
283                const BHWDC& shape, const TensorDescriptor& descriptor)
284     : memory_(memory),
285       image_buffer_memory_(image_buffer_memory),
286       memory_owner_(memory_owner),
287       shape_(shape),
288       descriptor_(descriptor) {}
289 
Tensor(Tensor && tensor)290 Tensor::Tensor(Tensor&& tensor)
291     : memory_(tensor.memory_),
292       image_buffer_memory_(tensor.image_buffer_memory_),
293       memory_owner_(tensor.memory_owner_),
294       shape_(tensor.shape_),
295       descriptor_(tensor.descriptor_) {
296   tensor.memory_ = nullptr;
297   tensor.image_buffer_memory_ = nullptr;
298 }
299 
operator =(Tensor && tensor)300 Tensor& Tensor::operator=(Tensor&& tensor) {
301   if (this != &tensor) {
302     Release();
303     std::swap(memory_, tensor.memory_);
304     std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
305     std::swap(memory_owner_, tensor.memory_owner_);
306     std::swap(shape_, tensor.shape_);
307     std::swap(descriptor_, tensor.descriptor_);
308   }
309   return *this;
310 }
311 
Release()312 void Tensor::Release() {
313   // image_buffer_memory_ always owned by object
314   if (image_buffer_memory_) {
315     clReleaseMemObject(image_buffer_memory_);
316     image_buffer_memory_ = nullptr;
317   }
318   if (memory_owner_ && memory_) {
319     clReleaseMemObject(memory_);
320     memory_ = nullptr;
321   }
322 }
323 
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const324 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
325                                      GPUResourcesWithValue* resources) const {
326   const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
327   if (buffer_desc) {
328     if (descriptor_.storage_type != TensorStorageType::BUFFER) {
329       return absl::InvalidArgumentError(
330           "Tensor can be used with BufferDescriptor only with "
331           "TensorStorageType::BUFFER.");
332     }
333     resources->buffers.push_back({"buffer", memory_});
334     return absl::OkStatus();
335   }
336   const auto* texture2d_desc =
337       dynamic_cast<const Texture2DDescriptor*>(obj_ptr);
338   if (texture2d_desc) {
339     if (descriptor_.storage_type != TensorStorageType::TEXTURE_2D) {
340       return absl::InvalidArgumentError(
341           "Tensor can be used with Texture2DDescriptor only with "
342           "TensorStorageType::TEXTURE_2D.");
343     }
344     resources->images2d.push_back({"tex2d", memory_});
345     return absl::OkStatus();
346   }
347   const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
348   if (!tensor_desc) {
349     return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
350   }
351   resources->ints.push_back(
352       {"slice_stride", tensor_desc->GetSliceStrideSize(shape_)});
353   if (descriptor_.HasAxis(Axis::WIDTH)) {
354     resources->ints.push_back({"width", Width()});
355     resources->ints.push_back({"width_div2", Width() / 2});
356     resources->ints.push_back({"width_div4", Width() / 4});
357     resources->ints.push_back({"width_batched", Width() * Batch()});
358     resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2});
359     resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4});
360   }
361   if (descriptor_.HasAxis(Axis::HEIGHT)) {
362     resources->ints.push_back({"height", Height()});
363   }
364   if (descriptor_.HasAxis(Axis::CHANNELS)) {
365     resources->ints.push_back({"slices", Slices()});
366     resources->ints.push_back({"channels", Channels()});
367   }
368   if (descriptor_.HasAxis(Axis::BATCH)) {
369     resources->ints.push_back({"batch", Batch()});
370   }
371   if (descriptor_.HasAxis(Axis::DEPTH)) {
372     resources->ints.push_back({"depth", Depth()});
373   }
374 
375   if (descriptor_.storage_type == TensorStorageType::BUFFER) {
376     resources->buffers.push_back({"buffer", memory_});
377   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
378              descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
379     resources->images2d.push_back({"image2d", memory_});
380   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) {
381     resources->image2d_arrays.push_back({"image2d_array", memory_});
382   } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) {
383     resources->images3d.push_back({"image3d", memory_});
384   } else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) {
385     if (obj_ptr->GetAccess() == AccessType::READ) {
386       resources->image_buffers.push_back(
387           {"image_buffer", image_buffer_memory_});
388     } else {
389       resources->buffers.push_back({"buffer", memory_});
390     }
391   }
392 
393   return absl::OkStatus();
394 }
395 
GetFullTensorRegion() const396 int3 Tensor::GetFullTensorRegion() const {
397   switch (descriptor_.storage_type) {
398     case TensorStorageType::BUFFER:
399     case TensorStorageType::TEXTURE_ARRAY:
400     case TensorStorageType::TEXTURE_3D:
401     case TensorStorageType::IMAGE_BUFFER:
402       return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
403     case TensorStorageType::TEXTURE_2D:
404       return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
405     case TensorStorageType::SINGLE_TEXTURE_2D:
406       return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
407     case TensorStorageType::UNKNOWN:
408       return {-1, -1, -1};
409   }
410 }
411 
IsValid(const BHWC & shape) const412 absl::Status Tensor::IsValid(const BHWC& shape) const {
413   if (shape.b != shape_.b) {
414     return absl::InvalidArgumentError(
415         "Shape batch does not match tensor batch");
416   }
417   if (shape.w != shape_.w) {
418     return absl::InvalidArgumentError(
419         "Shape width does not match tensor width");
420   }
421   if (shape.h != shape_.h) {
422     return absl::InvalidArgumentError(
423         "Shape height does not match tensor height");
424   }
425   if (shape.c != shape_.c) {
426     return absl::InvalidArgumentError(
427         "Shape channels does not match tensor channels");
428   }
429   return absl::OkStatus();
430 }
431 
IsValid(const BHWDC & shape) const432 absl::Status Tensor::IsValid(const BHWDC& shape) const {
433   if (shape.b != shape_.b) {
434     return absl::InvalidArgumentError(
435         "Shape batch does not match tensor batch");
436   }
437   if (shape.w != shape_.w) {
438     return absl::InvalidArgumentError(
439         "Shape width does not match tensor width");
440   }
441   if (shape.h != shape_.h) {
442     return absl::InvalidArgumentError(
443         "Shape height does not match tensor height");
444   }
445   if (shape.d != shape_.d) {
446     return absl::InvalidArgumentError(
447         "Shape depth does not match tensor depth");
448   }
449   if (shape.c != shape_.c) {
450     return absl::InvalidArgumentError(
451         "Shape channels does not match tensor channels");
452   }
453   return absl::OkStatus();
454 }
455 
GetAlignedChannels() const456 int Tensor::GetAlignedChannels() const {
457   return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
458              ? shape_.c
459              : AlignByN(shape_.c, 4);
460 }
461 
GetMemorySizeInBytes() const462 uint64_t Tensor::GetMemorySizeInBytes() const {
463   const int flt_size = SizeOf(descriptor_.data_type);
464   const int flt4_size = 4 * flt_size;
465   switch (descriptor_.storage_type) {
466     case TensorStorageType::BUFFER:
467     case TensorStorageType::IMAGE_BUFFER:
468     case TensorStorageType::TEXTURE_ARRAY:
469     case TensorStorageType::TEXTURE_2D:
470     case TensorStorageType::TEXTURE_3D:
471       return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
472     case TensorStorageType::SINGLE_TEXTURE_2D:
473       return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
474     default:
475       return 0;
476   }
477 }
478 
GetMemoryPtr() const479 cl_mem Tensor::GetMemoryPtr() const {
480   return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER
481              ? image_buffer_memory_
482              : memory_;
483 }
484 
GetMemoryPtrForWriting() const485 cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; }
486 
WriteDataBHWDC(const float * in,CLCommandQueue * queue)487 absl::Status Tensor::WriteDataBHWDC(const float* in, CLCommandQueue* queue) {
488   void* data_ptr = nullptr;
489   const int aligned_channels = GetAlignedChannels();
490   const int elements_count =
491       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
492 
493   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
494   std::unique_ptr<float[]> data_f;
495   std::unique_ptr<half[]> data_h;
496   if (descriptor_.data_type == DataType::FLOAT32) {
497     data_f.reset(new float[elements_count]);
498     data_ptr = data_f.get();
499     DataFromBHWDC(in, shape_, descriptor_, data_f.get());
500   } else {
501     data_h.reset(new half[elements_count]);
502     data_ptr = data_h.get();
503     DataFromBHWDC(in, shape_, descriptor_, data_h.get());
504   }
505 
506   switch (descriptor_.storage_type) {
507     case TensorStorageType::BUFFER:
508     case TensorStorageType::IMAGE_BUFFER:
509       RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr));
510       break;
511     case TensorStorageType::TEXTURE_ARRAY:
512     case TensorStorageType::TEXTURE_2D:
513     case TensorStorageType::TEXTURE_3D:
514     case TensorStorageType::SINGLE_TEXTURE_2D:
515       RETURN_IF_ERROR(
516           queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
517       break;
518     default:
519       return absl::InternalError("Unsupported tensor storage type");
520   }
521 
522   return absl::OkStatus();
523 }
524 
WriteData(CLCommandQueue * queue,const TensorFloat32 & src)525 absl::Status Tensor::WriteData(CLCommandQueue* queue,
526                                const TensorFloat32& src) {
527   RETURN_IF_ERROR(IsValid(src.shape));
528   return WriteDataBHWDC(src.data.data(), queue);
529 }
530 
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)531 absl::Status Tensor::WriteData(
532     CLCommandQueue* queue,
533     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
534   return WriteDataBHWDC(src.data.data(), queue);
535 }
536 
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)537 absl::Status Tensor::WriteData(
538     CLCommandQueue* queue,
539     const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
540   return WriteDataBHWDC(src.data.data(), queue);
541 }
542 
WriteData(CLCommandQueue * queue,const Tensor5DFloat32 & src)543 absl::Status Tensor::WriteData(CLCommandQueue* queue,
544                                const Tensor5DFloat32& src) {
545   RETURN_IF_ERROR(IsValid(src.shape));
546   return WriteDataBHWDC(src.data.data(), queue);
547 }
548 
ReadDataBHWDC(float * out,CLCommandQueue * queue) const549 absl::Status Tensor::ReadDataBHWDC(float* out, CLCommandQueue* queue) const {
550   void* data_ptr = nullptr;
551   const int aligned_channels = GetAlignedChannels();
552   const int elements_count =
553       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
554   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
555   std::unique_ptr<float[]> data_f;
556   std::unique_ptr<half[]> data_h;
557   if (descriptor_.data_type == DataType::FLOAT32) {
558     data_f.reset(new float[elements_count]);
559     data_ptr = data_f.get();
560   } else {
561     data_h.reset(new half[elements_count]);
562     data_ptr = data_h.get();
563   }
564 
565   switch (descriptor_.storage_type) {
566     case TensorStorageType::BUFFER:
567     case TensorStorageType::IMAGE_BUFFER:
568       RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr));
569       break;
570     case TensorStorageType::TEXTURE_ARRAY:
571     case TensorStorageType::TEXTURE_2D:
572     case TensorStorageType::TEXTURE_3D:
573     case TensorStorageType::SINGLE_TEXTURE_2D:
574       RETURN_IF_ERROR(
575           queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
576       break;
577     default:
578       return absl::InternalError("Unsupported tensor storage type");
579   }
580 
581   if (descriptor_.data_type == DataType::FLOAT32) {
582     DataToBHWDC(data_f.get(), shape_, descriptor_, out);
583   } else {
584     DataToBHWDC(data_h.get(), shape_, descriptor_, out);
585   }
586 
587   return absl::OkStatus();
588 }
589 
ReadData(CLCommandQueue * queue,TensorFloat32 * dst) const590 absl::Status Tensor::ReadData(CLCommandQueue* queue, TensorFloat32* dst) const {
591   RETURN_IF_ERROR(IsValid(dst->shape));
592   return ReadDataBHWDC(dst->data.data(), queue);
593 }
594 
ReadData(CLCommandQueue * queue,Tensor5DFloat32 * dst) const595 absl::Status Tensor::ReadData(CLCommandQueue* queue,
596                               Tensor5DFloat32* dst) const {
597   RETURN_IF_ERROR(IsValid(dst->shape));
598   return ReadDataBHWDC(dst->data.data(), queue);
599 }
600 
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)601 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
602                                           CLContext* context) {
603   shape_ = desc.shape;
604   descriptor_.data_type = desc.data_type;
605   descriptor_.storage_type = desc.storage_type;
606   descriptor_.layout = desc.layout;
607   memory_owner_ = true;
608   CLMemory memory;
609   uint8_t* data_ptr = desc.data.empty()
610                           ? nullptr
611                           : const_cast<unsigned char*>(desc.data.data());
612   RETURN_IF_ERROR(
613       AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
614   memory_ = memory.Release();
615   if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
616     RETURN_IF_ERROR(CreateImageBufferFromBuffer(
617         *context, memory_, desc.data_type,
618         shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4),
619         &image_buffer_memory_));
620   }
621   return absl::OkStatus();
622 }
623 
CreateTensor(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)624 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
625                           const TensorDescriptor& descriptor, Tensor* result) {
626   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
627   return CreateTensor(context, shape5D, descriptor, nullptr, result);
628 }
629 
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)630 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
631                           const TensorDescriptor& descriptor, Tensor* result) {
632   return CreateTensor(context, shape, descriptor, nullptr, result);
633 }
634 
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)635 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
636                                 const BHWC& shape,
637                                 const TensorDescriptor& descriptor,
638                                 Tensor* result) {
639   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
640   return CreateTensorShared(context, shape5D, descriptor, memory, result);
641 }
642 
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)643 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
644                                 const BHWDC& shape,
645                                 const TensorDescriptor& descriptor,
646                                 Tensor* result) {
647   return CreateTensorShared(context, shape, descriptor, memory, result);
648 }
649 
AllocateTensorMemory(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,CLMemory * result)650 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
651                                   const TensorDescriptor& descriptor,
652                                   CLMemory* result) {
653   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
654   return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
655 }
656 
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,CLMemory * result)657 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
658                                   const TensorDescriptor& descriptor,
659                                   CLMemory* result) {
660   return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
661 }
662 
663 }  // namespace cl
664 }  // namespace gpu
665 }  // namespace tflite
666