1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
18 
19 #include <cstdint>
20 #include <memory>
21 
22 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
23 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
24 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
25 #include "tensorflow/lite/delegates/gpu/cl/cl_memory.h"
26 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
27 #include "tensorflow/lite/delegates/gpu/cl/util.h"
28 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35 
36 namespace tflite {
37 namespace gpu {
38 namespace cl {
39 
40 class Tensor : public GPUObject, public GpuSpatialTensor {
41  public:
Tensor()42   Tensor()
43       : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
44   Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
45          const TensorDescriptor& descriptor);
46   Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
47          const TensorDescriptor& descriptor);
48   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
49          const BHWC& shape, const TensorDescriptor& descriptor);
50   Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
51          const BHWDC& shape, const TensorDescriptor& descriptor);
52 
53   // Move only
54   Tensor(Tensor&& tensor);
55   Tensor& operator=(Tensor&& tensor);
56   Tensor(const Tensor&) = delete;
57   Tensor& operator=(const Tensor&) = delete;
58 
~Tensor()59   ~Tensor() override { Release(); }
60 
61   absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
62                                GPUResourcesWithValue* resources) const override;
63 
Width()64   int Width() const override { return shape_.w; }
Height()65   int Height() const override { return shape_.h; }
Depth()66   int Depth() const override { return shape_.d; }
Channels()67   int Channels() const override { return shape_.c; }
Slices()68   int Slices() const override { return DivideRoundUp(shape_.c, 4); }
Batch()69   int Batch() const override { return shape_.b; }
70 
GetDescriptor()71   TensorDescriptor GetDescriptor() const { return descriptor_; }
GetDataType()72   DataType GetDataType() const { return descriptor_.data_type; }
GetStorageType()73   TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
74 
75   // for profiling and memory statistics
76   uint64_t GetMemorySizeInBytes() const;
77 
78   cl_mem GetMemoryPtr() const;
79 
80   // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
81   // memory ptr.
82   cl_mem GetMemoryPtrForWriting() const;
83 
84   absl::Status WriteData(
85       CLCommandQueue* queue,
86       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
87   absl::Status WriteData(
88       CLCommandQueue* queue,
89       const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
90   template <DataType T>
91   absl::Status WriteData(CLCommandQueue* queue,
92                          const tflite::gpu::Tensor<BHWC, T>& src);
93   template <DataType T>
94   absl::Status WriteData(CLCommandQueue* queue,
95                          const tflite::gpu::Tensor<BHWDC, T>& src);
96   template <DataType T>
97   absl::Status ReadData(CLCommandQueue* queue,
98                         tflite::gpu::Tensor<BHWC, T>* dst) const;
99   template <DataType T>
100   absl::Status ReadData(CLCommandQueue* queue,
101                         tflite::gpu::Tensor<BHWDC, T>* dst) const;
102 
103   absl::Status CreateFromDescriptor(const TensorDescriptor& desc,
104                                     CLContext* context);
105 
106  private:
107   absl::Status IsValid(const BHWC& shape) const;
108   absl::Status IsValid(const BHWDC& shape) const;
109 
110   int GetChannelsAlignment() const;
111   int GetAlignedChannels() const;
112 
113   template <typename T>
114   absl::Status WriteDataBHWDC(const T* in, CLCommandQueue* queue);
115   template <typename T>
116   absl::Status ReadDataBHWDC(T* out, CLCommandQueue* queue) const;
117 
118   int3 GetFullTensorRegion() const;
119   void Release();
120 
121   cl_mem memory_;
122   cl_mem image_buffer_memory_;  // for IMAGE_BUFFER/TEXTURE_2D/SINGLE_TEXTURE_2D
123   bool memory_owner_;
124   bool buffer_based_ = false;
125   BHWDC shape_;
126   TensorDescriptor descriptor_;
127 };
128 
129 using TensorPtr = std::shared_ptr<Tensor>;
130 
131 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
132                                   const TensorDescriptor& descriptor,
133                                   CLMemory* result);
134 
135 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
136                                   const TensorDescriptor& descriptor,
137                                   CLMemory* result);
138 
139 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
140                           const TensorDescriptor& descriptor, Tensor* result);
141 
142 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
143                           const TensorDescriptor& descriptor, Tensor* result);
144 
145 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
146                                 const BHWC& shape,
147                                 const TensorDescriptor& descriptor,
148                                 Tensor* result);
149 
150 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
151                                 const BHWDC& shape,
152                                 const TensorDescriptor& descriptor,
153                                 Tensor* result);
154 
155 absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
156                                              cl_mem memory, const BHWC& shape,
157                                              const TensorDescriptor& descriptor,
158                                              int row_bytes_alignment,
159                                              Tensor* result);
160 
161 template <DataType T>
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<BHWC,T> & src)162 absl::Status Tensor::WriteData(CLCommandQueue* queue,
163                                const tflite::gpu::Tensor<BHWC, T>& src) {
164   RETURN_IF_ERROR(IsValid(src.shape));
165   return WriteDataBHWDC(src.data.data(), queue);
166 }
167 
168 template <DataType T>
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<BHWDC,T> & src)169 absl::Status Tensor::WriteData(CLCommandQueue* queue,
170                                const tflite::gpu::Tensor<BHWDC, T>& src) {
171   RETURN_IF_ERROR(IsValid(src.shape));
172   return WriteDataBHWDC(src.data.data(), queue);
173 }
174 
175 template <DataType T>
ReadData(CLCommandQueue * queue,tflite::gpu::Tensor<BHWC,T> * dst)176 absl::Status Tensor::ReadData(CLCommandQueue* queue,
177                               tflite::gpu::Tensor<BHWC, T>* dst) const {
178   RETURN_IF_ERROR(IsValid(dst->shape));
179   return ReadDataBHWDC(dst->data.data(), queue);
180 }
181 
182 template <DataType T>
ReadData(CLCommandQueue * queue,tflite::gpu::Tensor<BHWDC,T> * dst)183 absl::Status Tensor::ReadData(CLCommandQueue* queue,
184                               tflite::gpu::Tensor<BHWDC, T>* dst) const {
185   RETURN_IF_ERROR(IsValid(dst->shape));
186   return ReadDataBHWDC(dst->data.data(), queue);
187 }
188 
189 template <typename T>
WriteDataBHWDC(const T * in,CLCommandQueue * queue)190 absl::Status Tensor::WriteDataBHWDC(const T* in, CLCommandQueue* queue) {
191   const int aligned_channels = GetAlignedChannels();
192   const int elements_count =
193       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
194 
195   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
196   std::unique_ptr<uint8_t[]> data_copy;
197   data_copy.reset(new uint8_t[data_size]);
198   if (descriptor_.data_type == DataType::FLOAT16) {
199     // rearrangement and conversion from float32 to float16
200     DataFromBHWDC(reinterpret_cast<const float*>(in), shape_, descriptor_,
201                   reinterpret_cast<half*>(data_copy.get()));
202   } else {
203     // rearrangement
204     DataFromBHWDC(in, shape_, descriptor_,
205                   reinterpret_cast<T*>(data_copy.get()));
206   }
207 
208   switch (descriptor_.storage_type) {
209     case TensorStorageType::BUFFER:
210     case TensorStorageType::IMAGE_BUFFER:
211       RETURN_IF_ERROR(
212           queue->EnqueueWriteBuffer(memory_, data_size, data_copy.get()));
213       break;
214     case TensorStorageType::TEXTURE_ARRAY:
215     case TensorStorageType::TEXTURE_2D:
216     case TensorStorageType::TEXTURE_3D:
217     case TensorStorageType::SINGLE_TEXTURE_2D: {
218       cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
219       RETURN_IF_ERROR(queue->EnqueueWriteImage(mem, GetFullTensorRegion(),
220                                                data_copy.get()));
221       break;
222     }
223     default:
224       return absl::InternalError("Unsupported tensor storage type");
225   }
226 
227   return absl::OkStatus();
228 }
229 
230 template <typename T>
ReadDataBHWDC(T * out,CLCommandQueue * queue)231 absl::Status Tensor::ReadDataBHWDC(T* out, CLCommandQueue* queue) const {
232   const int aligned_channels = GetAlignedChannels();
233   const int elements_count =
234       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
235   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
236   std::unique_ptr<uint8_t[]> data_copy;
237   data_copy.reset(new uint8_t[data_size]);
238 
239   switch (descriptor_.storage_type) {
240     case TensorStorageType::BUFFER:
241     case TensorStorageType::IMAGE_BUFFER:
242       RETURN_IF_ERROR(
243           queue->EnqueueReadBuffer(memory_, data_size, data_copy.get()));
244       break;
245     case TensorStorageType::TEXTURE_ARRAY:
246     case TensorStorageType::TEXTURE_2D:
247     case TensorStorageType::TEXTURE_3D:
248     case TensorStorageType::SINGLE_TEXTURE_2D: {
249       cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
250       RETURN_IF_ERROR(
251           queue->EnqueueReadImage(mem, GetFullTensorRegion(), data_copy.get()));
252       break;
253     }
254     default:
255       return absl::InternalError("Unsupported tensor storage type");
256   }
257 
258   if (descriptor_.data_type == DataType::FLOAT16) {
259     // rearrangement and conversion from float32 to float16
260     DataToBHWDC(reinterpret_cast<half*>(data_copy.get()), shape_, descriptor_,
261                 reinterpret_cast<float*>(out));
262   } else {
263     // rearrangement
264     DataToBHWDC(reinterpret_cast<T*>(data_copy.get()), shape_, descriptor_,
265                 out);
266   }
267 
268   return absl::OkStatus();
269 }
270 
271 }  // namespace cl
272 }  // namespace gpu
273 }  // namespace tflite
274 
275 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_TENSOR_H_
276