1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TENSOR_DESC_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TENSOR_DESC_H_
18
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22 #include <vector>
23
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/shape.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/gpu_object_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
28 #include "tensorflow/lite/delegates/gpu/common/types.h"
29 #include "tensorflow/lite/delegates/gpu/common/util.h"
30
31 namespace tflite {
32 namespace gpu {
33
34 enum class TensorStorageType {
35 UNKNOWN,
36 BUFFER,
37 IMAGE_BUFFER,
38 TEXTURE_2D,
39 TEXTURE_3D,
40 TEXTURE_ARRAY,
41 SINGLE_TEXTURE_2D
42 };
43
44 class TensorDescriptor : public GPUObjectDescriptor {
45 public:
46 TensorDescriptor() = default;
TensorDescriptor(DataType data_type,TensorStorageType storage_type,Layout layout)47 TensorDescriptor(DataType data_type, TensorStorageType storage_type,
48 Layout layout)
49 : data_type_(data_type), storage_type_(storage_type), layout_(layout) {}
50
51 TensorDescriptor(const TensorDescriptor&) = default;
52 TensorDescriptor& operator=(const TensorDescriptor&) = default;
53 TensorDescriptor(TensorDescriptor&& desc);
54 TensorDescriptor& operator=(TensorDescriptor&& desc);
55
56 void CopyWithoutData(TensorDescriptor* desc) const;
57
58 bool operator==(const TensorDescriptor& d) const {
59 return data_type_ == d.data_type_ && storage_type_ == d.storage_type_ &&
60 layout_ == d.layout_;
61 }
62
63 bool operator!=(const TensorDescriptor& d) const { return !(*this == d); }
64
65 void GetGpuResources(const BHWDC& tensor_shape,
66 GenericGPUResourcesWithValue* resources) const;
67
68 absl::Status PerformConstExpr(const GpuInfo& gpu_info,
69 const std::string& const_expr,
70 std::string* result) const override;
71
72 absl::Status PerformSelector(const GpuInfo& gpu_info,
73 const std::string& selector,
74 const std::vector<std::string>& args,
75 const std::vector<std::string>& template_args,
76 std::string* result) const override;
77
78 GPUResources GetGPUResources(const GpuInfo& gpu_info) const override;
79
Release()80 void Release() override { data_.clear(); }
GetSizeInBytes()81 uint64_t GetSizeInBytes() const override { return data_.size(); };
82 size_t GetSizeInBytesForShape(const BHWDC& shape5d) const;
83
84 bool HasAxis(Axis axis) const;
85
86 absl::Status GetLinkingContextFromWriteSelector(
87 const std::vector<std::string>& args, std::string* value_name,
88 std::string* x_coord, std::string* y_coord, std::string* z_coord,
89 std::string* s_coord, std::string* b_coord) const;
90
91 template <DataType T>
92 void UploadData(const tflite::gpu::Tensor<BHWC, T>& src);
93 template <DataType T>
94 void DownloadData(tflite::gpu::Tensor<BHWC, T>* dst);
95 template <DataType T>
96 void UploadData(const tflite::gpu::Tensor<BHWDC, T>& src);
97 template <DataType T>
98 void DownloadData(tflite::gpu::Tensor<BHWDC, T>* dst);
99
100 void UploadData(const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src);
101
102 int GetLinearIndex(const BHWDC& shape5d, int b, int x, int y, int d, int s,
103 int sub_c) const;
104
105 bool SupportsZeroClamp(const Axis& axis, const GpuInfo& gpu_info) const;
106 bool CanReadOutOfBorder(const Axis& axis) const;
107 bool IsLinear() const;
108
GetDataType()109 DataType GetDataType() const { return data_type_; }
GetStorageType()110 TensorStorageType GetStorageType() const { return storage_type_; }
111
112 // applicable only for types that: IsLinear -> true.
113 // In this case for address we have 1d component - addr (int)
114 // If for addr == -1 this linear storage type returns zero value, this
115 // function returns true, otherwise false
116 bool ReturnsZeroForNegOneRead(const GpuInfo& gpu_info) const;
117
118 absl::Status CanCreateTensorWithShape(const GpuInfo& gpu_info,
119 const BHWDC& shape) const;
120
121 absl::Status CanCreateTensorWithShape(const GpuInfo& gpu_info,
122 const BHWC& shape) const;
123
124 // Can udate storage type if in the current storage type this tensor can not
125 // be allocated with shape on specified device(gpu_info)
126 // Usual scenario is to create new tensor_desc on base of another and may be
127 // update storage type for new tensor_desc shape because it can be unsuported
128 // with old storage type
129 absl::Status UpdateToSupportedStorageType(const GpuInfo& gpu_info,
130 const BHWC& shape);
131
132 // shape must be initialized when using this function
133 std::vector<uint64_t> GetStorageDims() const;
134 // shape must be initialized when using this function
135 int3 GetFullTensorRegion() const;
136 // shape must be initialized when using this function
137 uint64_t GetMemorySizeInBytes() const;
138 // shape must be initialized when using this function
139 int GetElementSize() const;
140
SetUseBufferForWriteOnlyTexture2d(bool value)141 void SetUseBufferForWriteOnlyTexture2d(bool value) {
142 use_buffer_for_write_only_2d_texture_ = value;
143 }
GetUseBufferForWriteOnlyTexture2d()144 bool GetUseBufferForWriteOnlyTexture2d() const {
145 return use_buffer_for_write_only_2d_texture_;
146 }
147
SetUseBufferForWriteOnlyImageBuffer(bool value)148 void SetUseBufferForWriteOnlyImageBuffer(bool value) {
149 use_buffer_for_write_only_image_buffer_ = value;
150 }
GetUseBufferForWriteOnlyImageBuffer()151 bool GetUseBufferForWriteOnlyImageBuffer() const {
152 return use_buffer_for_write_only_image_buffer_;
153 }
154
SetBHWCShape(const BHWC & new_shape)155 void SetBHWCShape(const BHWC& new_shape) {
156 shape_ = BHWDC(new_shape.b, new_shape.h, new_shape.w, 1, new_shape.c);
157 }
SetBHWDCShape(const BHWDC & new_shape)158 void SetBHWDCShape(const BHWDC& new_shape) { shape_ = new_shape; }
GetBHWCShape()159 BHWC GetBHWCShape() const {
160 return BHWC(shape_.b, shape_.h, shape_.w, shape_.c);
161 }
GetBHWDCShape()162 BHWDC GetBHWDCShape() const { return shape_; }
SetData(std::vector<uint8_t> && new_data)163 void SetData(std::vector<uint8_t>&& new_data) { data_ = new_data; }
GetData()164 const std::vector<uint8_t>& GetData() const { return data_; }
165
166 private:
167 friend flatbuffers::Offset<data::TensorDescriptor> Encode(
168 const TensorDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
169 friend void Decode(const data::TensorDescriptor* fb_desc,
170 TensorDescriptor* desc);
171
172 friend TensorDescriptor CreateConstantLinearTensorDescriptor(
173 DataType data_type, TensorStorageType storage_type,
174 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
175
176 friend TensorDescriptor CreateConstantHWVec4TensorDescriptor(
177 DataType data_type, TensorStorageType storage_type, int width, int height,
178 const uint8_t* data);
179
180 absl::Status PerformReadSelector(
181 const GpuInfo& gpu_info, const std::vector<std::string>& args,
182 const std::vector<std::string>& template_args, std::string* result) const;
183 absl::Status PerformReadNearestSelector(const GpuInfo& gpu_info,
184 const std::vector<std::string>& args,
185 std::string* result) const;
186 absl::Status PerformReadBilinearSelector(const GpuInfo& gpu_info,
187 const std::vector<std::string>& args,
188 std::string* result) const;
189 absl::Status PerformReadPerChannelSelector(
190 const GpuInfo& gpu_info, const std::vector<std::string>& args,
191 const std::vector<std::string>& template_args, std::string* result) const;
192
193 absl::Status PerformGetAddressSelector(const std::vector<std::string>& args,
194 std::string* result) const;
195
196 absl::Status PerformGetHandleSelector(const std::vector<std::string>& args,
197 std::string* result) const;
198
199 std::string StorageTypeToAddressType() const;
200
201 absl::Status PerformWriteSelector(
202 const GpuInfo& gpu_info, const std::vector<std::string>& args,
203 const std::vector<std::string>& template_args, std::string* result) const;
204
205 absl::Status PerformWriteLinearSelector(
206 const GpuInfo& gpu_info, const std::vector<std::string>& args,
207 const std::vector<std::string>& template_args, std::string* result) const;
208
209 absl::Status PerformWrite2DSelector(
210 const GpuInfo& gpu_info, const std::vector<std::string>& args,
211 const std::vector<std::string>& template_args, std::string* result) const;
212
213 std::string Read(const GpuInfo& gpu_info, DataType read_as_type,
214 const std::vector<std::string>& coords) const;
215 std::string Write(const GpuInfo& gpu_info, DataType write_type,
216 const std::string& var_name,
217 const std::vector<std::string>& coords) const;
218
219 absl::Status MaybeGetDataTypeFromTemplateArgs(
220 const std::vector<std::string>& template_args, DataType* result) const;
221
222 std::string GetGlobalAddressNoDeclaration(const std::string& xc,
223 const std::string& yc,
224 const std::string& zc,
225 const std::string& sc,
226 const std::string& bc) const;
227
228 std::vector<std::string> GetPhysicalCoordsWHS(const std::string& x,
229 const std::string& y,
230 const std::string& s) const;
231 std::vector<std::string> GetPhysicalCoordsWHSB(const std::string& x,
232 const std::string& y,
233 const std::string& s,
234 const std::string& b) const;
235 std::vector<std::string> GetPhysicalCoordsWHDS(const std::string& x,
236 const std::string& y,
237 const std::string& z,
238 const std::string& s) const;
239 std::vector<std::string> GetPhysicalCoordsWHDSB(const std::string& x,
240 const std::string& y,
241 const std::string& z,
242 const std::string& s,
243 const std::string& b) const;
244 std::vector<std::string> GetPhysicalCoords(const std::string& xc,
245 const std::string& yc,
246 const std::string& zc,
247 const std::string& sc,
248 const std::string& bc) const;
249 std::vector<std::string> GetPhysicalCoordsLinear(const std::string& x) const;
250 std::vector<std::string> GetPhysicalCoordsHW(const std::string& x,
251 const std::string& y) const;
252
253 bool ParseCoordsFromArgs(const std::vector<std::string>& args, int offset,
254 std::string* xc, std::string* yc, std::string* zc,
255 std::string* sc, std::string* bc) const;
256
257 template <typename T>
258 void UploadData(const T* src);
259 template <typename T>
260 void DownloadData(T* dst);
261
262 DataType data_type_ = DataType::UNKNOWN;
263 TensorStorageType storage_type_ = TensorStorageType::UNKNOWN;
264
265 // This field describes logical layout, actual(physical) GPU layout can be
266 // totally different.
267 Layout layout_ =
268 Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
269 // HW and LINEAR (for constant objects only)
270
271 // applicable only for TEXTURE_2D.
272 // When Texture 2d created from buffer, we can use it as texture or as buffer.
273 // This option allows to use texture 2d as buffer when we use it as dst
274 // tensor(write only).
275 // Currently supported only for Metal/OpenCL.
276 // By default false.
277 bool use_buffer_for_write_only_2d_texture_ = false;
278
279 // applicable only for IMAGE_BUFFER.
280 // We can use image buffer as image or as buffer.
281 // This option allows to use image buffer as buffer when we use it as dst
282 // tensor(write only).
283 // Currently supported only for Metal/OpenCL.
284 // By default true.
285 bool use_buffer_for_write_only_image_buffer_ = true;
286
287 // optional
288 BHWDC shape_;
289 std::vector<uint8_t> data_;
290 };
291
292 TensorDescriptor CreateBhwcTensorDescriptor(DataType data_type,
293 TensorStorageType storage_type,
294 const BHWC& shape);
295 TensorDescriptor CreateHwcTensorDescriptor(DataType data_type,
296 TensorStorageType storage_type,
297 const HWC& shape);
298
299 TensorStorageType GetStorageTypeForLinearTensor(const GpuInfo& gpu_info,
300 DataType data_type,
301 const Linear& shape);
302 TensorDescriptor CreateConstantLinearTensorDescriptor(
303 DataType data_type, TensorStorageType storage_type,
304 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
305 TensorDescriptor CreateConstantLinearTensorDescriptor(
306 const GpuInfo& gpu_info, DataType data_type,
307 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src);
308
309 TensorDescriptor CreateConstantHWVec4TensorDescriptor(
310 DataType data_type, TensorStorageType storage_type, int width, int height,
311 const uint8_t* data);
312
313 template <DataType T>
UploadData(const tflite::gpu::Tensor<BHWC,T> & src)314 void TensorDescriptor::UploadData(const tflite::gpu::Tensor<BHWC, T>& src) {
315 shape_ = BHWDC(src.shape.b, src.shape.h, src.shape.w, 1, src.shape.c);
316 UploadData(src.data.data());
317 }
318
319 template <DataType T>
DownloadData(tflite::gpu::Tensor<BHWC,T> * dst)320 void TensorDescriptor::DownloadData(tflite::gpu::Tensor<BHWC, T>* dst) {
321 dst->shape = BHWC(shape_.b, shape_.h, shape_.w, shape_.c);
322 dst->data.resize(dst->shape.DimensionsProduct(), 0.0f);
323 DownloadData(dst->data.data());
324 }
325
326 template <DataType T>
UploadData(const tflite::gpu::Tensor<BHWDC,T> & src)327 void TensorDescriptor::UploadData(const tflite::gpu::Tensor<BHWDC, T>& src) {
328 shape_ = src.shape;
329 UploadData(src.data.data());
330 }
331
332 template <DataType T>
DownloadData(tflite::gpu::Tensor<BHWDC,T> * dst)333 void TensorDescriptor::DownloadData(tflite::gpu::Tensor<BHWDC, T>* dst) {
334 dst->shape = shape_;
335 dst->data.resize(dst->shape.DimensionsProduct(), 0.0f);
336 DownloadData(dst->data.data());
337 }
338
339 template <typename T>
UploadData(const T * src)340 void TensorDescriptor::UploadData(const T* src) {
341 data_.resize(GetSizeInBytesForShape(shape_));
342 if (layout_ == Layout::LINEAR) {
343 if (data_type_ == DataType::FLOAT16) {
344 half* gpu_data = reinterpret_cast<half*>(data_.data());
345 DataFromLinear(src, *this, gpu_data);
346 } else {
347 T* gpu_data = reinterpret_cast<T*>(data_.data());
348 DataFromLinear(src, *this, gpu_data);
349 }
350 } else { // HWC/BHWC/HWDC/BHWDC
351 if (data_type_ == DataType::FLOAT16) {
352 half* gpu_data = reinterpret_cast<half*>(data_.data());
353 DataFromBHWDC(src, shape_, *this, gpu_data);
354 } else {
355 T* gpu_data = reinterpret_cast<T*>(data_.data());
356 DataFromBHWDC(src, shape_, *this, gpu_data);
357 }
358 }
359 }
360
361 template <typename T>
DownloadData(T * dst)362 void TensorDescriptor::DownloadData(T* dst) {
363 data_.resize(GetSizeInBytesForShape(shape_));
364 if (data_type_ == DataType::FLOAT16) {
365 half* gpu_data = reinterpret_cast<half*>(data_.data());
366 DataToBHWDC(gpu_data, shape_, *this, dst);
367 } else {
368 T* gpu_data = reinterpret_cast<T*>(data_.data());
369 DataToBHWDC(gpu_data, shape_, *this, dst);
370 }
371 }
372
373 template <typename FromType, typename ToType>
DataFromLinear(const FromType * src,const TensorDescriptor & desc,ToType * dst)374 void DataFromLinear(const FromType* src, const TensorDescriptor& desc,
375 ToType* dst) {
376 const int element_size = desc.GetElementSize();
377 const Linear shape = Linear(desc.GetBHWCShape().c);
378 const int slices = DivideRoundUp(shape.v, element_size);
379 for (int s = 0; s < slices; ++s) {
380 for (int c = 0; c < element_size; ++c) {
381 FromType value;
382 if (s * 4 + c < shape.v) {
383 const int cpu_index = shape.LinearIndex({s * element_size + c});
384 value = src[cpu_index];
385 } else {
386 value = 0;
387 }
388 int gpu_index = s * element_size + c;
389 dst[gpu_index] = value;
390 }
391 }
392 }
393
394 template <typename FromType, typename ToType>
DataFromBHWDC(const FromType * src,const BHWDC & shape,const TensorDescriptor & desc,ToType * dst)395 void DataFromBHWDC(const FromType* src, const BHWDC& shape,
396 const TensorDescriptor& desc, ToType* dst) {
397 const int channels_alignment =
398 desc.GetStorageType() == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c
399 : 4;
400 const int slices = DivideRoundUp(shape.c, 4);
401 for (int b = 0; b < shape.b; ++b) {
402 for (int s = 0; s < slices; ++s) {
403 for (int y = 0; y < shape.h; ++y) {
404 for (int x = 0; x < shape.w; ++x) {
405 for (int d = 0; d < shape.d; ++d) {
406 for (int c = 0; c < channels_alignment; ++c) {
407 FromType value;
408 if (s * 4 + c < shape.c) {
409 const int cpu_index =
410 shape.LinearIndex({b, y, x, d, s * 4 + c});
411 value = src[cpu_index];
412 } else {
413 value = 0;
414 }
415 int gpu_index = desc.GetLinearIndex(shape, b, x, y, d, s, c);
416 dst[gpu_index] = value;
417 }
418 }
419 }
420 }
421 }
422 }
423 }
424
425 template <typename FromType, typename ToType>
DataToBHWDC(const FromType * src,const BHWDC & shape,const TensorDescriptor & desc,ToType * dst)426 void DataToBHWDC(const FromType* src, const BHWDC& shape,
427 const TensorDescriptor& desc, ToType* dst) {
428 const int channels_alignment =
429 desc.GetStorageType() == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c
430 : 4;
431 const int slices = DivideRoundUp(shape.c, 4);
432 for (int b = 0; b < shape.b; ++b) {
433 for (int s = 0; s < slices; ++s) {
434 for (int y = 0; y < shape.h; ++y) {
435 for (int x = 0; x < shape.w; ++x) {
436 for (int d = 0; d < shape.d; ++d) {
437 for (int c = 0; c < channels_alignment; ++c) {
438 if (s * 4 + c >= shape.c) {
439 continue;
440 }
441 int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
442 int gpu_index = desc.GetLinearIndex(shape, b, x, y, d, s, c);
443 dst[cpu_index] = src[gpu_index];
444 }
445 }
446 }
447 }
448 }
449 }
450 }
451
452 std::string ToString(TensorStorageType type);
453
454 } // namespace gpu
455 } // namespace tflite
456
457 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TENSOR_DESC_H_
458