1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
18
19 #include <cstdint>
20 #include <vector>
21
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/operations.h"
24 #include "tensorflow/lite/delegates/gpu/common/shape.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35
36 namespace tflite {
37 namespace gpu {
38
39 class ConvolutionTransposed : public GPUOperation {
40 public:
41 ConvolutionTransposed() = default;
42 void GetPossibleKernelWorkGroups(
43 TuningType tuning_type, const GpuInfo& gpu_info,
44 const KernelInfo& kernel_info,
45 std::vector<int3>* work_groups) const override;
46 absl::Status BindArguments(ArgumentsBinder* args) override;
47 int3 GetGridSize() const override;
48
49 // Move only
50 ConvolutionTransposed(ConvolutionTransposed&& operation) = default;
51 ConvolutionTransposed& operator=(ConvolutionTransposed&& operation) = default;
52 ConvolutionTransposed(const ConvolutionTransposed&) = delete;
53 ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
54
GetWeightsDescription()55 WeightsDescription GetWeightsDescription() const {
56 WeightsDescription desc;
57 desc.layout = weights_layout_;
58 desc.output_group_size = block_size_.w;
59 return desc;
60 }
61
62 private:
63 friend ConvolutionTransposed CreateConvolutionTransposed(
64 const GpuInfo& gpu_info, const OperationDef& definition,
65 const ConvolutionTransposedAttributes& attr);
66 friend ConvolutionTransposed CreateConvolutionTransposed3D(
67 const GpuInfo& gpu_info, const OperationDef& definition,
68 const ConvolutionTransposed3DAttributes& attr);
69 friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
70 const GpuInfo& gpu_info, const OperationDef& definition,
71 const ConvolutionTransposedAttributes& attr);
72
73 ConvolutionTransposed(const OperationDef& definition,
74 const ConvolutionTransposedAttributes& attr,
75 const GpuInfo& gpu_info, bool weights_are_buffer);
76 ConvolutionTransposed(const OperationDef& definition,
77 const ConvolutionTransposed3DAttributes& attr,
78 const GpuInfo& gpu_info, bool weights_are_buffer);
79
80 template <DataType T>
81 void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
82 bool weights_are_buffer);
83
84 template <DataType T>
85 void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
86 bool weights_are_buffer);
87
88 std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
89 const GpuInfo& gpu_info,
90 bool weights_are_buffer,
91 const int4& block_size);
92 int4 stride_;
93 int4 block_size_ = int4(1, 1, 1, 1); // WHDS
94 WeightsLayout weights_layout_;
95 };
96
97 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer)98 void ConvolutionTransposed::UploadWeights(
99 const tflite::gpu::Tensor<OHWI, T>& weights, bool weights_are_buffer) {
100 const int flt_count =
101 GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
102 DataType weights_type = definition_.precision == CalculationsPrecision::F32
103 ? DataType::FLOAT32
104 : DataType::FLOAT16;
105
106 std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
107 RearrangeWeights(weights, GetWeightsDescription(), weights_type,
108 absl::MakeSpan(weights_data));
109
110 if (weights_are_buffer) {
111 BufferDescriptor desc;
112 desc.element_type = weights_type;
113 desc.element_size = 16;
114 desc.size = weights_data.size();
115 desc.data = std::move(weights_data);
116 args_.AddObject("weights",
117 absl::make_unique<BufferDescriptor>(std::move(desc)));
118 } else {
119 const int dst_depth =
120 AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
121 const int src_depth = DivideRoundUp(weights.shape.i, 4);
122 const int kernel_x = weights.shape.w;
123 const int kernel_y = weights.shape.h;
124 int texture_width = dst_depth;
125 int texture_height = src_depth * kernel_x * kernel_y;
126 int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
127 for (int i = 0; i < 4; ++i) {
128 Texture2DDescriptor desc;
129 desc.element_type = weights_type;
130 desc.size = int2(texture_width, texture_height);
131 desc.data.resize(sub_size);
132 memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
133 const std::string name = "weights" + std::to_string(i);
134 args_.AddObject(name,
135 absl::make_unique<Texture2DDescriptor>(std::move(desc)));
136 }
137 }
138 }
139
140 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer)141 void ConvolutionTransposed::UploadWeights(
142 const tflite::gpu::Tensor<OHWDI, T>& weights, bool weights_are_buffer) {
143 const int flt_count =
144 GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
145 DataType weights_type = definition_.precision == CalculationsPrecision::F32
146 ? DataType::FLOAT32
147 : DataType::FLOAT16;
148
149 std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
150 RearrangeWeights(weights, GetWeightsDescription(), weights_type,
151 absl::MakeSpan(weights_data));
152
153 if (weights_are_buffer) {
154 BufferDescriptor desc;
155 desc.element_type = weights_type;
156 desc.element_size = 16;
157 desc.size = weights_data.size();
158 desc.data = std::move(weights_data);
159 args_.AddObject("weights",
160 absl::make_unique<BufferDescriptor>(std::move(desc)));
161 } else {
162 const int dst_depth =
163 AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
164 const int src_depth = DivideRoundUp(weights.shape.i, 4);
165 const int kernel_x = weights.shape.w;
166 const int kernel_y = weights.shape.h;
167 const int kernel_z = weights.shape.d;
168 int texture_width = dst_depth;
169 int texture_height = src_depth * kernel_x * kernel_y * kernel_z;
170 int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
171 for (int i = 0; i < 4; ++i) {
172 Texture2DDescriptor desc;
173 desc.element_type = weights_type;
174 desc.size = int2(texture_width, texture_height);
175 desc.data.resize(sub_size);
176 memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
177 const std::string name = "weights" + std::to_string(i);
178 args_.AddObject(name,
179 absl::make_unique<Texture2DDescriptor>(std::move(desc)));
180 }
181 }
182 }
183
184 ConvolutionTransposed CreateConvolutionTransposed(
185 const GpuInfo& gpu_info, const OperationDef& definition,
186 const ConvolutionTransposedAttributes& attr);
187
188 ConvolutionTransposed CreateConvolutionTransposed3D(
189 const GpuInfo& gpu_info, const OperationDef& definition,
190 const ConvolutionTransposed3DAttributes& attr);
191
192 ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
193 const GpuInfo& gpu_info, const OperationDef& definition,
194 const ConvolutionTransposedAttributes& attr);
195
196 } // namespace gpu
197 } // namespace tflite
198
199 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
200