1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
18
19 #include <cstdint>
20 #include <vector>
21
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/operations.h"
24 #include "tensorflow/lite/delegates/gpu/common/shape.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35
36 namespace tflite {
37 namespace gpu {
38
39 class ConvolutionTransposed : public GPUOperation {
40 public:
41 ConvolutionTransposed() = default;
42 void GetPossibleKernelWorkGroups(
43 TuningType tuning_type, const GpuInfo& gpu_info,
44 const KernelInfo& kernel_info,
45 std::vector<int3>* work_groups) const override;
46 absl::Status BindArguments(ArgumentsBinder* args) override;
47 int3 GetGridSize() const override;
48
49 // Move only
50 ConvolutionTransposed(ConvolutionTransposed&& operation) = default;
51 ConvolutionTransposed& operator=(ConvolutionTransposed&& operation) = default;
52 ConvolutionTransposed(const ConvolutionTransposed&) = delete;
53 ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
54
GetWeightsDescription()55 WeightsDescription GetWeightsDescription() const {
56 WeightsDescription desc;
57 desc.layout = weights_layout_;
58 desc.output_group_size = block_size_.w;
59 return desc;
60 }
61
62 private:
63 friend ConvolutionTransposed CreateConvolutionTransposed(
64 const GpuInfo& gpu_info, const OperationDef& definition,
65 const ConvolutionTransposedAttributes& attr);
66 friend ConvolutionTransposed CreateConvolutionTransposed3D(
67 const GpuInfo& gpu_info, const OperationDef& definition,
68 const ConvolutionTransposed3DAttributes& attr);
69 friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
70 const GpuInfo& gpu_info, const OperationDef& definition,
71 const ConvolutionTransposedAttributes& attr);
72
73 ConvolutionTransposed(const OperationDef& definition,
74 const ConvolutionTransposedAttributes& attr,
75 const GpuInfo& gpu_info, bool weights_are_buffer);
76 ConvolutionTransposed(const OperationDef& definition,
77 const ConvolutionTransposed3DAttributes& attr,
78 const GpuInfo& gpu_info, bool weights_are_buffer);
79
80 template <DataType T>
81 void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
82 bool weights_are_buffer);
83
84 template <DataType T>
85 void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
86 bool weights_are_buffer);
87
88 std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
89 const GpuInfo& gpu_info,
90 bool weights_are_buffer,
91 const int4& block_size);
92 int4 stride_;
93 int4 block_size_ = int4(1, 1, 1, 1); // WHDS
94 WeightsLayout weights_layout_;
95 };
96
97 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer)98 void ConvolutionTransposed::UploadWeights(
99 const tflite::gpu::Tensor<OHWI, T>& weights, bool weights_are_buffer) {
100 const int flt_count =
101 GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
102 DataType weights_type = definition_.precision == CalculationsPrecision::F32
103 ? DataType::FLOAT32
104 : DataType::FLOAT16;
105
106 std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
107 RearrangeWeights(weights, GetWeightsDescription(), weights_type,
108 absl::MakeSpan(weights_data));
109
110 if (weights_are_buffer) {
111 BufferDescriptor desc;
112 desc.element_type = weights_type;
113 desc.element_size = 16;
114 desc.size = weights_data.size();
115 desc.data = std::move(weights_data);
116 args_.AddObject("weights",
117 absl::make_unique<BufferDescriptor>(std::move(desc)));
118 } else {
119 const int dst_depth =
120 AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
121 const int src_depth = DivideRoundUp(weights.shape.i, 4);
122 const int kernel_x = weights.shape.w;
123 const int kernel_y = weights.shape.h;
124 int texture_width = dst_depth;
125 int texture_height = src_depth * kernel_x * kernel_y;
126 int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
127 for (int i = 0; i < 4; ++i) {
128 Texture2DDescriptor desc;
129 desc.element_type = weights_type;
130 desc.size = int2(texture_width, texture_height);
131 desc.data.resize(sub_size);
132 memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
133 const std::string name = "weights" + std::to_string(i);
134 args_.AddObject(name,
135 absl::make_unique<Texture2DDescriptor>(std::move(desc)));
136 }
137 }
138 }
139
140 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer)141 void ConvolutionTransposed::UploadWeights(
142 const tflite::gpu::Tensor<OHWDI, T>& weights, bool weights_are_buffer) {
143 const int dst_depth =
144 AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
145 const int src_depth = DivideRoundUp(weights.shape.i, 4);
146 const int kernel_x = weights.shape.w;
147 const int kernel_y = weights.shape.h;
148 const int kernel_z = weights.shape.d;
149
150 const int elements_count =
151 kernel_x * kernel_y * kernel_z * src_depth * dst_depth * 4;
152 const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
153
154 const int float4_size = f32_weights ? 16 : 8;
155 std::vector<uint8_t> data(float4_size * elements_count);
156
157 if (f32_weights) {
158 float4* ptr = reinterpret_cast<float4*>(data.data());
159 if (weights_are_buffer) {
160 RearrangeWeightsToODHWIOGroupI4O4(weights, block_size_.w,
161 absl::MakeSpan(ptr, elements_count));
162 } else {
163 RearrangeWeightsToI4DHWIOOGroupO4(weights, block_size_.w,
164 absl::MakeSpan(ptr, elements_count));
165 }
166 } else {
167 half4* ptr = reinterpret_cast<half4*>(data.data());
168 if (weights_are_buffer) {
169 RearrangeWeightsToODHWIOGroupI4O4(weights, block_size_.w,
170 absl::MakeSpan(ptr, elements_count));
171 } else {
172 RearrangeWeightsToI4DHWIOOGroupO4(weights, block_size_.w,
173 absl::MakeSpan(ptr, elements_count));
174 }
175 }
176
177 if (weights_are_buffer) {
178 BufferDescriptor desc;
179 desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
180 desc.element_size = 16;
181 desc.size = float4_size * elements_count;
182 desc.data = std::move(data);
183 args_.AddObject("weights",
184 absl::make_unique<BufferDescriptor>(std::move(desc)));
185 } else {
186 int texture_width = dst_depth;
187 int texture_height = src_depth * kernel_x * kernel_y * kernel_z;
188 int sub_size = float4_size * texture_width * texture_height;
189 for (int i = 0; i < 4; ++i) {
190 Texture2DDescriptor desc;
191 desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
192 desc.size = int2(texture_width, texture_height);
193 desc.data.resize(sub_size);
194 memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
195 const std::string name = "weights" + std::to_string(i);
196 args_.AddObject(name,
197 absl::make_unique<Texture2DDescriptor>(std::move(desc)));
198 }
199 }
200 }
201
202 ConvolutionTransposed CreateConvolutionTransposed(
203 const GpuInfo& gpu_info, const OperationDef& definition,
204 const ConvolutionTransposedAttributes& attr);
205
206 ConvolutionTransposed CreateConvolutionTransposed3D(
207 const GpuInfo& gpu_info, const OperationDef& definition,
208 const ConvolutionTransposed3DAttributes& attr);
209
210 ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
211 const GpuInfo& gpu_info, const OperationDef& definition,
212 const ConvolutionTransposedAttributes& attr);
213
214 } // namespace gpu
215 } // namespace tflite
216
217 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
218