• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
18 
19 #include <cstdint>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/operations.h"
24 #include "tensorflow/lite/delegates/gpu/common/shape.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35 
36 namespace tflite {
37 namespace gpu {
38 
39 class ConvolutionTransposed : public GPUOperation {
40  public:
41   ConvolutionTransposed() = default;
42   void GetPossibleKernelWorkGroups(
43       TuningType tuning_type, const GpuInfo& gpu_info,
44       const KernelInfo& kernel_info,
45       std::vector<int3>* work_groups) const override;
46   absl::Status BindArguments(ArgumentsBinder* args) override;
47   int3 GetGridSize() const override;
48 
49   // Move only
50   ConvolutionTransposed(ConvolutionTransposed&& operation) = default;
51   ConvolutionTransposed& operator=(ConvolutionTransposed&& operation) = default;
52   ConvolutionTransposed(const ConvolutionTransposed&) = delete;
53   ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
54 
GetWeightsDescription()55   WeightsDescription GetWeightsDescription() const {
56     WeightsDescription desc;
57     desc.layout = weights_layout_;
58     desc.output_group_size = block_size_.w;
59     return desc;
60   }
61 
62  private:
63   friend ConvolutionTransposed CreateConvolutionTransposed(
64       const GpuInfo& gpu_info, const OperationDef& definition,
65       const ConvolutionTransposedAttributes& attr);
66   friend ConvolutionTransposed CreateConvolutionTransposed3D(
67       const GpuInfo& gpu_info, const OperationDef& definition,
68       const ConvolutionTransposed3DAttributes& attr);
69   friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
70       const GpuInfo& gpu_info, const OperationDef& definition,
71       const ConvolutionTransposedAttributes& attr);
72 
73   ConvolutionTransposed(const OperationDef& definition,
74                         const ConvolutionTransposedAttributes& attr,
75                         const GpuInfo& gpu_info, bool weights_are_buffer);
76   ConvolutionTransposed(const OperationDef& definition,
77                         const ConvolutionTransposed3DAttributes& attr,
78                         const GpuInfo& gpu_info, bool weights_are_buffer);
79 
80   template <DataType T>
81   void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
82                      bool weights_are_buffer);
83 
84   template <DataType T>
85   void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
86                      bool weights_are_buffer);
87 
88   std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
89                                                 const GpuInfo& gpu_info,
90                                                 bool weights_are_buffer,
91                                                 const int4& block_size);
92   int4 stride_;
93   int4 block_size_ = int4(1, 1, 1, 1);  // WHDS
94   WeightsLayout weights_layout_;
95 };
96 
97 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer)98 void ConvolutionTransposed::UploadWeights(
99     const tflite::gpu::Tensor<OHWI, T>& weights, bool weights_are_buffer) {
100   const int flt_count =
101       GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
102   DataType weights_type = definition_.precision == CalculationsPrecision::F32
103                               ? DataType::FLOAT32
104                               : DataType::FLOAT16;
105 
106   std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
107   RearrangeWeights(weights, GetWeightsDescription(), weights_type,
108                    absl::MakeSpan(weights_data));
109 
110   if (weights_are_buffer) {
111     BufferDescriptor desc;
112     desc.element_type = weights_type;
113     desc.element_size = 16;
114     desc.size = weights_data.size();
115     desc.data = std::move(weights_data);
116     args_.AddObject("weights",
117                     absl::make_unique<BufferDescriptor>(std::move(desc)));
118   } else {
119     const int dst_depth =
120         AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
121     const int src_depth = DivideRoundUp(weights.shape.i, 4);
122     const int kernel_x = weights.shape.w;
123     const int kernel_y = weights.shape.h;
124     int texture_width = dst_depth;
125     int texture_height = src_depth * kernel_x * kernel_y;
126     int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
127     for (int i = 0; i < 4; ++i) {
128       Texture2DDescriptor desc;
129       desc.element_type = weights_type;
130       desc.size = int2(texture_width, texture_height);
131       desc.data.resize(sub_size);
132       memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
133       const std::string name = "weights" + std::to_string(i);
134       args_.AddObject(name,
135                       absl::make_unique<Texture2DDescriptor>(std::move(desc)));
136     }
137   }
138 }
139 
140 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer)141 void ConvolutionTransposed::UploadWeights(
142     const tflite::gpu::Tensor<OHWDI, T>& weights, bool weights_are_buffer) {
143   const int flt_count =
144       GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
145   DataType weights_type = definition_.precision == CalculationsPrecision::F32
146                               ? DataType::FLOAT32
147                               : DataType::FLOAT16;
148 
149   std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
150   RearrangeWeights(weights, GetWeightsDescription(), weights_type,
151                    absl::MakeSpan(weights_data));
152 
153   if (weights_are_buffer) {
154     BufferDescriptor desc;
155     desc.element_type = weights_type;
156     desc.element_size = 16;
157     desc.size = weights_data.size();
158     desc.data = std::move(weights_data);
159     args_.AddObject("weights",
160                     absl::make_unique<BufferDescriptor>(std::move(desc)));
161   } else {
162     const int dst_depth =
163         AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
164     const int src_depth = DivideRoundUp(weights.shape.i, 4);
165     const int kernel_x = weights.shape.w;
166     const int kernel_y = weights.shape.h;
167     const int kernel_z = weights.shape.d;
168     int texture_width = dst_depth;
169     int texture_height = src_depth * kernel_x * kernel_y * kernel_z;
170     int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
171     for (int i = 0; i < 4; ++i) {
172       Texture2DDescriptor desc;
173       desc.element_type = weights_type;
174       desc.size = int2(texture_width, texture_height);
175       desc.data.resize(sub_size);
176       memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
177       const std::string name = "weights" + std::to_string(i);
178       args_.AddObject(name,
179                       absl::make_unique<Texture2DDescriptor>(std::move(desc)));
180     }
181   }
182 }
183 
184 ConvolutionTransposed CreateConvolutionTransposed(
185     const GpuInfo& gpu_info, const OperationDef& definition,
186     const ConvolutionTransposedAttributes& attr);
187 
188 ConvolutionTransposed CreateConvolutionTransposed3D(
189     const GpuInfo& gpu_info, const OperationDef& definition,
190     const ConvolutionTransposed3DAttributes& attr);
191 
192 ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
193     const GpuInfo& gpu_info, const OperationDef& definition,
194     const ConvolutionTransposedAttributes& attr);
195 
196 }  // namespace gpu
197 }  // namespace tflite
198 
199 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
200