• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_BUFFER_1X1_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_BUFFER_1X1_H_
18 
19 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/operations.h"
21 #include "tensorflow/lite/delegates/gpu/common/shape.h"
22 #include "tensorflow/lite/delegates/gpu/common/status.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
24 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
25 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
31 
32 namespace tflite {
33 namespace gpu {
34 
35 class ConvBuffer1x1 : public GPUOperation {
36  public:
37   ConvBuffer1x1() = default;
38 
39   // Move only
40   ConvBuffer1x1(ConvBuffer1x1&& operation);
41   ConvBuffer1x1& operator=(ConvBuffer1x1&& operation);
42   ConvBuffer1x1(const ConvBuffer1x1&) = delete;
43   ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete;
44 
45   void GetPossibleKernelWorkGroups(
46       TuningType tuning_type, const GpuInfo& gpu_info,
47       const KernelInfo& kernel_info,
48       std::vector<int3>* work_groups) const override;
49   int3 GetGridSize() const override;
50 
GetWeightsDescription()51   WeightsDescription GetWeightsDescription() const {
52     WeightsDescription desc;
53     desc.layout = WeightsLayout::kOSpatialIOGroupI4O4;
54     desc.output_group_size = conv_params_.block_size.z;
55     return desc;
56   }
57 
58   struct ConvParams {
59     int3 block_size = int3(1, 1, 1);
60     int element_size = 4;  // can be 4, 8 or 16
61 
62     // By default in 2d convolution we have the same weights for WH dims, but in
63     // some cases we need separate weights for H dimension and convolution
64     // kernel requires very small modifications to support it.
65     bool different_weights_for_height = false;
66   };
67 
68  private:
69   ConvBuffer1x1(const OperationDef& definition, const ConvParams& conv_params,
70                 const GpuInfo& gpu_info);
71   friend ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info,
72                                            const OperationDef& definition,
73                                            const Convolution2DAttributes& attr,
74                                            const BHWC* shape);
75   friend ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info,
76                                            const OperationDef& definition,
77                                            const FullyConnectedAttributes& attr,
78                                            const BHWC* shape);
79   friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(
80       const GpuInfo& gpu_info, const OperationDef& definition,
81       const Convolution2DAttributes& attr, const BHWC* shape);
82   friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(
83       const GpuInfo& gpu_info, const OperationDef& definition,
84       const Convolution2DAttributes& attr, const BHWC& weights_shape,
85       const BHWC* dst_shape);
86 
87   template <DataType T>
88   void UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
89                   const tflite::gpu::Tensor<Linear, T>& biases);
90   template <DataType T>
91   void UploadDataForWinograd4x4To6x6(
92       const tflite::gpu::Tensor<OHWI, T>& weights);
93 
94   template <DataType T>
95   void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights);
96 
97   template <DataType T>
98   void UploadBiases(const tflite::gpu::Tensor<Linear, T>& biases);
99 
100   std::string GenerateConvBuffer1x1(
101       const OperationDef& op_def, const ConvBuffer1x1::ConvParams& conv_params,
102       const GpuInfo& gpu_info, Arguments* args);
103 
104   ConvParams conv_params_;
105 };
106 
107 template <DataType T>
UploadData(const tflite::gpu::Tensor<OHWI,T> & weights,const tflite::gpu::Tensor<Linear,T> & biases)108 void ConvBuffer1x1::UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
109                                const tflite::gpu::Tensor<Linear, T>& biases) {
110   UploadWeights(weights);
111   UploadBiases(biases);
112 }
113 
114 template <DataType T>
UploadDataForWinograd4x4To6x6(const tflite::gpu::Tensor<OHWI,T> & weights)115 void ConvBuffer1x1::UploadDataForWinograd4x4To6x6(
116     const tflite::gpu::Tensor<OHWI, T>& weights) {
117   tflite::gpu::Tensor<OHWI, T> wino_weights;
118   RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
119   UploadWeights(wino_weights);
120   tflite::gpu::Tensor<Linear, DataType::FLOAT32> bias;
121   bias.shape = Linear(weights.shape.o);
122   bias.data.resize(weights.shape.o, 0.0f);
123   UploadBiases(bias);
124 }
125 
126 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights)127 void ConvBuffer1x1::UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights) {
128   const int dst_depth = DivideRoundUp(weights.shape.o, 4);
129   const int src_depth = DivideRoundUp(weights.shape.i, 4);
130 
131   const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
132   const int float4_size = f32_weights ? sizeof(float4) : sizeof(half4);
133 
134   const int dst_depth_aligned = AlignByN(dst_depth, conv_params_.block_size.z);
135   const int elements_count =
136       weights.shape.h * weights.shape.w * src_depth * dst_depth_aligned * 4;
137 
138   BufferDescriptor desc;
139   desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
140   desc.element_size = 16;
141   desc.memory_type = MemoryType::GLOBAL;
142   desc.size = float4_size * elements_count;
143   desc.data.resize(desc.size);
144 
145   if (f32_weights) {
146     float4* ptr = reinterpret_cast<float4*>(desc.data.data());
147     RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
148                                      absl::MakeSpan(ptr, elements_count));
149   } else {
150     half4* ptr = reinterpret_cast<half4*>(desc.data.data());
151     RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
152                                      absl::MakeSpan(ptr, elements_count));
153   }
154 
155   args_.AddObject("weights",
156                   absl::make_unique<BufferDescriptor>(std::move(desc)));
157 }
158 
159 template <DataType T>
UploadBiases(const tflite::gpu::Tensor<Linear,T> & biases)160 void ConvBuffer1x1::UploadBiases(const tflite::gpu::Tensor<Linear, T>& biases) {
161   TensorLinearDescriptor desc;
162   desc.storage_type = LinearStorageType::BUFFER;
163   desc.element_type = definition_.GetDataType();
164   int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
165   desc.UploadLinearData(biases, depth);
166   args_.AddObject("biases",
167                   absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
168 }
169 
170 bool IsConvBuffer1x1Supported(const OperationDef& definition,
171                               const Convolution2DAttributes& attr);
172 
173 bool IsConvBuffer1x1Supported(const OperationDef& definition,
174                               const BHWC& weights_shape,
175                               const Convolution2DAttributes& attr);
176 
177 ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info,
178                                   const OperationDef& definition,
179                                   const Convolution2DAttributes& attr,
180                                   const BHWC* shape = nullptr);
181 
182 ConvBuffer1x1 CreateConvBuffer1x1(const GpuInfo& gpu_info,
183                                   const OperationDef& definition,
184                                   const FullyConnectedAttributes& attr,
185                                   const BHWC* shape = nullptr);
186 
187 ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(
188     const GpuInfo& gpu_info, const OperationDef& definition,
189     const Convolution2DAttributes& attr, const BHWC& weights_shape,
190     const BHWC* dst_shape = nullptr);
191 
192 ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(
193     const GpuInfo& gpu_info, const OperationDef& definition,
194     const Convolution2DAttributes& attr, const BHWC* shape = nullptr);
195 
196 }  // namespace gpu
197 }  // namespace tflite
198 
199 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONV_BUFFER_1X1_H_
200