1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_H_
18
19 #include <vector>
20
21 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/shape.h"
24 #include "tensorflow/lite/delegates/gpu/common/status.h"
25 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
31 #include "tensorflow/lite/delegates/gpu/common/types.h"
32
33 namespace tflite {
34 namespace gpu {
35
36 template <DataType S, typename T>
RearrangeWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI,S> & weights,absl::Span<T> dst)37 void RearrangeWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI, S>& weights,
38 absl::Span<T> dst) {
39 const int dst_channels = weights.shape.i * weights.shape.o;
40 const int dst_depth = DivideRoundUp(dst_channels, 4);
41 const int kernel_x = weights.shape.w;
42 const int kernel_y = weights.shape.h;
43
44 int counter = 0;
45 for (int d = 0; d < dst_depth; ++d) {
46 for (int y = 0; y < kernel_y; ++y) {
47 for (int x = 0; x < kernel_x; ++x) {
48 T filter_val;
49 for (int i = 0; i < 4; ++i) {
50 const int d_ch = d * 4 + i;
51 if (d_ch < dst_channels) {
52 const int f_index = weights.shape.LinearIndex(
53 {d_ch % weights.shape.o, y, x, d_ch / weights.shape.o});
54 filter_val[i] = weights.data[f_index];
55 } else {
56 filter_val[i] = 0.0f;
57 }
58 }
59 dst[counter++] = filter_val;
60 }
61 }
62 }
63 }
64
65 template <DataType T>
UploadWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer,CalculationsPrecision precision,GPUOperation * op)66 void UploadWeightsForDWConv2D(const tflite::gpu::Tensor<OHWI, T>& weights,
67 bool weights_are_buffer,
68 CalculationsPrecision precision,
69 GPUOperation* op) {
70 const int dst_channels = weights.shape.i * weights.shape.o;
71 const int dst_slices = DivideRoundUp(dst_channels, 4);
72 const int kernel_x = weights.shape.w;
73 const int kernel_y = weights.shape.h;
74
75 const int elements_count = kernel_x * kernel_y * dst_slices;
76
77 const bool fp32_weights = precision == CalculationsPrecision::F32;
78 const int float4_size = fp32_weights ? 16 : 8;
79
80 std::vector<uint8_t> data(float4_size * elements_count);
81
82 if (fp32_weights) {
83 float4* ptr = reinterpret_cast<float4*>(data.data());
84 RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
85 } else {
86 half4* ptr = reinterpret_cast<half4*>(data.data());
87 RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
88 }
89
90 if (weights_are_buffer) {
91 BufferDescriptor desc;
92 desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
93 desc.element_size = 4;
94 desc.size = float4_size * elements_count;
95 desc.data = std::move(data);
96 op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
97 } else {
98 Texture2DDescriptor desc;
99 desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
100 desc.size = int2(kernel_x * kernel_y, dst_slices);
101 desc.data = std::move(data);
102 op->args_.AddObject("weights",
103 absl::make_unique<Texture2DDescriptor>(desc));
104 }
105 }
106
107 template <DataType S, typename T>
RearrangeWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI,S> & weights,absl::Span<T> dst)108 void RearrangeWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI, S>& weights,
109 absl::Span<T> dst) {
110 const int dst_channels = weights.shape.i * weights.shape.o;
111 const int dst_slices = DivideRoundUp(dst_channels, 4);
112 const int kernel_x = weights.shape.w;
113 const int kernel_y = weights.shape.h;
114 const int kernel_z = weights.shape.d;
115
116 int counter = 0;
117 for (int d = 0; d < dst_slices; ++d) {
118 for (int z = 0; z < kernel_z; ++z) {
119 for (int y = 0; y < kernel_y; ++y) {
120 for (int x = 0; x < kernel_x; ++x) {
121 T filter_val;
122 for (int i = 0; i < 4; ++i) {
123 const int d_ch = d * 4 + i;
124 if (d_ch < dst_channels) {
125 const int f_index = weights.shape.LinearIndex(
126 {d_ch % weights.shape.o, y, x, z, d_ch / weights.shape.o});
127 filter_val[i] = weights.data[f_index];
128 } else {
129 filter_val[i] = 0.0f;
130 }
131 }
132 dst[counter++] = filter_val;
133 }
134 }
135 }
136 }
137 }
138
139 template <DataType T>
UploadWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer,CalculationsPrecision precision,GPUOperation * op)140 void UploadWeightsForDWConv3D(const tflite::gpu::Tensor<OHWDI, T>& weights,
141 bool weights_are_buffer,
142 CalculationsPrecision precision,
143 GPUOperation* op) {
144 const int dst_channels = weights.shape.i * weights.shape.o;
145 const int dst_slices = DivideRoundUp(dst_channels, 4);
146 const int kernel_x = weights.shape.w;
147 const int kernel_y = weights.shape.h;
148 const int kernel_z = weights.shape.d;
149
150 const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
151
152 const bool fp32_weights = precision == CalculationsPrecision::F32;
153 const int float4_size = fp32_weights ? 16 : 8;
154
155 std::vector<uint8_t> data(float4_size * elements_count);
156
157 if (fp32_weights) {
158 float4* ptr = reinterpret_cast<float4*>(data.data());
159 RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
160 } else {
161 half4* ptr = reinterpret_cast<half4*>(data.data());
162 RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
163 }
164
165 if (weights_are_buffer) {
166 BufferDescriptor desc;
167 desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
168 desc.element_size = 4;
169 desc.size = float4_size * elements_count;
170 desc.data = std::move(data);
171 op->args_.AddObject("weights",
172 absl::make_unique<BufferDescriptor>(std::move(desc)));
173 } else {
174 Texture2DDescriptor desc;
175 desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
176 desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
177 desc.data = std::move(data);
178 op->args_.AddObject(
179 "weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
180 }
181 }
182
183 GPUOperation CreateDepthwiseConvolution2D(
184 const GpuInfo& gpu_info, const OperationDef& definition,
185 const DepthwiseConvolution2DAttributes& attr);
186
187 GPUOperation CreateDepthwiseConvolution2DDynamicWeights(
188 const GpuInfo& gpu_info, const OperationDef& definition,
189 const DepthwiseConvolution2DAttributes& attr);
190
191 GPUOperation CreateDepthwiseConvolution3D(
192 const GpuInfo& gpu_info, const OperationDef& definition,
193 const DepthwiseConvolution3DAttributes& attr);
194
195 } // namespace gpu
196 } // namespace tflite
197
198 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_DEPTHWISE_CONV_H_
199