1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/tasks/convolution_transposed_thin.h"
17
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 #include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h"
23
24 namespace tflite {
25 namespace gpu {
26
ConvolutionTransposedThin(const OperationDef & definition,const ConvolutionTransposedAttributes & attr,const GpuInfo & gpu_info)27 ConvolutionTransposedThin::ConvolutionTransposedThin(
28 const OperationDef& definition, const ConvolutionTransposedAttributes& attr,
29 const GpuInfo& gpu_info)
30 : GPUOperation(definition) {
31 code_ = GenerateConvolutionTransposedCode(
32 definition_, DivideRoundUp(attr.weights.shape.i, 4), attr.weights.shape.o,
33 int2(attr.weights.shape.w, attr.weights.shape.h));
34 if (definition_.precision == CalculationsPrecision::F16 &&
35 gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) {
36 compiler_options_.push_back(CompilerOptions::kAdrenoFullSimd);
37 }
38 }
39
ConvolutionTransposedThin(ConvolutionTransposedThin && operation)40 ConvolutionTransposedThin::ConvolutionTransposedThin(
41 ConvolutionTransposedThin&& operation)
42 : GPUOperation(std::move(operation)) {}
43
operator =(ConvolutionTransposedThin && operation)44 ConvolutionTransposedThin& ConvolutionTransposedThin::operator=(
45 ConvolutionTransposedThin&& operation) {
46 if (this != &operation) {
47 GPUOperation::operator=(std::move(operation));
48 }
49 return *this;
50 }
51
GenerateConvolutionTransposedCode(const OperationDef & op_def,int src_depth,int dst_channels,const int2 & kernel_size)52 std::string ConvolutionTransposedThin::GenerateConvolutionTransposedCode(
53 const OperationDef& op_def, int src_depth, int dst_channels,
54 const int2& kernel_size) {
55 AddSrcTensor("src_tensor", op_def.src_tensors[0]);
56 AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
57
58 const std::string channel_x = dst_channels == 1 ? "" : ".x";
59 const std::vector<std::string> postfix = {channel_x, ".y", ".z", ".w"};
60 const std::vector<std::string> channel = {".x", ".y", ".z", ".w"};
61
62 const std::string type_postfix =
63 dst_channels == 1 ? "" : std::to_string(dst_channels);
64
65 std::string accum_type;
66
67 switch (op_def.precision) {
68 case CalculationsPrecision::F32:
69 case CalculationsPrecision::F32_F16:
70 accum_type = "float" + type_postfix;
71 break;
72 case CalculationsPrecision::F16:
73 accum_type = "half" + type_postfix;
74 break;
75 }
76
77 std::string c;
78 c += "MAIN_FUNCTION($0) {\n";
79 if (op_def.IsBatchSupported()) {
80 c += " int linear_id = GLOBAL_ID_0;\n";
81 c += " int X = linear_id / args.dst_tensor.Batch();\n";
82 c += " int B = linear_id % args.dst_tensor.Batch();\n";
83 c += " args.dst_tensor.SetBatchRef(B);\n";
84 c += " args.src_tensor.SetBatchRef(B);\n";
85 } else {
86 c += " int X = GLOBAL_ID_0;\n";
87 }
88 c += " int Y = GLOBAL_ID_1;\n";
89 c += " if (X >= args.src_tensor.Width() || Y >= args.src_tensor.Height()) "
90 "return;\n";
91 c += " " + accum_type + " r[" + std::to_string(kernel_size.y) + "][" +
92 std::to_string(kernel_size.x) + "];\n";
93 c += " {\n";
94 c += " FLT4 src = args.src_tensor.Read(X, Y, 0);\n";
95 int index = 0;
96 for (int y = 0; y < kernel_size.y; ++y) {
97 for (int x = 0; x < kernel_size.x; ++x) {
98 std::string r_s =
99 " r[" + std::to_string(y) + "][" + std::to_string(x) + "]";
100 for (int d = 0; d < dst_channels; ++d) {
101 c += r_s + postfix[d] + " = dot(src, args.weights.Read(" +
102 std::to_string(index) + "));\n";
103 index++;
104 }
105 }
106 }
107 c += " }\n";
108 for (int i = 1; i < src_depth; ++i) {
109 c += " if (X > " + std::to_string(-i) +
110 ") { // always true, to reduce registers usage\n";
111 c +=
112 " FLT4 src = args.src_tensor.Read(X, Y, " + std::to_string(i) + ");\n";
113 for (int y = 0; y < kernel_size.y; ++y) {
114 for (int x = 0; x < kernel_size.x; ++x) {
115 std::string r_s =
116 " r[" + std::to_string(y) + "][" + std::to_string(x) + "]";
117 for (int d = 0; d < dst_channels; ++d) {
118 c += r_s + postfix[d] + " += dot(src, args.weights.Read(" +
119 std::to_string(index) + "));\n";
120 index++;
121 }
122 }
123 }
124 c += " }\n";
125 }
126 c += " X *= " + std::to_string(kernel_size.x) + ";\n";
127 c += " Y *= " + std::to_string(kernel_size.y) + ";\n";
128 for (int y = 0; y < kernel_size.y; ++y) {
129 for (int x = 0; x < kernel_size.x; ++x) {
130 const std::string x_coord = "X + " + std::to_string(x);
131 const std::string y_coord = "Y + " + std::to_string(y);
132 c += " if (" + x_coord + " < args.dst_tensor.Width() && " + y_coord +
133 " < args.dst_tensor.Height()) {\n";
134 c += " FLT4 result = args.weights.Read(" + std::to_string(index) +
135 ");\n";
136 for (int d = 0; d < dst_channels; ++d) {
137 c += " result" + channel[d] + " += r[" + std::to_string(y) + "][" +
138 std::to_string(x) + "]" + postfix[d] + ";\n";
139 }
140 c += " args.dst_tensor.Write(result, " + x_coord + ", " + y_coord +
141 ", 0);\n";
142 c += " }\n";
143 }
144 }
145 c += "}\n";
146
147 return c;
148 }
149
GetGridSize() const150 int3 ConvolutionTransposedThin::GetGridSize() const {
151 const int grid_x = src_[0]->Width() * dst_[0]->Batch();
152 const int grid_y = src_[0]->Height();
153 const int grid_z = 1;
154 return int3(grid_x, grid_y, grid_z);
155 }
156
IsConvolutionTransposedThinSupported(const ConvolutionTransposedAttributes & attr)157 bool IsConvolutionTransposedThinSupported(
158 const ConvolutionTransposedAttributes& attr) {
159 return attr.weights.shape.o <= 4 && attr.weights.shape.w == attr.stride.w &&
160 attr.weights.shape.h == attr.stride.h &&
161 attr.padding.prepended.w == 0 && attr.padding.prepended.h == 0 &&
162 attr.padding.appended.w == 0 && attr.padding.appended.h == 0;
163 }
164
CreateConvolutionTransposedThin(const GpuInfo & gpu_info,const OperationDef & definition,const ConvolutionTransposedAttributes & attr)165 ConvolutionTransposedThin CreateConvolutionTransposedThin(
166 const GpuInfo& gpu_info, const OperationDef& definition,
167 const ConvolutionTransposedAttributes& attr) {
168 ConvolutionTransposedThin result(definition, attr, gpu_info);
169 result.UploadData(attr.weights, attr.bias);
170 return result;
171 }
172
173 } // namespace gpu
174 } // namespace tflite
175