• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h"
17 
18 #include <map>
19 #include <vector>
20 
21 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/shape.h"
24 #include "tensorflow/lite/delegates/gpu/common/types.h"
25 #include "tensorflow/lite/delegates/gpu/gl/workgroups/calculator.h"
26 
27 namespace tflite {
28 namespace gpu {
29 namespace gl {
30 namespace {
31 
32 // This code employs the results the workgroup performance reseach
33 // (b/117291356).
34 
35 // Describes the ideal convolution for the specific operation case
36 // Case here means specific "kernel + strides" combination for specific
37 // operations type, not sizes of input and output tensors, they can be any.
38 struct IdealByCase {
ParamsAcceptedtflite::gpu::gl::__anon054730a20111::IdealByCase39   bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
40                       HW in_strides) const {
41     return operation_type == in_op_type && kernel == in_kernel &&
42            strides == in_strides;
43   }
44   OperationType operation_type;
45   HW kernel;
46   HW strides;
47   uint3 ideal_workgroup;
48 };
49 
50 // Describes the ideal convolution for the type of operations. It means that
51 // any configuration of operation of this type will be working with top 10%
52 // performance with the particular GPU.
53 struct IdealByType {
ParamsAcceptedtflite::gpu::gl::__anon054730a20111::IdealByType54   bool ParamsAccepted(OperationType in_op_type) const {
55     return operation_type == in_op_type;
56   }
57   OperationType operation_type;
58   uint3 ideal_workgroup;
59 };
60 
61 // Describes ideal workgroups for the particular GPU model.
62 struct IdealWorkgroups {
63   std::vector<IdealByType> by_type;
64   std::vector<IdealByCase> by_case;
65 };
66 
67 // List of Ideal workgroups which is received after the research mentioned
68 // above.
69 
70 // Ideal workgroups for Adreno 630.
71 std::vector<IdealByType>* kIdealByTypeAdreno630Ptr =
72     new std::vector<IdealByType>{
73         {OperationType::CONVOLUTION_2D, uint3(4, 8, 4)},
74         {OperationType::DEPTHWISE_CONVOLUTION, uint3(4, 4, 8)},
75     };
76 
77 std::vector<IdealByCase>* kIdealByCaseAdreno630Ptr =
78     new std::vector<IdealByCase>{
79         {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 8, 4)},
80         {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 4, 4)},
81         {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
82          uint3(8, 4, 4)},
83         {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
84          uint3(4, 4, 4)},
85     };
86 
87 // Ideal workgroups for Adreno 540.
88 std::vector<IdealByType>* kIdealByTypeAdreno540Ptr =
89     new std::vector<IdealByType>{
90         {OperationType::CONVOLUTION_2D, uint3(8, 2, 2)},
91         {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
92     };
93 
94 std::vector<IdealByCase>* kIdealByCaseAdreno540Ptr =
95     new std::vector<IdealByCase>{
96         {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
97         {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
98         {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
99          uint3(8, 4, 8)},
100         {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
101          uint3(4, 4, 8)},
102     };
103 
104 // Ideal workgroups for Adreno 510.
105 std::vector<IdealByType>* kIdealByTypeAdreno510Ptr =
106     new std::vector<IdealByType>{
107         {OperationType::CONVOLUTION_2D, uint3(8, 4, 4)},
108         {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 4)},
109     };
110 
111 std::vector<IdealByCase>* kIdealByCaseAdreno510Ptr =
112     new std::vector<IdealByCase>{
113         {OperationType::CONVOLUTION_2D, HW(1, 1), HW(1, 1), uint3(4, 2, 8)},
114         {OperationType::CONVOLUTION_2D, HW(3, 3), HW(2, 2), uint3(8, 2, 8)},
115         {OperationType::DEPTHWISE_CONVOLUTION, HW(1, 1), HW(1, 1),
116          uint3(8, 4, 8)},
117         {OperationType::DEPTHWISE_CONVOLUTION, HW(3, 3), HW(2, 2),
118          uint3(4, 4, 8)},
119     };
120 
121 // Ideal workgroups for Adreno 509.
122 std::vector<IdealByType>* kIdealByTypeAdreno509Ptr =
123     new std::vector<IdealByType>{
124         {OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
125         {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 8, 2)},
126     };
127 
128 // Ideal workgroups for Adreno 508, 506, 505, 418, 405
129 std::vector<IdealByType>* kIdealByTypeAdreno508Ptr =
130     new std::vector<IdealByType>{
131         {OperationType::CONVOLUTION_2D, uint3(8, 4, 8)},
132         {OperationType::DEPTHWISE_CONVOLUTION, uint3(8, 4, 8)},
133     };
134 std::vector<IdealByType>* kIdealByTypeAdreno506Ptr = kIdealByTypeAdreno508Ptr;
135 std::vector<IdealByType>* kIdealByTypeAdreno505Ptr = kIdealByTypeAdreno508Ptr;
136 std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
137 std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
138 
139 // Put all ideal workgroups from the list together.
140 const std::map<AdrenoGpu, IdealWorkgroups>* kIdealAdrenoWorkgroupsInfoPtr =
141     new std::map<AdrenoGpu, IdealWorkgroups>{
142         {AdrenoGpu::kAdreno630,
143          {*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
144         {AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}},
145         {AdrenoGpu::kAdreno510,
146          {*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
147         {AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}},
148         {AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}},
149         {AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}},
150         {AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}},
151         {AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}},
152         {AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}},
153     };
154 
155 }  // namespace
156 
GetIdealWorkgroupIfPossible(const GpuInfo & gpu_info,OperationType op_type,HW kernel,HW strides,uint3 default_wg,OHWI workload)157 uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
158                                   OperationType op_type, HW kernel, HW strides,
159                                   uint3 default_wg, OHWI workload) {
160   // Research showed that ideal workgroup approach doesn't work well with
161   // convolutions, which have small amount of output channels or output
162   // height/width dimensions
163   if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
164 
165   if (!gpu_info.IsAdreno()) {
166     return default_wg;
167   }
168   auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu;
169 
170   // If GPU was investigated
171   if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) {
172     return default_wg;
173   }
174 
175   // Try to find the ideal workgroup by the specific operation case, cause they
176   // are expected to be better tuned than default "by type" cases
177   for (const auto& specific_case :
178        kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) {
179     if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
180       return specific_case.ideal_workgroup;
181     }
182   }
183 
184   // Try to find the ideal workgroup by the operation type
185   for (const auto& default_case :
186        kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) {
187     if (default_case.ParamsAccepted(op_type)) {
188       return default_case.ideal_workgroup;
189     }
190   }
191 
192   // If no ideal workgroup is found, use the default workgroup suggested by each
193   // operation.
194   return default_wg;
195 }
196 
GetIdealWorkgroupIfPossible(const GpuInfo & gpu_info,OperationType op_type,HW kernel,HW strides,OHWI workload)197 uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
198                                   OperationType op_type, HW kernel, HW strides,
199                                   OHWI workload) {
200   return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides,
201                                      kEmptyWorkgroupSize, workload);
202 }
203 
204 }  // namespace gl
205 }  // namespace gpu
206 }  // namespace tflite
207