1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
17
18 #include <cmath>
19 #include <vector>
20
21 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
22 #include "tensorflow/lite/delegates/gpu/common/shape.h"
23 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
24
25 namespace tflite {
26 namespace gpu {
27 namespace {
28 // Matrices for Winograd trasformations were computed with the method described
29 // here https://openreview.net/pdf?id=H1ZaRZVKg
GetTransposedMatrixForWinograd(int width,int height)30 std::vector<float> GetTransposedMatrixForWinograd(int width, int height) {
31 const float kDelta = std::sqrt(2.0f) / 2.0f;
32 std::vector<float> px(width);
33
34 px[0] = 0.0f;
35 const int points_count = (width - 1) / 2;
36 for (int i = 0; i < points_count; ++i) {
37 px[i * 2 + 1] = kDelta * (i + 1.0f);
38 px[i * 2 + 2] = -kDelta * (i + 1.0f);
39 }
40 px[width - 1] = 1.0f;
41
42 std::vector<float> py(width, 1.0f);
43 py[width - 1] = 0.0f;
44
45 std::vector<float> result(height * width);
46 for (int y = 0; y < width; ++y) {
47 for (int x = 0; x < height; ++x) {
48 result[x * width + y] =
49 std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x);
50 }
51 }
52 return result;
53 }
54
GetInversedMatrixForWinograd(int rank)55 std::vector<float> GetInversedMatrixForWinograd(int rank) {
56 auto matrix = GetTransposedMatrixForWinograd(rank, rank);
57 std::vector<float> inverted(rank * rank, 0.0f);
58 for (int i = 0; i < rank; ++i) {
59 inverted[i * rank + i] = 1.0f;
60 }
61
62 for (int i = 1; i < rank - 1; ++i) {
63 float inv_t = 1.0f / matrix[i * rank + i];
64 for (int x = i; x < rank; ++x) {
65 matrix[i * rank + x] *= inv_t;
66 }
67 for (int x = 0; x < rank; ++x) {
68 inverted[i * rank + x] *= inv_t;
69 }
70
71 for (int y = 0; y < rank; ++y) {
72 if (y == i) continue;
73 float t = matrix[y * rank + i];
74 for (int x = i; x < rank; ++x) {
75 matrix[y * rank + x] -= t * matrix[i * rank + x];
76 }
77 for (int x = 0; x < rank; ++x) {
78 inverted[y * rank + x] -= t * inverted[i * rank + x];
79 }
80 }
81 }
82
83 return inverted;
84 }
85
Multiply(const std::vector<float> & a_mat,const std::vector<float> & b_mat,int m,int n,int k)86 std::vector<float> Multiply(const std::vector<float>& a_mat,
87 const std::vector<float>& b_mat, int m, int n,
88 int k) {
89 std::vector<float> result(m * k);
90 for (int y = 0; y < m; ++y) {
91 for (int x = 0; x < k; ++x) {
92 float sum = 0.0f;
93 for (int i = 0; i < n; ++i) {
94 sum += a_mat[y * n + i] * b_mat[i * k + x];
95 }
96 result[y * k + x] = sum;
97 }
98 }
99 return result;
100 }
101 } // namespace
102
AtMatrixForWinograd4x4To6x6()103 std::vector<float> AtMatrixForWinograd4x4To6x6() {
104 return GetTransposedMatrixForWinograd(6, 4);
105 }
106
BtMatrixForWinograd4x4To6x6()107 std::vector<float> BtMatrixForWinograd4x4To6x6() {
108 return GetInversedMatrixForWinograd(6);
109 }
110
RearrangeWeightsToWinograd4x4To6x6Weights(const Tensor<OHWI,DataType::FLOAT32> & src_weights,Tensor<OHWI,DataType::FLOAT32> * dst_weights)111 void RearrangeWeightsToWinograd4x4To6x6Weights(
112 const Tensor<OHWI, DataType::FLOAT32>& src_weights,
113 Tensor<OHWI, DataType::FLOAT32>* dst_weights) {
114 OHWI dst_shape;
115 dst_shape.o = src_weights.shape.o;
116 dst_shape.h = 6;
117 dst_shape.w = 6;
118 dst_shape.i = src_weights.shape.i;
119 dst_weights->shape = dst_shape;
120 dst_weights->data.resize(dst_shape.DimensionsProduct());
121
122 auto gt_mat = GetTransposedMatrixForWinograd(6, 3);
123 std::vector<float> g_mat(gt_mat.size());
124 for (int y = 0; y < 3; ++y) {
125 for (int x = 0; x < 6; ++x) {
126 g_mat[x * 3 + y] = gt_mat[y * 6 + x];
127 }
128 }
129
130 for (int d = 0; d < src_weights.shape.o; ++d) {
131 for (int s = 0; s < src_weights.shape.i; ++s) {
132 std::vector<float> in_vals(9);
133 for (int y = 0; y < 3; ++y) {
134 for (int x = 0; x < 3; ++x) {
135 const int f_index = src_weights.shape.LinearIndex({d, y, x, s});
136 in_vals[y * 3 + x] = src_weights.data[f_index];
137 }
138 }
139
140 auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3);
141 auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6);
142 for (int y = 0; y < 6; ++y) {
143 for (int x = 0; x < 6; ++x) {
144 const int f_index = dst_shape.LinearIndex({d, y, x, s});
145 dst_weights->data[f_index] = out_vals[y * 6 + x];
146 }
147 }
148 }
149 }
150 }
151
IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes & attr)152 bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr) {
153 return attr.weights.shape.w == 3 && attr.weights.shape.h == 3 &&
154 attr.dilations == HW(1, 1) && attr.strides == HW(1, 1);
155 }
156
157 } // namespace gpu
158 } // namespace tflite
159