1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_KERNELS_WINOGRAD_TRANSFORM_H_
17 #define TENSORFLOW_CORE_KERNELS_WINOGRAD_TRANSFORM_H_
18
19 #include "tensorflow/core/kernels/deep_conv2d.h"
20
21 namespace tensorflow {
22
23 // Winograd DeepConv2DTransform implementation for 3x3 filters.
24 // Details:
25 // *) Arithmetic complexity of computations: Shmuel Winograd
26 // *) Fast Algorithms for Convolutional Neural Networks: Lavin, Gray
27
28 template <typename T>
29 class WinogradTransform : public DeepConv2DTransform<T> {
30 public:
31 typedef typename DeepConv2DTransform<T>::Shape Shape;
32
WinogradTransform()33 WinogradTransform()
34 : filter_shape_(3, 3), input_shape_(4, 4), output_shape_(2, 2) {}
35
36 virtual void GetFilterTransformMatrix(const int64 rows, const int64 cols,
37 T* transform_matrix) const;
38
39 virtual void GetInputTransformMatrix(const int64 rows, const int64 cols,
40 T* transform_matrix) const;
41
42 virtual void GetOutputTransformMatrix(const int64 rows, const int64 cols,
43 T* transform_matrix) const;
44
filter_shape()45 virtual const Shape& filter_shape() const { return filter_shape_; }
input_shape()46 virtual const Shape& input_shape() const { return input_shape_; }
output_shape()47 virtual const Shape& output_shape() const { return output_shape_; }
48
49 private:
50 const Shape filter_shape_;
51 const Shape input_shape_;
52 const Shape output_shape_;
53 };
54
55 // The filter transform matrix is the kronecker product 'M * M' of the
56 // following matrix 'M':
57 //
58 // [ 1 0 0 ]
59 // [ 1/2 1/2 1/2 ]
60 // [ 1/2 -1/2 1/2 ]
61 // [ 0 0 1 ]
62 //
63 // The data layout of 'transform_matrix':
64 // [input_tile_spatial_size, filter_spatial_size]
65 //
66 template <typename T>
GetFilterTransformMatrix(const int64 rows,const int64 cols,T * transform_matrix)67 void WinogradTransform<T>::GetFilterTransformMatrix(const int64 rows,
68 const int64 cols,
69 T* transform_matrix) const {
70 CHECK_GT(rows, 0);
71 CHECK_GT(cols, 0);
72 memset(transform_matrix, 0, sizeof(T) * rows * cols);
73
74 // Sub matrix [0,0]
75 transform_matrix[0 * cols + 0] = T(1.0);
76
77 transform_matrix[1 * cols + 0] = T(0.5);
78 transform_matrix[1 * cols + 1] = T(0.5);
79 transform_matrix[1 * cols + 2] = T(0.5);
80
81 transform_matrix[2 * cols + 0] = T(0.5);
82 transform_matrix[2 * cols + 1] = T(-0.5);
83 transform_matrix[2 * cols + 2] = T(0.5);
84
85 transform_matrix[3 * cols + 2] = T(1.0);
86
87 // Sub matrix [1,0]
88 transform_matrix[4 * cols + 0] = T(0.5);
89
90 transform_matrix[5 * cols + 0] = T(0.25);
91 transform_matrix[5 * cols + 1] = T(0.25);
92 transform_matrix[5 * cols + 2] = T(0.25);
93
94 transform_matrix[6 * cols + 0] = T(0.25);
95 transform_matrix[6 * cols + 1] = T(-0.25);
96 transform_matrix[6 * cols + 2] = T(0.25);
97
98 transform_matrix[7 * cols + 2] = T(0.5);
99
100 // Sub matrix [1,1]
101 transform_matrix[4 * cols + 3] = T(0.5);
102
103 transform_matrix[5 * cols + 3] = T(0.25);
104 transform_matrix[5 * cols + 4] = T(0.25);
105 transform_matrix[5 * cols + 5] = T(0.25);
106
107 transform_matrix[6 * cols + 3] = T(0.25);
108 transform_matrix[6 * cols + 4] = T(-0.25);
109 transform_matrix[6 * cols + 5] = T(0.25);
110
111 transform_matrix[7 * cols + 5] = T(0.5);
112
113 // Sub matrix [1,2]
114 transform_matrix[4 * cols + 6] = T(0.5);
115
116 transform_matrix[5 * cols + 6] = T(0.25);
117 transform_matrix[5 * cols + 7] = T(0.25);
118 transform_matrix[5 * cols + 8] = T(0.25);
119
120 transform_matrix[6 * cols + 6] = T(0.25);
121 transform_matrix[6 * cols + 7] = T(-0.25);
122 transform_matrix[6 * cols + 8] = T(0.25);
123
124 transform_matrix[7 * cols + 8] = T(0.5);
125
126 // Sub matrix [2,0]
127 transform_matrix[8 * cols + 0] = T(0.5);
128
129 transform_matrix[9 * cols + 0] = T(0.25);
130 transform_matrix[9 * cols + 1] = T(0.25);
131 transform_matrix[9 * cols + 2] = T(0.25);
132
133 transform_matrix[10 * cols + 0] = T(0.25);
134 transform_matrix[10 * cols + 1] = T(-0.25);
135 transform_matrix[10 * cols + 2] = T(0.25);
136
137 transform_matrix[11 * cols + 2] = T(0.5);
138
139 // Sub matrix [2,1]
140 transform_matrix[8 * cols + 3] = T(-0.5);
141
142 transform_matrix[9 * cols + 3] = T(-0.25);
143 transform_matrix[9 * cols + 4] = T(-0.25);
144 transform_matrix[9 * cols + 5] = T(-0.25);
145
146 transform_matrix[10 * cols + 3] = T(-0.25);
147 transform_matrix[10 * cols + 4] = T(0.25);
148 transform_matrix[10 * cols + 5] = T(-0.25);
149
150 transform_matrix[11 * cols + 5] = T(-0.5);
151
152 // Sub matrix [2,2]
153 transform_matrix[8 * cols + 6] = T(0.5);
154
155 transform_matrix[9 * cols + 6] = T(0.25);
156 transform_matrix[9 * cols + 7] = T(0.25);
157 transform_matrix[9 * cols + 8] = T(0.25);
158
159 transform_matrix[10 * cols + 6] = T(0.25);
160 transform_matrix[10 * cols + 7] = T(-0.25);
161 transform_matrix[10 * cols + 8] = T(0.25);
162
163 transform_matrix[11 * cols + 8] = T(0.5);
164
165 // Sub matrix [3,2]
166 transform_matrix[12 * cols + 6] = T(1.0);
167
168 transform_matrix[13 * cols + 6] = T(0.5);
169 transform_matrix[13 * cols + 7] = T(0.5);
170 transform_matrix[13 * cols + 8] = T(0.5);
171
172 transform_matrix[14 * cols + 6] = T(0.5);
173 transform_matrix[14 * cols + 7] = T(-0.5);
174 transform_matrix[14 * cols + 8] = T(0.5);
175
176 transform_matrix[15 * cols + 8] = T(1.0);
177 }
178
179 // The input transform matrix is the kronecker product 'M * M' of the
180 // following matrix 'M':
181 //
182 // [1 0 -1 0]
183 // [0 1 1 0]
184 // [0 -1 1 0]
185 // [0 1 0 -1]
186 //
187 // Data layout of 'transform_matrix':
188 // [tile_spatial_size, tile_spatial_size]
189 //
190 template <typename T>
GetInputTransformMatrix(const int64 rows,const int64 cols,T * transform_matrix)191 void WinogradTransform<T>::GetInputTransformMatrix(const int64 rows,
192 const int64 cols,
193 T* transform_matrix) const {
194 CHECK_GT(rows, 0);
195 CHECK_GT(cols, 0);
196 memset(transform_matrix, 0, sizeof(T) * rows * cols);
197
198 // Sub matrix [0,0]
199 transform_matrix[0 * cols + 0] = T(1.0);
200 transform_matrix[0 * cols + 2] = T(-1.0);
201
202 transform_matrix[1 * cols + 1] = T(1.0);
203 transform_matrix[1 * cols + 2] = T(1.0);
204
205 transform_matrix[2 * cols + 1] = T(-1.0);
206 transform_matrix[2 * cols + 2] = T(1.0);
207
208 transform_matrix[3 * cols + 1] = T(1.0);
209 transform_matrix[3 * cols + 3] = T(-1.0);
210
211 // Sub matrix [0,2]
212 transform_matrix[0 * cols + 8] = T(-1.0);
213 transform_matrix[0 * cols + 10] = T(1.0);
214
215 transform_matrix[1 * cols + 9] = T(-1.0);
216 transform_matrix[1 * cols + 10] = T(-1.0);
217
218 transform_matrix[2 * cols + 9] = T(1.0);
219 transform_matrix[2 * cols + 10] = T(-1.0);
220
221 transform_matrix[3 * cols + 9] = T(-1.0);
222 transform_matrix[3 * cols + 11] = T(1.0);
223
224 // Sub matrix [1,1]
225 transform_matrix[4 * cols + 4] = T(1.0);
226 transform_matrix[4 * cols + 6] = T(-1.0);
227
228 transform_matrix[5 * cols + 5] = T(1.0);
229 transform_matrix[5 * cols + 6] = T(1.0);
230
231 transform_matrix[6 * cols + 5] = T(-1.0);
232 transform_matrix[6 * cols + 6] = T(1.0);
233
234 transform_matrix[7 * cols + 5] = T(1.0);
235 transform_matrix[7 * cols + 7] = T(-1.0);
236
237 // Sub matrix [1,2]
238 transform_matrix[4 * cols + 8] = T(1.0);
239 transform_matrix[4 * cols + 10] = T(-1.0);
240
241 transform_matrix[5 * cols + 9] = T(1.0);
242 transform_matrix[5 * cols + 10] = T(1.0);
243
244 transform_matrix[6 * cols + 9] = T(-1.0);
245 transform_matrix[6 * cols + 10] = T(1.0);
246
247 transform_matrix[7 * cols + 9] = T(1.0);
248 transform_matrix[7 * cols + 11] = T(-1.0);
249
250 // Sub matrix [2,1]
251 transform_matrix[8 * cols + 4] = T(-1.0);
252 transform_matrix[8 * cols + 6] = T(1.0);
253
254 transform_matrix[9 * cols + 5] = T(-1.0);
255 transform_matrix[9 * cols + 6] = T(-1.0);
256
257 transform_matrix[10 * cols + 5] = T(1.0);
258 transform_matrix[10 * cols + 6] = T(-1.0);
259
260 transform_matrix[11 * cols + 5] = T(-1.0);
261 transform_matrix[11 * cols + 7] = T(1.0);
262
263 // Sub matrix [2,2]
264 transform_matrix[8 * cols + 8] = T(1.0);
265 transform_matrix[8 * cols + 10] = T(-1.0);
266
267 transform_matrix[9 * cols + 9] = T(1.0);
268 transform_matrix[9 * cols + 10] = T(1.0);
269
270 transform_matrix[10 * cols + 9] = T(-1.0);
271 transform_matrix[10 * cols + 10] = T(1.0);
272
273 transform_matrix[11 * cols + 9] = T(1.0);
274 transform_matrix[11 * cols + 11] = T(-1.0);
275
276 // Sub matrix [3,1]
277 transform_matrix[12 * cols + 4] = T(1.0);
278 transform_matrix[12 * cols + 6] = T(-1.0);
279
280 transform_matrix[13 * cols + 5] = T(1.0);
281 transform_matrix[13 * cols + 6] = T(1.0);
282
283 transform_matrix[14 * cols + 5] = T(-1.0);
284 transform_matrix[14 * cols + 6] = T(1.0);
285
286 transform_matrix[15 * cols + 5] = T(1.0);
287 transform_matrix[15 * cols + 7] = T(-1.0);
288
289 // Sub matrix [3,3]
290 transform_matrix[12 * cols + 12] = T(-1.0);
291 transform_matrix[12 * cols + 14] = T(1.0);
292
293 transform_matrix[13 * cols + 13] = T(-1.0);
294 transform_matrix[13 * cols + 14] = T(-1.0);
295
296 transform_matrix[14 * cols + 13] = T(1.0);
297 transform_matrix[14 * cols + 14] = T(-1.0);
298
299 transform_matrix[15 * cols + 13] = T(-1.0);
300 transform_matrix[15 * cols + 15] = T(1.0);
301 };
302
303 // The output transform matrix is the kronecker product 'M * M' of the
304 // following matrix 'M':
305 //
306 // [1 1 1 0]
307 // [0 1 -1 -1]
308 //
309 // Data layout of 'transform_matrix':
310 // [out_tile_spatial_size, tile_spatial_size]
311 //
312 template <typename T>
GetOutputTransformMatrix(const int64 rows,const int64 cols,T * transform_matrix)313 void WinogradTransform<T>::GetOutputTransformMatrix(const int64 rows,
314 const int64 cols,
315 T* transform_matrix) const {
316 CHECK_GT(rows, 0);
317 CHECK_GT(cols, 0);
318 memset(transform_matrix, 0, sizeof(T) * rows * cols);
319
320 // Sub matrix [0,0]
321 transform_matrix[0 * cols + 0] = T(1.0);
322 transform_matrix[0 * cols + 1] = T(1.0);
323 transform_matrix[0 * cols + 2] = T(1.0);
324
325 transform_matrix[1 * cols + 1] = T(1.0);
326 transform_matrix[1 * cols + 2] = T(-1.0);
327 transform_matrix[1 * cols + 3] = T(-1.0);
328
329 // Sub matrix [0,1]
330 transform_matrix[0 * cols + 4] = T(1.0);
331 transform_matrix[0 * cols + 5] = T(1.0);
332 transform_matrix[0 * cols + 6] = T(1.0);
333
334 transform_matrix[1 * cols + 5] = T(1.0);
335 transform_matrix[1 * cols + 6] = T(-1.0);
336 transform_matrix[1 * cols + 7] = T(-1.0);
337
338 // Sub matrix [0,2]
339 transform_matrix[0 * cols + 8] = T(1.0);
340 transform_matrix[0 * cols + 9] = T(1.0);
341 transform_matrix[0 * cols + 10] = T(1.0);
342
343 transform_matrix[1 * cols + 9] = T(1.0);
344 transform_matrix[1 * cols + 10] = T(-1.0);
345 transform_matrix[1 * cols + 11] = T(-1.0);
346
347 // Sub matrix [1,1]
348 transform_matrix[2 * cols + 4] = T(1.0);
349 transform_matrix[2 * cols + 5] = T(1.0);
350 transform_matrix[2 * cols + 6] = T(1.0);
351
352 transform_matrix[3 * cols + 5] = T(1.0);
353 transform_matrix[3 * cols + 6] = T(-1.0);
354 transform_matrix[3 * cols + 7] = T(-1.0);
355
356 // Sub matrix [1,2]
357 transform_matrix[2 * cols + 8] = T(-1.0);
358 transform_matrix[2 * cols + 9] = T(-1.0);
359 transform_matrix[2 * cols + 10] = T(-1.0);
360
361 transform_matrix[3 * cols + 9] = T(-1.0);
362 transform_matrix[3 * cols + 10] = T(1.0);
363 transform_matrix[3 * cols + 11] = T(1.0);
364
365 // Sub matrix [1,3]
366 transform_matrix[2 * cols + 12] = T(-1.0);
367 transform_matrix[2 * cols + 13] = T(-1.0);
368 transform_matrix[2 * cols + 14] = T(-1.0);
369
370 transform_matrix[3 * cols + 13] = T(-1.0);
371 transform_matrix[3 * cols + 14] = T(1.0);
372 transform_matrix[3 * cols + 15] = T(1.0);
373 };
374
375 } // namespace tensorflow
376
377 #endif // TENSORFLOW_CORE_KERNELS_WINOGRAD_TRANSFORM_H_
378