1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/kernels/eigen_spatial_convolutions.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/core/kernels/eigen_cuboid_convolution.h"
20 #include "tensorflow/core/platform/test.h"
21 #include "tensorflow/core/platform/test_benchmark.h"
22
23 namespace Eigen {
24
25 #define EigenApprox(a, b) \
26 { ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); }
ceil_div(int a,int b)27 static int ceil_div(int a, int b) { return (a + b - 1) / b; }
28
TEST(EigenSpatialConvolutionsTest,Simple)29 TEST(EigenSpatialConvolutionsTest, Simple) {
30 const int input_depth = 7;
31 const int input_rows = 4;
32 const int input_cols = 5;
33 const int output_depth = 10;
34 const int patch_rows = 3;
35 const int patch_cols = 4;
36 const int output_rows = input_rows;
37 const int output_cols = input_cols;
38
39 Tensor<float, 3> input(input_depth, input_rows, input_cols);
40 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
41 Tensor<float, 3> result(output_depth, output_rows, output_cols);
42
43 input = input.constant(11.0f) + input.random();
44 kernel = kernel.constant(2.0f) + kernel.random();
45 result.setRandom();
46
47 result = SpatialConvolution(input, kernel);
48
49 EXPECT_EQ(result.dimension(0), output_depth);
50 EXPECT_EQ(result.dimension(1), output_rows);
51 EXPECT_EQ(result.dimension(2), output_cols);
52
53 for (int od = 0; od < output_depth; ++od) {
54 for (int i = 0; i < output_rows; ++i) {
55 for (int j = 0; j < output_cols; ++j) {
56 float expected = 0.0f;
57 for (int c = 0; c < patch_cols; ++c) {
58 for (int r = 0; r < patch_rows; ++r) {
59 for (int id = 0; id < input_depth; ++id) {
60 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
61 c - 1 + j < output_cols) {
62 expected +=
63 input(id, r - 1 + i, c - 1 + j) * kernel(od, id, r, c);
64 }
65 }
66 }
67 }
68 EigenApprox(result(od, i, j), expected);
69 }
70 }
71 }
72 }
73
TEST(EigenSpatialConvolutionsTest,SimpleRowMajor)74 TEST(EigenSpatialConvolutionsTest, SimpleRowMajor) {
75 const int input_depth = 7;
76 const int input_rows = 4;
77 const int input_cols = 5;
78 const int output_depth = 10;
79 const int patch_rows = 3;
80 const int patch_cols = 4;
81 const int output_rows = input_rows;
82 const int output_cols = input_cols;
83
84 Tensor<float, 3, RowMajor> input(input_cols, input_rows, input_depth);
85 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
86 output_depth);
87 Tensor<float, 3, RowMajor> result(output_cols, output_rows, output_depth);
88 input = input.constant(11.0f) + input.random();
89 kernel = kernel.constant(2.0f) + kernel.random();
90 result.setRandom();
91
92 result = SpatialConvolution(input, kernel);
93
94 EXPECT_EQ(result.dimension(0), output_cols);
95 EXPECT_EQ(result.dimension(1), output_rows);
96 EXPECT_EQ(result.dimension(2), output_depth);
97
98 for (int od = 0; od < output_depth; ++od) {
99 for (int i = 0; i < output_rows; ++i) {
100 for (int j = 0; j < output_cols; ++j) {
101 float expected = 0.0f;
102 for (int c = 0; c < patch_cols; ++c) {
103 for (int r = 0; r < patch_rows; ++r) {
104 for (int id = 0; id < input_depth; ++id) {
105 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
106 c - 1 + j < output_cols) {
107 expected +=
108 input(c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
109 }
110 }
111 }
112 }
113 EigenApprox(result(j, i, od), expected);
114 }
115 }
116 }
117 }
118
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolution)119 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolution) {
120 Tensor<float, 4> input(10, 5, 5, 13);
121 Tensor<float, 4> kernel(7, 10, 3, 3);
122 Tensor<float, 4> result(7, 5, 5, 13);
123 input = input.constant(11.0f) + input.random();
124 kernel = kernel.constant(2.0f) + kernel.random();
125 result.setRandom();
126
127 result = SpatialConvolution(input, kernel);
128
129 EXPECT_EQ(result.dimension(0), 7);
130 EXPECT_EQ(result.dimension(1), 5);
131 EXPECT_EQ(result.dimension(2), 5);
132
133 for (int b = 0; b < 13; ++b) {
134 for (int od = 0; od < 7; ++od) {
135 for (int i = 0; i < 5; ++i) {
136 for (int j = 0; j < 5; ++j) {
137 float expected = 0.0f;
138 for (int c = 0; c < 3; ++c) {
139 for (int r = 0; r < 3; ++r) {
140 for (int id = 0; id < 10; ++id) {
141 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
142 c - 1 + j < 5) {
143 expected +=
144 input(id, r - 1 + i, c - 1 + j, b) * kernel(od, id, r, c);
145 }
146 }
147 }
148 }
149 EigenApprox(result(od, i, j, b), expected);
150 }
151 }
152 }
153 }
154 }
155
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolutionRowMajor)156 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolutionRowMajor) {
157 Tensor<float, 4, RowMajor> input(13, 5, 5, 10);
158 Tensor<float, 4, RowMajor> kernel(3, 3, 10, 7);
159 Tensor<float, 4, RowMajor> result(13, 5, 5, 7);
160 input = input.constant(11.0f) + input.random();
161 kernel = kernel.constant(2.0f) + kernel.random();
162 result.setRandom();
163
164 result = SpatialConvolution(input, kernel);
165
166 EXPECT_EQ(result.dimension(1), 5);
167 EXPECT_EQ(result.dimension(2), 5);
168 EXPECT_EQ(result.dimension(3), 7);
169
170 for (int b = 0; b < 13; ++b) {
171 for (int od = 0; od < 7; ++od) {
172 for (int i = 0; i < 5; ++i) {
173 for (int j = 0; j < 5; ++j) {
174 float expected = 0.0f;
175 for (int c = 0; c < 3; ++c) {
176 for (int r = 0; r < 3; ++r) {
177 for (int id = 0; id < 10; ++id) {
178 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
179 c - 1 + j < 5) {
180 expected +=
181 input(b, c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
182 }
183 }
184 }
185 }
186 EigenApprox(result(b, j, i, od), expected);
187 }
188 }
189 }
190 }
191 }
192
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolution)193 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolution) {
194 const int input_depth = 10;
195 const int input_rows = 5;
196 const int input_cols = 5;
197 const int num_batches = 13;
198 const int output_depth = 7;
199 const int patch_rows = 4;
200 const int patch_cols = 4;
201 const int output_rows = input_rows - patch_rows + 1;
202 const int output_cols = input_cols - patch_cols + 1;
203
204 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
205 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
206 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
207 input = input.constant(11.0f) + input.random();
208 kernel = kernel.constant(2.0f) + kernel.random();
209 result.setRandom();
210
211 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
212 // of 1.
213 const int stride = 1;
214 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
215
216 EXPECT_EQ(result.dimension(0), output_depth);
217 EXPECT_EQ(result.dimension(1), output_rows);
218 EXPECT_EQ(result.dimension(2), output_cols);
219 EXPECT_EQ(result.dimension(3), num_batches);
220
221 for (int b = 0; b < num_batches; ++b) {
222 for (int od = 0; od < output_depth; ++od) {
223 for (int i = 0; i < output_rows; ++i) {
224 for (int j = 0; j < output_cols; ++j) {
225 float expected = 0.0f;
226 for (int c = 0; c < patch_cols; ++c) {
227 for (int r = 0; r < patch_rows; ++r) {
228 for (int id = 0; id < input_depth; ++id) {
229 expected += input(id, r + i, c + j, b) * kernel(od, id, r, c);
230 }
231 }
232 }
233 if (result(od, i, j, b) != expected) {
234 std::cout << "at od=" << od << " b=" << b << " i=" << i
235 << " j=" << j << " " << result(od, i, j, b) << " vs "
236 << expected << std::endl;
237 }
238 EigenApprox(result(od, i, j, b), expected);
239 }
240 }
241 }
242 }
243 }
244
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionUnequalStrides)245 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionUnequalStrides) {
246 const int input_depth = 10;
247 const int input_rows = 5;
248 const int input_cols = 5;
249 const int num_batches = 13;
250 const int output_depth = 7;
251 const int patch_rows = 4;
252 const int patch_cols = 4;
253
254 const int row_stride = 1;
255 const int col_stride = 2;
256 const int output_rows = 2;
257 const int output_cols = 1;
258
259 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
260 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
261 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
262 input = input.constant(11.0f) + input.random();
263 kernel = kernel.constant(2.0f) + kernel.random();
264 result.setRandom();
265
266 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
267 // of 1.
268 result =
269 SpatialConvolution(input, kernel, row_stride, col_stride, PADDING_VALID);
270
271 EXPECT_EQ(result.dimension(0), output_depth);
272 EXPECT_EQ(result.dimension(1), output_rows);
273 EXPECT_EQ(result.dimension(2), output_cols);
274 EXPECT_EQ(result.dimension(3), num_batches);
275 if (true) return;
276
277 for (int b = 0; b < num_batches; ++b) {
278 for (int od = 0; od < output_depth; ++od) {
279 for (int i = 0; i < output_rows; ++i) {
280 for (int j = 0; j < output_cols; ++j) {
281 float expected = 0.0f;
282 for (int c = 0; c < patch_cols; ++c) {
283 for (int r = 0; r < patch_rows; ++r) {
284 for (int id = 0; id < input_depth; ++id) {
285 expected +=
286 input(id, r + row_stride * i, c + col_stride * j, b) *
287 kernel(od, id, r, c);
288 }
289 }
290 }
291 if (result(od, i, j, b) != expected) {
292 std::cout << "at od=" << od << " b=" << b << " i=" << i
293 << " j=" << j << " " << result(od, i, j, b) << " vs "
294 << expected << std::endl;
295 }
296 EigenApprox(result(od, i, j, b), expected);
297 }
298 }
299 }
300 }
301 }
302
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionRowMajor)303 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionRowMajor) {
304 const int input_depth = 10;
305 const int input_rows = 5;
306 const int input_cols = 5;
307 const int num_batches = 13;
308 const int output_depth = 7;
309 const int patch_rows = 4;
310 const int patch_cols = 4;
311 const int output_rows = input_rows - patch_rows + 1;
312 const int output_cols = input_cols - patch_cols + 1;
313
314 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
315 input_depth);
316 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
317 output_depth);
318 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
319 output_depth);
320
321 input = input.constant(11.0f) + input.random();
322 kernel = kernel.constant(2.0f) + kernel.random();
323 result.setRandom();
324
325 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
326 // of 1.
327 const int stride = 1;
328 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
329
330 EXPECT_EQ(result.dimension(0), num_batches);
331 EXPECT_EQ(result.dimension(1), output_cols);
332 EXPECT_EQ(result.dimension(2), output_rows);
333 EXPECT_EQ(result.dimension(3), output_depth);
334
335 for (int b = 0; b < num_batches; ++b) {
336 for (int od = 0; od < output_depth; ++od) {
337 for (int i = 0; i < output_rows; ++i) {
338 for (int j = 0; j < output_cols; ++j) {
339 float expected = 0.0f;
340 for (int c = 0; c < patch_rows; ++c) {
341 for (int r = 0; r < patch_cols; ++r) {
342 for (int id = 0; id < input_depth; ++id) {
343 expected += input(b, c + j, r + i, id) * kernel(c, r, id, od);
344 }
345 }
346 }
347 if (result(b, j, i, od) != expected) {
348 std::cout << "at od=" << od << " b=" << b << " i=" << i
349 << " j=" << j << " " << result(b, j, i, od) << " vs "
350 << expected << std::endl;
351 }
352 EigenApprox(result(b, j, i, od), expected);
353 }
354 }
355 }
356 }
357 }
358
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolution)359 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolution) {
360 const int input_depth = 10;
361 const int input_rows = 5;
362 const int input_cols = 5;
363 const int num_batches = 13;
364 const int output_depth = 7;
365 const int patch_rows = 3;
366 const int patch_cols = 3;
367 const int output_rows = 2;
368 const int output_cols = 2;
369
370 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
371 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
372 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
373 input = input.constant(11.0f) + input.random();
374 kernel = kernel.constant(2.0f) + kernel.random();
375 result.setRandom();
376
377 // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
378 // of 2.
379 int stride = 2;
380 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
381
382 EXPECT_EQ(result.dimension(0), output_depth);
383 EXPECT_EQ(result.dimension(1), output_rows);
384 EXPECT_EQ(result.dimension(2), output_cols);
385 EXPECT_EQ(result.dimension(3), num_batches);
386
387 for (int b = 0; b < num_batches; ++b) {
388 for (int od = 0; od < output_depth; ++od) {
389 for (int i = 0; i < output_rows; ++i) {
390 for (int j = 0; j < output_cols; ++j) {
391 float expected = 0.0f;
392 for (int c = 0; c < patch_cols; ++c) {
393 for (int r = 0; r < patch_rows; ++r) {
394 for (int id = 0; id < input_depth; ++id) {
395 expected += input(id, r + stride * i, c + stride * j, b) *
396 kernel(od, id, r, c);
397 }
398 }
399 }
400 EigenApprox(result(od, i, j, b), expected);
401 }
402 }
403 }
404 }
405 }
406
TEST(EigenSpatialConvolutionsTest,KernelSmallerThanStride)407 TEST(EigenSpatialConvolutionsTest, KernelSmallerThanStride) {
408 const int input_depth = 2;
409 const int input_rows = 3;
410 const int input_cols = 3;
411 const int num_batches = 5;
412 const int output_depth = 6;
413 const int patch_rows = 1;
414 const int patch_cols = 1;
415 const int output_rows = 2;
416 const int output_cols = 2;
417
418 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
419 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
420 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
421 input = input.constant(11.0f) + input.random();
422 kernel = kernel.constant(2.0f) + kernel.random();
423 result.setRandom();
424
425 // Apply a spatial convolution using a 1x1 kernel, valid padding, and a stride
426 // of 2.
427 int stride = 2;
428 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
429
430 EXPECT_EQ(result.dimension(0), output_depth);
431 EXPECT_EQ(result.dimension(1), output_rows);
432 EXPECT_EQ(result.dimension(2), output_cols);
433 EXPECT_EQ(result.dimension(3), num_batches);
434
435 for (int b = 0; b < num_batches; ++b) {
436 for (int od = 0; od < output_depth; ++od) {
437 for (int i = 0; i < output_rows; ++i) {
438 for (int j = 0; j < output_cols; ++j) {
439 float expected = 0.0f;
440 for (int c = 0; c < patch_cols; ++c) {
441 for (int r = 0; r < patch_rows; ++r) {
442 for (int id = 0; id < input_depth; ++id) {
443 expected += input(id, r + stride * i, c + stride * j, b) *
444 kernel(od, id, r, c);
445 }
446 }
447 }
448 EigenApprox(result(od, i, j, b), expected);
449 }
450 }
451 }
452 }
453 }
454
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolutionRowMajor)455 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolutionRowMajor) {
456 const int input_depth = 10;
457 const int input_rows = 5;
458 const int input_cols = 5;
459 const int num_batches = 13;
460 const int output_depth = 7;
461 const int patch_rows = 3;
462 const int patch_cols = 3;
463 const int output_rows = 2;
464 const int output_cols = 2;
465
466 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
467 input_depth);
468 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
469 output_depth);
470 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
471 output_depth);
472 input = input.constant(11.0f) + input.random();
473 kernel = kernel.constant(2.0f) + kernel.random();
474 result.setRandom();
475
476 // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
477 // of 2.
478 int stride = 2;
479 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
480
481 EXPECT_EQ(result.dimension(0), num_batches);
482 EXPECT_EQ(result.dimension(1), output_cols);
483 EXPECT_EQ(result.dimension(2), output_rows);
484 EXPECT_EQ(result.dimension(3), output_depth);
485
486 for (int b = 0; b < num_batches; ++b) {
487 for (int od = 0; od < output_depth; ++od) {
488 for (int i = 0; i < output_rows; ++i) {
489 for (int j = 0; j < output_cols; ++j) {
490 float expected = 0.0f;
491 for (int c = 0; c < patch_cols; ++c) {
492 for (int r = 0; r < patch_rows; ++r) {
493 for (int id = 0; id < input_depth; ++id) {
494 expected += input(b, c + stride * j, r + stride * i, id) *
495 kernel(c, r, id, od);
496 }
497 }
498 }
499 EigenApprox(result(b, j, i, od), expected);
500 }
501 }
502 }
503 }
504 }
505
TEST(EigenSpatialConvolutionsTest,AtrousSpatial)506 TEST(EigenSpatialConvolutionsTest, AtrousSpatial) {
507 const int input_depth = 10;
508 const int input_rows = 7;
509 const int input_cols = 7;
510 const int num_batches = 13;
511 const int output_depth = 7;
512 const int patch_rows = 3;
513 const int patch_cols = 3;
514 const int output_rows = 3;
515 const int output_cols = 3;
516
517 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
518 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
519 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
520 input = input.constant(11.0f) + input.random();
521 kernel = kernel.constant(2.0f) + kernel.random();
522 result.setRandom();
523
524 // Apply a spatial convolution using a 3x3 kernel, valid padding
525 // output (standard) stride 1, and input (atrous) stride of 2.
526 int stride = 1;
527 int in_stride = 2;
528 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
529 in_stride, in_stride);
530
531 EXPECT_EQ(result.dimension(0), output_depth);
532 EXPECT_EQ(result.dimension(1), output_rows);
533 EXPECT_EQ(result.dimension(2), output_cols);
534 EXPECT_EQ(result.dimension(3), num_batches);
535
536 for (int b = 0; b < num_batches; ++b) {
537 for (int od = 0; od < output_depth; ++od) {
538 for (int i = 0; i < output_rows; ++i) {
539 for (int j = 0; j < output_cols; ++j) {
540 float expected = 0.0f;
541 for (int c = 0; c < patch_cols; ++c) {
542 for (int r = 0; r < patch_rows; ++r) {
543 for (int id = 0; id < input_depth; ++id) {
544 expected += input(id, in_stride * r + stride * i,
545 in_stride * c + stride * j, b) *
546 kernel(od, id, r, c);
547 }
548 }
549 }
550 EigenApprox(result(od, i, j, b), expected);
551 }
552 }
553 }
554 }
555 }
556
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajor)557 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajor) {
558 const int input_depth = 10;
559 const int input_rows = 7;
560 const int input_cols = 7;
561 const int num_batches = 13;
562 const int output_depth = 7;
563 const int patch_rows = 3;
564 const int patch_cols = 3;
565 const int output_rows = 3;
566 const int output_cols = 3;
567
568 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
569 input_depth);
570 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
571 output_depth);
572 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
573 output_depth);
574 input = input.constant(11.0f) + input.random();
575 kernel = kernel.constant(2.0f) + kernel.random();
576 result.setRandom();
577
578 // Apply a spatial convolution using a 3x3 kernel, valid padding
579 // output (standard) stride 1, and input (atrous) stride of 2.
580 int stride = 1;
581 int in_stride = 2;
582 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
583 in_stride, in_stride);
584
585 EXPECT_EQ(result.dimension(0), num_batches);
586 EXPECT_EQ(result.dimension(1), output_cols);
587 EXPECT_EQ(result.dimension(2), output_rows);
588 EXPECT_EQ(result.dimension(3), output_depth);
589
590 for (int b = 0; b < num_batches; ++b) {
591 for (int od = 0; od < output_depth; ++od) {
592 for (int i = 0; i < output_rows; ++i) {
593 for (int j = 0; j < output_cols; ++j) {
594 float expected = 0.0f;
595 for (int c = 0; c < patch_cols; ++c) {
596 for (int r = 0; r < patch_rows; ++r) {
597 for (int id = 0; id < input_depth; ++id) {
598 expected += input(b, in_stride * c + stride * j,
599 in_stride * r + stride * i, id) *
600 kernel(c, r, id, od);
601 }
602 }
603 }
604 EigenApprox(result(b, j, i, od), expected);
605 }
606 }
607 }
608 }
609 }
610
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajorUnequalStrides)611 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajorUnequalStrides) {
612 const int input_depth = 10;
613 const int input_rows = 7;
614 const int input_cols = 7;
615 const int num_batches = 13;
616 const int output_depth = 7;
617 const int patch_rows = 3;
618 const int patch_cols = 3;
619 const int output_rows = 1;
620 const int output_cols = 3;
621
622 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
623 input_depth);
624 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
625 output_depth);
626 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
627 output_depth);
628 input = input.constant(11.0f) + input.random();
629 kernel = kernel.constant(2.0f) + kernel.random();
630 result.setRandom();
631
632 // Apply a spatial convolution using a 3x3 kernel, valid padding
633 // output (standard) stride 1, and input (atrous) stride of 2.
634 int row_stride = 1;
635 int col_stride = 2;
636 int row_in_stride = 3;
637 int col_in_stride = 1;
638 result = SpatialConvolution(input, kernel, row_stride, col_stride,
639 PADDING_VALID, row_in_stride, col_in_stride);
640
641 EXPECT_EQ(result.dimension(0), num_batches);
642 EXPECT_EQ(result.dimension(1), output_cols);
643 EXPECT_EQ(result.dimension(2), output_rows);
644 EXPECT_EQ(result.dimension(3), output_depth);
645
646 for (int b = 0; b < num_batches; ++b) {
647 for (int od = 0; od < output_depth; ++od) {
648 for (int i = 0; i < output_rows; ++i) {
649 for (int j = 0; j < output_cols; ++j) {
650 float expected = 0.0f;
651 for (int c = 0; c < patch_cols; ++c) {
652 for (int r = 0; r < patch_rows; ++r) {
653 for (int id = 0; id < input_depth; ++id) {
654 expected += input(b, col_in_stride * c + col_stride * j,
655 row_in_stride * r + row_stride * i, id) *
656 kernel(c, r, id, od);
657 }
658 }
659 }
660 EigenApprox(result(b, j, i, od), expected);
661 }
662 }
663 }
664 }
665 }
666
TEST(EigenSpatialConvolutionsTest,Cuboid)667 TEST(EigenSpatialConvolutionsTest, Cuboid) {
668 const int in_channels = 10;
669 const int in_depth = 5;
670 const int in_rows = 8;
671 const int in_cols = 7;
672
673 const int kern_filters = 7;
674 const int kern_depth = 3;
675 const int kern_width = 4;
676 const int kern_height = 4;
677
678 const int out_depth = in_depth;
679 const int out_height = in_rows;
680 const int out_width = in_cols;
681
682 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
683 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
684 kern_width);
685 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
686 input = input.constant(11.0f) + input.random();
687 kernel = kernel.constant(2.0f) + kernel.random();
688 result.setRandom();
689
690 result = CuboidConvolution(input, kernel);
691
692 EXPECT_EQ(result.dimension(0), kern_filters);
693 EXPECT_EQ(result.dimension(1), out_depth);
694 EXPECT_EQ(result.dimension(2), out_height);
695 EXPECT_EQ(result.dimension(3), out_width);
696
697 const int off_p = (kern_depth - 1) / 2;
698 const int off_r = (kern_height - 1) / 2;
699 const int off_c = (kern_width - 1) / 2;
700
701 for (int od = 0; od < kern_filters; ++od) {
702 for (int i = 0; i < out_depth; ++i) {
703 for (int j = 0; j < out_height; ++j) {
704 for (int k = 0; k < out_width; ++k) {
705 float expected = 0.0f;
706 for (int c = 0; c < kern_width; ++c) {
707 for (int r = 0; r < kern_height; ++r) {
708 for (int p = 0; p < kern_depth; ++p) {
709 for (int id = 0; id < in_channels; ++id) {
710 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
711 c - off_c + k >= 0 && p - off_p + i < in_depth &&
712 r - off_r + j < in_rows && c - off_c + k < in_cols) {
713 expected +=
714 input(id, p - off_p + i, r - off_r + j, c - off_c + k) *
715 kernel(od, id, p, r, c);
716 }
717 }
718 }
719 }
720 }
721 EigenApprox(result(od, i, j, k), expected);
722 }
723 }
724 }
725 }
726 }
727
TEST(EigenSpatialConvolutionsTest,CuboidRowMajor)728 TEST(EigenSpatialConvolutionsTest, CuboidRowMajor) {
729 const int in_channels = 10;
730 const int in_depth = 5;
731 const int in_rows = 8;
732 const int in_cols = 7;
733
734 const int kern_filters = 7;
735 const int kern_depth = 3;
736 const int kern_width = 4;
737 const int kern_height = 4;
738
739 const int out_depth = in_depth;
740 const int out_height = in_rows;
741 const int out_width = in_cols;
742
743 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
744 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
745 in_channels, kern_filters);
746 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
747 kern_filters);
748 input = input.constant(11.0f) + input.random();
749 kernel = kernel.constant(2.0f) + kernel.random();
750 result.setRandom();
751
752 result = CuboidConvolution(input, kernel);
753
754 EXPECT_EQ(result.dimension(3), kern_filters);
755 EXPECT_EQ(result.dimension(2), out_depth);
756 EXPECT_EQ(result.dimension(1), out_height);
757 EXPECT_EQ(result.dimension(0), out_width);
758
759 const int off_p = (kern_depth - 1) / 2;
760 const int off_r = (kern_height - 1) / 2;
761 const int off_c = (kern_width - 1) / 2;
762
763 for (int od = 0; od < kern_filters; ++od) {
764 for (int i = 0; i < out_depth; ++i) {
765 for (int j = 0; j < out_height; ++j) {
766 for (int k = 0; k < out_width; ++k) {
767 float expected = 0.0f;
768 for (int c = 0; c < kern_width; ++c) {
769 for (int r = 0; r < kern_height; ++r) {
770 for (int p = 0; p < kern_depth; ++p) {
771 for (int id = 0; id < in_channels; ++id) {
772 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
773 c - off_c + k >= 0 && p - off_p + i < in_depth &&
774 r - off_r + j < in_rows && c - off_c + k < in_cols) {
775 expected +=
776 input(c - off_c + k, r - off_r + j, p - off_p + i, id) *
777 kernel(c, r, p, id, od);
778 }
779 }
780 }
781 }
782 }
783 EigenApprox(result(k, j, i, od), expected);
784 }
785 }
786 }
787 }
788 }
789
TEST(EigenSpatialConvolutionsTest,ValidCuboid)790 TEST(EigenSpatialConvolutionsTest, ValidCuboid) {
791 const int in_channels = 10;
792 const int in_depth = 5;
793 const int in_rows = 5;
794 const int in_cols = 5;
795
796 const int kern_filters = 7;
797 const int kern_depth = 3;
798 const int kern_width = 3;
799 const int kern_height = 3;
800
801 const int out_depth = 3;
802 const int out_height = 3;
803 const int out_width = 3;
804
805 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
806 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
807 kern_width);
808 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
809 input = input.constant(11.0f) + input.random();
810 kernel = kernel.constant(2.0f) + kernel.random();
811 result.setRandom();
812
813 result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
814
815 EXPECT_EQ(result.dimension(0), kern_filters);
816 EXPECT_EQ(result.dimension(1), out_depth);
817 EXPECT_EQ(result.dimension(2), out_height);
818 EXPECT_EQ(result.dimension(3), out_width);
819
820 for (int od = 0; od < kern_filters; ++od) {
821 for (int i = 0; i < out_depth; ++i) {
822 for (int j = 0; j < out_height; ++j) {
823 for (int k = 0; k < out_width; ++k) {
824 float expected = 0.0f;
825 for (int c = 0; c < kern_width; ++c) {
826 for (int r = 0; r < kern_height; ++r) {
827 for (int p = 0; p < kern_depth; ++p) {
828 for (int id = 0; id < in_channels; ++id) {
829 expected +=
830 input(id, p + i, r + j, c + k) * kernel(od, id, p, r, c);
831 }
832 }
833 }
834 }
835 EigenApprox(result(od, i, j, k), expected);
836 }
837 }
838 }
839 }
840 }
841
TEST(EigenSpatialConvolutionsTest,ValidCuboidRowMajor)842 TEST(EigenSpatialConvolutionsTest, ValidCuboidRowMajor) {
843 const int in_channels = 10;
844 const int in_depth = 5;
845 const int in_rows = 5;
846 const int in_cols = 5;
847
848 const int kern_filters = 7;
849 const int kern_depth = 3;
850 const int kern_width = 3;
851 const int kern_height = 3;
852
853 const int out_depth = 3;
854 const int out_height = 3;
855 const int out_width = 3;
856
857 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
858 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
859 in_channels, kern_filters);
860 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
861 kern_filters);
862 input = input.constant(11.0f) + input.random();
863 kernel = kernel.constant(2.0f) + kernel.random();
864 result.setRandom();
865
866 result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
867
868 EXPECT_EQ(result.dimension(3), kern_filters);
869 EXPECT_EQ(result.dimension(2), out_depth);
870 EXPECT_EQ(result.dimension(1), out_height);
871 EXPECT_EQ(result.dimension(0), out_width);
872
873 for (int od = 0; od < kern_filters; ++od) {
874 for (int i = 0; i < out_depth; ++i) {
875 for (int j = 0; j < out_height; ++j) {
876 for (int k = 0; k < out_width; ++k) {
877 float expected = 0.0f;
878 for (int c = 0; c < kern_width; ++c) {
879 for (int r = 0; r < kern_height; ++r) {
880 for (int p = 0; p < kern_depth; ++p) {
881 for (int id = 0; id < in_channels; ++id) {
882 expected +=
883 input(c + k, r + j, p + i, id) * kernel(c, r, p, id, od);
884 }
885 }
886 }
887 }
888 EigenApprox(result(k, j, i, od), expected);
889 }
890 }
891 }
892 }
893 }
894
TEST(EigenSpatialConvolutionsTest,BatchedCuboid)895 TEST(EigenSpatialConvolutionsTest, BatchedCuboid) {
896 const int batches = 2;
897 const int in_channels = 10;
898 const int in_depth = 5;
899 const int in_rows = 8;
900 const int in_cols = 7;
901
902 const int kern_filters = 7;
903 const int kern_depth = 3;
904 const int kern_width = 4;
905 const int kern_height = 4;
906
907 const int out_depth = in_depth;
908 const int out_height = in_rows;
909 const int out_width = in_cols;
910
911 Tensor<float, 5> input(in_channels, in_depth, in_rows, in_cols, batches);
912 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
913 kern_width);
914 Tensor<float, 5> result(kern_filters, out_depth, out_height, out_width,
915 batches);
916 input = input.constant(11.0f) + input.random();
917 kernel = kernel.constant(2.0f) + kernel.random();
918 result.setRandom();
919
920 result = CuboidConvolution(input, kernel);
921
922 EXPECT_EQ(result.dimension(0), kern_filters);
923 EXPECT_EQ(result.dimension(1), out_depth);
924 EXPECT_EQ(result.dimension(2), out_height);
925 EXPECT_EQ(result.dimension(3), out_width);
926 EXPECT_EQ(result.dimension(4), batches);
927
928 const int off_p = (kern_depth - 1) / 2;
929 const int off_r = (kern_height - 1) / 2;
930 const int off_c = (kern_width - 1) / 2;
931
932 for (int b = 0; b < batches; b++) {
933 for (int od = 0; od < kern_filters; ++od) {
934 for (int i = 0; i < out_depth; ++i) {
935 for (int j = 0; j < out_height; ++j) {
936 for (int k = 0; k < out_width; ++k) {
937 float expected = 0.0f;
938 for (int c = 0; c < kern_width; ++c) {
939 for (int r = 0; r < kern_height; ++r) {
940 for (int p = 0; p < kern_depth; ++p) {
941 for (int id = 0; id < in_channels; ++id) {
942 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
943 c - off_c + k >= 0 && p - off_p + i < in_depth &&
944 r - off_r + j < in_rows && c - off_c + k < in_cols) {
945 expected += input(id, p - off_p + i, r - off_r + j,
946 c - off_c + k, b) *
947 kernel(od, id, p, r, c);
948 }
949 }
950 }
951 }
952 }
953 EigenApprox(result(od, i, j, k, b), expected);
954 }
955 }
956 }
957 }
958 }
959 }
960
TEST(EigenSpatialConvolutionsTest,BatchedCuboidRowMajor)961 TEST(EigenSpatialConvolutionsTest, BatchedCuboidRowMajor) {
962 const int batches = 2;
963 const int in_channels = 10;
964 const int in_depth = 5;
965 const int in_rows = 8;
966 const int in_cols = 7;
967
968 const int kern_filters = 7;
969 const int kern_depth = 3;
970 const int kern_width = 4;
971 const int kern_height = 4;
972
973 const int out_depth = in_depth;
974 const int out_height = in_rows;
975 const int out_width = in_cols;
976
977 Tensor<float, 5, RowMajor> input(batches, in_cols, in_rows, in_depth,
978 in_channels);
979 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
980 in_channels, kern_filters);
981 Tensor<float, 5, RowMajor> result(batches, out_width, out_height, out_depth,
982 kern_filters);
983 input = input.constant(11.0f) + input.random();
984 kernel = kernel.constant(2.0f) + kernel.random();
985 result.setRandom();
986
987 result = CuboidConvolution(input, kernel);
988
989 EXPECT_EQ(result.dimension(4), kern_filters);
990 EXPECT_EQ(result.dimension(3), out_depth);
991 EXPECT_EQ(result.dimension(2), out_height);
992 EXPECT_EQ(result.dimension(1), out_width);
993 EXPECT_EQ(result.dimension(0), batches);
994
995 const int off_p = (kern_depth - 1) / 2;
996 const int off_r = (kern_height - 1) / 2;
997 const int off_c = (kern_width - 1) / 2;
998
999 for (int b = 0; b < batches; b++) {
1000 for (int od = 0; od < kern_filters; ++od) {
1001 for (int i = 0; i < out_depth; ++i) {
1002 for (int j = 0; j < out_height; ++j) {
1003 for (int k = 0; k < out_width; ++k) {
1004 float expected = 0.0f;
1005 for (int c = 0; c < kern_width; ++c) {
1006 for (int r = 0; r < kern_height; ++r) {
1007 for (int p = 0; p < kern_depth; ++p) {
1008 for (int id = 0; id < in_channels; ++id) {
1009 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
1010 c - off_c + k >= 0 && p - off_p + i < in_depth &&
1011 r - off_r + j < in_rows && c - off_c + k < in_cols) {
1012 expected += input(b, c - off_c + k, r - off_r + j,
1013 p - off_p + i, id) *
1014 kernel(c, r, p, id, od);
1015 }
1016 }
1017 }
1018 }
1019 }
1020 EigenApprox(result(b, k, j, i, od), expected);
1021 }
1022 }
1023 }
1024 }
1025 }
1026 }
1027
TEST(EigenSpatialConvolutionsTest,StridedValidCuboid)1028 TEST(EigenSpatialConvolutionsTest, StridedValidCuboid) {
1029 const int in_channels = 10;
1030 const int in_depth = 8;
1031 const int in_rows = 7;
1032 const int in_cols = 5;
1033
1034 const int kern_filters = 7;
1035 const int kern_depth = 3;
1036 const int kern_width = 3;
1037 const int kern_height = 3;
1038
1039 const int out_depth = 3;
1040 const int out_height = 3;
1041 const int out_width = 2;
1042
1043 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1044 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1045 kern_width);
1046 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1047 input = input.constant(11.0f) + input.random();
1048 kernel = kernel.constant(2.0f) + kernel.random();
1049 result.setRandom();
1050
1051 const int stride = 2;
1052 result =
1053 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1054
1055 EXPECT_EQ(result.dimension(0), kern_filters);
1056 EXPECT_EQ(result.dimension(1), out_depth);
1057 EXPECT_EQ(result.dimension(2), out_height);
1058 EXPECT_EQ(result.dimension(3), out_width);
1059
1060 for (int od = 0; od < kern_filters; ++od) {
1061 for (int i = 0; i < out_depth; ++i) {
1062 for (int j = 0; j < out_height; ++j) {
1063 for (int k = 0; k < out_width; ++k) {
1064 float expected = 0.0f;
1065 for (int c = 0; c < kern_width; ++c) {
1066 for (int r = 0; r < kern_height; ++r) {
1067 for (int p = 0; p < kern_depth; ++p) {
1068 for (int id = 0; id < in_channels; ++id) {
1069 expected += input(id, p + stride * i, r + stride * j,
1070 c + stride * k) *
1071 kernel(od, id, p, r, c);
1072 }
1073 }
1074 }
1075 }
1076 EigenApprox(result(od, i, j, k), expected);
1077 }
1078 }
1079 }
1080 }
1081 }
1082
TEST(EigenSpatialConvolutionsTest,StridedValidCuboidRowMajor)1083 TEST(EigenSpatialConvolutionsTest, StridedValidCuboidRowMajor) {
1084 const int in_channels = 10;
1085 const int in_depth = 8;
1086 const int in_rows = 7;
1087 const int in_cols = 5;
1088
1089 const int kern_filters = 7;
1090 const int kern_depth = 3;
1091 const int kern_width = 3;
1092 const int kern_height = 3;
1093
1094 const int out_depth = 3;
1095 const int out_height = 3;
1096 const int out_width = 2;
1097
1098 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1099 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1100 in_channels, kern_filters);
1101 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1102 kern_filters);
1103 input = input.constant(11.0f) + input.random();
1104 kernel = kernel.constant(2.0f) + kernel.random();
1105 result.setRandom();
1106
1107 const int stride = 2;
1108 result =
1109 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1110
1111 EXPECT_EQ(result.dimension(3), kern_filters);
1112 EXPECT_EQ(result.dimension(2), out_depth);
1113 EXPECT_EQ(result.dimension(1), out_height);
1114 EXPECT_EQ(result.dimension(0), out_width);
1115
1116 for (int od = 0; od < kern_filters; ++od) {
1117 for (int i = 0; i < out_depth; ++i) {
1118 for (int j = 0; j < out_height; ++j) {
1119 for (int k = 0; k < out_width; ++k) {
1120 float expected = 0.0f;
1121 for (int c = 0; c < kern_width; ++c) {
1122 for (int r = 0; r < kern_height; ++r) {
1123 for (int p = 0; p < kern_depth; ++p) {
1124 for (int id = 0; id < in_channels; ++id) {
1125 expected += input(c + stride * k, r + stride * j,
1126 p + stride * i, id) *
1127 kernel(c, r, p, id, od);
1128 }
1129 }
1130 }
1131 }
1132 EigenApprox(result(k, j, i, od), expected);
1133 }
1134 }
1135 }
1136 }
1137 }
1138
TEST(EigenSpatialConvolutionsTest,StridedSameCuboid)1139 TEST(EigenSpatialConvolutionsTest, StridedSameCuboid) {
1140 const int in_channels = 10;
1141 const int in_depth = 8;
1142 const int in_rows = 7;
1143 const int in_cols = 5;
1144
1145 const int kern_filters = 7;
1146 const int kern_depth = 3;
1147 const int kern_width = 3;
1148 const int kern_height = 3;
1149
1150 const int stride = 2;
1151 const int out_depth = ceil_div(in_depth, stride);
1152 const int out_height = ceil_div(in_rows, stride);
1153 const int out_width = ceil_div(in_cols, stride);
1154
1155 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1156 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1157 kern_width);
1158 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1159 input = input.constant(11.0f) + input.random();
1160 kernel = kernel.constant(2.0f) + kernel.random();
1161 result.setRandom();
1162
1163 result =
1164 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1165
1166 EXPECT_EQ(result.dimension(0), kern_filters);
1167 EXPECT_EQ(result.dimension(1), out_depth);
1168 EXPECT_EQ(result.dimension(2), out_height);
1169 EXPECT_EQ(result.dimension(3), out_width);
1170
1171 const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1172 const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1173 const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1174
1175 // Number of pixels the input is extended with at the lower end in every
1176 // dimension.
1177 const int dp = pad_p / 2;
1178 const int dr = pad_r / 2;
1179 const int dc = pad_c / 2;
1180
1181 for (int od = 0; od < kern_filters; ++od) {
1182 for (int i = 0; i < out_depth; ++i) {
1183 for (int j = 0; j < out_height; ++j) {
1184 for (int k = 0; k < out_width; ++k) {
1185 float expected = 0.0f;
1186 for (int c = 0; c < kern_width; ++c) {
1187 for (int r = 0; r < kern_height; ++r) {
1188 for (int p = 0; p < kern_depth; ++p) {
1189 for (int id = 0; id < in_channels; ++id) {
1190 const int in_p = p - dp + i * stride;
1191 const int in_r = r - dr + j * stride;
1192 const int in_c = c - dc + k * stride;
1193 if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1194 in_r < in_rows && in_c < in_cols) {
1195 expected +=
1196 input(id, in_p, in_r, in_c) * kernel(od, id, p, r, c);
1197 }
1198 }
1199 }
1200 }
1201 }
1202 EigenApprox(result(od, i, j, k), expected);
1203 }
1204 }
1205 }
1206 }
1207 }
1208
TEST(EigenSpatialConvolutionsTest,StridedSameCuboidRowMajor)1209 TEST(EigenSpatialConvolutionsTest, StridedSameCuboidRowMajor) {
1210 const int in_channels = 10;
1211 const int in_depth = 8;
1212 const int in_rows = 7;
1213 const int in_cols = 5;
1214
1215 const int kern_filters = 7;
1216 const int kern_depth = 3;
1217 const int kern_width = 3;
1218 const int kern_height = 3;
1219
1220 const int stride = 2;
1221 const int out_depth = ceil_div(in_depth, stride);
1222 const int out_height = ceil_div(in_rows, stride);
1223 const int out_width = ceil_div(in_cols, stride);
1224
1225 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1226 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1227 in_channels, kern_filters);
1228 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1229 kern_filters);
1230 input = input.constant(11.0f) + input.random();
1231 kernel = kernel.constant(2.0f) + kernel.random();
1232 result.setRandom();
1233
1234 result =
1235 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1236
1237 EXPECT_EQ(result.dimension(3), kern_filters);
1238 EXPECT_EQ(result.dimension(2), out_depth);
1239 EXPECT_EQ(result.dimension(1), out_height);
1240 EXPECT_EQ(result.dimension(0), out_width);
1241
1242 const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1243 const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1244 const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1245
1246 // Number of pixels the input is extended with at the lower end in every
1247 // dimension.
1248 const int dp = pad_p / 2;
1249 const int dr = pad_r / 2;
1250 const int dc = pad_c / 2;
1251
1252 for (int od = 0; od < kern_filters; ++od) {
1253 for (int i = 0; i < out_depth; ++i) {
1254 for (int j = 0; j < out_height; ++j) {
1255 for (int k = 0; k < out_width; ++k) {
1256 float expected = 0.0f;
1257 for (int c = 0; c < kern_width; ++c) {
1258 for (int r = 0; r < kern_height; ++r) {
1259 for (int p = 0; p < kern_depth; ++p) {
1260 for (int id = 0; id < in_channels; ++id) {
1261 const int in_p = p - dp + i * stride;
1262 const int in_r = r - dr + j * stride;
1263 const int in_c = c - dc + k * stride;
1264 if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1265 in_r < in_rows && in_c < in_cols) {
1266 expected +=
1267 input(in_c, in_r, in_p, id) * kernel(c, r, p, id, od);
1268 }
1269 }
1270 }
1271 }
1272 }
1273 EigenApprox(result(k, j, i, od), expected);
1274 }
1275 }
1276 }
1277 }
1278 }
1279
1280 // A test case discovered when testing backward spatial convolution where the
1281 // special tensor contraction mapper for spatial convolution contains a bug.
TEST(EigenSpatialConvolutionsTest,SpatialConvContractionMapper)1282 TEST(EigenSpatialConvolutionsTest, SpatialConvContractionMapper) {
1283 // We have a 3x4 input image with 2x2 patch and stride of 2.
1284 // The output has size 1x2.
1285 typedef Tensor<float, 1>::DimensionPair DimPair;
1286 Tensor<float, 4> out(1, 1, 2, 1);
1287 Tensor<float, 4> kern(1, 1, 2, 2);
1288 for (int i = 0; i < kern.size(); ++i) {
1289 kern.coeffRef(i) = static_cast<float>(i) + 1;
1290 }
1291 for (int i = 0; i < out.size(); ++i) {
1292 out.coeffRef(i) = static_cast<float>(i) + 1;
1293 }
1294
1295 DSizes<ptrdiff_t, 4> strides;
1296 strides[0] = 1;
1297 strides[1] = 2;
1298 strides[2] = 2;
1299 strides[3] = 1;
1300
1301 array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
1302 paddings[0] = std::make_pair(0, 0);
1303 paddings[1] = std::make_pair(1, 2);
1304 paddings[2] = std::make_pair(1, 1);
1305 paddings[3] = std::make_pair(0, 0);
1306
1307 DSizes<ptrdiff_t, 3> out_dim;
1308 out_dim[0] = 1;
1309 out_dim[1] = 4;
1310 out_dim[2] = 12;
1311
1312 array<bool, 4> kernel_reverse;
1313 kernel_reverse[0] = false;
1314 kernel_reverse[1] = false;
1315 kernel_reverse[2] = true;
1316 kernel_reverse[3] = true;
1317
1318 DSizes<ptrdiff_t, 3> k_dims;
1319 k_dims[0] = 1;
1320 k_dims[1] = 1;
1321 k_dims[2] = 4;
1322
1323 array<DimPair, 2> contract_dims;
1324 contract_dims[0] = DimPair(0, 0);
1325 contract_dims[1] = DimPair(2, 1);
1326
1327 DSizes<ptrdiff_t, 4> in_dim;
1328 in_dim[0] = 1;
1329 in_dim[1] = 3;
1330 in_dim[2] = 4;
1331 in_dim[3] = 1;
1332
1333 DSizes<ptrdiff_t, 2> in_dbg_dim;
1334 in_dbg_dim[0] = 3;
1335 in_dbg_dim[1] = 4;
1336
1337 DSizes<ptrdiff_t, 2> out_dbg_dim;
1338 out_dbg_dim[0] = 4;
1339 out_dbg_dim[1] = 12;
1340
1341 // This is the formula for computing the backward prop for input with a
1342 // spatial convolution.
1343 Tensor<float, 4> direct =
1344 kern.reverse(kernel_reverse)
1345 .reshape(k_dims)
1346 .contract(
1347 out.extract_image_patches(2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0)
1348 .reshape(out_dim),
1349 contract_dims)
1350 .reshape(in_dim);
1351
1352 Tensor<float, 4> indirect =
1353 kern.reverse(kernel_reverse)
1354 .reshape(k_dims)
1355 .contract(
1356 out.inflate(strides)
1357 .pad(paddings)
1358 .extract_image_patches(2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
1359 .reshape(out_dim),
1360 contract_dims)
1361 .reshape(in_dim);
1362
1363 eigen_assert(dimensions_match(direct.dimensions(), indirect.dimensions()));
1364 for (size_t i = 0; i < direct.dimensions().TotalSize(); ++i) {
1365 EigenApprox(direct.data()[i], indirect.data()[i]);
1366 }
1367 EigenApprox(1.0f, direct(0, 0, 0, 0));
1368 EigenApprox(3.0f, direct(0, 0, 1, 0));
1369 EigenApprox(2.0f, direct(0, 0, 2, 0));
1370 EigenApprox(6.0f, direct(0, 0, 3, 0));
1371
1372 EigenApprox(2.0f, direct(0, 1, 0, 0));
1373 EigenApprox(4.0f, direct(0, 1, 1, 0));
1374 EigenApprox(4.0f, direct(0, 1, 2, 0));
1375 EigenApprox(8.0f, direct(0, 1, 3, 0));
1376 }
1377
1378 template <typename T>
PackRhsHelper(::testing::benchmark::State & state,int input_batches,int input_cols,int input_rows,int input_depth,int filter_count,int filter_cols,int filter_rows,Eigen::PaddingType padding,int col_strides,int row_strides,int patch_col_inflate_stride,int patch_row_inflate_stride,Index block_rows,Index block_cols)1379 static void PackRhsHelper(::testing::benchmark::State& state,
1380 /* Input dimensions: */
1381 int input_batches, int input_cols, int input_rows,
1382 int input_depth,
1383 /* Filter (kernel) dimensions: */
1384 int filter_count, int filter_cols, int filter_rows,
1385 Eigen::PaddingType padding,
1386 /* Input strides: */
1387 int col_strides, int row_strides,
1388 /* Patch inflate strides: */
1389 int patch_col_inflate_stride,
1390 int patch_row_inflate_stride,
1391 /* Block dimensions: */
1392 Index block_rows, Index block_cols) {
1393 // Set random seed for benchmark repeatability.
1394 srand(12345);
1395
1396 using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1397
1398 // Default Eigen::Tensor layout is column major, so we configure dimensions
1399 // starting from the inner most (channels aka depth in this case).
1400 Dimensions input_dims(input_depth, input_rows, input_cols, input_batches);
1401
1402 static const int packet_size = Eigen::internal::packet_traits<T>::size;
1403
1404 // Reshape dimensions.
1405 using NewDimension = Eigen::DSizes<Index, 2>;
1406
1407 // Contraction dimensions.
1408 using nocontract_t = Eigen::array<Eigen::Index, 1>;
1409 using contract_t = Eigen::array<Eigen::Index, 1>;
1410
1411 // Input to the TensorImagePatchOp. It is the tensorflow TTypes<T>::Tensor
1412 // with ColMajor layout, instead of RowMajor. But that doesn't make any
1413 // difference, because TensorContraction swaps LHS with RHS for row major
1414 // inputs, and contraction mapper always works with column major data.
1415 using ArgType = TensorMap<Tensor<T, 4>, Eigen::Aligned>;
1416
1417 using Evaluator = TensorEvaluator<
1418 const TensorReshapingOp<
1419 NewDimension, const TensorImagePatchOp<Dynamic, Dynamic, ArgType>>,
1420 Eigen::DefaultDevice>;
1421
1422 using InputMapper = Eigen::internal::TensorContractionInputMapper<
1423 T, Index, Eigen::internal::Rhs, Evaluator, //
1424 nocontract_t, contract_t, //
1425 packet_size, //
1426 /*inner_dim_contiguous*/ true, //
1427 /*inner_dim_reordered*/ false, //
1428 /*Alignment*/ 0>;
1429
1430 using SubMapper = Eigen::internal::TensorContractionSubMapper<
1431 T, Index, Eigen::internal::Rhs, Evaluator, //
1432 nocontract_t, contract_t, //
1433 packet_size, //
1434 /*inner_dim_contiguous*/ true, //
1435 /*inner_dim_reordered*/ false, //
1436 /*Alignment*/ 0>;
1437
1438 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1439 using PackRhsImpl =
1440 Eigen::internal::gemm_pack_colmajor_block<T, Eigen::Index, SubMapper,
1441 ColMajor>;
1442 #else
1443 using Traits = typename Eigen::internal::gebp_traits<T, T>;
1444 using PackRhsImpl =
1445 Eigen::internal::gemm_pack_rhs<T, Eigen::Index, SubMapper, //
1446 Traits::nr, //
1447 ColMajor, //
1448 /*Conjugate*/ false, //
1449 /*PanelMode*/ false>;
1450 #endif
1451
1452 Eigen::DefaultDevice device;
1453
1454 // Actual contract dimensions are not important.
1455 const Eigen::Index not_important = -1234;
1456 nocontract_t nocontract_dim = {not_important};
1457 contract_t contract_dim = {not_important};
1458
1459 // We use tensor of the same dimensions to store packed data.
1460 Tensor<T, 4> packed(input_dims);
1461
1462 // We generate multiple input tensors, around 512mb in total size to measure
1463 // realistic workload when input data in not in L1-L3 cache.
1464 size_t input_bytes = input_dims.TotalSize() * sizeof(T);
1465 size_t mem_size_bytes = 1024 * 1024 * 512;
1466 size_t num_inputs =
1467 std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1468
1469 std::vector<Tensor<T, 4>> inputs;
1470 std::vector<Evaluator> evaluators;
1471 std::vector<InputMapper> input_mappers;
1472
1473 inputs.reserve(num_inputs);
1474 evaluators.reserve(num_inputs);
1475 input_mappers.reserve(num_inputs);
1476
1477 for (int i = 0; i < num_inputs; ++i) {
1478 inputs.emplace_back(input_dims);
1479 inputs[i].setRandom();
1480
1481 ArgType tensor_map(inputs[i].data(), input_dims);
1482
1483 // 1. Extract image patches from input tensor. All strides are `1`.
1484 const auto image_patch_op = TensorImagePatchOp<Dynamic, Dynamic, ArgType>(
1485 tensor_map, //
1486 filter_rows, filter_cols, //
1487 row_strides, col_strides, //
1488 /*in_row_strides=*/1, /*in_col_strides=*/1, //
1489 patch_row_inflate_stride, patch_col_inflate_stride, //
1490 padding, /*padding_value=*/0.0);
1491
1492 // 2. Reshape extracted patches into "virtual" 2d tensor.
1493 Index input_rows_eff = (input_rows - 1) * patch_row_inflate_stride + 1;
1494 Index input_cols_eff = (input_cols - 1) * patch_col_inflate_stride + 1;
1495
1496 Index output_rows = 0;
1497 Index output_cols = 0;
1498
1499 if (padding == Eigen::PADDING_SAME) {
1500 output_rows = input_rows_eff / row_strides;
1501 output_cols = input_cols_eff / col_strides;
1502 } else if (padding == Eigen::PADDING_VALID) {
1503 output_rows =
1504 numext::ceil((input_rows_eff - filter_rows + 1.f) / row_strides);
1505 output_cols =
1506 numext::ceil((input_cols_eff - filter_cols + 1.f) / col_strides);
1507 } else {
1508 eigen_assert(false && "not supported");
1509 }
1510
1511 NewDimension reshape_dims;
1512 reshape_dims[0] = input_depth * filter_rows * filter_cols; // patch size
1513 reshape_dims[1] = output_rows * output_cols * input_batches; // num_patches
1514
1515 const auto reshape_op =
1516 TensorReshapingOp<NewDimension, decltype(image_patch_op)>(
1517 image_patch_op, reshape_dims);
1518
1519 evaluators.emplace_back(reshape_op, device);
1520
1521 input_mappers.emplace_back(evaluators[i], nocontract_dim, nocontract_dim,
1522 contract_dim, contract_dim);
1523 }
1524
1525 // We read properties of extracted image patches directly from evaluator.
1526 const Index patch_depth = evaluators[0].impl().dimensions()[0];
1527 const Index patch_rows = evaluators[0].impl().dimensions()[1];
1528 const Index patch_cols = evaluators[0].impl().dimensions()[2];
1529
1530 // Number of patches is the same as the maximum column available through the
1531 // InputMapper (SubMapper).
1532 const Index num_patches = evaluators[0].impl().dimensions()[3];
1533
1534 // The size of a single patch, it's the same as the maximum depth available
1535 // through the InputMapper (SubMapper).
1536 const Index patch_size = patch_depth * patch_rows * patch_cols;
1537
1538 PackRhsImpl pack_rhs;
1539
1540 const Index packed_total_size = input_dims.TotalSize();
1541
1542 // Round up row/col/memory offsets to make them multiple of packet size.
1543 const auto round_up = [](const Index idx) {
1544 return (idx / packet_size) * packet_size;
1545 };
1546
1547 for (auto s : state) {
1548 int input_idx =
1549 num_inputs == 1 ? 1 : internal::random<int>(0, num_inputs - 1);
1550
1551 // Depth offset must be a multiple packet size.
1552 Index depth_offset =
1553 (patch_size > block_rows)
1554 ? round_up(internal::random<Index>(0, patch_size - 10))
1555 : 0;
1556 Index col_offset = internal::random<Index>(0, num_patches - 10);
1557
1558 Index depth = std::min(block_rows, patch_size - depth_offset);
1559 Index cols = std::min(block_cols, num_patches - col_offset);
1560
1561 // Write packed data to random memory location to emulate cold caches.
1562 Index packed_size = depth * cols;
1563 Index packed_offset =
1564 internal::random<Index>(0, packed_total_size - packed_size - 1);
1565
1566 SubMapper sub_mapper =
1567 input_mappers[input_idx].getSubMapper(depth_offset, col_offset);
1568 pack_rhs(packed.data() + packed_offset, sub_mapper, depth, cols);
1569 }
1570
1571 state.SetLabel(
1572 absl::StrCat("patch: ", patch_rows, "x", patch_cols, " D", patch_depth,
1573 "; num_patches=", num_patches, " patch_size=", patch_size,
1574 " num_inputs=", num_inputs, " padding=", padding));
1575 }
1576
1577 template <typename T>
PackLhsHelper(::testing::benchmark::State & state,int input_depth,int filter_count,int filter_cols,int filter_rows,Index block_rows,Index block_cols)1578 static void PackLhsHelper(::testing::benchmark::State& state,
1579 /* Input dimensions: */
1580 int input_depth,
1581 /* Filter (kernel) dimensions: */
1582 int filter_count, int filter_cols, int filter_rows,
1583 /* Block dimensions: */
1584 Index block_rows, Index block_cols) {
1585 // Set random seed for benchmark repeatability.
1586 srand(12345);
1587
1588 eigen_assert(block_rows <= filter_count);
1589 eigen_assert(block_cols <= input_depth * filter_rows * filter_cols);
1590
1591 using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1592
1593 // Default Eigen::Tensor layout is column major, so we configure dimensions
1594 // starting from the inner most (`filter count` aka `kernel filers`).
1595 Dimensions filter_dims(filter_count, filter_rows, filter_cols, input_depth);
1596
1597 static const int packet_size = Eigen::internal::packet_traits<T>::size;
1598
1599 // We are going to reshape filter into 2D tensor.
1600 using NewDimension = Eigen::DSizes<Index, 2>;
1601
1602 // Contraction dimensions.
1603 using nocontract_t = Eigen::array<Eigen::Index, 1>;
1604 using contract_t = Eigen::array<Eigen::Index, 1>;
1605
1606 // Input to the ReshapeOp. It is the tensorflow TTypes<T>::Tensor
1607 // with ColMajor layout, instead of RowMajor. But that doesn't make any
1608 // difference, because TensorContraction swaps LHS with RHS for row major
1609 // inputs, and contraction mapper always works with column major data.
1610 using ArgType = TensorMap<Tensor<T, 4>, Eigen::Aligned>;
1611
1612 using Evaluator =
1613 TensorEvaluator<const TensorReshapingOp<NewDimension, ArgType>,
1614 Eigen::DefaultDevice>;
1615
1616 using InputMapper = Eigen::internal::TensorContractionInputMapper<
1617 T, Index, Eigen::internal::Lhs, Evaluator, //
1618 nocontract_t, contract_t, //
1619 packet_size, //
1620 /*inner_dim_contiguous*/ true, //
1621 /*inner_dim_reordered*/ false, //
1622 /*Alignment*/ 0>;
1623
1624 using SubMapper = Eigen::internal::TensorContractionSubMapper<
1625 T, Index, Eigen::internal::Lhs, Evaluator, //
1626 nocontract_t, contract_t, //
1627 packet_size, //
1628 /*inner_dim_contiguous*/ true, //
1629 /*inner_dim_reordered*/ false, //
1630 /*Alignment*/ 0>;
1631
1632 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1633 using PackLhsImpl =
1634 Eigen::internal::gemm_pack_colmajor_block<T, Eigen::Index, SubMapper,
1635 ColMajor>;
1636 #else
1637 using Traits = typename Eigen::internal::gebp_traits<T, T>;
1638 using PackLhsImpl =
1639 Eigen::internal::gemm_pack_lhs<T, Eigen::Index, SubMapper, //
1640 Traits::mr, //
1641 Traits::LhsProgress, //
1642 typename Traits::LhsPacket4Packing, //
1643 ColMajor>;
1644 #endif
1645
1646 Eigen::DefaultDevice device;
1647
1648 // We will reshape kernel into 2D tensor.
1649 NewDimension reshape_dims;
1650 reshape_dims[0] = filter_count;
1651 reshape_dims[1] = input_depth * filter_rows * filter_cols;
1652
1653 // We are going to contract along the 'in_depth * filter_rows * filter_cols`.
1654 nocontract_t nocontract_dim = {0};
1655 contract_t contract_dim = {1};
1656
1657 // These values computed using the algorithm in TensorContraction.h, with
1658 // 'nocontract_dim' and 'contract_dim' values specified above.
1659 nocontract_t nocontract_strides = {1};
1660 contract_t contract_strides = {filter_count};
1661 nocontract_t i_strides = {1};
1662 contract_t k_strides = {1};
1663
1664 // We use tensor of the same dimensions to store packed data.
1665 Tensor<T, 4> packed(filter_dims);
1666
1667 // We generate multiple filter tensors, around 512mb in total size to measure
1668 // realistic workload when input data in not in L1-L3 cache.
1669 size_t input_bytes = filter_dims.TotalSize() * sizeof(T);
1670 size_t mem_size_bytes = 1024 * 1024 * 512;
1671 size_t num_filters =
1672 std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1673
1674 std::vector<Tensor<T, 4>> filters;
1675 std::vector<Evaluator> evaluators;
1676 std::vector<InputMapper> input_mappers;
1677
1678 filters.reserve(num_filters);
1679 evaluators.reserve(num_filters);
1680 input_mappers.reserve(num_filters);
1681
1682 for (int i = 0; i < num_filters; ++i) {
1683 filters.emplace_back(filter_dims);
1684 filters[i].setRandom();
1685
1686 ArgType tensor_map(filters[i].data(), filter_dims);
1687
1688 const auto reshape_op =
1689 TensorReshapingOp<NewDimension, ArgType>(tensor_map, reshape_dims);
1690
1691 evaluators.emplace_back(reshape_op, device);
1692
1693 input_mappers.emplace_back(evaluators[i], nocontract_strides, i_strides,
1694 contract_strides, k_strides);
1695 }
1696
1697 PackLhsImpl pack_lhs;
1698
1699 const Index packed_total_size = filter_dims.TotalSize();
1700
1701 // Round up row/col/memory offsets to make them multiple of packet size.
1702 const auto round_up = [](const Index idx) {
1703 return (idx / packet_size) * packet_size;
1704 };
1705
1706 // Block rows is in the [0, filter_count) range.
1707 // Block cols is in the [0, filter_rows * filter_cols * input_depth) range.
1708
1709 const Index max_row = filter_count;
1710 const Index max_col = filter_rows * filter_cols * input_depth;
1711
1712 for (auto s : state) {
1713 int filter_idx =
1714 num_filters == 1 ? 1 : internal::random<int>(0, num_filters - 1);
1715
1716 Index row_offset = round_up(internal::random<Index>(0, max_row - 10));
1717 Index col_offset = round_up(internal::random<Index>(0, max_col - 10));
1718
1719 Index rows = std::min(block_rows, max_row - row_offset);
1720 Index cols = std::min(block_cols, max_col - col_offset);
1721
1722 // Write packed data to random memory location to emulate cold caches.
1723 Index packed_offset = round_up(
1724 internal::random<Index>(0, packed_total_size - rows * cols - 1));
1725
1726 SubMapper sub_mapper =
1727 input_mappers[filter_idx].getSubMapper(row_offset, col_offset);
1728
1729 // NOTE: Eigen gemm_pack_lhs accepts contraction depth (k-th dimension) as a
1730 // first argument (aka block cols). MKL-DNN pack is generic for lhs and rhs
1731 // and accepts block rows and cols in the same order for lhs and rhs.
1732 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1733 pack_lhs(packed.data() + packed_offset, sub_mapper, rows, cols);
1734 #else
1735 pack_lhs(packed.data() + packed_offset, sub_mapper, cols, rows);
1736 #endif
1737 }
1738 state.SetLabel(absl::StrCat(
1739 "filter: count=", filter_count, " dims=", filter_rows, "x", filter_cols,
1740 "; input: depth=", input_depth, "; num_filers=", num_filters));
1741 }
1742
1743 // -------------------------------------------------------------------------- //
1744 // Pack RHS
1745 //
1746 // Macro argument names:
1747 // N: batch size
1748 // H: height
1749 // W: width
1750 // C: input channels
1751 // FC: filter channels
1752 // FH: filter height
1753 // FW: filter width
1754 // SH: stride in height dimensions
1755 // SW: stride in width dimensions
1756 // ISH: patch inflate stride in height dimension
1757 // ISW: patch inflate stride in width dimension
1758 // BR: block rows
1759 // BC: block cols
1760
1761 #define BM_CONCAT(a, b) a##b
1762
1763 #define BM_RHS_NAME(prefix, T, N, H, W, C, FC, FH, FW, PAD, SH, SW, ISH, ISW, \
1764 BR, BC) \
1765 BM_CONCAT( \
1766 BM_##prefix##_##T##_##N##_##H##x##W##_IC##C##_FC##FC##_##FH##x##FW, \
1767 _##PAD##_s##SH##x##SW##_is##ISH##x##ISW##_B##BR##x##BC)
1768
1769 #define BM_PackRhs(T, N, H, W, C, FC, FH, FW, PAD, SH, SW, ISH, ISW, BR, BC) \
1770 static void BM_RHS_NAME(PackRhs, T, N, H, W, C, FC, FH, FW, PAD, SH, SW, \
1771 ISH, ISW, BR, \
1772 BC)(::testing::benchmark::State & state) { \
1773 PackRhsHelper<T>(state, N, H, W, C, FC, FH, FW, PADDING_##PAD, SH, SW, \
1774 ISH, ISW, BR, BC); \
1775 } \
1776 BENCHMARK(BM_RHS_NAME(PackRhs, T, N, H, W, C, FC, FH, FW, PAD, SH, SW, ISH, \
1777 ISW, BR, BC)) \
1778 ->UseRealTime()
1779
1780 // Number of input channel (input depth) it equal to the number of patch
1781 // channels (patch depth).
1782
1783 // Fast path: input channel dimension is the multiple of the packet size.
1784 BM_PackRhs(/*type*/ float, //
1785 /*batch*/ 32, //
1786 /*image*/ 64, 64, //
1787 /*channels*/ 32, //
1788 /*num_filters*/ 64, //
1789 /*filter*/ 5, 5, //
1790 /*padding*/ VALID, //
1791 /*stride*/ 1, 1, //
1792 /*patch inflate stride*/ 1, 1, //
1793 /*block*/ 256, 56);
1794
1795 BM_PackRhs(/*type*/ float, //
1796 /*batch*/ 32, //
1797 /*image*/ 64, 64, //
1798 /*channels*/ 32, //
1799 /*num_filters*/ 64, //
1800 /*filter*/ 5, 5, //
1801 /*padding*/ SAME, //
1802 /*stride*/ 1, 1, //
1803 /*patch inflate stride*/ 1, 1, //
1804 /*block*/ 256, 56);
1805
1806 BM_PackRhs(/*type*/ float, //
1807 /*batch*/ 32, //
1808 /*image*/ 64, 64, //
1809 /*channels*/ 32, //
1810 /*num_filters*/ 64, //
1811 /*filter*/ 5, 5, //
1812 /*padding*/ VALID, //
1813 /*stride*/ 2, 2, //
1814 /*patch inflate stride*/ 1, 1, //
1815 /*block*/ 256, 56);
1816
1817 BM_PackRhs(/*type*/ float, //
1818 /*batch*/ 32, //
1819 /*image*/ 64, 64, //
1820 /*channels*/ 32, //
1821 /*num_filters*/ 64, //
1822 /*filter*/ 5, 5, //
1823 /*padding*/ SAME, //
1824 /*stride*/ 2, 2, //
1825 /*patch inflate stride*/ 1, 1, //
1826 /*block*/ 256, 56);
1827
1828 // Slow path: input channel dimension is not the multiple of the packet size.
1829 BM_PackRhs(/*type*/ float, //
1830 /*batch*/ 32, //
1831 /*image*/ 64, 64, //
1832 /*channels*/ 30, //
1833 /*num_filters*/ 64, //
1834 /*filter*/ 5, 5, //
1835 /*padding*/ SAME, //
1836 /*stride*/ 1, 1, //
1837 /*patch inflate stride*/ 1, 1, //
1838 /*block*/ 256, 56);
1839
1840 BM_PackRhs(/*type*/ float, //
1841 /*batch*/ 32, //
1842 /*image*/ 64, 64, //
1843 /*channels*/ 30, //
1844 /*num_filters*/ 64, //
1845 /*filter*/ 5, 5, //
1846 /*padding*/ VALID, //
1847 /*stride*/ 1, 1, //
1848 /*patch inflate stride*/ 1, 1, //
1849 /*block*/ 256, 56);
1850
1851 BM_PackRhs(/*type*/ float, //
1852 /*batch*/ 32, //
1853 /*image*/ 64, 64, //
1854 /*channels*/ 30, //
1855 /*num_filters*/ 64, //
1856 /*filter*/ 5, 5, //
1857 /*padding*/ SAME, //
1858 /*stride*/ 2, 2, //
1859 /*patch inflate stride*/ 1, 1, //
1860 /*block*/ 256, 56);
1861
1862 BM_PackRhs(/*type*/ float, //
1863 /*batch*/ 32, //
1864 /*image*/ 64, 64, //
1865 /*channels*/ 30, //
1866 /*num_filters*/ 64, //
1867 /*filter*/ 5, 5, //
1868 /*padding*/ VALID, //
1869 /*stride*/ 2, 2, //
1870 /*patch inflate stride*/ 1, 1, //
1871 /*block*/ 256, 56);
1872
1873 // Slow path with input channel dimension smaller than the packet size.
1874 BM_PackRhs(/*type*/ float, //
1875 /*batch*/ 32, //
1876 /*image*/ 256, 256, //
1877 /*channels*/ 4, //
1878 /*num_filters*/ 16, //
1879 /*filter*/ 8, 8, //
1880 /*padding*/ SAME, //
1881 /*stride*/ 1, 1, //
1882 /*patch inflate stride*/ 1, 1, //
1883 /*block*/ 256, 56);
1884
1885 BM_PackRhs(/*type*/ float, //
1886 /*batch*/ 32, //
1887 /*image*/ 256, 256, //
1888 /*channels*/ 4, //
1889 /*num_filters*/ 16, //
1890 /*filter*/ 8, 8, //
1891 /*padding*/ VALID, //
1892 /*stride*/ 1, 1, //
1893 /*patch inflate stride*/ 1, 1, //
1894 /*block*/ 256, 56);
1895
1896 BM_PackRhs(/*type*/ float, //
1897 /*batch*/ 32, //
1898 /*image*/ 256, 256, //
1899 /*channels*/ 4, //
1900 /*num_filters*/ 16, //
1901 /*filter*/ 8, 8, //
1902 /*padding*/ SAME, //
1903 /*stride*/ 2, 4, //
1904 /*patch inflate stride*/ 1, 1, //
1905 /*block*/ 256, 56);
1906
1907 BM_PackRhs(/*type*/ float, //
1908 /*batch*/ 32, //
1909 /*image*/ 256, 256, //
1910 /*channels*/ 4, //
1911 /*num_filters*/ 16, //
1912 /*filter*/ 8, 8, //
1913 /*padding*/ VALID, //
1914 /*stride*/ 2, 4, //
1915 /*patch inflate stride*/ 1, 1, //
1916 /*block*/ 256, 56);
1917
1918 // Short and wide block with small input channel dimension.
1919 BM_PackRhs(/*type*/ float, //
1920 /*batch*/ 32, //
1921 /*image*/ 64, 64, //
1922 /*channels*/ 4, //
1923 /*num_filters*/ 16, //
1924 /*filter*/ 3, 3, //
1925 /*padding*/ SAME, //
1926 /*stride*/ 1, 1, //
1927 /*patch inflate stride*/ 1, 1, //
1928 /*block*/ 36, 432);
1929
1930 // Short and wide block with small input channel dimension.
1931 BM_PackRhs(/*type*/ float, //
1932 /*batch*/ 32, //
1933 /*image*/ 64, 64, //
1934 /*channels*/ 4, //
1935 /*num_filters*/ 16, //
1936 /*filter*/ 3, 3, //
1937 /*padding*/ VALID, //
1938 /*stride*/ 1, 1, //
1939 /*patch inflate stride*/ 1, 1, //
1940 /*block*/ 36, 432);
1941
1942 BM_PackRhs(/*type*/ float, //
1943 /*batch*/ 32, //
1944 /*image*/ 64, 64, //
1945 /*channels*/ 4, //
1946 /*num_filters*/ 16, //
1947 /*filter*/ 3, 3, //
1948 /*padding*/ SAME, //
1949 /*stride*/ 2, 2, //
1950 /*patch inflate stride*/ 1, 1, //
1951 /*block*/ 36, 432);
1952
1953 BM_PackRhs(/*type*/ float, //
1954 /*batch*/ 32, //
1955 /*image*/ 64, 64, //
1956 /*channels*/ 4, //
1957 /*num_filters*/ 16, //
1958 /*filter*/ 3, 3, //
1959 /*padding*/ VALID, //
1960 /*stride*/ 2, 2, //
1961 /*patch inflate stride*/ 1, 1, //
1962 /*block*/ 36, 432);
1963
1964 // Non standard patches with inflated strides.
1965 BM_PackRhs(/*type*/ float, //
1966 /*batch*/ 32, //
1967 /*image*/ 32, 32, //
1968 /*channels*/ 96, //
1969 /*num_filters*/ 96, //
1970 /*filter*/ 5, 5, //
1971 /*padding*/ SAME, //
1972 /*stride*/ 1, 1, //
1973 /*patch inflate stride*/ 2, 2, //
1974 /*block*/ 272, 240);
1975
1976 BM_PackRhs(/*type*/ float, //
1977 /*batch*/ 32, //
1978 /*image*/ 32, 32, //
1979 /*channels*/ 96, //
1980 /*num_filters*/ 96, //
1981 /*filter*/ 5, 5, //
1982 /*padding*/ VALID, //
1983 /*stride*/ 1, 1, //
1984 /*patch inflate stride*/ 2, 2, //
1985 /*block*/ 272, 240);
1986
1987 #if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
1988 using qint8 = Eigen::QInt8;
1989 BM_PackRhs(/*type*/ qint8, //
1990 /*batch*/ 32, //
1991 /*image*/ 64, 64, //
1992 /*channels*/ 32, //
1993 /*num_filters*/ 64, //
1994 /*filter*/ 5, 5, //
1995 /*padding*/ SAME, //
1996 /*stride*/ 1, 1, //
1997 /*patch inflate stride*/ 1, 1, //
1998 /*block*/ 256, 56);
1999 #endif // defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
2000
2001 // -------------------------------------------------------------------------- //
2002 // Pack LHS
2003 //
2004 // Macro argument names:
2005 // C: input channels
2006 // FC: filter channels
2007 // FH: filter height
2008 // FW: filter width
2009 // BR: block rows
2010 // BC: block cols
2011
2012 #define BM_LHS_NAME(prefix, T, C, FC, FH, FW, BR, BC) \
2013 BM_CONCAT(BM_##prefix##_##T##_##C##_FC##FC##_##FH##x##FW, _B##BR##x##BC)
2014
2015 #define BM_PackLhs(T, C, FC, FH, FW, BR, BC) \
2016 static void BM_LHS_NAME(PackLhs, T, C, FC, FH, FW, BR, \
2017 BC)(::testing::benchmark::State & state) { \
2018 PackLhsHelper<T>(state, C, FC, FH, FW, BR, BC); \
2019 } \
2020 BENCHMARK(BM_LHS_NAME(PackLhs, T, C, FC, FH, FW, BR, BC))->UseRealTime()
2021
2022 // Number of input channel (input depth) it equal to the number of patch
2023 // channels (patch depth).
2024
2025 BM_PackLhs(/*type*/ float, //
2026 /*input channels*/ 128, //
2027 /*filter channels*/ 1024, //
2028 /*filter dims*/ 3, 3, //
2029 /*block*/ 256, 56);
2030
2031 BM_PackLhs(/*type*/ float, //
2032 /*input channels*/ 128, //
2033 /*filter channels*/ 1024, //
2034 /*filter dims*/ 3, 3, //
2035 /*block*/ 56, 256);
2036
2037 BM_PackLhs(/*type*/ float, //
2038 /*input channels*/ 30, //
2039 /*filter channels*/ 64, //
2040 /*filter dims*/ 3, 3, //
2041 /*block*/ 256, 56);
2042
2043 BM_PackLhs(/*type*/ float, //
2044 /*input channels*/ 50, //
2045 /*filter channels*/ 64, //
2046 /*filter dims*/ 3, 3, //
2047 /*block*/ 56, 256);
2048 } // namespace Eigen
2049