1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/kernels/eigen_spatial_convolutions.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/core/kernels/eigen_cuboid_convolution.h"
20 #include "tensorflow/core/platform/test.h"
21 #include "tensorflow/core/platform/test_benchmark.h"
22
23 namespace Eigen {
24
25 #define EigenApprox(a, b) \
26 { ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); }
ceil_div(int a,int b)27 static int ceil_div(int a, int b) { return (a + b - 1) / b; }
28
TEST(EigenSpatialConvolutionsTest,Simple)29 TEST(EigenSpatialConvolutionsTest, Simple) {
30 const int input_depth = 7;
31 const int input_rows = 4;
32 const int input_cols = 5;
33 const int output_depth = 10;
34 const int patch_rows = 3;
35 const int patch_cols = 4;
36 const int output_rows = input_rows;
37 const int output_cols = input_cols;
38
39 Tensor<float, 3> input(input_depth, input_rows, input_cols);
40 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
41 Tensor<float, 3> result(output_depth, output_rows, output_cols);
42
43 input = input.constant(11.0f) + input.random();
44 kernel = kernel.constant(2.0f) + kernel.random();
45 result.setRandom();
46
47 result = SpatialConvolution(input, kernel);
48
49 EXPECT_EQ(result.dimension(0), output_depth);
50 EXPECT_EQ(result.dimension(1), output_rows);
51 EXPECT_EQ(result.dimension(2), output_cols);
52
53 for (int od = 0; od < output_depth; ++od) {
54 for (int i = 0; i < output_rows; ++i) {
55 for (int j = 0; j < output_cols; ++j) {
56 float expected = 0.0f;
57 for (int c = 0; c < patch_cols; ++c) {
58 for (int r = 0; r < patch_rows; ++r) {
59 for (int id = 0; id < input_depth; ++id) {
60 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
61 c - 1 + j < output_cols) {
62 expected +=
63 input(id, r - 1 + i, c - 1 + j) * kernel(od, id, r, c);
64 }
65 }
66 }
67 }
68 EigenApprox(result(od, i, j), expected);
69 }
70 }
71 }
72 }
73
TEST(EigenSpatialConvolutionsTest,SimpleRowMajor)74 TEST(EigenSpatialConvolutionsTest, SimpleRowMajor) {
75 const int input_depth = 7;
76 const int input_rows = 4;
77 const int input_cols = 5;
78 const int output_depth = 10;
79 const int patch_rows = 3;
80 const int patch_cols = 4;
81 const int output_rows = input_rows;
82 const int output_cols = input_cols;
83
84 Tensor<float, 3, RowMajor> input(input_cols, input_rows, input_depth);
85 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
86 output_depth);
87 Tensor<float, 3, RowMajor> result(output_cols, output_rows, output_depth);
88 input = input.constant(11.0f) + input.random();
89 kernel = kernel.constant(2.0f) + kernel.random();
90 result.setRandom();
91
92 result = SpatialConvolution(input, kernel);
93
94 EXPECT_EQ(result.dimension(0), output_cols);
95 EXPECT_EQ(result.dimension(1), output_rows);
96 EXPECT_EQ(result.dimension(2), output_depth);
97
98 for (int od = 0; od < output_depth; ++od) {
99 for (int i = 0; i < output_rows; ++i) {
100 for (int j = 0; j < output_cols; ++j) {
101 float expected = 0.0f;
102 for (int c = 0; c < patch_cols; ++c) {
103 for (int r = 0; r < patch_rows; ++r) {
104 for (int id = 0; id < input_depth; ++id) {
105 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows &&
106 c - 1 + j < output_cols) {
107 expected +=
108 input(c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
109 }
110 }
111 }
112 }
113 EigenApprox(result(j, i, od), expected);
114 }
115 }
116 }
117 }
118
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolution)119 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolution) {
120 Tensor<float, 4> input(10, 5, 5, 13);
121 Tensor<float, 4> kernel(7, 10, 3, 3);
122 Tensor<float, 4> result(7, 5, 5, 13);
123 input = input.constant(11.0f) + input.random();
124 kernel = kernel.constant(2.0f) + kernel.random();
125 result.setRandom();
126
127 result = SpatialConvolution(input, kernel);
128
129 EXPECT_EQ(result.dimension(0), 7);
130 EXPECT_EQ(result.dimension(1), 5);
131 EXPECT_EQ(result.dimension(2), 5);
132
133 for (int b = 0; b < 13; ++b) {
134 for (int od = 0; od < 7; ++od) {
135 for (int i = 0; i < 5; ++i) {
136 for (int j = 0; j < 5; ++j) {
137 float expected = 0.0f;
138 for (int c = 0; c < 3; ++c) {
139 for (int r = 0; r < 3; ++r) {
140 for (int id = 0; id < 10; ++id) {
141 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
142 c - 1 + j < 5) {
143 expected +=
144 input(id, r - 1 + i, c - 1 + j, b) * kernel(od, id, r, c);
145 }
146 }
147 }
148 }
149 EigenApprox(result(od, i, j, b), expected);
150 }
151 }
152 }
153 }
154 }
155
TEST(EigenSpatialConvolutionsTest,BatchedSpatialConvolutionRowMajor)156 TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolutionRowMajor) {
157 Tensor<float, 4, RowMajor> input(13, 5, 5, 10);
158 Tensor<float, 4, RowMajor> kernel(3, 3, 10, 7);
159 Tensor<float, 4, RowMajor> result(13, 5, 5, 7);
160 input = input.constant(11.0f) + input.random();
161 kernel = kernel.constant(2.0f) + kernel.random();
162 result.setRandom();
163
164 result = SpatialConvolution(input, kernel);
165
166 EXPECT_EQ(result.dimension(1), 5);
167 EXPECT_EQ(result.dimension(2), 5);
168 EXPECT_EQ(result.dimension(3), 7);
169
170 for (int b = 0; b < 13; ++b) {
171 for (int od = 0; od < 7; ++od) {
172 for (int i = 0; i < 5; ++i) {
173 for (int j = 0; j < 5; ++j) {
174 float expected = 0.0f;
175 for (int c = 0; c < 3; ++c) {
176 for (int r = 0; r < 3; ++r) {
177 for (int id = 0; id < 10; ++id) {
178 if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 &&
179 c - 1 + j < 5) {
180 expected +=
181 input(b, c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od);
182 }
183 }
184 }
185 }
186 EigenApprox(result(b, j, i, od), expected);
187 }
188 }
189 }
190 }
191 }
192
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolution)193 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolution) {
194 const int input_depth = 10;
195 const int input_rows = 5;
196 const int input_cols = 5;
197 const int num_batches = 13;
198 const int output_depth = 7;
199 const int patch_rows = 4;
200 const int patch_cols = 4;
201 const int output_rows = input_rows - patch_rows + 1;
202 const int output_cols = input_cols - patch_cols + 1;
203
204 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
205 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
206 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
207 input = input.constant(11.0f) + input.random();
208 kernel = kernel.constant(2.0f) + kernel.random();
209 result.setRandom();
210
211 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
212 // of 1.
213 const int stride = 1;
214 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
215
216 EXPECT_EQ(result.dimension(0), output_depth);
217 EXPECT_EQ(result.dimension(1), output_rows);
218 EXPECT_EQ(result.dimension(2), output_cols);
219 EXPECT_EQ(result.dimension(3), num_batches);
220
221 for (int b = 0; b < num_batches; ++b) {
222 for (int od = 0; od < output_depth; ++od) {
223 for (int i = 0; i < output_rows; ++i) {
224 for (int j = 0; j < output_cols; ++j) {
225 float expected = 0.0f;
226 for (int c = 0; c < patch_cols; ++c) {
227 for (int r = 0; r < patch_rows; ++r) {
228 for (int id = 0; id < input_depth; ++id) {
229 expected += input(id, r + i, c + j, b) * kernel(od, id, r, c);
230 }
231 }
232 }
233 if (result(od, i, j, b) != expected) {
234 std::cout << "at od=" << od << " b=" << b << " i=" << i
235 << " j=" << j << " " << result(od, i, j, b) << " vs "
236 << expected << std::endl;
237 }
238 EigenApprox(result(od, i, j, b), expected);
239 }
240 }
241 }
242 }
243 }
244
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionUnequalStrides)245 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionUnequalStrides) {
246 const int input_depth = 10;
247 const int input_rows = 5;
248 const int input_cols = 5;
249 const int num_batches = 13;
250 const int output_depth = 7;
251 const int patch_rows = 4;
252 const int patch_cols = 4;
253
254 const int row_stride = 1;
255 const int col_stride = 2;
256 const int output_rows = 2;
257 const int output_cols = 1;
258
259 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
260 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
261 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
262 input = input.constant(11.0f) + input.random();
263 kernel = kernel.constant(2.0f) + kernel.random();
264 result.setRandom();
265
266 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
267 // of 1.
268 result =
269 SpatialConvolution(input, kernel, row_stride, col_stride, PADDING_VALID);
270
271 EXPECT_EQ(result.dimension(0), output_depth);
272 EXPECT_EQ(result.dimension(1), output_rows);
273 EXPECT_EQ(result.dimension(2), output_cols);
274 EXPECT_EQ(result.dimension(3), num_batches);
275 if (true) return;
276
277 for (int b = 0; b < num_batches; ++b) {
278 for (int od = 0; od < output_depth; ++od) {
279 for (int i = 0; i < output_rows; ++i) {
280 for (int j = 0; j < output_cols; ++j) {
281 float expected = 0.0f;
282 for (int c = 0; c < patch_cols; ++c) {
283 for (int r = 0; r < patch_rows; ++r) {
284 for (int id = 0; id < input_depth; ++id) {
285 expected +=
286 input(id, r + row_stride * i, c + col_stride * j, b) *
287 kernel(od, id, r, c);
288 }
289 }
290 }
291 if (result(od, i, j, b) != expected) {
292 std::cout << "at od=" << od << " b=" << b << " i=" << i
293 << " j=" << j << " " << result(od, i, j, b) << " vs "
294 << expected << std::endl;
295 }
296 EigenApprox(result(od, i, j, b), expected);
297 }
298 }
299 }
300 }
301 }
302
TEST(EigenSpatialConvolutionsTest,ValidSpatialConvolutionRowMajor)303 TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionRowMajor) {
304 const int input_depth = 10;
305 const int input_rows = 5;
306 const int input_cols = 5;
307 const int num_batches = 13;
308 const int output_depth = 7;
309 const int patch_rows = 4;
310 const int patch_cols = 4;
311 const int output_rows = input_rows - patch_rows + 1;
312 const int output_cols = input_cols - patch_cols + 1;
313
314 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
315 input_depth);
316 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
317 output_depth);
318 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
319 output_depth);
320
321 input = input.constant(11.0f) + input.random();
322 kernel = kernel.constant(2.0f) + kernel.random();
323 result.setRandom();
324
325 // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride
326 // of 1.
327 const int stride = 1;
328 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
329
330 EXPECT_EQ(result.dimension(0), num_batches);
331 EXPECT_EQ(result.dimension(1), output_cols);
332 EXPECT_EQ(result.dimension(2), output_rows);
333 EXPECT_EQ(result.dimension(3), output_depth);
334
335 for (int b = 0; b < num_batches; ++b) {
336 for (int od = 0; od < output_depth; ++od) {
337 for (int i = 0; i < output_rows; ++i) {
338 for (int j = 0; j < output_cols; ++j) {
339 float expected = 0.0f;
340 for (int c = 0; c < patch_rows; ++c) {
341 for (int r = 0; r < patch_cols; ++r) {
342 for (int id = 0; id < input_depth; ++id) {
343 expected += input(b, c + j, r + i, id) * kernel(c, r, id, od);
344 }
345 }
346 }
347 if (result(b, j, i, od) != expected) {
348 std::cout << "at od=" << od << " b=" << b << " i=" << i
349 << " j=" << j << " " << result(b, j, i, od) << " vs "
350 << expected << std::endl;
351 }
352 EigenApprox(result(b, j, i, od), expected);
353 }
354 }
355 }
356 }
357 }
358
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolution)359 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolution) {
360 const int input_depth = 10;
361 const int input_rows = 5;
362 const int input_cols = 5;
363 const int num_batches = 13;
364 const int output_depth = 7;
365 const int patch_rows = 3;
366 const int patch_cols = 3;
367 const int output_rows = 2;
368 const int output_cols = 2;
369
370 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
371 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
372 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
373 input = input.constant(11.0f) + input.random();
374 kernel = kernel.constant(2.0f) + kernel.random();
375 result.setRandom();
376
377 // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
378 // of 2.
379 int stride = 2;
380 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
381
382 EXPECT_EQ(result.dimension(0), output_depth);
383 EXPECT_EQ(result.dimension(1), output_rows);
384 EXPECT_EQ(result.dimension(2), output_cols);
385 EXPECT_EQ(result.dimension(3), num_batches);
386
387 for (int b = 0; b < num_batches; ++b) {
388 for (int od = 0; od < output_depth; ++od) {
389 for (int i = 0; i < output_rows; ++i) {
390 for (int j = 0; j < output_cols; ++j) {
391 float expected = 0.0f;
392 for (int c = 0; c < patch_cols; ++c) {
393 for (int r = 0; r < patch_rows; ++r) {
394 for (int id = 0; id < input_depth; ++id) {
395 expected += input(id, r + stride * i, c + stride * j, b) *
396 kernel(od, id, r, c);
397 }
398 }
399 }
400 EigenApprox(result(od, i, j, b), expected);
401 }
402 }
403 }
404 }
405 }
406
TEST(EigenSpatialConvolutionsTest,KernelSmallerThanStride)407 TEST(EigenSpatialConvolutionsTest, KernelSmallerThanStride) {
408 const int input_depth = 2;
409 const int input_rows = 3;
410 const int input_cols = 3;
411 const int num_batches = 5;
412 const int output_depth = 6;
413 const int patch_rows = 1;
414 const int patch_cols = 1;
415 const int output_rows = 2;
416 const int output_cols = 2;
417
418 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
419 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
420 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
421 input = input.constant(11.0f) + input.random();
422 kernel = kernel.constant(2.0f) + kernel.random();
423 result.setRandom();
424
425 // Apply a spatial convolution using a 1x1 kernel, valid padding, and a stride
426 // of 2.
427 int stride = 2;
428 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
429
430 EXPECT_EQ(result.dimension(0), output_depth);
431 EXPECT_EQ(result.dimension(1), output_rows);
432 EXPECT_EQ(result.dimension(2), output_cols);
433 EXPECT_EQ(result.dimension(3), num_batches);
434
435 for (int b = 0; b < num_batches; ++b) {
436 for (int od = 0; od < output_depth; ++od) {
437 for (int i = 0; i < output_rows; ++i) {
438 for (int j = 0; j < output_cols; ++j) {
439 float expected = 0.0f;
440 for (int c = 0; c < patch_cols; ++c) {
441 for (int r = 0; r < patch_rows; ++r) {
442 for (int id = 0; id < input_depth; ++id) {
443 expected += input(id, r + stride * i, c + stride * j, b) *
444 kernel(od, id, r, c);
445 }
446 }
447 }
448 EigenApprox(result(od, i, j, b), expected);
449 }
450 }
451 }
452 }
453 }
454
TEST(EigenSpatialConvolutionsTest,StridedSpatialConvolutionRowMajor)455 TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolutionRowMajor) {
456 const int input_depth = 10;
457 const int input_rows = 5;
458 const int input_cols = 5;
459 const int num_batches = 13;
460 const int output_depth = 7;
461 const int patch_rows = 3;
462 const int patch_cols = 3;
463 const int output_rows = 2;
464 const int output_cols = 2;
465
466 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
467 input_depth);
468 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
469 output_depth);
470 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
471 output_depth);
472 input = input.constant(11.0f) + input.random();
473 kernel = kernel.constant(2.0f) + kernel.random();
474 result.setRandom();
475
476 // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride
477 // of 2.
478 int stride = 2;
479 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID);
480
481 EXPECT_EQ(result.dimension(0), num_batches);
482 EXPECT_EQ(result.dimension(1), output_cols);
483 EXPECT_EQ(result.dimension(2), output_rows);
484 EXPECT_EQ(result.dimension(3), output_depth);
485
486 for (int b = 0; b < num_batches; ++b) {
487 for (int od = 0; od < output_depth; ++od) {
488 for (int i = 0; i < output_rows; ++i) {
489 for (int j = 0; j < output_cols; ++j) {
490 float expected = 0.0f;
491 for (int c = 0; c < patch_cols; ++c) {
492 for (int r = 0; r < patch_rows; ++r) {
493 for (int id = 0; id < input_depth; ++id) {
494 expected += input(b, c + stride * j, r + stride * i, id) *
495 kernel(c, r, id, od);
496 }
497 }
498 }
499 EigenApprox(result(b, j, i, od), expected);
500 }
501 }
502 }
503 }
504 }
505
TEST(EigenSpatialConvolutionsTest,AtrousSpatial)506 TEST(EigenSpatialConvolutionsTest, AtrousSpatial) {
507 const int input_depth = 10;
508 const int input_rows = 7;
509 const int input_cols = 7;
510 const int num_batches = 13;
511 const int output_depth = 7;
512 const int patch_rows = 3;
513 const int patch_cols = 3;
514 const int output_rows = 3;
515 const int output_cols = 3;
516
517 Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches);
518 Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols);
519 Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches);
520 input = input.constant(11.0f) + input.random();
521 kernel = kernel.constant(2.0f) + kernel.random();
522 result.setRandom();
523
524 // Apply a spatial convolution using a 3x3 kernel, valid padding
525 // output (standard) stride 1, and input (atrous) stride of 2.
526 int stride = 1;
527 int in_stride = 2;
528 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
529 in_stride, in_stride);
530
531 EXPECT_EQ(result.dimension(0), output_depth);
532 EXPECT_EQ(result.dimension(1), output_rows);
533 EXPECT_EQ(result.dimension(2), output_cols);
534 EXPECT_EQ(result.dimension(3), num_batches);
535
536 for (int b = 0; b < num_batches; ++b) {
537 for (int od = 0; od < output_depth; ++od) {
538 for (int i = 0; i < output_rows; ++i) {
539 for (int j = 0; j < output_cols; ++j) {
540 float expected = 0.0f;
541 for (int c = 0; c < patch_cols; ++c) {
542 for (int r = 0; r < patch_rows; ++r) {
543 for (int id = 0; id < input_depth; ++id) {
544 expected += input(id, in_stride * r + stride * i,
545 in_stride * c + stride * j, b) *
546 kernel(od, id, r, c);
547 }
548 }
549 }
550 EigenApprox(result(od, i, j, b), expected);
551 }
552 }
553 }
554 }
555 }
556
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajor)557 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajor) {
558 const int input_depth = 10;
559 const int input_rows = 7;
560 const int input_cols = 7;
561 const int num_batches = 13;
562 const int output_depth = 7;
563 const int patch_rows = 3;
564 const int patch_cols = 3;
565 const int output_rows = 3;
566 const int output_cols = 3;
567
568 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
569 input_depth);
570 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
571 output_depth);
572 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
573 output_depth);
574 input = input.constant(11.0f) + input.random();
575 kernel = kernel.constant(2.0f) + kernel.random();
576 result.setRandom();
577
578 // Apply a spatial convolution using a 3x3 kernel, valid padding
579 // output (standard) stride 1, and input (atrous) stride of 2.
580 int stride = 1;
581 int in_stride = 2;
582 result = SpatialConvolution(input, kernel, stride, stride, PADDING_VALID,
583 in_stride, in_stride);
584
585 EXPECT_EQ(result.dimension(0), num_batches);
586 EXPECT_EQ(result.dimension(1), output_cols);
587 EXPECT_EQ(result.dimension(2), output_rows);
588 EXPECT_EQ(result.dimension(3), output_depth);
589
590 for (int b = 0; b < num_batches; ++b) {
591 for (int od = 0; od < output_depth; ++od) {
592 for (int i = 0; i < output_rows; ++i) {
593 for (int j = 0; j < output_cols; ++j) {
594 float expected = 0.0f;
595 for (int c = 0; c < patch_cols; ++c) {
596 for (int r = 0; r < patch_rows; ++r) {
597 for (int id = 0; id < input_depth; ++id) {
598 expected += input(b, in_stride * c + stride * j,
599 in_stride * r + stride * i, id) *
600 kernel(c, r, id, od);
601 }
602 }
603 }
604 EigenApprox(result(b, j, i, od), expected);
605 }
606 }
607 }
608 }
609 }
610
TEST(EigenSpatialConvolutionsTest,AtrousSpatialRowMajorUnequalStrides)611 TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajorUnequalStrides) {
612 const int input_depth = 10;
613 const int input_rows = 7;
614 const int input_cols = 7;
615 const int num_batches = 13;
616 const int output_depth = 7;
617 const int patch_rows = 3;
618 const int patch_cols = 3;
619 const int output_rows = 1;
620 const int output_cols = 3;
621
622 Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows,
623 input_depth);
624 Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth,
625 output_depth);
626 Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows,
627 output_depth);
628 input = input.constant(11.0f) + input.random();
629 kernel = kernel.constant(2.0f) + kernel.random();
630 result.setRandom();
631
632 // Apply a spatial convolution using a 3x3 kernel, valid padding
633 // output (standard) stride 1, and input (atrous) stride of 2.
634 int row_stride = 1;
635 int col_stride = 2;
636 int row_in_stride = 3;
637 int col_in_stride = 1;
638 result = SpatialConvolution(input, kernel, row_stride, col_stride,
639 PADDING_VALID, row_in_stride, col_in_stride);
640
641 EXPECT_EQ(result.dimension(0), num_batches);
642 EXPECT_EQ(result.dimension(1), output_cols);
643 EXPECT_EQ(result.dimension(2), output_rows);
644 EXPECT_EQ(result.dimension(3), output_depth);
645
646 for (int b = 0; b < num_batches; ++b) {
647 for (int od = 0; od < output_depth; ++od) {
648 for (int i = 0; i < output_rows; ++i) {
649 for (int j = 0; j < output_cols; ++j) {
650 float expected = 0.0f;
651 for (int c = 0; c < patch_cols; ++c) {
652 for (int r = 0; r < patch_rows; ++r) {
653 for (int id = 0; id < input_depth; ++id) {
654 expected += input(b, col_in_stride * c + col_stride * j,
655 row_in_stride * r + row_stride * i, id) *
656 kernel(c, r, id, od);
657 }
658 }
659 }
660 EigenApprox(result(b, j, i, od), expected);
661 }
662 }
663 }
664 }
665 }
666
TEST(EigenSpatialConvolutionsTest,Cuboid)667 TEST(EigenSpatialConvolutionsTest, Cuboid) {
668 const int in_channels = 10;
669 const int in_depth = 5;
670 const int in_rows = 8;
671 const int in_cols = 7;
672
673 const int kern_filters = 7;
674 const int kern_depth = 3;
675 const int kern_width = 4;
676 const int kern_height = 4;
677
678 const int out_depth = in_depth;
679 const int out_height = in_rows;
680 const int out_width = in_cols;
681
682 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
683 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
684 kern_width);
685 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
686 input = input.constant(11.0f) + input.random();
687 kernel = kernel.constant(2.0f) + kernel.random();
688 result.setRandom();
689
690 result = CuboidConvolution(input, kernel);
691
692 EXPECT_EQ(result.dimension(0), kern_filters);
693 EXPECT_EQ(result.dimension(1), out_depth);
694 EXPECT_EQ(result.dimension(2), out_height);
695 EXPECT_EQ(result.dimension(3), out_width);
696
697 const int off_p = (kern_depth - 1) / 2;
698 const int off_r = (kern_height - 1) / 2;
699 const int off_c = (kern_width - 1) / 2;
700
701 for (int od = 0; od < kern_filters; ++od) {
702 for (int i = 0; i < out_depth; ++i) {
703 for (int j = 0; j < out_height; ++j) {
704 for (int k = 0; k < out_width; ++k) {
705 float expected = 0.0f;
706 for (int c = 0; c < kern_width; ++c) {
707 for (int r = 0; r < kern_height; ++r) {
708 for (int p = 0; p < kern_depth; ++p) {
709 for (int id = 0; id < in_channels; ++id) {
710 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
711 c - off_c + k >= 0 && p - off_p + i < in_depth &&
712 r - off_r + j < in_rows && c - off_c + k < in_cols) {
713 expected +=
714 input(id, p - off_p + i, r - off_r + j, c - off_c + k) *
715 kernel(od, id, p, r, c);
716 }
717 }
718 }
719 }
720 }
721 EigenApprox(result(od, i, j, k), expected);
722 }
723 }
724 }
725 }
726 }
727
TEST(EigenSpatialConvolutionsTest,CuboidRowMajor)728 TEST(EigenSpatialConvolutionsTest, CuboidRowMajor) {
729 const int in_channels = 10;
730 const int in_depth = 5;
731 const int in_rows = 8;
732 const int in_cols = 7;
733
734 const int kern_filters = 7;
735 const int kern_depth = 3;
736 const int kern_width = 4;
737 const int kern_height = 4;
738
739 const int out_depth = in_depth;
740 const int out_height = in_rows;
741 const int out_width = in_cols;
742
743 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
744 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
745 in_channels, kern_filters);
746 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
747 kern_filters);
748 input = input.constant(11.0f) + input.random();
749 kernel = kernel.constant(2.0f) + kernel.random();
750 result.setRandom();
751
752 result = CuboidConvolution(input, kernel);
753
754 EXPECT_EQ(result.dimension(3), kern_filters);
755 EXPECT_EQ(result.dimension(2), out_depth);
756 EXPECT_EQ(result.dimension(1), out_height);
757 EXPECT_EQ(result.dimension(0), out_width);
758
759 const int off_p = (kern_depth - 1) / 2;
760 const int off_r = (kern_height - 1) / 2;
761 const int off_c = (kern_width - 1) / 2;
762
763 for (int od = 0; od < kern_filters; ++od) {
764 for (int i = 0; i < out_depth; ++i) {
765 for (int j = 0; j < out_height; ++j) {
766 for (int k = 0; k < out_width; ++k) {
767 float expected = 0.0f;
768 for (int c = 0; c < kern_width; ++c) {
769 for (int r = 0; r < kern_height; ++r) {
770 for (int p = 0; p < kern_depth; ++p) {
771 for (int id = 0; id < in_channels; ++id) {
772 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
773 c - off_c + k >= 0 && p - off_p + i < in_depth &&
774 r - off_r + j < in_rows && c - off_c + k < in_cols) {
775 expected +=
776 input(c - off_c + k, r - off_r + j, p - off_p + i, id) *
777 kernel(c, r, p, id, od);
778 }
779 }
780 }
781 }
782 }
783 EigenApprox(result(k, j, i, od), expected);
784 }
785 }
786 }
787 }
788 }
789
TEST(EigenSpatialConvolutionsTest,ValidCuboid)790 TEST(EigenSpatialConvolutionsTest, ValidCuboid) {
791 const int in_channels = 10;
792 const int in_depth = 5;
793 const int in_rows = 5;
794 const int in_cols = 5;
795
796 const int kern_filters = 7;
797 const int kern_depth = 3;
798 const int kern_width = 3;
799 const int kern_height = 3;
800
801 const int out_depth = 3;
802 const int out_height = 3;
803 const int out_width = 3;
804
805 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
806 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
807 kern_width);
808 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
809 input = input.constant(11.0f) + input.random();
810 kernel = kernel.constant(2.0f) + kernel.random();
811 result.setRandom();
812
813 result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
814
815 EXPECT_EQ(result.dimension(0), kern_filters);
816 EXPECT_EQ(result.dimension(1), out_depth);
817 EXPECT_EQ(result.dimension(2), out_height);
818 EXPECT_EQ(result.dimension(3), out_width);
819
820 for (int od = 0; od < kern_filters; ++od) {
821 for (int i = 0; i < out_depth; ++i) {
822 for (int j = 0; j < out_height; ++j) {
823 for (int k = 0; k < out_width; ++k) {
824 float expected = 0.0f;
825 for (int c = 0; c < kern_width; ++c) {
826 for (int r = 0; r < kern_height; ++r) {
827 for (int p = 0; p < kern_depth; ++p) {
828 for (int id = 0; id < in_channels; ++id) {
829 expected +=
830 input(id, p + i, r + j, c + k) * kernel(od, id, p, r, c);
831 }
832 }
833 }
834 }
835 EigenApprox(result(od, i, j, k), expected);
836 }
837 }
838 }
839 }
840 }
841
TEST(EigenSpatialConvolutionsTest,ValidCuboidRowMajor)842 TEST(EigenSpatialConvolutionsTest, ValidCuboidRowMajor) {
843 const int in_channels = 10;
844 const int in_depth = 5;
845 const int in_rows = 5;
846 const int in_cols = 5;
847
848 const int kern_filters = 7;
849 const int kern_depth = 3;
850 const int kern_width = 3;
851 const int kern_height = 3;
852
853 const int out_depth = 3;
854 const int out_height = 3;
855 const int out_width = 3;
856
857 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
858 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
859 in_channels, kern_filters);
860 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
861 kern_filters);
862 input = input.constant(11.0f) + input.random();
863 kernel = kernel.constant(2.0f) + kernel.random();
864 result.setRandom();
865
866 result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID);
867
868 EXPECT_EQ(result.dimension(3), kern_filters);
869 EXPECT_EQ(result.dimension(2), out_depth);
870 EXPECT_EQ(result.dimension(1), out_height);
871 EXPECT_EQ(result.dimension(0), out_width);
872
873 for (int od = 0; od < kern_filters; ++od) {
874 for (int i = 0; i < out_depth; ++i) {
875 for (int j = 0; j < out_height; ++j) {
876 for (int k = 0; k < out_width; ++k) {
877 float expected = 0.0f;
878 for (int c = 0; c < kern_width; ++c) {
879 for (int r = 0; r < kern_height; ++r) {
880 for (int p = 0; p < kern_depth; ++p) {
881 for (int id = 0; id < in_channels; ++id) {
882 expected +=
883 input(c + k, r + j, p + i, id) * kernel(c, r, p, id, od);
884 }
885 }
886 }
887 }
888 EigenApprox(result(k, j, i, od), expected);
889 }
890 }
891 }
892 }
893 }
894
TEST(EigenSpatialConvolutionsTest,BatchedCuboid)895 TEST(EigenSpatialConvolutionsTest, BatchedCuboid) {
896 const int batches = 2;
897 const int in_channels = 10;
898 const int in_depth = 5;
899 const int in_rows = 8;
900 const int in_cols = 7;
901
902 const int kern_filters = 7;
903 const int kern_depth = 3;
904 const int kern_width = 4;
905 const int kern_height = 4;
906
907 const int out_depth = in_depth;
908 const int out_height = in_rows;
909 const int out_width = in_cols;
910
911 Tensor<float, 5> input(in_channels, in_depth, in_rows, in_cols, batches);
912 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
913 kern_width);
914 Tensor<float, 5> result(kern_filters, out_depth, out_height, out_width,
915 batches);
916 input = input.constant(11.0f) + input.random();
917 kernel = kernel.constant(2.0f) + kernel.random();
918 result.setRandom();
919
920 result = CuboidConvolution(input, kernel);
921
922 EXPECT_EQ(result.dimension(0), kern_filters);
923 EXPECT_EQ(result.dimension(1), out_depth);
924 EXPECT_EQ(result.dimension(2), out_height);
925 EXPECT_EQ(result.dimension(3), out_width);
926 EXPECT_EQ(result.dimension(4), batches);
927
928 const int off_p = (kern_depth - 1) / 2;
929 const int off_r = (kern_height - 1) / 2;
930 const int off_c = (kern_width - 1) / 2;
931
932 for (int b = 0; b < batches; b++) {
933 for (int od = 0; od < kern_filters; ++od) {
934 for (int i = 0; i < out_depth; ++i) {
935 for (int j = 0; j < out_height; ++j) {
936 for (int k = 0; k < out_width; ++k) {
937 float expected = 0.0f;
938 for (int c = 0; c < kern_width; ++c) {
939 for (int r = 0; r < kern_height; ++r) {
940 for (int p = 0; p < kern_depth; ++p) {
941 for (int id = 0; id < in_channels; ++id) {
942 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
943 c - off_c + k >= 0 && p - off_p + i < in_depth &&
944 r - off_r + j < in_rows && c - off_c + k < in_cols) {
945 expected += input(id, p - off_p + i, r - off_r + j,
946 c - off_c + k, b) *
947 kernel(od, id, p, r, c);
948 }
949 }
950 }
951 }
952 }
953 EigenApprox(result(od, i, j, k, b), expected);
954 }
955 }
956 }
957 }
958 }
959 }
960
TEST(EigenSpatialConvolutionsTest,BatchedCuboidRowMajor)961 TEST(EigenSpatialConvolutionsTest, BatchedCuboidRowMajor) {
962 const int batches = 2;
963 const int in_channels = 10;
964 const int in_depth = 5;
965 const int in_rows = 8;
966 const int in_cols = 7;
967
968 const int kern_filters = 7;
969 const int kern_depth = 3;
970 const int kern_width = 4;
971 const int kern_height = 4;
972
973 const int out_depth = in_depth;
974 const int out_height = in_rows;
975 const int out_width = in_cols;
976
977 Tensor<float, 5, RowMajor> input(batches, in_cols, in_rows, in_depth,
978 in_channels);
979 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
980 in_channels, kern_filters);
981 Tensor<float, 5, RowMajor> result(batches, out_width, out_height, out_depth,
982 kern_filters);
983 input = input.constant(11.0f) + input.random();
984 kernel = kernel.constant(2.0f) + kernel.random();
985 result.setRandom();
986
987 result = CuboidConvolution(input, kernel);
988
989 EXPECT_EQ(result.dimension(4), kern_filters);
990 EXPECT_EQ(result.dimension(3), out_depth);
991 EXPECT_EQ(result.dimension(2), out_height);
992 EXPECT_EQ(result.dimension(1), out_width);
993 EXPECT_EQ(result.dimension(0), batches);
994
995 const int off_p = (kern_depth - 1) / 2;
996 const int off_r = (kern_height - 1) / 2;
997 const int off_c = (kern_width - 1) / 2;
998
999 for (int b = 0; b < batches; b++) {
1000 for (int od = 0; od < kern_filters; ++od) {
1001 for (int i = 0; i < out_depth; ++i) {
1002 for (int j = 0; j < out_height; ++j) {
1003 for (int k = 0; k < out_width; ++k) {
1004 float expected = 0.0f;
1005 for (int c = 0; c < kern_width; ++c) {
1006 for (int r = 0; r < kern_height; ++r) {
1007 for (int p = 0; p < kern_depth; ++p) {
1008 for (int id = 0; id < in_channels; ++id) {
1009 if (p - off_p + i >= 0 && r - off_r + j >= 0 &&
1010 c - off_c + k >= 0 && p - off_p + i < in_depth &&
1011 r - off_r + j < in_rows && c - off_c + k < in_cols) {
1012 expected += input(b, c - off_c + k, r - off_r + j,
1013 p - off_p + i, id) *
1014 kernel(c, r, p, id, od);
1015 }
1016 }
1017 }
1018 }
1019 }
1020 EigenApprox(result(b, k, j, i, od), expected);
1021 }
1022 }
1023 }
1024 }
1025 }
1026 }
1027
TEST(EigenSpatialConvolutionsTest,StridedValidCuboid)1028 TEST(EigenSpatialConvolutionsTest, StridedValidCuboid) {
1029 const int in_channels = 10;
1030 const int in_depth = 8;
1031 const int in_rows = 7;
1032 const int in_cols = 5;
1033
1034 const int kern_filters = 7;
1035 const int kern_depth = 3;
1036 const int kern_width = 3;
1037 const int kern_height = 3;
1038
1039 const int out_depth = 3;
1040 const int out_height = 3;
1041 const int out_width = 2;
1042
1043 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1044 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1045 kern_width);
1046 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1047 input = input.constant(11.0f) + input.random();
1048 kernel = kernel.constant(2.0f) + kernel.random();
1049 result.setRandom();
1050
1051 const int stride = 2;
1052 result =
1053 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1054
1055 EXPECT_EQ(result.dimension(0), kern_filters);
1056 EXPECT_EQ(result.dimension(1), out_depth);
1057 EXPECT_EQ(result.dimension(2), out_height);
1058 EXPECT_EQ(result.dimension(3), out_width);
1059
1060 for (int od = 0; od < kern_filters; ++od) {
1061 for (int i = 0; i < out_depth; ++i) {
1062 for (int j = 0; j < out_height; ++j) {
1063 for (int k = 0; k < out_width; ++k) {
1064 float expected = 0.0f;
1065 for (int c = 0; c < kern_width; ++c) {
1066 for (int r = 0; r < kern_height; ++r) {
1067 for (int p = 0; p < kern_depth; ++p) {
1068 for (int id = 0; id < in_channels; ++id) {
1069 expected += input(id, p + stride * i, r + stride * j,
1070 c + stride * k) *
1071 kernel(od, id, p, r, c);
1072 }
1073 }
1074 }
1075 }
1076 EigenApprox(result(od, i, j, k), expected);
1077 }
1078 }
1079 }
1080 }
1081 }
1082
TEST(EigenSpatialConvolutionsTest,StridedValidCuboidRowMajor)1083 TEST(EigenSpatialConvolutionsTest, StridedValidCuboidRowMajor) {
1084 const int in_channels = 10;
1085 const int in_depth = 8;
1086 const int in_rows = 7;
1087 const int in_cols = 5;
1088
1089 const int kern_filters = 7;
1090 const int kern_depth = 3;
1091 const int kern_width = 3;
1092 const int kern_height = 3;
1093
1094 const int out_depth = 3;
1095 const int out_height = 3;
1096 const int out_width = 2;
1097
1098 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1099 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1100 in_channels, kern_filters);
1101 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1102 kern_filters);
1103 input = input.constant(11.0f) + input.random();
1104 kernel = kernel.constant(2.0f) + kernel.random();
1105 result.setRandom();
1106
1107 const int stride = 2;
1108 result =
1109 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID);
1110
1111 EXPECT_EQ(result.dimension(3), kern_filters);
1112 EXPECT_EQ(result.dimension(2), out_depth);
1113 EXPECT_EQ(result.dimension(1), out_height);
1114 EXPECT_EQ(result.dimension(0), out_width);
1115
1116 for (int od = 0; od < kern_filters; ++od) {
1117 for (int i = 0; i < out_depth; ++i) {
1118 for (int j = 0; j < out_height; ++j) {
1119 for (int k = 0; k < out_width; ++k) {
1120 float expected = 0.0f;
1121 for (int c = 0; c < kern_width; ++c) {
1122 for (int r = 0; r < kern_height; ++r) {
1123 for (int p = 0; p < kern_depth; ++p) {
1124 for (int id = 0; id < in_channels; ++id) {
1125 expected += input(c + stride * k, r + stride * j,
1126 p + stride * i, id) *
1127 kernel(c, r, p, id, od);
1128 }
1129 }
1130 }
1131 }
1132 EigenApprox(result(k, j, i, od), expected);
1133 }
1134 }
1135 }
1136 }
1137 }
1138
TEST(EigenSpatialConvolutionsTest,StridedSameCuboid)1139 TEST(EigenSpatialConvolutionsTest, StridedSameCuboid) {
1140 const int in_channels = 10;
1141 const int in_depth = 8;
1142 const int in_rows = 7;
1143 const int in_cols = 5;
1144
1145 const int kern_filters = 7;
1146 const int kern_depth = 3;
1147 const int kern_width = 3;
1148 const int kern_height = 3;
1149
1150 const int stride = 2;
1151 const int out_depth = ceil_div(in_depth, stride);
1152 const int out_height = ceil_div(in_rows, stride);
1153 const int out_width = ceil_div(in_cols, stride);
1154
1155 Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols);
1156 Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height,
1157 kern_width);
1158 Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width);
1159 input = input.constant(11.0f) + input.random();
1160 kernel = kernel.constant(2.0f) + kernel.random();
1161 result.setRandom();
1162
1163 result =
1164 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1165
1166 EXPECT_EQ(result.dimension(0), kern_filters);
1167 EXPECT_EQ(result.dimension(1), out_depth);
1168 EXPECT_EQ(result.dimension(2), out_height);
1169 EXPECT_EQ(result.dimension(3), out_width);
1170
1171 const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1172 const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1173 const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1174
1175 // Number of pixels the input is extended with at the lower end in every
1176 // dimension.
1177 const int dp = pad_p / 2;
1178 const int dr = pad_r / 2;
1179 const int dc = pad_c / 2;
1180
1181 for (int od = 0; od < kern_filters; ++od) {
1182 for (int i = 0; i < out_depth; ++i) {
1183 for (int j = 0; j < out_height; ++j) {
1184 for (int k = 0; k < out_width; ++k) {
1185 float expected = 0.0f;
1186 for (int c = 0; c < kern_width; ++c) {
1187 for (int r = 0; r < kern_height; ++r) {
1188 for (int p = 0; p < kern_depth; ++p) {
1189 for (int id = 0; id < in_channels; ++id) {
1190 const int in_p = p - dp + i * stride;
1191 const int in_r = r - dr + j * stride;
1192 const int in_c = c - dc + k * stride;
1193 if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1194 in_r < in_rows && in_c < in_cols) {
1195 expected +=
1196 input(id, in_p, in_r, in_c) * kernel(od, id, p, r, c);
1197 }
1198 }
1199 }
1200 }
1201 }
1202 EigenApprox(result(od, i, j, k), expected);
1203 }
1204 }
1205 }
1206 }
1207 }
1208
TEST(EigenSpatialConvolutionsTest,StridedSameCuboidRowMajor)1209 TEST(EigenSpatialConvolutionsTest, StridedSameCuboidRowMajor) {
1210 const int in_channels = 10;
1211 const int in_depth = 8;
1212 const int in_rows = 7;
1213 const int in_cols = 5;
1214
1215 const int kern_filters = 7;
1216 const int kern_depth = 3;
1217 const int kern_width = 3;
1218 const int kern_height = 3;
1219
1220 const int stride = 2;
1221 const int out_depth = ceil_div(in_depth, stride);
1222 const int out_height = ceil_div(in_rows, stride);
1223 const int out_width = ceil_div(in_cols, stride);
1224
1225 Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels);
1226 Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth,
1227 in_channels, kern_filters);
1228 Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth,
1229 kern_filters);
1230 input = input.constant(11.0f) + input.random();
1231 kernel = kernel.constant(2.0f) + kernel.random();
1232 result.setRandom();
1233
1234 result =
1235 CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME);
1236
1237 EXPECT_EQ(result.dimension(3), kern_filters);
1238 EXPECT_EQ(result.dimension(2), out_depth);
1239 EXPECT_EQ(result.dimension(1), out_height);
1240 EXPECT_EQ(result.dimension(0), out_width);
1241
1242 const int pad_p = (out_depth - 1) * stride - in_depth + kern_depth;
1243 const int pad_r = (out_height - 1) * stride - in_rows + kern_height;
1244 const int pad_c = (out_width - 1) * stride - in_cols + kern_width;
1245
1246 // Number of pixels the input is extended with at the lower end in every
1247 // dimension.
1248 const int dp = pad_p / 2;
1249 const int dr = pad_r / 2;
1250 const int dc = pad_c / 2;
1251
1252 for (int od = 0; od < kern_filters; ++od) {
1253 for (int i = 0; i < out_depth; ++i) {
1254 for (int j = 0; j < out_height; ++j) {
1255 for (int k = 0; k < out_width; ++k) {
1256 float expected = 0.0f;
1257 for (int c = 0; c < kern_width; ++c) {
1258 for (int r = 0; r < kern_height; ++r) {
1259 for (int p = 0; p < kern_depth; ++p) {
1260 for (int id = 0; id < in_channels; ++id) {
1261 const int in_p = p - dp + i * stride;
1262 const int in_r = r - dr + j * stride;
1263 const int in_c = c - dc + k * stride;
1264 if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth &&
1265 in_r < in_rows && in_c < in_cols) {
1266 expected +=
1267 input(in_c, in_r, in_p, id) * kernel(c, r, p, id, od);
1268 }
1269 }
1270 }
1271 }
1272 }
1273 EigenApprox(result(k, j, i, od), expected);
1274 }
1275 }
1276 }
1277 }
1278 }
1279
1280 // A test case discovered when testing backward spatial convolution where the
1281 // special tensor contraction mapper for spatial convolution contains a bug.
TEST(EigenSpatialConvolutionsTest,SpatialConvContractionMapper)1282 TEST(EigenSpatialConvolutionsTest, SpatialConvContractionMapper) {
1283 // We have a 3x4 input image with 2x2 patch and stride of 2.
1284 // The output has size 1x2.
1285 typedef Tensor<float, 1>::DimensionPair DimPair;
1286 Tensor<float, 4> out(1, 1, 2, 1);
1287 Tensor<float, 4> kern(1, 1, 2, 2);
1288 for (int i = 0; i < kern.size(); ++i) {
1289 kern.coeffRef(i) = static_cast<float>(i) + 1;
1290 }
1291 for (int i = 0; i < out.size(); ++i) {
1292 out.coeffRef(i) = static_cast<float>(i) + 1;
1293 }
1294
1295 DSizes<ptrdiff_t, 4> strides;
1296 strides[0] = 1;
1297 strides[1] = 2;
1298 strides[2] = 2;
1299 strides[3] = 1;
1300
1301 array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
1302 paddings[0] = std::make_pair(0, 0);
1303 paddings[1] = std::make_pair(1, 2);
1304 paddings[2] = std::make_pair(1, 1);
1305 paddings[3] = std::make_pair(0, 0);
1306
1307 DSizes<ptrdiff_t, 3> out_dim;
1308 out_dim[0] = 1;
1309 out_dim[1] = 4;
1310 out_dim[2] = 12;
1311
1312 array<bool, 4> kernel_reverse;
1313 kernel_reverse[0] = false;
1314 kernel_reverse[1] = false;
1315 kernel_reverse[2] = true;
1316 kernel_reverse[3] = true;
1317
1318 DSizes<ptrdiff_t, 3> k_dims;
1319 k_dims[0] = 1;
1320 k_dims[1] = 1;
1321 k_dims[2] = 4;
1322
1323 array<DimPair, 2> contract_dims;
1324 contract_dims[0] = DimPair(0, 0);
1325 contract_dims[1] = DimPair(2, 1);
1326
1327 DSizes<ptrdiff_t, 4> in_dim;
1328 in_dim[0] = 1;
1329 in_dim[1] = 3;
1330 in_dim[2] = 4;
1331 in_dim[3] = 1;
1332
1333 DSizes<ptrdiff_t, 2> in_dbg_dim;
1334 in_dbg_dim[0] = 3;
1335 in_dbg_dim[1] = 4;
1336
1337 DSizes<ptrdiff_t, 2> out_dbg_dim;
1338 out_dbg_dim[0] = 4;
1339 out_dbg_dim[1] = 12;
1340
1341 // This is the formula for computing the backward prop for input with a
1342 // spatial convolution.
1343 Tensor<float, 4> direct =
1344 kern.reverse(kernel_reverse)
1345 .reshape(k_dims)
1346 .contract(
1347 out.extract_image_patches(2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0)
1348 .reshape(out_dim),
1349 contract_dims)
1350 .reshape(in_dim);
1351
1352 Tensor<float, 4> indirect =
1353 kern.reverse(kernel_reverse)
1354 .reshape(k_dims)
1355 .contract(
1356 out.inflate(strides)
1357 .pad(paddings)
1358 .extract_image_patches(2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
1359 .reshape(out_dim),
1360 contract_dims)
1361 .reshape(in_dim);
1362
1363 eigen_assert(dimensions_match(direct.dimensions(), indirect.dimensions()));
1364 for (size_t i = 0; i < direct.dimensions().TotalSize(); ++i) {
1365 EigenApprox(direct.data()[i], indirect.data()[i]);
1366 }
1367 EigenApprox(1.0f, direct(0, 0, 0, 0));
1368 EigenApprox(3.0f, direct(0, 0, 1, 0));
1369 EigenApprox(2.0f, direct(0, 0, 2, 0));
1370 EigenApprox(6.0f, direct(0, 0, 3, 0));
1371
1372 EigenApprox(2.0f, direct(0, 1, 0, 0));
1373 EigenApprox(4.0f, direct(0, 1, 1, 0));
1374 EigenApprox(4.0f, direct(0, 1, 2, 0));
1375 EigenApprox(8.0f, direct(0, 1, 3, 0));
1376 }
1377
PackRhsHelper(int iters,int input_batches,int input_cols,int input_rows,int input_depth,int filter_count,int filter_cols,int filter_rows,int col_strides,int row_strides,Index block_rows,Index block_cols)1378 static void PackRhsHelper(int iters,
1379 /* Input dimensions: */
1380 int input_batches, int input_cols, int input_rows,
1381 int input_depth,
1382 /* Filter (kernel) dimensions: */
1383 int filter_count, int filter_cols, int filter_rows,
1384 /* Input strides: */
1385 int col_strides, int row_strides,
1386 /* Block dimensions: */
1387 Index block_rows, Index block_cols) {
1388 // Set random seed for benchmark repeatability.
1389 srand(12345);
1390
1391 tensorflow::testing::UseRealTime();
1392 tensorflow::testing::StopTiming();
1393
1394 using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1395
1396 // Default Eigen::Tensor layout is column major, so we configure dimensions
1397 // starting from the inner most (channels aka depth in this case).
1398 Dimensions input_dims(input_depth, input_rows, input_cols, input_batches);
1399
1400 using Traits = typename Eigen::internal::gebp_traits<float, float>;
1401 static const int packet_size = Eigen::internal::packet_traits<float>::size;
1402
1403 // Reshape dimensions.
1404 using NewDimension = Eigen::DSizes<Index, 2>;
1405
1406 // Contraction dimensions.
1407 using nocontract_t = Eigen::array<Eigen::Index, 1>;
1408 using contract_t = Eigen::array<Eigen::Index, 1>;
1409
1410 // Input to the TensorImagePatchOp. It is the tensorflow TTypes<float>::Tensor
1411 // with ColMajor layout, instead of RowMajor. But that doesn't make any
1412 // difference, because TensorContraction swaps LHS with RHS for row major
1413 // inputs, and contraction mapper always works with column major data.
1414 using ArgType = TensorMap<Tensor<float, 4>, Eigen::Aligned>;
1415
1416 using Evaluator = TensorEvaluator<
1417 const TensorReshapingOp<
1418 NewDimension, const TensorImagePatchOp<Dynamic, Dynamic, ArgType>>,
1419 Eigen::DefaultDevice>;
1420
1421 using InputMapper = Eigen::internal::TensorContractionInputMapper<
1422 float, Index, Eigen::internal::Rhs, Evaluator, //
1423 nocontract_t, contract_t, //
1424 packet_size, //
1425 /*inner_dim_contiguous*/ true, //
1426 /*inner_dim_reordered*/ false, //
1427 /*Alignment*/ 0>;
1428
1429 using SubMapper = Eigen::internal::TensorContractionSubMapper<
1430 float, Index, Eigen::internal::Rhs, Evaluator, //
1431 nocontract_t, contract_t, //
1432 packet_size, //
1433 /*inner_dim_contiguous*/ true, //
1434 /*inner_dim_reordered*/ false, //
1435 /*Alignment*/ 0>;
1436
1437 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1438 using PackRhsImpl =
1439 Eigen::internal::gemm_pack_colmajor_block<float, Eigen::Index, SubMapper,
1440 ColMajor>;
1441 #else
1442 using PackRhsImpl =
1443 Eigen::internal::gemm_pack_rhs<float, Eigen::Index, SubMapper, //
1444 Traits::nr, //
1445 ColMajor, //
1446 /*Conjugate*/ false, //
1447 /*PanelMode*/ false>;
1448 #endif
1449
1450 Eigen::DefaultDevice device;
1451
1452 // Actual contract dimensions are not important.
1453 const Eigen::Index not_important = -1234;
1454 nocontract_t nocontract_dim = {not_important};
1455 contract_t contract_dim = {not_important};
1456
1457 // We use tensor of the same dimensions to store packed data.
1458 Tensor<float, 4> packed(input_dims);
1459
1460 // We generate multiple input tensors, around 512mb in total size to measure
1461 // realistic workload when input data in not in L1-L3 cache.
1462 size_t input_bytes = input_dims.TotalSize() * sizeof(float);
1463 size_t mem_size_bytes = 1024 * 1024 * 512;
1464 size_t num_inputs =
1465 std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1466
1467 std::vector<Tensor<float, 4>> inputs;
1468 std::vector<Evaluator> evaluators;
1469 std::vector<InputMapper> input_mappers;
1470
1471 for (int i = 0; i < num_inputs; ++i) {
1472 inputs.emplace_back(input_dims);
1473 inputs[i].setRandom();
1474
1475 ArgType tensor_map(inputs[i].data(), input_dims);
1476
1477 // 1. Extract image patches from input tensor. All strides are `1`.
1478 const auto image_patch_op = TensorImagePatchOp<Dynamic, Dynamic, ArgType>(
1479 tensor_map, //
1480 filter_rows, filter_cols, //
1481 row_strides, col_strides, //
1482 /*in_row_strides=*/1, /*in_col_strides=*/1, //
1483 /*row_inflate_strides=*/1, /*col_inflate_strides=*/1, //
1484 Eigen::PADDING_SAME, /*padding_value=*/0.0);
1485
1486 // 2. Reshape extracted patches into "virtual" 2d tensor.
1487 // NOTE: This is valid for PADDING_SAME only.
1488 Index output_rows = input_rows / row_strides;
1489 Index output_cols = input_cols / col_strides;
1490 NewDimension reshape_dims;
1491 reshape_dims[0] = input_depth * filter_rows * filter_cols; // patch size
1492 reshape_dims[1] = output_rows * output_cols * input_batches; // num_patches
1493
1494 const auto reshape_op =
1495 TensorReshapingOp<NewDimension, decltype(image_patch_op)>(
1496 image_patch_op, reshape_dims);
1497
1498 evaluators.emplace_back(reshape_op, device);
1499
1500 input_mappers.emplace_back(evaluators[i], nocontract_dim, nocontract_dim,
1501 contract_dim, contract_dim);
1502 }
1503
1504 // We read properties of extracted image patches directly from evaluator.
1505 const Index patch_depth = evaluators[0].impl().dimensions()[0];
1506 const Index patch_rows = evaluators[0].impl().dimensions()[1];
1507 const Index patch_cols = evaluators[0].impl().dimensions()[2];
1508
1509 // Number of patches is the same as the maximum column available through the
1510 // InputMapper (SubMapper).
1511 const Index num_patches = evaluators[0].impl().dimensions()[3];
1512
1513 // The size of a single patch, it's the same as the maximum depth available
1514 // through the InputMapper (SubMapper).
1515 const Index patch_size = patch_depth * patch_rows * patch_cols;
1516
1517 PackRhsImpl pack_rhs;
1518
1519 const Index packed_total_size = input_dims.TotalSize();
1520
1521 tensorflow::testing::StartTiming();
1522 for (int i = 0; i < iters; ++i) {
1523 int input_idx =
1524 num_inputs == 1 ? 1 : internal::random<int>(0, num_inputs - 1);
1525
1526 // Depth offset must be a multiple of 8 (float packet size with AVX2).
1527 Index depth_offset =
1528 (patch_size > block_rows)
1529 ? (internal::random<Index>(0, patch_size - 10) / 8) * 8
1530 : 0;
1531 Index col_offset = internal::random<Index>(0, num_patches - 10);
1532
1533 Index depth = std::min(block_rows, patch_size - depth_offset);
1534 Index cols = std::min(block_cols, num_patches - col_offset);
1535
1536 // Write packed data to random memory location to emulate cold caches.
1537 Index packed_size = depth * cols;
1538 Index packed_offset =
1539 internal::random<Index>(0, packed_total_size - packed_size - 1);
1540
1541 SubMapper sub_mapper =
1542 input_mappers[input_idx].getSubMapper(depth_offset, col_offset);
1543 pack_rhs(packed.data() + packed_offset, sub_mapper, depth, cols);
1544 }
1545 tensorflow::testing::StopTiming();
1546 tensorflow::testing::SetLabel(
1547 absl::StrCat("patch: ", patch_rows, "x", patch_cols, " D", patch_depth,
1548 "; num_patches=", num_patches, " patch_size=", patch_size,
1549 " num_inputs=", num_inputs));
1550 }
1551
PackLhsHelper(int iters,int input_depth,int filter_count,int filter_cols,int filter_rows,Index block_rows,Index block_cols)1552 static void PackLhsHelper(int iters,
1553 /* Input dimensions: */
1554 int input_depth,
1555 /* Filter (kernel) dimensions: */
1556 int filter_count, int filter_cols, int filter_rows,
1557 /* Block dimensions: */
1558 Index block_rows, Index block_cols) {
1559 // Set random seed for benchmark repeatability.
1560 srand(12345);
1561
1562 eigen_assert(block_rows <= filter_count);
1563 eigen_assert(block_cols <= input_depth * filter_rows * filter_cols);
1564
1565 tensorflow::testing::UseRealTime();
1566 tensorflow::testing::StopTiming();
1567
1568 using Dimensions = Eigen::DSizes<Eigen::Index, 4>;
1569
1570 // Default Eigen::Tensor layout is column major, so we configure dimensions
1571 // starting from the inner most (`filter count` aka `kernel filers`).
1572 Dimensions filter_dims(filter_count, filter_rows, filter_cols, input_depth);
1573
1574 static const int packet_size = Eigen::internal::packet_traits<float>::size;
1575
1576 // We are going to reshape filter into 2D tensor.
1577 using NewDimension = Eigen::DSizes<Index, 2>;
1578
1579 // Contraction dimensions.
1580 using nocontract_t = Eigen::array<Eigen::Index, 1>;
1581 using contract_t = Eigen::array<Eigen::Index, 1>;
1582
1583 // Input to the ReshapeOp. It is the tensorflow TTypes<float>::Tensor
1584 // with ColMajor layout, instead of RowMajor. But that doesn't make any
1585 // difference, because TensorContraction swaps LHS with RHS for row major
1586 // inputs, and contraction mapper always works with column major data.
1587 using ArgType = TensorMap<Tensor<float, 4>, Eigen::Aligned>;
1588
1589 using Evaluator =
1590 TensorEvaluator<const TensorReshapingOp<NewDimension, ArgType>,
1591 Eigen::DefaultDevice>;
1592
1593 using InputMapper = Eigen::internal::TensorContractionInputMapper<
1594 float, Index, Eigen::internal::Lhs, Evaluator, //
1595 nocontract_t, contract_t, //
1596 packet_size, //
1597 /*inner_dim_contiguous*/ true, //
1598 /*inner_dim_reordered*/ false, //
1599 /*Alignment*/ 0>;
1600
1601 using SubMapper = Eigen::internal::TensorContractionSubMapper<
1602 float, Index, Eigen::internal::Lhs, Evaluator, //
1603 nocontract_t, contract_t, //
1604 packet_size, //
1605 /*inner_dim_contiguous*/ true, //
1606 /*inner_dim_reordered*/ false, //
1607 /*Alignment*/ 0>;
1608
1609 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1610 using PackLhsImpl =
1611 Eigen::internal::gemm_pack_colmajor_block<float, Eigen::Index, SubMapper,
1612 ColMajor>;
1613 #else
1614 using Traits = typename Eigen::internal::gebp_traits<float, float>;
1615 using PackLhsImpl =
1616 Eigen::internal::gemm_pack_lhs<float, Eigen::Index, SubMapper, //
1617 Traits::mr, //
1618 Traits::LhsProgress, //
1619 typename Traits::LhsPacket4Packing, //
1620 ColMajor>;
1621 #endif
1622
1623 Eigen::DefaultDevice device;
1624
1625 // We will reshape kernel into 2D tensor.
1626 NewDimension reshape_dims;
1627 reshape_dims[0] = filter_count;
1628 reshape_dims[1] = input_depth * filter_rows * filter_cols;
1629
1630 // We are going to contract along the 'in_depth * filter_rows * filter_cols`.
1631 nocontract_t nocontract_dim = {0};
1632 contract_t contract_dim = {1};
1633
1634 // These values computed using the algorithm in TensorContraction.h, with
1635 // 'nocontract_dim' and 'contract_dim' values specified above.
1636 nocontract_t nocontract_strides = {1};
1637 contract_t contract_strides = {filter_count};
1638 nocontract_t i_strides = {1};
1639 contract_t k_strides = {1};
1640
1641 // We use tensor of the same dimensions to store packed data.
1642 Tensor<float, 4> packed(filter_dims);
1643
1644 // We generate multiple filter tensors, around 512mb in total size to measure
1645 // realistic workload when input data in not in L1-L3 cache.
1646 size_t input_bytes = filter_dims.TotalSize() * sizeof(float);
1647 size_t mem_size_bytes = 1024 * 1024 * 512;
1648 size_t num_filters =
1649 std::max(static_cast<size_t>(1), mem_size_bytes / input_bytes);
1650
1651 std::vector<Tensor<float, 4>> filters;
1652 std::vector<Evaluator> evaluators;
1653 std::vector<InputMapper> input_mappers;
1654
1655 for (int i = 0; i < num_filters; ++i) {
1656 filters.emplace_back(filter_dims);
1657 filters[i].setRandom();
1658
1659 ArgType tensor_map(filters[i].data(), filter_dims);
1660
1661 const auto reshape_op =
1662 TensorReshapingOp<NewDimension, ArgType>(tensor_map, reshape_dims);
1663
1664 evaluators.emplace_back(reshape_op, device);
1665
1666 input_mappers.emplace_back(evaluators[i], nocontract_strides, i_strides,
1667 contract_strides, k_strides);
1668 }
1669
1670 PackLhsImpl pack_lhs;
1671
1672 const Index packed_total_size = filter_dims.TotalSize();
1673
1674 // Round up row/col/memory offsets to make them multiple of packet size.
1675 const auto round_up = [](const Index idx) {
1676 return (idx / packet_size) * packet_size;
1677 };
1678
1679 // Block rows is in the [0, filter_count) range.
1680 // Block cols is in the [0, filter_rows * filter_cols * input_depth) range.
1681
1682 const Index max_row = filter_count;
1683 const Index max_col = filter_rows * filter_cols * input_depth;
1684
1685 tensorflow::testing::StartTiming();
1686 for (int i = 0; i < iters; ++i) {
1687 int filter_idx =
1688 num_filters == 1 ? 1 : internal::random<int>(0, num_filters - 1);
1689
1690 Index row_offset = round_up(internal::random<Index>(0, max_row - 10));
1691 Index col_offset = round_up(internal::random<Index>(0, max_col - 10));
1692
1693 Index rows = std::min(block_rows, max_row - row_offset);
1694 Index cols = std::min(block_cols, max_col - col_offset);
1695
1696 // Write packed data to random memory location to emulate cold caches.
1697 Index packed_offset = round_up(
1698 internal::random<Index>(0, packed_total_size - rows * cols - 1));
1699
1700 SubMapper sub_mapper =
1701 input_mappers[filter_idx].getSubMapper(row_offset, col_offset);
1702
1703 // NOTE: Eigen gemm_pack_lhs accepts contraction depth (k-th dimension) as a
1704 // first argument (aka block cols). MKL-DNN pack is generic for lhs and rhs
1705 // and accepts block rows and cols in the same order for lhs and rhs.
1706 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
1707 pack_lhs(packed.data() + packed_offset, sub_mapper, rows, cols);
1708 #else
1709 pack_lhs(packed.data() + packed_offset, sub_mapper, cols, rows);
1710 #endif
1711 }
1712 tensorflow::testing::StopTiming();
1713 tensorflow::testing::SetLabel(absl::StrCat(
1714 "filter: count=", filter_count, " dims=", filter_rows, "x", filter_cols,
1715 "; input: depth=", input_depth, "; num_filers=", num_filters));
1716 }
1717
1718 // -------------------------------------------------------------------------- //
1719 // Pack RHS
1720 //
1721 // Macro argument names:
1722 // N: batch size
1723 // H: height
1724 // W: width
1725 // C: input channels
1726 // FC: filter channels
1727 // FH: filter height
1728 // FW: filter width
1729 // SH: stride in height dimensions
1730 // SW: stride in width dimensions
1731 // BR: block rows
1732 // BC: block cols
1733
1734 #define BM_CONCAT(a, b) a##b
1735
1736 #define BM_RHS_NAME(prefix, N, H, W, C, FC, FH, FW, SH, SW, BR, BC) \
1737 BM_CONCAT(BM_##prefix##_##N##_##H##x##W##_IC##C##_FC##FC##_##FH##x##FW, \
1738 _s##SH##x##SW##_B##BR##x##BC)
1739
1740 #define BM_PackRhs(N, H, W, C, FC, FH, FW, SH, SW, BR, BC) \
1741 static void BM_RHS_NAME(PackRhs, N, H, W, C, FC, FH, FW, SH, SW, BR, \
1742 BC)(int iters) { \
1743 PackRhsHelper(iters, N, H, W, C, FC, FH, FW, SH, SW, BR, BC); \
1744 } \
1745 BENCHMARK(BM_RHS_NAME(PackRhs, N, H, W, C, FC, FH, FW, SH, SW, BR, BC))
1746
1747 // Number of input channel (input depth) it equal to the number of patch
1748 // channels (patch depth).
1749
1750 // NOTE: This is the most common case in Tensorflow models.
1751 // Fast path: input channel dimension is the multiple of the packet size.
1752 BM_PackRhs(/*batch*/ 32, //
1753 /*image*/ 64, 64, //
1754 /*channels*/ 32, //
1755 /*num_filters*/ 64, //
1756 /*filter*/ 5, 5, //
1757 /*stride*/ 1, 1, //
1758 /*block*/ 256, 56);
1759
1760 BM_PackRhs(/*batch*/ 32, //
1761 /*image*/ 64, 64, //
1762 /*channels*/ 32, //
1763 /*num_filters*/ 64, //
1764 /*filter*/ 5, 5, //
1765 /*stride*/ 2, 2, //
1766 /*block*/ 256, 56);
1767
1768 // Slow path: input channel dimension is not the multiple of the packet size.
1769 BM_PackRhs(/*batch*/ 32, //
1770 /*image*/ 64, 64, //
1771 /*channels*/ 30, //
1772 /*num_filters*/ 64, //
1773 /*filter*/ 5, 5, //
1774 /*stride*/ 1, 1, //
1775 /*block*/ 256, 56);
1776
1777 BM_PackRhs(/*batch*/ 32, //
1778 /*image*/ 64, 64, //
1779 /*channels*/ 30, //
1780 /*num_filters*/ 64, //
1781 /*filter*/ 5, 5, //
1782 /*stride*/ 2, 2, //
1783 /*block*/ 256, 56);
1784
1785 // Slow path with input channel dimension smaller than the packet size.
1786 BM_PackRhs(/*batch*/ 32, //
1787 /*image*/ 256, 256, //
1788 /*channels*/ 4, //
1789 /*num_filters*/ 16, //
1790 /*filter*/ 8, 8, //
1791 /*stride*/ 1, 1, //
1792 /*block*/ 256, 56);
1793
1794 BM_PackRhs(/*batch*/ 32, //
1795 /*image*/ 256, 256, //
1796 /*channels*/ 4, //
1797 /*num_filters*/ 16, //
1798 /*filter*/ 8, 8, //
1799 /*stride*/ 2, 4, //
1800 /*block*/ 256, 56);
1801
1802 // Short and wide block with small input channel dimension.
1803 BM_PackRhs(/*batch*/ 32, //
1804 /*image*/ 64, 64, //
1805 /*channels*/ 4, //
1806 /*num_filters*/ 16, //
1807 /*filter*/ 3, 3, //
1808 /*stride*/ 1, 1, //
1809 /*block*/ 36, 432);
1810
1811 BM_PackRhs(/*batch*/ 32, //
1812 /*image*/ 64, 64, //
1813 /*channels*/ 4, //
1814 /*num_filters*/ 16, //
1815 /*filter*/ 3, 3, //
1816 /*stride*/ 2, 2, //
1817 /*block*/ 36, 432);
1818
1819 // -------------------------------------------------------------------------- //
1820 // Pack LHS
1821 //
1822 // Macro argument names:
1823 // C: input channels
1824 // FC: filter channels
1825 // FH: filter height
1826 // FW: filter width
1827 // BR: block rows
1828 // BC: block cols
1829
1830 #define BM_LHS_NAME(prefix, C, FC, FH, FW, BR, BC) \
1831 BM_CONCAT(BM_##prefix##_##C##_FC##FC##_##FH##x##FW, _B##BR##x##BC)
1832
1833 #define BM_PackLhs(C, FC, FH, FW, BR, BC) \
1834 static void BM_LHS_NAME(PackLhs, C, FC, FH, FW, BR, BC)(int iters) { \
1835 PackLhsHelper(iters, C, FC, FH, FW, BR, BC); \
1836 } \
1837 BENCHMARK(BM_LHS_NAME(PackLhs, C, FC, FH, FW, BR, BC))
1838
1839 // Number of input channel (input depth) it equal to the number of patch
1840 // channels (patch depth).
1841
1842 BM_PackLhs(/*input channels*/ 128, //
1843 /*filter channels*/ 1024, //
1844 /*filter dims*/ 3, 3, //
1845 /*block*/ 256, 56);
1846
1847 BM_PackLhs(/*input channels*/ 128, //
1848 /*filter channels*/ 1024, //
1849 /*filter dims*/ 3, 3, //
1850 /*block*/ 56, 256);
1851 } // namespace Eigen
1852