1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <string>
17 #include <vector>
18
19 #include "absl/algorithm/container.h"
20 #include "tensorflow/cc/ops/const_op.h"
21 #include "tensorflow/cc/ops/image_ops.h"
22 #include "tensorflow/cc/ops/nn_ops.h"
23 #include "tensorflow/cc/ops/nn_ops_internal.h"
24 #include "tensorflow/cc/ops/standard_ops.h"
25 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
26 #include "tensorflow/core/framework/fake_input.h"
27 #include "tensorflow/core/framework/node_def_builder.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/types.pb.h"
30 #include "tensorflow/core/kernels/conv_ops_gpu.h"
31 #include "tensorflow/core/kernels/ops_testutil.h"
32 #include "tensorflow/core/kernels/ops_util.h"
33 #include "tensorflow/core/lib/core/status_test_util.h"
34 #include "tensorflow/core/platform/tensor_float_32_utils.h"
35 #include "tensorflow/core/platform/test.h"
36 #include "tensorflow/core/platform/test_benchmark.h"
37 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
38 #include "tensorflow/core/public/session.h"
39
40 namespace tensorflow {
41
42 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
43
44 struct ConvParametersPeer {
45 template <typename T>
ShouldIncludeWinogradNonfusedAlgoPreCudnn7tensorflow::ConvParametersPeer46 bool ShouldIncludeWinogradNonfusedAlgoPreCudnn7() {
47 return params.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<T>();
48 }
49
50 ConvParameters params;
51 };
52
TEST(ConvParameters,WinogradNonfusedAlgoSize)53 TEST(ConvParameters, WinogradNonfusedAlgoSize) {
54 ConvParametersPeer conv_params_small = {{
55 1, // batch
56 32, // in_depths
57 {{300, // in_rows
58 300}}, // in_cols
59 FORMAT_NCHW, // compute_data_format
60 128, // out_depths
61 {{3, // filter_rows
62 3}}, // filter_cols
63 {{1, // dilation_rows
64 1}}, // dilation_cols
65 {{1, // stride_rows
66 1}}, // stride_cols
67 {{0, // padding_rows
68 0}}, // padding_cols
69 DT_FLOAT, // tensor datatype
70 0, // device_id
71 }};
72 EXPECT_TRUE(
73 conv_params_small.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>());
74
75 ConvParametersPeer conv_params_large = {{
76 1, // batch
77 128, // in_depths
78 {{300, // in_rows
79 300}}, // in_cols
80 FORMAT_NCHW, // compute_data_format
81 768, // out_depths
82 {{3, // filter_rows
83 3}}, // filter_cols
84 {{1, // dilation_rows
85 1}}, // dilation_cols
86 {{1, // stride_rows
87 1}}, // stride_cols
88 {{0, // padding_rows
89 0}}, // padding_cols
90 DT_FLOAT, // tensor datatype
91 0, // device_id
92 }};
93 EXPECT_FALSE(
94 conv_params_large.ShouldIncludeWinogradNonfusedAlgoPreCudnn7<float>());
95 }
96
97 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
98
99 class FusedResizePadConvOpTest : public OpsTestBase {
100 protected:
101 template <typename T>
HandwrittenConv(DataType dtype)102 void HandwrittenConv(DataType dtype) {
103 const int stride = 1;
104 TF_EXPECT_OK(NodeDefBuilder("fused_resize_op", "FusedResizeAndPadConv2D")
105 .Input(FakeInput(dtype))
106 .Input(FakeInput(DT_INT32))
107 .Input(FakeInput(DT_INT32))
108 .Input(FakeInput(dtype))
109 .Attr("T", dtype)
110 .Attr("resize_align_corners", false)
111 .Attr("mode", "REFLECT")
112 .Attr("strides", {1, stride, stride, 1})
113 .Attr("padding", "SAME")
114 .Finalize(node_def()));
115 TF_EXPECT_OK(InitOp());
116 const int depth = 1;
117 const int image_width = 4;
118 const int image_height = 3;
119 const int image_batch_count = 1;
120 // The image matrix is:
121 // | 1 | 2 | 3 | 4 |
122 // | 5 | 6 | 7 | 8 |
123 // | 9 | 10 | 11 | 12 |
124 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
125 test::FillValues<T>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
126
127 // The filter matrix is:
128 // | 1 | 4 | 7 |
129 // | 2 | 5 | 8 |
130 // | 3 | 6 | 9 |
131 const int filter_size = 3;
132 const int filter_count = 1;
133 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
134 test::FillValues<T>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
135
136 const int resized_width = image_width;
137 const int resized_height = image_height;
138
139 const int top_padding = 0;
140 const int bottom_padding = 0;
141 const int left_padding = 0;
142 const int right_padding = 0;
143
144 AddInputFromArray<T>(image.shape(), image.flat<T>());
145 AddInputFromArray<int32>(TensorShape({2}), {resized_height, resized_width});
146 AddInputFromArray<int32>(
147 TensorShape({4, 2}),
148 {0, 0, top_padding, bottom_padding, left_padding, right_padding, 0, 0});
149 AddInputFromArray<T>(filter.shape(), filter.flat<T>());
150 TF_ASSERT_OK(RunOpKernel());
151
152 // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
153 // the input set to zero because we're using the 'SAME' padding mode.
154 // The calculations behind the expected output are:
155 // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
156 // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
157 // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
158 // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
159 // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
160 // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
161 // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
162 // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
163 // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
164 // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
165 // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
166 // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
167 // This means we should end up with this matrix:
168 // | 105 | 150 | 183 | 95 |
169 // | 235 | 312 | 357 | 178 |
170 // | 187 | 234 | 261 | 121 |
171 const int expected_width = image_width;
172 const int expected_height = image_height * filter_count;
173 Tensor expected(dtype, TensorShape({image_batch_count, expected_height,
174 expected_width, filter_count}));
175 test::FillValues<T>(
176 &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
177 const Tensor& output = *GetOutput(0);
178 test::ExpectTensorNear<T>(expected, output, 1e-5);
179 }
180
181 template <typename T>
CompareFusedAndSeparate(int input_width,int input_height,int input_depth,int resize_width,int resize_height,int y_padding,int x_padding,int filter_size,int filter_count,bool resize_align_corners,const string & pad_mode,int stride,const string & padding,DataType dtype)182 void CompareFusedAndSeparate(int input_width, int input_height,
183 int input_depth, int resize_width,
184 int resize_height, int y_padding, int x_padding,
185 int filter_size, int filter_count,
186 bool resize_align_corners,
187 const string& pad_mode, int stride,
188 const string& padding, DataType dtype) {
189 Scope root = tensorflow::Scope::NewRootScope();
190 using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
191
192 Tensor input_data(DT_FLOAT,
193 TensorShape({1, input_height, input_width, input_depth}));
194 test::FillIota<float>(&input_data, 1.0f);
195 Output input =
196 Const(root.WithOpName("input"), Input::Initializer(input_data));
197 Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
198
199 Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
200 input_depth, filter_count}));
201 test::FillIota<float>(&filter_data, 1.0f);
202 Output filter =
203 Const(root.WithOpName("filter"), Input::Initializer(filter_data));
204 Output casted_filter =
205 Cast(root.WithOpName("casted_filter"), filter, dtype);
206
207 Output resize_size =
208 Const(root.WithOpName("resize_size"), {resize_height, resize_width});
209 Output resize =
210 ResizeBilinear(root.WithOpName("resize"), input, resize_size,
211 ResizeBilinear::AlignCorners(resize_align_corners));
212 // Bilinear resize only output float, cast it to dtype to match the input.
213 Output casted_resize = Cast(root.WithOpName("cast"), resize, dtype);
214 Output paddings =
215 Const(root.WithOpName("paddings"),
216 {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
217 Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_resize,
218 paddings, pad_mode);
219 Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
220 {1, stride, stride, 1}, padding);
221
222 Output fused_conv = FusedResizeAndPadConv2D(
223 root.WithOpName("fused_conv"), casted_input, resize_size, paddings,
224 casted_filter, pad_mode, {1, stride, stride, 1}, padding,
225 FusedResizeAndPadConv2D::ResizeAlignCorners(resize_align_corners));
226
227 tensorflow::GraphDef graph;
228 TF_ASSERT_OK(root.ToGraphDef(&graph));
229
230 std::unique_ptr<tensorflow::Session> session(
231 tensorflow::NewSession(tensorflow::SessionOptions()));
232 TF_ASSERT_OK(session->Create(graph));
233
234 std::vector<Tensor> unfused_tensors;
235 TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
236
237 std::vector<Tensor> fused_tensors;
238 TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
239
240 test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
241 }
242
243 template <typename T>
CompareFusedPadOnlyAndSeparate(int input_width,int input_height,int input_depth,int y_padding,int x_padding,int filter_size,int filter_count,const string & pad_mode,int stride,const string & padding,DataType dtype)244 void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
245 int input_depth, int y_padding,
246 int x_padding, int filter_size,
247 int filter_count, const string& pad_mode,
248 int stride, const string& padding,
249 DataType dtype) {
250 Scope root = tensorflow::Scope::NewRootScope();
251 using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
252
253 Tensor input_data(DT_FLOAT,
254 TensorShape({1, input_height, input_width, input_depth}));
255 test::FillIota<float>(&input_data, 1.0f);
256 Output input =
257 Const(root.WithOpName("input"), Input::Initializer(input_data));
258 Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
259
260 Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
261 input_depth, filter_count}));
262 test::FillIota<float>(&filter_data, 1.0f);
263 Output filter =
264 Const(root.WithOpName("filter"), Input::Initializer(filter_data));
265 Output casted_filter =
266 Cast(root.WithOpName("casted_filter"), filter, dtype);
267
268 Output paddings =
269 Const(root.WithOpName("paddings"),
270 {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
271 Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_input,
272 paddings, pad_mode);
273 Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
274 {1, stride, stride, 1}, padding);
275
276 Output fused_conv = FusedPadConv2D(
277 root.WithOpName("fused_conv"), casted_input, paddings, casted_filter,
278 pad_mode, {1, stride, stride, 1}, padding);
279
280 tensorflow::GraphDef graph;
281 TF_ASSERT_OK(root.ToGraphDef(&graph));
282
283 std::unique_ptr<tensorflow::Session> session(
284 tensorflow::NewSession(tensorflow::SessionOptions()));
285 TF_ASSERT_OK(session->Create(graph));
286
287 std::vector<Tensor> unfused_tensors;
288 TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
289
290 std::vector<Tensor> fused_tensors;
291 TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
292
293 test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
294 }
295 };
296
TEST_F(FusedResizePadConvOpTest,HandwrittenConvHalf)297 TEST_F(FusedResizePadConvOpTest, HandwrittenConvHalf) {
298 HandwrittenConv<Eigen::half>(DT_HALF);
299 }
300
TEST_F(FusedResizePadConvOpTest,HandwrittenConvFloat)301 TEST_F(FusedResizePadConvOpTest, HandwrittenConvFloat) {
302 HandwrittenConv<float>(DT_FLOAT);
303 }
304
TEST_F(FusedResizePadConvOpTest,HandwrittenConvDouble)305 TEST_F(FusedResizePadConvOpTest, HandwrittenConvDouble) {
306 HandwrittenConv<double>(DT_DOUBLE);
307 }
308
TEST_F(FusedResizePadConvOpTest,IdentityComparativeHalf)309 TEST_F(FusedResizePadConvOpTest, IdentityComparativeHalf) {
310 CompareFusedAndSeparate<Eigen::half>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
311 "REFLECT", 1, "SAME", DT_HALF);
312 }
313
TEST_F(FusedResizePadConvOpTest,IdentityComparativeFloat)314 TEST_F(FusedResizePadConvOpTest, IdentityComparativeFloat) {
315 CompareFusedAndSeparate<float>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
316 "REFLECT", 1, "SAME", DT_FLOAT);
317 }
318
TEST_F(FusedResizePadConvOpTest,IdentityComparativeDouble)319 TEST_F(FusedResizePadConvOpTest, IdentityComparativeDouble) {
320 CompareFusedAndSeparate<double>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
321 "REFLECT", 1, "SAME", DT_DOUBLE);
322 }
323
TEST_F(FusedResizePadConvOpTest,ConvOnlyComparative)324 TEST_F(FusedResizePadConvOpTest, ConvOnlyComparative) {
325 CompareFusedAndSeparate<float>(10, 10, 3, 10, 10, 0, 0, 4, 4, false,
326 "REFLECT", 1, "SAME", DT_FLOAT);
327 }
328
TEST_F(FusedResizePadConvOpTest,ResizeOnlyComparative)329 TEST_F(FusedResizePadConvOpTest, ResizeOnlyComparative) {
330 CompareFusedAndSeparate<float>(10, 10, 1, 20, 20, 0, 0, 1, 1, false,
331 "REFLECT", 1, "SAME", DT_FLOAT);
332 }
333
TEST_F(FusedResizePadConvOpTest,ResizeAndConvComparative)334 TEST_F(FusedResizePadConvOpTest, ResizeAndConvComparative) {
335 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 1,
336 "SAME", DT_FLOAT);
337 }
338
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvComparative)339 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvComparative) {
340 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
341 "SAME", DT_FLOAT);
342 }
343
TEST_F(FusedResizePadConvOpTest,ResizeAndConvStridedComparative)344 TEST_F(FusedResizePadConvOpTest, ResizeAndConvStridedComparative) {
345 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 2,
346 "SAME", DT_FLOAT);
347 }
348
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvValidComparative)349 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvValidComparative) {
350 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
351 "VALID", DT_FLOAT);
352 }
353
TEST_F(FusedResizePadConvOpTest,PadOnlyComparative)354 TEST_F(FusedResizePadConvOpTest, PadOnlyComparative) {
355 CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
356 "SAME", DT_FLOAT);
357 }
358
TEST_F(FusedResizePadConvOpTest,PadOnlyWithChannelsComparative)359 TEST_F(FusedResizePadConvOpTest, PadOnlyWithChannelsComparative) {
360 CompareFusedAndSeparate<float>(4, 4, 3, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
361 "SAME", DT_FLOAT);
362 }
363
TEST_F(FusedResizePadConvOpTest,ResizeAndPadComparative)364 TEST_F(FusedResizePadConvOpTest, ResizeAndPadComparative) {
365 CompareFusedAndSeparate<float>(4, 4, 1, 6, 6, 2, 2, 1, 1, false, "REFLECT", 1,
366 "SAME", DT_FLOAT);
367 }
368
TEST_F(FusedResizePadConvOpTest,PadOnlySymmetricComparative)369 TEST_F(FusedResizePadConvOpTest, PadOnlySymmetricComparative) {
370 CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "SYMMETRIC",
371 1, "SAME", DT_FLOAT);
372 }
373
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparative)374 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
375 CompareFusedAndSeparate<float>(4, 4, 3, 6, 6, 2, 2, 1, 1, false, "SYMMETRIC",
376 1, "SAME", DT_FLOAT);
377 }
378
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparativeLarge)379 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparativeLarge) {
380 CompareFusedAndSeparate<float>(1000, 1000, 3, 1006, 1006, 2, 2, 1, 1, false,
381 "SYMMETRIC", 1, "SAME", DT_FLOAT);
382 }
383
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeHalf)384 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeHalf) {
385 CompareFusedPadOnlyAndSeparate<Eigen::half>(10, 10, 1, 0, 0, 1, 1, "REFLECT",
386 1, "SAME", DT_HALF);
387 }
388
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeFloat)389 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeFloat) {
390 CompareFusedPadOnlyAndSeparate<float>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
391 "SAME", DT_FLOAT);
392 }
393
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeDouble)394 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeDouble) {
395 CompareFusedPadOnlyAndSeparate<double>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
396 "SAME", DT_DOUBLE);
397 }
398
TEST_F(FusedResizePadConvOpTest,NoResizeConvOnlyComparative)399 TEST_F(FusedResizePadConvOpTest, NoResizeConvOnlyComparative) {
400 CompareFusedPadOnlyAndSeparate<float>(10, 10, 3, 0, 0, 4, 4, "REFLECT", 1,
401 "SAME", DT_FLOAT);
402 }
403
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyComparative)404 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyComparative) {
405 CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "REFLECT", 1,
406 "SAME", DT_FLOAT);
407 }
408
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyWithChannelsComparative)409 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyWithChannelsComparative) {
410 CompareFusedPadOnlyAndSeparate<float>(4, 4, 3, 2, 2, 1, 1, "REFLECT", 1,
411 "SAME", DT_FLOAT);
412 }
413
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlySymmetricComparative)414 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlySymmetricComparative) {
415 CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "SYMMETRIC", 1,
416 "SAME", DT_FLOAT);
417 }
418
419 class ConvOpTest : public OpsTestBase {
420 protected:
HandwrittenConv()421 void HandwrittenConv() {
422 const int stride = 1;
423 TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
424 .Input(FakeInput(DT_FLOAT))
425 .Input(FakeInput(DT_FLOAT))
426 .Attr("T", DT_FLOAT)
427 .Attr("strides", {1, stride, stride, 1})
428 .Attr("padding", "SAME")
429 .Finalize(node_def()));
430 TF_EXPECT_OK(InitOp());
431 const int depth = 1;
432 const int image_width = 4;
433 const int image_height = 3;
434 const int image_batch_count = 1;
435 // The image matrix is:
436 // | 1 | 2 | 3 | 4 |
437 // | 5 | 6 | 7 | 8 |
438 // | 9 | 10 | 11 | 12 |
439 Tensor image(DT_FLOAT,
440 {image_batch_count, image_height, image_width, depth});
441 test::FillValues<float>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
442
443 // The filter matrix is:
444 // | 1 | 4 | 7 |
445 // | 2 | 5 | 8 |
446 // | 3 | 6 | 9 |
447 const int filter_size = 3;
448 const int filter_count = 1;
449 Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
450 test::FillValues<float>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
451
452 AddInputFromArray<float>(image.shape(), image.flat<float>());
453 AddInputFromArray<float>(filter.shape(), filter.flat<float>());
454 TF_ASSERT_OK(RunOpKernel());
455
456 // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
457 // the input set to zero because we're using the 'SAME' padding mode.
458 // The calculations behind the expected output are:
459 // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
460 // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
461 // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
462 // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
463 // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
464 // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
465 // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
466 // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
467 // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
468 // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
469 // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
470 // (1*7)+(4*8)+(7*0)+(2*11)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
471 // This means we should end up with this matrix:
472 // | 105 | 150 | 183 | 95 |
473 // | 235 | 312 | 357 | 178 |
474 // | 187 | 234 | 261 | 121 |
475 const int expected_width = image_width;
476 const int expected_height = image_height * filter_count;
477 Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
478 expected_width, filter_count}));
479 test::FillValues<float>(
480 &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
481 const Tensor& output = *GetOutput(0);
482 test::ExpectTensorNear<float>(expected, output, 1e-5);
483 }
484
AnisotropicStrides()485 void AnisotropicStrides() {
486 const int stride_width = 3;
487 const int stride_height = 1;
488 TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
489 .Input(FakeInput(DT_FLOAT))
490 .Input(FakeInput(DT_FLOAT))
491 .Attr("T", DT_FLOAT)
492 .Attr("strides", {1, stride_height, stride_width, 1})
493 .Attr("padding", "VALID")
494 .Finalize(node_def()));
495 TF_EXPECT_OK(InitOp());
496 const int depth = 1;
497 const int image_width = 6;
498 const int image_height = 3;
499 const int image_batch_count = 1;
500 Tensor image(DT_FLOAT,
501 {image_batch_count, image_height, image_width, depth});
502 test::FillValues<float>(&image, {
503 3, 2, 1, -1, -2, -3, //
504 4, 3, 2, -2, -3, -4, //
505 5, 4, 3, -3, -4, -5, //
506 });
507 const int filter_size = 2;
508 const int filter_count = 1;
509 Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
510 test::FillValues<float>(&filter, {
511 1, 2, //
512 3, 4, //
513 });
514
515 AddInputFromArray<float>(image.shape(), image.flat<float>());
516 AddInputFromArray<float>(filter.shape(), filter.flat<float>());
517 TF_ASSERT_OK(RunOpKernel());
518
519 const int expected_width = 2;
520 const int expected_height = 2;
521 Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
522 expected_width, filter_count}));
523 test::FillValues<float>(&expected, {31, -23, 41, -33});
524 const Tensor& output = *GetOutput(0);
525 test::ExpectTensorNear<float>(expected, output, 1e-5);
526 }
527 };
528
TEST_F(ConvOpTest,HandwrittenConv)529 TEST_F(ConvOpTest, HandwrittenConv) { HandwrittenConv(); }
530
TEST_F(ConvOpTest,AnisotropicStride)531 TEST_F(ConvOpTest, AnisotropicStride) { AnisotropicStrides(); }
532
533 template <typename T>
534 class FusedConv2DOpTest : public OpsTestBase {
535 protected:
536 static constexpr int kDepth = 3;
537 static constexpr int kImageWidth = 32;
538 static constexpr int kImageHeight = 32;
539 static constexpr int kImageBatchCount = 8;
540
541 using BiasAddGraphRunner =
542 std::function<void(const Tensor& input_data, const Tensor& filter_data,
543 const Tensor& bias_data, Tensor* out)>;
544
545 using BatchNormGraphRunner = std::function<void(
546 const Tensor& input_data, const Tensor& filter_data,
547 const Tensor& scale_data, const Tensor& offset_data,
548 const Tensor& mean_data, const Tensor& variance_data, Tensor* out)>;
549
550 // Runs a Tensorflow graph defined by the root scope, and fetches the result
551 // of 'fetch' node into the output Tensor. Optional `fetch_node` parameter
552 // allows to define a fetch node directly using a NodeDef for the ops that are
553 // not supported by the C++ Api.
RunAndFetch(const tensorflow::Scope & root,const string & fetch,Tensor * output,bool allow_gpu_device,const NodeDef * fetch_node=nullptr)554 void RunAndFetch(const tensorflow::Scope& root, const string& fetch,
555 Tensor* output, bool allow_gpu_device,
556 const NodeDef* fetch_node = nullptr) {
557 tensorflow::GraphDef graph;
558 TF_ASSERT_OK(root.ToGraphDef(&graph));
559
560 if (fetch_node) {
561 *graph.add_node() = *fetch_node;
562 }
563
564 // We really want to make sure that graph executed exactly as we passed it
565 // to the session, so we disable various optimizations.
566 tensorflow::SessionOptions session_options;
567
568 // Disable common runtime constant folding.
569 session_options.config.mutable_graph_options()
570 ->mutable_optimizer_options()
571 ->set_opt_level(OptimizerOptions::L0);
572
573 // Disable Grappler optimizations for tests.
574 tensorflow::RewriterConfig* cfg =
575 session_options.config.mutable_graph_options()
576 ->mutable_rewrite_options();
577 cfg->set_constant_folding(tensorflow::RewriterConfig::OFF);
578 cfg->set_layout_optimizer(tensorflow::RewriterConfig::OFF);
579 cfg->set_remapping(tensorflow::RewriterConfig::OFF);
580
581 std::unique_ptr<tensorflow::Session> session(
582 tensorflow::NewSession(session_options));
583
584 std::vector<DeviceAttributes> available_devices;
585 TF_ASSERT_OK(session->ListDevices(&available_devices))
586 << "Failed to get available session devices";
587
588 // Check if session has an available GPU device.
589 const bool has_gpu_device =
590 absl::c_any_of(available_devices, [](const DeviceAttributes& device) {
591 return device.device_type() == DEVICE_GPU;
592 });
593
594 // Some of the `FusedConv2D` fusion types are implemented only for CPU, and
595 // in this test we don't want to compare GPU vs CPU numbers, so place all
596 // nodes on CPU in this case.
597 const bool place_all_on_gpu = allow_gpu_device && has_gpu_device;
598
599 const string device = place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0";
600 for (NodeDef& mutable_node : *graph.mutable_node()) {
601 mutable_node.set_device(device);
602 }
603
604 TF_ASSERT_OK(session->Create(graph));
605
606 std::vector<Tensor> unfused_tensors;
607 TF_ASSERT_OK(session->Run({}, {fetch}, {}, &unfused_tensors));
608
609 *output = unfused_tensors[0];
610 }
611
RunConv2DWithBias(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)612 void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
613 const Tensor& bias_data, const std::string& padding,
614 const std::vector<int>& explicit_paddings,
615 Tensor* output, bool allow_gpu_device = false,
616 int stride = 1) {
617 Scope root = tensorflow::Scope::NewRootScope();
618
619 ops::Conv2D conv = ops::Conv2D(
620 root.WithOpName("conv"),
621 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
622 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
623 {1, stride, stride, 1}, padding,
624 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
625
626 ops::BiasAdd with_bias = ops::BiasAdd(
627 root.WithOpName("with_bias"), conv,
628 ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
629
630 RunAndFetch(root, "with_bias", output, allow_gpu_device);
631 }
632
RunConv2DWithBiasAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)633 void RunConv2DWithBiasAndActivation(
634 const Tensor& input_data, const Tensor& filter_data,
635 const Tensor& bias_data, const string& activation_type,
636 const std::string& padding, const std::vector<int>& explicit_paddings,
637 Tensor* output, bool allow_gpu_device = false, int stride = 1) {
638 Scope root = tensorflow::Scope::NewRootScope();
639
640 ops::Conv2D conv = ops::Conv2D(
641 root.WithOpName("conv"),
642 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
643 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
644 {1, stride, stride, 1}, padding,
645 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
646
647 ops::BiasAdd with_bias = ops::BiasAdd(
648 root.WithOpName("with_bias"), conv,
649 ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
650
651 if (activation_type == "Relu") {
652 ops::Relu(root.WithOpName("with_activation"), with_bias);
653 } else if (activation_type == "Relu6") {
654 ops::Relu6(root.WithOpName("with_activation"), with_bias);
655 } else if (activation_type == "Elu") {
656 ops::Elu(root.WithOpName("with_activation"), with_bias);
657 } else if (activation_type == "LeakyRelu") {
658 ops::internal::LeakyRelu(root.WithOpName("with_activation"), with_bias);
659 } else {
660 ops::Identity(root.WithOpName("with_activation"), with_bias);
661 }
662
663 RunAndFetch(root, "with_activation", output, allow_gpu_device);
664 }
665
RunConv2DWithBatchNorm(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)666 void RunConv2DWithBatchNorm(
667 const Tensor& input_data, const Tensor& filter_data,
668 const Tensor& scale_data, const Tensor& offset_data,
669 const Tensor& mean_data, const Tensor& variance_data,
670 const std::string& padding, const std::vector<int>& explicit_paddings,
671 Tensor* output, bool allow_gpu_device = false, int stride = 1) {
672 Scope root = tensorflow::Scope::NewRootScope();
673
674 ops::Conv2D conv = ops::Conv2D(
675 root.WithOpName("conv"),
676 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
677 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
678 {1, stride, stride, 1}, padding,
679 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
680
681 ops::FusedBatchNorm::Attrs attr;
682 attr = attr.IsTraining(false);
683
684 ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
685 root.WithOpName("with_fused_batch_norm"), conv,
686 ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
687 ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
688 ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
689 ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
690 attr);
691
692 RunAndFetch(root, "with_fused_batch_norm", output, allow_gpu_device);
693 }
694
RunConv2DWithBatchNormAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)695 void RunConv2DWithBatchNormAndActivation(
696 const Tensor& input_data, const Tensor& filter_data,
697 const Tensor& scale_data, const Tensor& offset_data,
698 const Tensor& mean_data, const Tensor& variance_data,
699 const string& activation_type, const std::string& padding,
700 const std::vector<int>& explicit_paddings, Tensor* output,
701 bool allow_gpu_device = false, int stride = 1) {
702 Scope root = tensorflow::Scope::NewRootScope();
703
704 ops::Conv2D conv = ops::Conv2D(
705 root.WithOpName("conv"),
706 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
707 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
708 {1, stride, stride, 1}, padding,
709 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
710
711 ops::FusedBatchNorm::Attrs attr;
712 attr = attr.IsTraining(false);
713
714 ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
715 root.WithOpName("with_fused_batch_norm"), conv,
716 ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
717 ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
718 ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
719 ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
720 attr);
721
722 if (activation_type == "Relu") {
723 ops::Relu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
724 } else if (activation_type == "Relu6") {
725 ops::Relu6(root.WithOpName("with_activation"), with_fused_batch_norm.y);
726 } else if (activation_type == "Elu") {
727 ops::Elu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
728 } else if (activation_type == "LeakyRelu") {
729 ops::internal::LeakyRelu(root.WithOpName("with_activation"),
730 with_fused_batch_norm.y);
731 } else {
732 ops::Identity(root.WithOpName("with_activation"),
733 with_fused_batch_norm.y);
734 }
735
736 RunAndFetch(root, "with_activation", output, allow_gpu_device);
737 }
738
RunFusedConv2DOp(const Tensor & input_data,const Tensor & filter_data,const std::vector<Tensor> & args_data,const std::vector<string> & fused_ops,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)739 void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data,
740 const std::vector<Tensor>& args_data,
741 const std::vector<string>& fused_ops,
742 const std::string& padding,
743 const std::vector<int>& explicit_paddings,
744 Tensor* output, bool allow_gpu_device = false,
745 int stride = 1) {
746 Scope root = tensorflow::Scope::NewRootScope();
747
748 DataType dtype = DataTypeToEnum<T>::v();
749 int num_args = static_cast<int>(args_data.size());
750
751 Output input =
752 ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
753 Output filter =
754 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
755
756 std::vector<NodeDefBuilder::NodeOut> args;
757 for (int i = 0; i < num_args; ++i) {
758 Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)),
759 Input::Initializer(args_data[i]));
760 args.emplace_back(arg.name(), 0, dtype);
761 }
762
763 NodeDef fused_conv2d;
764 TF_EXPECT_OK(NodeDefBuilder("fused_conv", "_FusedConv2D")
765 .Input({input.name(), 0, dtype})
766 .Input({filter.name(), 0, dtype})
767 .Input(args)
768 .Attr("num_args", num_args)
769 .Attr("T", dtype)
770 .Attr("strides", {1, stride, stride, 1})
771 .Attr("padding", padding)
772 .Attr("explicit_paddings", explicit_paddings)
773 .Attr("fused_ops", fused_ops)
774 .Finalize(&fused_conv2d));
775
776 RunAndFetch(root, fused_conv2d.name(), output, allow_gpu_device,
777 &fused_conv2d);
778 }
779
VerifyBiasAddTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BiasAddGraphRunner & run_default,const BiasAddGraphRunner & run_fused)780 void VerifyBiasAddTensorsNear(int depth, int image_width, int image_height,
781 int image_batch_count, int filter_size,
782 int filter_count,
783 const BiasAddGraphRunner& run_default,
784 const BiasAddGraphRunner& run_fused) {
785 DataType dtype = DataTypeToEnum<T>::v();
786
787 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
788 image.flat<T>() = image.flat<T>().setRandom();
789
790 // Add some negative values to filter to properly test Relu.
791 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
792 filter.flat<T>() = filter.flat<T>().setRandom();
793 filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
794
795 const int bias_size = filter_count;
796 Tensor bias(dtype, {bias_size});
797 bias.flat<T>() = bias.flat<T>().setRandom();
798 bias.flat<T>() += bias.flat<T>().constant(static_cast<T>(0.5f));
799
800 Tensor conv_2d;
801 Tensor fused_conv_2d;
802
803 run_default(image, filter, bias, &conv_2d);
804 run_fused(image, filter, bias, &fused_conv_2d);
805
806 ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
807 ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
808
809 // NOTE(intel-tf): When filter_size is equal to the input image size,
810 // conv2d essentially is element-wise multiplication followed by
811 // a full sum reduction, which causes larger numerical error
812 // than usual cases.
813 if (image_width == filter_size && image_height == filter_size) {
814 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
815 } else {
816 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
817 }
818 }
819
VerifyFusedBatchNormTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BatchNormGraphRunner & run_default,const BatchNormGraphRunner & run_fused)820 void VerifyFusedBatchNormTensorsNear(int depth, int image_width,
821 int image_height, int image_batch_count,
822 int filter_size, int filter_count,
823 const BatchNormGraphRunner& run_default,
824 const BatchNormGraphRunner& run_fused) {
825 DataType dtype = DataTypeToEnum<T>::v();
826
827 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
828 image.flat<T>() = image.flat<T>().setRandom();
829
830 // Add some negative values to filter to properly test Relu.
831 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
832 filter.flat<T>() = filter.flat<T>().setRandom();
833 filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
834
835 const int scale_size = filter_count;
836
837 Tensor scale(dtype, {scale_size});
838 scale.flat<T>() = scale.flat<T>().setRandom();
839
840 Tensor offset(dtype, {scale_size});
841 offset.flat<T>() = offset.flat<T>().setRandom();
842
843 Tensor mean(dtype, {scale_size});
844 mean.flat<T>() = mean.flat<T>().setRandom();
845
846 Tensor variance(dtype, {scale_size});
847 variance.flat<T>() = variance.flat<T>().setRandom();
848 variance.flat<T>() += variance.flat<T>().constant(static_cast<T>(0.5f));
849
850 Tensor conv_2d;
851 Tensor fused_conv_2d;
852
853 run_default(image, filter, scale, offset, mean, variance, &conv_2d);
854 run_fused(image, filter, scale, offset, mean, variance, &fused_conv_2d);
855
856 ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
857 ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
858
859 // NOTE(intel-tf): When filter_size is equal to the input image size,
860 // conv2d essentially is element-wise multiplication followed by
861 // a full sum reduction, which causes larger numerical error
862 // than usual cases.
863 if (image_width == filter_size && image_height == filter_size) {
864 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
865 } else {
866 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
867 }
868 }
869
870 // Verifies that computing Conv2D+BiasAdd in a graph is identical to
871 // FusedConv2D.
VerifyConv2DWithBias(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)872 void VerifyConv2DWithBias(int filter_size, int filter_count,
873 const std::vector<int>& explicit_paddings = {},
874 int depth = kDepth, int image_width = kImageWidth,
875 int image_height = kImageHeight,
876 int image_batch_count = kImageBatchCount) {
877 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
878 const BiasAddGraphRunner run_default =
879 [this, &explicit_paddings, padding](
880 const Tensor& input_data, const Tensor& filter_data,
__anone7ca09510202( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 881 const Tensor& bias_data, Tensor* out) {
882 RunConv2DWithBias(input_data, filter_data, bias_data, padding,
883 explicit_paddings, out);
884 };
885
886 const BiasAddGraphRunner run_fused =
887 [this, explicit_paddings, padding](
888 const Tensor& input_data, const Tensor& filter_data,
__anone7ca09510302( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 889 const Tensor& bias_data, Tensor* out) {
890 RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"},
891 padding, explicit_paddings, out);
892 };
893
894 VerifyBiasAddTensorsNear(depth, image_width, image_height,
895 image_batch_count, filter_size, filter_count,
896 run_default, run_fused);
897 }
898
899 // Verifies that computing Conv2D+BiasAdd+{Activation} in a graph is identical
900 // to FusedConv2D.
VerifyConv2DWithBiasAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)901 void VerifyConv2DWithBiasAndActivation(
902 const string& activation, int filter_size, int filter_count,
903 const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
904 int image_width = kImageWidth, int image_height = kImageHeight,
905 int image_batch_count = kImageBatchCount) {
906 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
907 const BiasAddGraphRunner run_default =
908 [this, &activation, &explicit_paddings, &padding](
909 const Tensor& input_data, const Tensor& filter_data,
__anone7ca09510402( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 910 const Tensor& bias_data, Tensor* out) {
911 RunConv2DWithBiasAndActivation(
912 input_data, filter_data, bias_data, activation, padding,
913 explicit_paddings, out,
914 /*allow_gpu_device=*/activation == "Relu");
915 };
916
917 const BiasAddGraphRunner run_fused = [this, &activation, &explicit_paddings,
918 padding](const Tensor& input_data,
919 const Tensor& filter_data,
920 const Tensor& bias_data,
__anone7ca09510502(const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 921 Tensor* out) {
922 RunFusedConv2DOp(input_data, filter_data, {bias_data},
923 {"BiasAdd", activation}, padding, explicit_paddings, out,
924 /*allow_gpu_device=*/activation == "Relu");
925 };
926
927 VerifyBiasAddTensorsNear(depth, image_width, image_height,
928 image_batch_count, filter_size, filter_count,
929 run_default, run_fused);
930 }
931
932 // Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to
933 // FusedConv2D.
VerifyConv2DWithBatchNorm(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)934 void VerifyConv2DWithBatchNorm(int filter_size, int filter_count,
935 const std::vector<int>& explicit_paddings = {},
936 int depth = kDepth,
937 int image_width = kImageWidth,
938 int image_height = kImageHeight,
939 int image_batch_count = kImageBatchCount) {
940 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
941 const BatchNormGraphRunner run_default =
942 [this, explicit_paddings, padding](
943 const Tensor& input_data, const Tensor& filter_data,
944 const Tensor& scale_data, const Tensor& offset_data,
__anone7ca09510602( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 945 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
946 RunConv2DWithBatchNorm(input_data, filter_data, scale_data,
947 offset_data, mean_data, variance_data, padding,
948 explicit_paddings, out);
949 };
950
951 const BatchNormGraphRunner run_fused =
952 [this, explicit_paddings, padding](
953 const Tensor& input_data, const Tensor& filter_data,
954 const Tensor& scale_data, const Tensor& offset_data,
__anone7ca09510702( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 955 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
956 RunFusedConv2DOp(input_data, filter_data,
957 {scale_data, offset_data, mean_data, variance_data},
958 {"FusedBatchNorm"}, padding, explicit_paddings, out);
959 };
960
961 VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
962 image_batch_count, filter_size,
963 filter_count, run_default, run_fused);
964 }
965
966 // Verifies that computing Conv2D+FusedBatchNorm+{Activation} in a graph is
967 // identical to FusedConv2D.
VerifyConv2DWithBatchNormAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)968 void VerifyConv2DWithBatchNormAndActivation(
969 const string& activation, int filter_size, int filter_count,
970 const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
971 int image_width = kImageWidth, int image_height = kImageHeight,
972 int image_batch_count = kImageBatchCount) {
973 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
974 const BatchNormGraphRunner run_default =
975 [this, &activation, explicit_paddings, padding](
976 const Tensor& input_data, const Tensor& filter_data,
977 const Tensor& scale_data, const Tensor& offset_data,
__anone7ca09510802( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 978 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
979 RunConv2DWithBatchNormAndActivation(
980 input_data, filter_data, scale_data, offset_data, mean_data,
981 variance_data, activation, padding, explicit_paddings, out);
982 };
983
984 const BatchNormGraphRunner run_fused =
985 [this, &activation, explicit_paddings, padding](
986 const Tensor& input_data, const Tensor& filter_data,
987 const Tensor& scale_data, const Tensor& offset_data,
__anone7ca09510902( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 988 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
989 RunFusedConv2DOp(input_data, filter_data,
990 {scale_data, offset_data, mean_data, variance_data},
991 {"FusedBatchNorm", activation}, padding,
992 explicit_paddings, out);
993 };
994
995 VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
996 image_batch_count, filter_size,
997 filter_count, run_default, run_fused);
998 }
999 };
1000
1001 // Conv2D with BatchNorm can be tested only with `T=float`, because default
1002 // `FusedBatchNorm` kernel supports only floats for scale, mean and variance.
1003
1004 template <typename T>
1005 class FusedConv2DWithBiasOpTest : public FusedConv2DOpTest<T> {};
1006 template <typename T>
1007 class FusedConv2DWithBatchNormOpTest : public FusedConv2DOpTest<T> {};
1008
1009 TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest);
1010 TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest);
1011
1012 // ROCm does not yet support the _FusedConv2D op,
1013 // Therefore disable tests that check _FusedConv2D, when building with ROCm
1014
1015 #ifndef TENSORFLOW_USE_ROCM
1016 // -------------------------------------------------------------------------- //
1017 // Conv2D + BiasAdd + {Activation} //
1018 // -------------------------------------------------------------------------- //
1019
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolution)1020 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolution) {
1021 const int filter_size = 1;
1022 const int filter_count = 12;
1023 this->VerifyConv2DWithBias(filter_size, filter_count);
1024 }
1025
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolution)1026 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolution) {
1027 const int filter_size = TestFixture::kImageWidth;
1028 const int filter_count = 12;
1029 this->VerifyConv2DWithBias(filter_size, filter_count);
1030 }
1031
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolution)1032 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolution) {
1033 const int filter_size = 3;
1034 const int filter_count = 12;
1035 this->VerifyConv2DWithBias(filter_size, filter_count);
1036 }
1037
1038 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolution)1039 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ExplicitPaddingConvolution) {
1040 const int filter_size = 3;
1041 const int filter_count = 12;
1042 this->VerifyConv2DWithBias(filter_size, filter_count,
1043 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1044 }
1045 #endif
1046
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolutionAndActivation)1047 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) {
1048 // Requires full precision Conv2D op
1049 tensorflow::enable_tensor_float_32_execution(false);
1050 const int filter_size = 1;
1051 const int filter_count = 12;
1052 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1053 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1054 filter_count);
1055 }
1056 }
1057
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolutionAndActivation)1058 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolutionAndActivation) {
1059 const int filter_size = TestFixture::kImageWidth;
1060 const int filter_count = 12;
1061 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1062 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1063 filter_count);
1064 }
1065 }
1066
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolutionAndActivation)1067 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolutionAndActivation) {
1068 const int filter_size = 3;
1069 const int filter_count = 12;
1070 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1071 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1072 filter_count);
1073 }
1074 }
1075
1076 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolutionAndActivation)1077 TYPED_TEST_P(FusedConv2DWithBiasOpTest,
1078 ExplicitPaddingConvolutionAndActivation) {
1079 const int filter_size = 3;
1080 const int filter_count = 12;
1081 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1082 this->VerifyConv2DWithBiasAndActivation(
1083 activation, filter_size, filter_count,
1084 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1085 }
1086 }
1087 #endif
1088
1089 // -------------------------------------------------------------------------- //
1090 // Conv2D + FusedBatchNorm + {Activation} //
1091 // -------------------------------------------------------------------------- //
1092
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolution)1093 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolution) {
1094 const int filter_size = 1;
1095 const int filter_count = 12;
1096 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1097 }
1098
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolution)1099 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ImageSizeConvolution) {
1100 const int filter_size = TestFixture::kImageWidth;
1101 const int filter_count = 12;
1102 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1103 }
1104
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolution)1105 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolution) {
1106 const int filter_size = 3;
1107 const int filter_count = 12;
1108 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1109 }
1110
1111 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolution)1112 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ExplicitPaddingConvolution) {
1113 const int filter_size = 3;
1114 const int filter_count = 12;
1115 this->VerifyConv2DWithBatchNorm(
1116 filter_size, filter_count,
1117 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1118 }
1119 #endif
1120
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolutionAndActivation)1121 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) {
1122 const int filter_size = 1;
1123 const int filter_count = 12;
1124 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1125 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1126 filter_count);
1127 }
1128 }
1129
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolutionAndActivation)1130 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1131 ImageSizeConvolutionAndActivation) {
1132 const int filter_size = TestFixture::kImageWidth;
1133 const int filter_count = 12;
1134 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1135 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1136 filter_count);
1137 }
1138 }
1139
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolutionAndActivation)1140 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolutionAndActivation) {
1141 const int filter_size = 3;
1142 const int filter_count = 12;
1143 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1144 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1145 filter_count);
1146 }
1147 }
1148
1149 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolutionAndActivation)1150 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1151 ExplicitPaddingConvolutionAndActivation) {
1152 const int filter_size = 3;
1153 const int filter_count = 12;
1154 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1155 this->VerifyConv2DWithBatchNormAndActivation(
1156 activation, filter_size, filter_count,
1157 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1158 }
1159 }
1160 #endif
1161
1162 #ifndef INTEL_MKL
1163 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, //
1164 OneByOneConvolution, //
1165 ImageSizeConvolution, //
1166 SpatialConvolution, //
1167 ExplicitPaddingConvolution, //
1168 OneByOneConvolutionAndActivation, //
1169 ImageSizeConvolutionAndActivation, //
1170 SpatialConvolutionAndActivation, //
1171 ExplicitPaddingConvolutionAndActivation);
1172
1173 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, //
1174 OneByOneConvolution, //
1175 ImageSizeConvolution, //
1176 SpatialConvolution, //
1177 ExplicitPaddingConvolution, //
1178 OneByOneConvolutionAndActivation, //
1179 ImageSizeConvolutionAndActivation, //
1180 SpatialConvolutionAndActivation, //
1181 ExplicitPaddingConvolutionAndActivation);
1182 #else
1183 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, //
1184 OneByOneConvolution, //
1185 ImageSizeConvolution, //
1186 SpatialConvolution, //
1187 OneByOneConvolutionAndActivation, //
1188 ImageSizeConvolutionAndActivation, //
1189 SpatialConvolutionAndActivation);
1190
1191 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, //
1192 OneByOneConvolution, //
1193 ImageSizeConvolution, //
1194 SpatialConvolution, //
1195 OneByOneConvolutionAndActivation, //
1196 ImageSizeConvolutionAndActivation, //
1197 SpatialConvolutionAndActivation);
1198 #endif
1199
1200 using FusedBiasAddDataTypes = ::testing::Types<float, double>;
1201 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest,
1202 FusedBiasAddDataTypes);
1203
1204 using FusedBatchNormDataTypes = ::testing::Types<float>;
1205 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBatchNormOpTest,
1206 FusedBatchNormDataTypes);
1207
1208 #endif // TENSORFLOW_USE_ROCM
1209 } // namespace tensorflow
1210