1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
17 #include "tensorflow/core/platform/test.h"
18
19 // TODO(b/210165681): The tests in this file are fragile to HLO op names.
20
21 namespace xla {
22 namespace gpu {
23
24 namespace {
25
26 class SwapConvOperandsTest : public GpuCodegenTest {};
27
28 // Here, we swap the operands of a convolution to avoid the performance penalty
29 // associated with convolutions with large padding. This tests that the operands
30 // are swapped in this case, and that the emitted convolution is sucessfully
31 // lowered to a cuDNN custom-call.
TEST_F(SwapConvOperandsTest,LargePadding)32 TEST_F(SwapConvOperandsTest, LargePadding) {
33 const char* hlo_text = R"(
34 HloModule swap_conv
35
36 ENTRY swap_conv {
37 input = f32[512,128,3,3]{3,2,1,0} parameter(0)
38 filter = f32[1,30,30,512]{3,2,1,0} parameter(1)
39 convolution = f32[1,32,32,128]{3,2,1,0} convolution(input, filter), window={size=30x30 pad=29_29x29_29}, dim_labels=fb01_o01i->f01b
40 ROOT tuple = (f32[1,32,32,128]{3,2,1,0}) tuple(convolution)
41 }
42 )";
43
44 MatchOptimizedHloWithShapes(hlo_text,
45 R"(
46 // CHECK-LABEL: ENTRY
47 // CHECK: [[FILTER:%[^ ]+]] = f32[1,30,30,512]{2,1,3,0}
48 // CHECK: [[INPUT:%[^ ]+]] = f32[512,128,3,3]{3,2,0,1}
49 // CHECK: %cudnn-conv = (f32[1,32,32,128]{2,1,3,0}, u8[{{[0-9]*}}]{0}) custom-call(f32[1,30,30,512]{2,1,3,0} [[FILTER]], f32[512,128,3,3]{3,2,0,1} [[INPUT]]), window={size=3x3 pad=2_2x2_2 rhs_reversal=1x1}, dim_labels=b01f_io01->b01f
50 )");
51 EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
52 }
53
54 // If the padding is already small, we leave the operands as-is before lowering.
TEST_F(SwapConvOperandsTest,SmallPadding)55 TEST_F(SwapConvOperandsTest, SmallPadding) {
56 const char* hlo_text = R"(
57 HloModule swap_conv
58
59 ENTRY swap_conv {
60 filter = f32[512,128,3,3]{3,2,1,0} parameter(0)
61 input = f32[1,30,30,512]{3,2,1,0} parameter(1)
62 convolution = f32[1,32,32,128]{2,1,3,0} convolution(input, filter), window={size=3x3 pad=2_2x2_2 rhs_reversal=1x1}, dim_labels=b01f_io01->b01f
63 ROOT tuple = (f32[1,32,32,128]{3,2,1,0}) tuple(convolution)
64 }
65 )";
66
67 MatchOptimizedHloWithShapes(hlo_text,
68 R"(
69 // CHECK-LABEL: ENTRY
70 // CHECK: [[INPUT:%[^ ]+]] = f32[1,30,30,512]{2,1,3,0}
71 // CHECK: [[FILTER:%[^ ]+]] = f32[512,128,3,3]{3,2,0,1}
72 // CHECK: %cudnn-conv = (f32[1,32,32,128]{2,1,3,0}, u8[{{[0-9]*}}]{0}) custom-call(f32[1,30,30,512]{2,1,3,0} [[INPUT]], f32[512,128,3,3]{3,2,0,1} [[FILTER]]), window={size=3x3 pad=2_2x2_2 rhs_reversal=1x1}, dim_labels=b01f_io01->b01f
73 )");
74 EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
75 }
76
77 // If swapping the conv operands would result in a conv that does not lower to a
78 // valid cuDNN call, we do not transform the op.
TEST_F(SwapConvOperandsTest,DoesNotLower)79 TEST_F(SwapConvOperandsTest, DoesNotLower) {
80 const char* hlo_text = R"(
81 HloModule swap_conv
82
83 ENTRY %conv3DBackpropInputV2(arg0.1: f32[3,3,3,2,3]) -> f32[2,4,3,3,2] {
84 %constant.5 = f32[2,2,2,2,3]{4,3,2,1,0} constant({...})
85 %arg0.1 = f32[3,3,3,2,3]{4,3,2,1,0} parameter(0), parameter_replication={false}
86 %reshape.2 = f32[3,3,3,2,3]{4,3,2,1,0} reshape(f32[3,3,3,2,3]{4,3,2,1,0} %arg0.1)
87 %reverse.6 = f32[3,3,3,2,3]{4,3,2,1,0} reverse(f32[3,3,3,2,3]{4,3,2,1,0} %reshape.2), dimensions={0,1,2}
88 %convolution.7 = f32[2,4,3,3,2]{4,3,2,1,0} convolution(f32[2,2,2,2,3]{4,3,2,1,0} %constant.5, f32[3,3,3,2,3]{4,3,2,1,0} %reverse.6), window={size=3x3x3 pad=2_1x1_1x1_1 lhs_dilate=2x2x2}, dim_labels=b012f_012oi->b012f, metadata={op_type="Conv3DBackpropInputV2" op_name="gradients_2/Conv3DBackpropFilterV2_1_grad/Conv3DBackpropInputV2"}
89 ROOT %reshape.8 = f32[2,4,3,3,2]{4,3,2,1,0} reshape(f32[2,4,3,3,2]{4,3,2,1,0} %convolution.7)
90 }
91 )";
92
93 MatchOptimizedHloWithShapes(hlo_text,
94 R"(
95 // CHECK-LABEL: ENTRY
96 // CHECK: [[INSTR_0:%[^ ]+]] = f32[2,2,2,2,3]{3,2,1,4,0} constant({...})
97 // CHECK: [[INSTR_1:%[^ ]+]] = f32[3,3,3,2,3]{4,3,2,1,0} parameter(0), parameter_replication={false}
98 // CHECK: [[INSTR_2:%[^ ]+]] = f32[3,3,3,2,3]{2,1,0,3,4} copy(f32[3,3,3,2,3]{4,3,2,1,0} %arg0.1)
99 // CHECK: [[INSTR_3:%[^ ]+]] = (f32[2,5,3,3,2]{3,2,1,4,0}, u8[{{[0-9]*}}]{0}) custom-call(f32[2,2,2,2,3]{3,2,1,4,0} [[INSTR_0]], f32[3,3,3,2,3]{2,1,0,3,4} [[INSTR_2]]), window={size=3x3x3 stride=2x2x2 pad=0_0x1_1x1_1}, dim_labels=b012f_012io->b012f, custom_call_target="__cudnn$convBackwardInput", metadata={op_type="Conv3DBackpropInputV2" op_name="gradients_2/Conv3DBackpropFilterV2_1_grad/Conv3DBackpropInputV2"}
100 )");
101 EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
102 }
103
104 } // namespace
105 } // namespace gpu
106 } // namespace xla
107