• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <memory>
17 #include <utility>
18 
19 #include "tensorflow/compiler/xla/literal.h"
20 #include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
21 #include "tensorflow/compiler/xla/service/hlo_computation.h"
22 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
23 #include "tensorflow/compiler/xla/service/hlo_module.h"
24 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
25 #include "tensorflow/compiler/xla/service/hlo_parser.h"
26 #include "tensorflow/compiler/xla/shape_util.h"
27 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
28 #include "tensorflow/compiler/xla/xla.pb.h"
29 #include "tensorflow/compiler/xla/xla_data.pb.h"
30 #include "tensorflow/core/platform/test.h"
31 
32 namespace xla {
33 namespace gpu {
34 
35 // This file tests the index expressions used to reference source tensors. When
36 // the destination tensor and source tensor have compatible shapes, the linear
37 // index is used to access the source tensor. Otherwise, dimensional indices
38 // computed from the linear index are used to access the source tensor.
39 
40 class GpuIndexTest : public GpuCodegenTest {};
41 
TEST_F(GpuIndexTest,CompatibleUseLinearIndex)42 TEST_F(GpuIndexTest, CompatibleUseLinearIndex) {
43   HloComputation::Builder builder(TestName());
44 
45   auto param_shape = ShapeUtil::MakeShape(F32, {5, 7, 2});
46   HloInstruction* param_x = builder.AddInstruction(
47       HloInstruction::CreateParameter(0, param_shape, "x"));
48   HloInstruction* param_y = builder.AddInstruction(
49       HloInstruction::CreateParameter(1, param_shape, "y"));
50   builder.AddInstruction(HloInstruction::CreateCompare(
51       ShapeUtil::MakeShape(PRED, {5, 7, 2}), param_x, param_y,
52       ComparisonDirection::kGe));
53 
54   auto hlo_module = CreateNewVerifiedModule();
55   hlo_module->AddEntryComputation(builder.Build());
56 
57   // Check the optimized IR as the unoptimized IR contains dead udiv and urem.
58   CompileAndVerifyIr(std::move(hlo_module),
59                      R"(
60 ; CHECK-NOT: udiv
61 ; CHECK-NOT: urem
62       )",
63                      /*match_optimized_ir=*/true);
64 }
65 
TEST_F(GpuIndexTest,CompatibleUseLinearIndexWithReshape)66 TEST_F(GpuIndexTest, CompatibleUseLinearIndexWithReshape) {
67   HloModuleConfig config;
68   config.set_debug_options(HloTestBase::GetDebugOptionsForTest());
69   auto module = ParseAndReturnVerifiedModule(R"(
70     HloModule test_module
71 
72     ENTRY CompatibleUseLinearIndexWithReshape {
73       x = f32[5,7,2]{2,1,0} parameter(0)
74       y = f32[5,14]{1,0} parameter(1)
75       reshape = f32[5,7,2]{2,1,0} reshape(y)
76       ROOT gte = pred[5,7,2]{2,1,0} compare(x, reshape), direction=GE
77     })",
78                                              config)
79                     .ValueOrDie();
80 
81   // Check the optimized IR as the unoptimized IR contains dead udiv and urem.
82   CompileAndVerifyIr(std::move(module),
83                      R"(
84 ; CHECK-NOT: udiv
85 ; CHECK-NOT: urem
86       )",
87                      /*match_optimized_ir=*/true);
88 }
89 
TEST_F(GpuIndexTest,ReuseMultidimIndexWithTrivialReshapeAndNonContiguousBroadcast)90 TEST_F(GpuIndexTest,
91        ReuseMultidimIndexWithTrivialReshapeAndNonContiguousBroadcast) {
92   HloModuleConfig config;
93   config.set_debug_options(HloTestBase::GetDebugOptionsForTest());
94   auto module = ParseAndReturnVerifiedModule(R"(
95     HloModule test_module
96 
97     ENTRY CompatibleUseLinearIndexWithReshape {
98       x = f32[1,7,2,5,3]{4,3,2,1,0} parameter(0)
99       y = f32[2,1,3]{2,1,0} parameter(1)
100       reshape = f32[1,2,3]{2,1,0} reshape(y)
101       broadcast = f32[1,7,2,5,3]{4,3,2,1,0} broadcast(reshape), dimensions={0,2,4}
102       ROOT gte = pred[1,7,2,5,3]{4,3,2,1,0} compare(x, broadcast), direction=GE
103     })",
104                                              config)
105                     .ValueOrDie();
106   CompileAndVerifyIr(std::move(module),
107                      R"(
108 ; CHECK: %[[tmp4:.*]] = udiv i32 %[[linear_index:.*]], 1
109 ; CHECK: %[[dim4:.*]] = urem i32 %[[tmp4]], 3
110 ; CHECK: %[[tmp3:.*]] = udiv i32 %[[linear_index]], 3
111 ; CHECK: %[[dim3:.*]] = urem i32 %[[tmp3]], 5
112 ; CHECK: %[[tmp2:.*]] = udiv i32 %[[linear_index]], 15
113 ; CHECK: %[[dim2:.*]] = urem i32 %[[tmp2]], 2
114 ; CHECK: %[[tmp1:.*]] = udiv i32 %[[linear_index]], 30
115 ; CHECK: %[[dim1:.*]] = urem i32 %[[tmp1]], 7
116 ; CHECK: %[[dim0:.*]] = udiv i32 %[[linear_index]], 210
117 ; CHECK: %{{.*}} = getelementptr inbounds [2 x [1 x [3 x float]]], ptr %{{.*}}, i32 0, i32 %[[dim2]], i32 0, i32 %[[dim4]]
118       )",
119                      /*match_optimized_ir=*/false);
120 }
121 
122 #if TENSORFLOW_USE_ROCM
123 #else
TEST_F(GpuIndexTest,CompatibleUseLinearIndexWithReshapeAndBroadcast)124 TEST_F(GpuIndexTest, CompatibleUseLinearIndexWithReshapeAndBroadcast) {
125   HloModuleConfig config;
126   config.set_debug_options(HloTestBase::GetDebugOptionsForTest());
127   auto module = ParseAndReturnVerifiedModule(R"(
128     HloModule test_module
129 
130     ENTRY CompatibleUseLinearIndexWithReshape {
131       x = f32[5,7,2]{2,1,0} parameter(0)
132       y = f32[14]{0} parameter(1)
133       reshape = f32[7,2]{1,0} reshape(y)
134       broadcast = f32[5,7,2]{2,1,0} broadcast(reshape), dimensions={1,2}
135       ROOT gte = pred[5,7,2]{2,1,0} compare(x, broadcast), direction=GE
136     })",
137                                              config)
138                     .ValueOrDie();
139 
140   // Check the optimized IR reuses the linear index by calculating modulo 14.
141 
142   // In the IR generated for AMDGPUs, we do not seem to have the
143   // the addrspace(1) attribute for the lines being checked by the following
144   // patterns.
145   // need to investigate why that is the case, and whether or not it is ok
146   CompileAndVerifyIr(std::move(module),
147                      R"(
148 ; CHECK: %[[urem1:.*]] = urem i{{[0-9]*}} %[[linear_index:.*]], 14
149 ; CHECK: %[[idx1:.*]] = zext i{{[0-9]*}} %[[urem1]] to i64
150 ; CHECK: getelementptr inbounds float, ptr{{( addrspace\(1\))?}} %[[alloc:.*]], i64 %[[idx1]]
151       )",
152                      /*match_optimized_ir=*/true);
153 }
154 #endif
155 
TEST_F(GpuIndexTest,CompatibleUseLinearIndexWithSizeOneDimensions)156 TEST_F(GpuIndexTest, CompatibleUseLinearIndexWithSizeOneDimensions) {
157   HloModuleConfig config;
158   auto debug_options = HloTestBase::GetDebugOptionsForTest();
159   debug_options.set_xla_gpu_max_kernel_unroll_factor(1);
160   config.set_debug_options(debug_options);
161 
162   auto module = ParseAndReturnVerifiedModule(R"(
163     HloModule  test_module
164 
165     ENTRY CompatibleUseLinearIndexWithSizeOneDimensions  {
166       x = f32[1,1024,1,256]{3,2,1,0} parameter(0)
167       ROOT y = f16[1,1024,1,256]{2,3,1,0} convert(x)
168     })",
169                                              config)
170                     .ValueOrDie();
171 
172   // Check that the unoptimized IR reuses the linear index.
173   CompileAndVerifyIr(std::move(module),
174                      R"(
175 ; CHECK-LABEL: @fusion
176 ; CHECK: udiv i32 %[[linear_index:.*]], 262144
177 ; CHECK: %[[ld_addr:.*]] = getelementptr inbounds float, ptr {{.*}}, i32 %[[linear_index]]
178 ; CHECK: load float, ptr %[[ld_addr]]
179 ; CHECK: %[[st_addr:.*]] = getelementptr inbounds half, ptr {{.*}}, i32 %[[linear_index]]
180 ; CHECK: store half {{.*}}, ptr %[[st_addr]]
181       )",
182                      /*match_optimized_ir=*/false);
183 }
184 
TEST_F(GpuIndexTest,CompatibleUseLinearIndexWithTranspose)185 TEST_F(GpuIndexTest, CompatibleUseLinearIndexWithTranspose) {
186   HloModuleConfig config;
187   auto debug_options = HloTestBase::GetDebugOptionsForTest();
188   debug_options.set_xla_gpu_max_kernel_unroll_factor(1);
189   config.set_debug_options(debug_options);
190 
191   auto module = ParseAndReturnVerifiedModule(R"(
192     HloModule  test_module
193 
194     ENTRY CompatibleUseLinearIndexWithTranspose  {
195       x = f32[2,1024,3,256]{3,2,1,0} parameter(0)
196       y = f32[1024,2,256,3]{2,3,0,1} parameter(1)
197       transpose = f32[1024,2,256,3]{3,2,1,0} transpose(x), dimensions={1,0,3,2}
198       ROOT gte = pred[1024,2,256,3]{2,3,0,1} compare(transpose, y), direction=GE
199     })",
200                                              config)
201                     .ValueOrDie();
202   // Check the optimized IR contains no udiv and urem.
203   CompileAndVerifyIr(std::move(module),
204                      R"(
205 ; CHECK-NOT: udiv
206 ; CHECK-NOT: urem
207       )",
208                      /*match_optimized_ir=*/true);
209 }
210 
211 }  // namespace gpu
212 }  // namespace xla
213