• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// RUN: hlo_to_llvm_ir --ptx %s | FileCheck %s
2// RUN: hlo_to_llvm_ir --ptx --sm=50 %s | FileCheck %s --check-prefix=CHECK-SM50
3// RUN: hlo_to_llvm_ir --ptx --sm=60 %s | FileCheck %s --check-prefix=CHECK-SM60
4// RUN: hlo_to_llvm_ir --ptx --sm=70 %s | FileCheck %s --check-prefix=CHECK-SM70
5// RUN: hlo_to_llvm_ir --ptx --sm=86 %s | FileCheck %s --check-prefix=CHECK-SM86
6
7// CHECK-LABEL: .entry reduce_odd_row
8// CHECK-NOT: ld.global.nc.v2.f32
9// CHECK-NOT: ld.global.nc.v4.f32
10// CHECK-NOT: ld.global.nc.u64
11// CHECK-NOT: ld.global.u64
12
13HloModule ReduceOddRowSize
14
15%max_ {
16  %x = f32[] parameter(0)
17  %y = f32[] parameter(1)
18  ROOT %maximum.7 = f32[] maximum(%x, %y)
19}
20
21ENTRY %main {
22  %param_0 = f32[5,4071] parameter(0)
23  %constant.3 = f32[] constant(0)
24  ROOT %reduce.odd_row = f32[5] reduce(f32[5,4071] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_
25}
26
27// -----
28
29// CHECK-SM86-LABEL: .entry reduce_small_row
30// CHECK-SM86: .reqntid 96, 1, 1
31
32HloModule ReduceSmallRow
33
34addition {
35  x = f32[] parameter(0)
36  y = f32[] parameter(1)
37  ROOT out = add(%x, %y)
38}
39
40ENTRY main {
41  p = f32[700000,32] parameter(0)
42  zero = f32[] constant(0)
43  ROOT reduce_small_row = f32[700000] reduce(p, zero), dimensions={1}, to_apply=%addition
44}
45
46// -----
47
48// CHECK-LABEL: .entry reduce_sine
49// CHECK-COUNT-7: ld.global.nc.v2.f32
50
51HloModule DisableSin
52
53%add_float {
54  %x = f32[] parameter(0)
55  %y = f32[] parameter(1)
56  ROOT %add.17 = f32[] add(f32[] %x, f32[] %y)
57}
58
59ENTRY %main {
60  %arg0.1 = f32[5,3584] parameter(0)
61  %sine = f32[5,3584] sine(f32[5,3584] %arg0.1)
62  %constant.0 = f32[] constant(0)
63  ROOT %reduce.sine = f32[5] reduce(f32[5,3584] %sine, f32[] %constant.0), dimensions={1}, to_apply=%add_float
64}
65
66// -----
67
68// SM dependent tests
69
70// CHECK-SM50-LABEL: .entry reduce_exp
71// CHECK-SM50-NOT: ld.global.nc.v2.f32
72// CHECK-SM50-COUNT-8: ld.global.nc.f32
73
74// CHECK-SM60: .entry exp
75// CHECK-SM60-LABEL: .entry reduce_exp
76// CHECK-SM60-COUNT-8: ld.global.nc.v2.f32
77
78// CHECK-SM70: .entry exp
79// CHECK-SM70-LABEL: .entry reduce_exp
80// CHECK-SM70-COUNT-8: ld.global.nc.v2.f32
81
82HloModule Exp
83
84%add_float {
85  %x = f32[] parameter(0)
86  %y = f32[] parameter(1)
87  ROOT %add.17 = f32[] add(f32[] %x, f32[] %y)
88}
89
90ENTRY %main {
91  %arg0.1 = f32[5,3584] parameter(0)
92  %exp = f32[5,3584] exponential(f32[5,3584] %arg0.1)
93  %constant.0 = f32[] constant(0)
94  ROOT %reduce.exp = f32[5] reduce(f32[5,3584] %exp, f32[] %constant.0), dimensions={1}, to_apply=%add_float
95}
96
97// -----
98
99HloModule ReduceTileFit
100
101// CHECK-SM50-LABEL: .entry reduce_tile_fit
102// CHECK-SM50-NOT: ld.global.nc.v2.f32
103// CHECK-SM50-COUNT-8: ld.global.nc.f32
104
105// CHECK-SM60-LABEL: .entry reduce_tile_fit
106// CHECK-SM60-COUNT-8: ld.global.nc.v2.f32
107
108// CHECK-SM70-LABEL: .entry reduce_tile_fit
109// CHECK-SM70-COUNT-4: ld.global.nc.v2.f32
110
111%max_ {
112  %x = f32[] parameter(0)
113  %y = f32[] parameter(1)
114  ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y)
115}
116
117ENTRY %main {
118  %param_0 = f32[5,3584] parameter(0)
119  %constant.3 = f32[] constant(0)
120  ROOT %reduce.tile_fit = f32[5] reduce(f32[5,3584] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_
121}
122
123// -----
124
125HloModule ReducePower2
126
127// CHECK-SM50-LABEL: .entry reduce_pow_2
128// CHECK-SM50-NOT: ld.global.nc.v2.f32
129// CHECK-SM50-COUNT-8: ld.global.nc.f32
130
131// CHECK-SM60-LABEL: .entry reduce_pow_2
132// CHECK-SM60-COUNT-4: ld.global.nc.v2.f32
133
134// CHECK-SM70-LABEL: .entry reduce_pow_2
135// CHECK-SM70-COUNT-4: ld.global.nc.v2.f32
136
137%max_ {
138  %x = f32[] parameter(0)
139  %y = f32[] parameter(1)
140  ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y)
141}
142
143ENTRY %main {
144  %param_0 = f32[5,4096] parameter(0)
145  %constant.3 = f32[] constant(0)
146  ROOT %reduce.pow_2 = f32[5] reduce(f32[5,4096] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_
147}
148
149// -----
150
151HloModule ReduceEvenColumns
152
153// CHECK-SM60-LABEL: .entry reduce_even_col
154// CHECK-SM60-NOT: ld.global.nc.f32
155// CHECK-SM60-COUNT-8: ld.global.nc.f32
156
157// CHECK-SM70-LABEL: .entry reduce_even_col
158// CHECK-SM70-COUNT-2: ld.global.nc.v2.f32
159// CHECK-SM70-COUNT-2: ld.global.nc.f32
160
161%max_ {
162  %x = f32[] parameter(0)
163  %y = f32[] parameter(1)
164  ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y)
165}
166
167ENTRY %main {
168  %param_0 = f32[5,4070] parameter(0)
169  %constant.3 = f32[] constant(0)
170  ROOT %reduce.even_col = f32[5] reduce(f32[5,4070] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_
171}
172