1// RUN: hlo_to_llvm_ir --ptx %s | FileCheck %s 2// RUN: hlo_to_llvm_ir --ptx --sm=50 %s | FileCheck %s --check-prefix=CHECK-SM50 3// RUN: hlo_to_llvm_ir --ptx --sm=60 %s | FileCheck %s --check-prefix=CHECK-SM60 4// RUN: hlo_to_llvm_ir --ptx --sm=70 %s | FileCheck %s --check-prefix=CHECK-SM70 5// RUN: hlo_to_llvm_ir --ptx --sm=86 %s | FileCheck %s --check-prefix=CHECK-SM86 6 7// CHECK-LABEL: .entry reduce_odd_row 8// CHECK-NOT: ld.global.nc.v2.f32 9// CHECK-NOT: ld.global.nc.v4.f32 10// CHECK-NOT: ld.global.nc.u64 11// CHECK-NOT: ld.global.u64 12 13HloModule ReduceOddRowSize 14 15%max_ { 16 %x = f32[] parameter(0) 17 %y = f32[] parameter(1) 18 ROOT %maximum.7 = f32[] maximum(%x, %y) 19} 20 21ENTRY %main { 22 %param_0 = f32[5,4071] parameter(0) 23 %constant.3 = f32[] constant(0) 24 ROOT %reduce.odd_row = f32[5] reduce(f32[5,4071] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_ 25} 26 27// ----- 28 29// CHECK-SM86-LABEL: .entry reduce_small_row 30// CHECK-SM86: .reqntid 96, 1, 1 31 32HloModule ReduceSmallRow 33 34addition { 35 x = f32[] parameter(0) 36 y = f32[] parameter(1) 37 ROOT out = add(%x, %y) 38} 39 40ENTRY main { 41 p = f32[700000,32] parameter(0) 42 zero = f32[] constant(0) 43 ROOT reduce_small_row = f32[700000] reduce(p, zero), dimensions={1}, to_apply=%addition 44} 45 46// ----- 47 48// CHECK-LABEL: .entry reduce_sine 49// CHECK-COUNT-7: ld.global.nc.v2.f32 50 51HloModule DisableSin 52 53%add_float { 54 %x = f32[] parameter(0) 55 %y = f32[] parameter(1) 56 ROOT %add.17 = f32[] add(f32[] %x, f32[] %y) 57} 58 59ENTRY %main { 60 %arg0.1 = f32[5,3584] parameter(0) 61 %sine = f32[5,3584] sine(f32[5,3584] %arg0.1) 62 %constant.0 = f32[] constant(0) 63 ROOT %reduce.sine = f32[5] reduce(f32[5,3584] %sine, f32[] %constant.0), dimensions={1}, to_apply=%add_float 64} 65 66// ----- 67 68// SM dependent tests 69 70// CHECK-SM50-LABEL: .entry reduce_exp 71// CHECK-SM50-NOT: ld.global.nc.v2.f32 72// CHECK-SM50-COUNT-8: ld.global.nc.f32 73 74// CHECK-SM60: .entry exp 75// CHECK-SM60-LABEL: .entry reduce_exp 76// CHECK-SM60-COUNT-8: ld.global.nc.v2.f32 77 78// CHECK-SM70: .entry exp 79// CHECK-SM70-LABEL: .entry reduce_exp 80// CHECK-SM70-COUNT-8: ld.global.nc.v2.f32 81 82HloModule Exp 83 84%add_float { 85 %x = f32[] parameter(0) 86 %y = f32[] parameter(1) 87 ROOT %add.17 = f32[] add(f32[] %x, f32[] %y) 88} 89 90ENTRY %main { 91 %arg0.1 = f32[5,3584] parameter(0) 92 %exp = f32[5,3584] exponential(f32[5,3584] %arg0.1) 93 %constant.0 = f32[] constant(0) 94 ROOT %reduce.exp = f32[5] reduce(f32[5,3584] %exp, f32[] %constant.0), dimensions={1}, to_apply=%add_float 95} 96 97// ----- 98 99HloModule ReduceTileFit 100 101// CHECK-SM50-LABEL: .entry reduce_tile_fit 102// CHECK-SM50-NOT: ld.global.nc.v2.f32 103// CHECK-SM50-COUNT-8: ld.global.nc.f32 104 105// CHECK-SM60-LABEL: .entry reduce_tile_fit 106// CHECK-SM60-COUNT-8: ld.global.nc.v2.f32 107 108// CHECK-SM70-LABEL: .entry reduce_tile_fit 109// CHECK-SM70-COUNT-4: ld.global.nc.v2.f32 110 111%max_ { 112 %x = f32[] parameter(0) 113 %y = f32[] parameter(1) 114 ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y) 115} 116 117ENTRY %main { 118 %param_0 = f32[5,3584] parameter(0) 119 %constant.3 = f32[] constant(0) 120 ROOT %reduce.tile_fit = f32[5] reduce(f32[5,3584] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_ 121} 122 123// ----- 124 125HloModule ReducePower2 126 127// CHECK-SM50-LABEL: .entry reduce_pow_2 128// CHECK-SM50-NOT: ld.global.nc.v2.f32 129// CHECK-SM50-COUNT-8: ld.global.nc.f32 130 131// CHECK-SM60-LABEL: .entry reduce_pow_2 132// CHECK-SM60-COUNT-4: ld.global.nc.v2.f32 133 134// CHECK-SM70-LABEL: .entry reduce_pow_2 135// CHECK-SM70-COUNT-4: ld.global.nc.v2.f32 136 137%max_ { 138 %x = f32[] parameter(0) 139 %y = f32[] parameter(1) 140 ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y) 141} 142 143ENTRY %main { 144 %param_0 = f32[5,4096] parameter(0) 145 %constant.3 = f32[] constant(0) 146 ROOT %reduce.pow_2 = f32[5] reduce(f32[5,4096] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_ 147} 148 149// ----- 150 151HloModule ReduceEvenColumns 152 153// CHECK-SM60-LABEL: .entry reduce_even_col 154// CHECK-SM60-NOT: ld.global.nc.f32 155// CHECK-SM60-COUNT-8: ld.global.nc.f32 156 157// CHECK-SM70-LABEL: .entry reduce_even_col 158// CHECK-SM70-COUNT-2: ld.global.nc.v2.f32 159// CHECK-SM70-COUNT-2: ld.global.nc.f32 160 161%max_ { 162 %x = f32[] parameter(0) 163 %y = f32[] parameter(1) 164 ROOT %maximum.7 = f32[] maximum(f32[] %x, f32[] %y) 165} 166 167ENTRY %main { 168 %param_0 = f32[5,4070] parameter(0) 169 %constant.3 = f32[] constant(0) 170 ROOT %reduce.even_col = f32[5] reduce(f32[5,4070] %param_0, f32[] %constant.3), dimensions={1}, to_apply=%max_ 171} 172