1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s 3 4declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 5declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) 6 7define float @fadd_seq(float %start, <4 x float> %vec) { 8 ; CHECK-LABEL: name: fadd_seq 9 ; CHECK: bb.1 (%ir-block.0): 10 ; CHECK: liveins: $q1, $s0 11 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 12 ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 13 ; CHECK: [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[COPY1]](<4 x s32>) 14 ; CHECK: $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32) 15 ; CHECK: RET_ReallyLR implicit $s0 16 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec) 17 ret float %res 18} 19 20define float @fadd_fast(float %start, <4 x float> %vec) { 21 ; CHECK-LABEL: name: fadd_fast 22 ; CHECK: bb.1 (%ir-block.0): 23 ; CHECK: liveins: $q1, $s0 24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 25 ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 26 ; CHECK: [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[COPY1]](<4 x s32>) 27 ; CHECK: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]] 28 ; CHECK: $s0 = COPY [[FADD]](s32) 29 ; CHECK: RET_ReallyLR implicit $s0 30 %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec) 31 ret float %res 32} 33 34define double @fmul_seq(double %start, <4 x double> %vec) { 35 ; CHECK-LABEL: name: fmul_seq 36 ; CHECK: bb.1 (%ir-block.0): 37 ; CHECK: liveins: $d0, $q1, $q2 38 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 39 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 40 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 41 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>) 42 ; CHECK: [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>) 43 ; CHECK: $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64) 44 ; CHECK: RET_ReallyLR implicit $d0 45 %res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec) 46 ret double %res 47} 48 49define double @fmul_fast(double %start, <4 x double> %vec) { 50 ; CHECK-LABEL: name: fmul_fast 51 ; CHECK: bb.1 (%ir-block.0): 52 ; CHECK: liveins: $d0, $q1, $q2 53 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 54 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 55 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 56 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>) 57 ; CHECK: [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>) 58 ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]] 59 ; CHECK: $d0 = COPY [[FMUL]](s64) 60 ; CHECK: RET_ReallyLR implicit $d0 61 %res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec) 62 ret double %res 63} 64 65declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 66declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 67 68define float @fmax(<4 x float> %vec) { 69 ; CHECK-LABEL: name: fmax 70 ; CHECK: bb.1 (%ir-block.0): 71 ; CHECK: liveins: $q0 72 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 73 ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[COPY]](<4 x s32>) 74 ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32) 75 ; CHECK: RET_ReallyLR implicit $s0 76 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec) 77 ret float %res 78} 79 80define float @fmin(<4 x float> %vec) { 81 ; CHECK-LABEL: name: fmin 82 ; CHECK: bb.1 (%ir-block.0): 83 ; CHECK: liveins: $q0 84 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 85 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[COPY]](<4 x s32>) 86 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) 87 ; CHECK: RET_ReallyLR implicit $s0 88 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec) 89 ret float %res 90} 91 92define float @fmin_nnan(<4 x float> %vec) { 93 ; CHECK-LABEL: name: fmin_nnan 94 ; CHECK: bb.1 (%ir-block.0): 95 ; CHECK: liveins: $q0 96 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 97 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[COPY]](<4 x s32>) 98 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) 99 ; CHECK: RET_ReallyLR implicit $s0 100 %res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec) 101 ret float %res 102} 103 104declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 105 106define i32 @add(<4 x i32> %vec) { 107 ; CHECK-LABEL: name: add 108 ; CHECK: bb.1 (%ir-block.0): 109 ; CHECK: liveins: $q0 110 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 111 ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>) 112 ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) 113 ; CHECK: RET_ReallyLR implicit $w0 114 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec) 115 ret i32 %res 116} 117 118declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 119 120define i32 @mul(<4 x i32> %vec) { 121 ; CHECK-LABEL: name: mul 122 ; CHECK: bb.1 (%ir-block.0): 123 ; CHECK: liveins: $q0 124 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 125 ; CHECK: [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>) 126 ; CHECK: $w0 = COPY [[VECREDUCE_MUL]](s32) 127 ; CHECK: RET_ReallyLR implicit $w0 128 %res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec) 129 ret i32 %res 130} 131 132declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) 133 134define i32 @and(<4 x i32> %vec) { 135 ; CHECK-LABEL: name: and 136 ; CHECK: bb.1 (%ir-block.0): 137 ; CHECK: liveins: $q0 138 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 139 ; CHECK: [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>) 140 ; CHECK: $w0 = COPY [[VECREDUCE_AND]](s32) 141 ; CHECK: RET_ReallyLR implicit $w0 142 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec) 143 ret i32 %res 144} 145 146declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) 147 148define i32 @or(<4 x i32> %vec) { 149 ; CHECK-LABEL: name: or 150 ; CHECK: bb.1 (%ir-block.0): 151 ; CHECK: liveins: $q0 152 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 153 ; CHECK: [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>) 154 ; CHECK: $w0 = COPY [[VECREDUCE_OR]](s32) 155 ; CHECK: RET_ReallyLR implicit $w0 156 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec) 157 ret i32 %res 158} 159 160declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 161 162define i32 @xor(<4 x i32> %vec) { 163 ; CHECK-LABEL: name: xor 164 ; CHECK: bb.1 (%ir-block.0): 165 ; CHECK: liveins: $q0 166 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 167 ; CHECK: [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>) 168 ; CHECK: $w0 = COPY [[VECREDUCE_XOR]](s32) 169 ; CHECK: RET_ReallyLR implicit $w0 170 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec) 171 ret i32 %res 172} 173 174declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 175declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 176declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 177declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 178 179define i32 @smax(<4 x i32> %vec) { 180 ; CHECK-LABEL: name: smax 181 ; CHECK: bb.1 (%ir-block.0): 182 ; CHECK: liveins: $q0 183 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 184 ; CHECK: [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>) 185 ; CHECK: $w0 = COPY [[VECREDUCE_SMAX]](s32) 186 ; CHECK: RET_ReallyLR implicit $w0 187 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) 188 ret i32 %res 189} 190 191define i32 @smin(<4 x i32> %vec) { 192 ; CHECK-LABEL: name: smin 193 ; CHECK: bb.1 (%ir-block.0): 194 ; CHECK: liveins: $q0 195 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 196 ; CHECK: [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>) 197 ; CHECK: $w0 = COPY [[VECREDUCE_SMIN]](s32) 198 ; CHECK: RET_ReallyLR implicit $w0 199 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) 200 ret i32 %res 201} 202 203define i32 @umax(<4 x i32> %vec) { 204 ; CHECK-LABEL: name: umax 205 ; CHECK: bb.1 (%ir-block.0): 206 ; CHECK: liveins: $q0 207 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 208 ; CHECK: [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>) 209 ; CHECK: $w0 = COPY [[VECREDUCE_UMAX]](s32) 210 ; CHECK: RET_ReallyLR implicit $w0 211 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) 212 ret i32 %res 213} 214 215define i32 @umin(<4 x i32> %vec) { 216 ; CHECK-LABEL: name: umin 217 ; CHECK: bb.1 (%ir-block.0): 218 ; CHECK: liveins: $q0 219 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 220 ; CHECK: [[VECREDUCE_UMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_UMIN [[COPY]](<4 x s32>) 221 ; CHECK: $w0 = COPY [[VECREDUCE_UMIN]](s32) 222 ; CHECK: RET_ReallyLR implicit $w0 223 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) 224 ret i32 %res 225} 226