1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -expand-reductions -S | FileCheck %s 3; Tests without a target which should expand all reductions 4declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>) 5declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>) 6declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>) 7declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>) 8declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>) 9 10declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>) 11declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>) 12 13declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>) 14declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>) 15declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>) 16declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>) 17 18declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>) 19declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>) 20 21 22define i64 @add_i64(<2 x i64> %vec) { 23; CHECK-LABEL: @add_i64( 24; CHECK-NEXT: entry: 25; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 26; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]] 27; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 28; CHECK-NEXT: ret i64 [[TMP0]] 29; 30entry: 31 %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec) 32 ret i64 %r 33} 34 35define i64 @mul_i64(<2 x i64> %vec) { 36; CHECK-LABEL: @mul_i64( 37; CHECK-NEXT: entry: 38; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 39; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]] 40; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 41; CHECK-NEXT: ret i64 [[TMP0]] 42; 43entry: 44 %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec) 45 ret i64 %r 46} 47 48define i64 @and_i64(<2 x i64> %vec) { 49; CHECK-LABEL: @and_i64( 50; CHECK-NEXT: entry: 51; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 52; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]] 53; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 54; CHECK-NEXT: ret i64 [[TMP0]] 55; 56entry: 57 %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec) 58 ret i64 %r 59} 60 61define i64 @or_i64(<2 x i64> %vec) { 62; CHECK-LABEL: @or_i64( 63; CHECK-NEXT: entry: 64; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 65; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]] 66; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 67; CHECK-NEXT: ret i64 [[TMP0]] 68; 69entry: 70 %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec) 71 ret i64 %r 72} 73 74define i64 @xor_i64(<2 x i64> %vec) { 75; CHECK-LABEL: @xor_i64( 76; CHECK-NEXT: entry: 77; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 78; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]] 79; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 80; CHECK-NEXT: ret i64 [[TMP0]] 81; 82entry: 83 %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec) 84 ret i64 %r 85} 86 87define float @fadd_f32(<4 x float> %vec) { 88; CHECK-LABEL: @fadd_f32( 89; CHECK-NEXT: entry: 90; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 91; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] 92; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 93; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 94; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 95; CHECK-NEXT: ret float [[TMP0]] 96; 97entry: 98 %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) 99 ret float %r 100} 101 102define float @fadd_f32_accum(float %accum, <4 x float> %vec) { 103; CHECK-LABEL: @fadd_f32_accum( 104; CHECK-NEXT: entry: 105; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 106; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] 107; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 108; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 109; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 110; CHECK-NEXT: ret float [[TMP0]] 111; 112entry: 113 %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) 114 ret float %r 115} 116 117define float @fadd_f32_strict(<4 x float> %vec) { 118; CHECK-LABEL: @fadd_f32_strict( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 121; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float undef, [[TMP0]] 122; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 123; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] 124; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 125; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] 126; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 127; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] 128; CHECK-NEXT: ret float [[BIN_RDX3]] 129; 130entry: 131 %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec) 132 ret float %r 133} 134 135define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) { 136; CHECK-LABEL: @fadd_f32_strict_accum( 137; CHECK-NEXT: entry: 138; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 139; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd float [[ACCUM:%.*]], [[TMP0]] 140; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 141; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd float [[BIN_RDX]], [[TMP1]] 142; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 143; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd float [[BIN_RDX1]], [[TMP2]] 144; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 145; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd float [[BIN_RDX2]], [[TMP3]] 146; CHECK-NEXT: ret float [[BIN_RDX3]] 147; 148entry: 149 %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec) 150 ret float %r 151} 152 153define float @fmul_f32(<4 x float> %vec) { 154; CHECK-LABEL: @fmul_f32( 155; CHECK-NEXT: entry: 156; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 157; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] 158; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 159; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 160; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 161; CHECK-NEXT: ret float [[TMP0]] 162; 163entry: 164 %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) 165 ret float %r 166} 167 168define float @fmul_f32_accum(float %accum, <4 x float> %vec) { 169; CHECK-LABEL: @fmul_f32_accum( 170; CHECK-NEXT: entry: 171; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 172; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] 173; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 174; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 175; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 176; CHECK-NEXT: ret float [[TMP0]] 177; 178entry: 179 %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) 180 ret float %r 181} 182 183define float @fmul_f32_strict(<4 x float> %vec) { 184; CHECK-LABEL: @fmul_f32_strict( 185; CHECK-NEXT: entry: 186; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 187; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float undef, [[TMP0]] 188; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 189; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] 190; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 191; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] 192; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 193; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] 194; CHECK-NEXT: ret float [[BIN_RDX3]] 195; 196entry: 197 %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec) 198 ret float %r 199} 200 201define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) { 202; CHECK-LABEL: @fmul_f32_strict_accum( 203; CHECK-NEXT: entry: 204; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[VEC:%.*]], i32 0 205; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul float [[ACCUM:%.*]], [[TMP0]] 206; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[VEC]], i32 1 207; CHECK-NEXT: [[BIN_RDX1:%.*]] = fmul float [[BIN_RDX]], [[TMP1]] 208; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[VEC]], i32 2 209; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul float [[BIN_RDX1]], [[TMP2]] 210; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[VEC]], i32 3 211; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul float [[BIN_RDX2]], [[TMP3]] 212; CHECK-NEXT: ret float [[BIN_RDX3]] 213; 214entry: 215 %r = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec) 216 ret float %r 217} 218 219define i64 @smax_i64(<2 x i64> %vec) { 220; CHECK-LABEL: @smax_i64( 221; CHECK-NEXT: entry: 222; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 223; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]] 224; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 225; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 226; CHECK-NEXT: ret i64 [[TMP0]] 227; 228entry: 229 %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec) 230 ret i64 %r 231} 232 233define i64 @smin_i64(<2 x i64> %vec) { 234; CHECK-LABEL: @smin_i64( 235; CHECK-NEXT: entry: 236; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 237; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]] 238; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 239; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 240; CHECK-NEXT: ret i64 [[TMP0]] 241; 242entry: 243 %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec) 244 ret i64 %r 245} 246 247define i64 @umax_i64(<2 x i64> %vec) { 248; CHECK-LABEL: @umax_i64( 249; CHECK-NEXT: entry: 250; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 251; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]] 252; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 253; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 254; CHECK-NEXT: ret i64 [[TMP0]] 255; 256entry: 257 %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec) 258 ret i64 %r 259} 260 261define i64 @umin_i64(<2 x i64> %vec) { 262; CHECK-LABEL: @umin_i64( 263; CHECK-NEXT: entry: 264; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 265; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]] 266; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]] 267; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0 268; CHECK-NEXT: ret i64 [[TMP0]] 269; 270entry: 271 %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec) 272 ret i64 %r 273} 274 275define double @fmax_f64(<2 x double> %vec) { 276; CHECK-LABEL: @fmax_f64( 277; CHECK-NEXT: entry: 278; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef> 279; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]] 280; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]] 281; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0 282; CHECK-NEXT: ret double [[TMP0]] 283; 284entry: 285 %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec) 286 ret double %r 287} 288 289define double @fmin_f64(<2 x double> %vec) { 290; CHECK-LABEL: @fmin_f64( 291; CHECK-NEXT: entry: 292; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef> 293; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]] 294; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]] 295; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0 296; CHECK-NEXT: ret double [[TMP0]] 297; 298entry: 299 %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec) 300 ret double %r 301} 302