1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3 4declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) 5declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>) 6declare void @use_f32(float) 7 8declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 9declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 10declare void @use_i32(i32) 11 12define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { 13; CHECK-LABEL: @diff_of_sums_v4f32( 14; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]] 15; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]]) 16; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]] 17; CHECK-NEXT: ret float [[R]] 18; 19 %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) 20 %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) 21 %r = fsub reassoc nsz float %r0, %r1 22 ret float %r 23} 24 25; negative test - fsub must allow reassociation 26 27define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { 28; CHECK-LABEL: @diff_of_sums_v4f32_fmf( 29; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) 30; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) 31; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]] 32; CHECK-NEXT: ret float [[R]] 33; 34 %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) 35 %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) 36 %r = fsub ninf nnan nsz float %r0, %r1 37 ret float %r 38} 39 40; negative test - extra uses could create extra instructions 41 42define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { 43; CHECK-LABEL: @diff_of_sums_extra_use1( 44; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) 45; CHECK-NEXT: call void @use_f32(float [[R0]]) 46; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) 47; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] 48; CHECK-NEXT: ret float [[R]] 49; 50 %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) 51 call void @use_f32(float %r0) 52 %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) 53 %r = fsub fast float %r0, %r1 54 ret float %r 55} 56 57; negative test - extra uses could create extra instructions 58 59define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { 60; CHECK-LABEL: @diff_of_sums_extra_use2( 61; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) 62; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) 63; CHECK-NEXT: call void @use_f32(float [[R1]]) 64; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] 65; CHECK-NEXT: ret float [[R]] 66; 67 %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) 68 %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1) 69 call void @use_f32(float %r1) 70 %r = fsub fast float %r0, %r1 71 ret float %r 72} 73 74; negative test - can't reassociate different vector types 75 76define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) { 77; CHECK-LABEL: @diff_of_sums_type_mismatch( 78; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) 79; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) 80; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] 81; CHECK-NEXT: ret float [[R]] 82; 83 %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0) 84 %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1) 85 %r = fsub fast float %r0, %r1 86 ret float %r 87} 88 89define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) { 90; CHECK-LABEL: @diff_of_sums_v4i32( 91; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] 92; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]]) 93; CHECK-NEXT: ret i32 [[TMP2]] 94; 95 %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) 96 %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) 97 %r = sub i32 %r0, %r1 98 ret i32 %r 99} 100 101; negative test - extra uses could create extra instructions 102 103define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) { 104; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1( 105; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) 106; CHECK-NEXT: call void @use_i32(i32 [[R0]]) 107; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) 108; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] 109; CHECK-NEXT: ret i32 [[R]] 110; 111 %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) 112 call void @use_i32(i32 %r0) 113 %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) 114 %r = sub i32 %r0, %r1 115 ret i32 %r 116} 117 118; negative test - extra uses could create extra instructions 119 120define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) { 121; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2( 122; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]]) 123; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) 124; CHECK-NEXT: call void @use_i32(i32 [[R1]]) 125; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] 126; CHECK-NEXT: ret i32 [[R]] 127; 128 %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0) 129 %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) 130 call void @use_i32(i32 %r1) 131 %r = sub i32 %r0, %r1 132 ret i32 %r 133} 134 135; negative test - can't reassociate different vector types 136 137define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { 138; CHECK-LABEL: @diff_of_sums_type_mismatch2( 139; CHECK-NEXT: [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]]) 140; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]]) 141; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]] 142; CHECK-NEXT: ret i32 [[R]] 143; 144 %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0) 145 %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1) 146 %r = sub i32 %r0, %r1 147 ret i32 %r 148} 149