• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -instcombine -S | FileCheck %s
3
4declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
5declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
6declare void @use_f32(float)
7
8declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
9declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
10declare void @use_i32(i32)
11
12define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
13; CHECK-LABEL: @diff_of_sums_v4f32(
14; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]]
15; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]])
16; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]]
17; CHECK-NEXT:    ret float [[R]]
18;
19  %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
20  %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
21  %r = fsub reassoc nsz float %r0, %r1
22  ret float %r
23}
24
25; negative test - fsub must allow reassociation
26
27define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
28; CHECK-LABEL: @diff_of_sums_v4f32_fmf(
29; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
30; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
31; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]]
32; CHECK-NEXT:    ret float [[R]]
33;
34  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
35  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
36  %r = fsub ninf nnan nsz float %r0, %r1
37  ret float %r
38}
39
40; negative test - extra uses could create extra instructions
41
42define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
43; CHECK-LABEL: @diff_of_sums_extra_use1(
44; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
45; CHECK-NEXT:    call void @use_f32(float [[R0]])
46; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
47; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
48; CHECK-NEXT:    ret float [[R]]
49;
50  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
51  call void @use_f32(float %r0)
52  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
53  %r = fsub fast float %r0, %r1
54  ret float %r
55}
56
57; negative test - extra uses could create extra instructions
58
59define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
60; CHECK-LABEL: @diff_of_sums_extra_use2(
61; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
62; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
63; CHECK-NEXT:    call void @use_f32(float [[R1]])
64; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
65; CHECK-NEXT:    ret float [[R]]
66;
67  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
68  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
69  call void @use_f32(float %r1)
70  %r = fsub fast float %r0, %r1
71  ret float %r
72}
73
74; negative test - can't reassociate different vector types
75
76define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {
77; CHECK-LABEL: @diff_of_sums_type_mismatch(
78; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
79; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]])
80; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
81; CHECK-NEXT:    ret float [[R]]
82;
83  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
84  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1)
85  %r = fsub fast float %r0, %r1
86  ret float %r
87}
88
89define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
90; CHECK-LABEL: @diff_of_sums_v4i32(
91; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
92; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
93; CHECK-NEXT:    ret i32 [[TMP2]]
94;
95  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
96  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
97  %r = sub i32 %r0, %r1
98  ret i32 %r
99}
100
101; negative test - extra uses could create extra instructions
102
103define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
104; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(
105; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
106; CHECK-NEXT:    call void @use_i32(i32 [[R0]])
107; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
108; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
109; CHECK-NEXT:    ret i32 [[R]]
110;
111  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
112  call void @use_i32(i32 %r0)
113  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
114  %r = sub i32 %r0, %r1
115  ret i32 %r
116}
117
118; negative test - extra uses could create extra instructions
119
120define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
121; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(
122; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
123; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
124; CHECK-NEXT:    call void @use_i32(i32 [[R1]])
125; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
126; CHECK-NEXT:    ret i32 [[R]]
127;
128  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
129  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
130  call void @use_i32(i32 %r1)
131  %r = sub i32 %r0, %r1
132  ret i32 %r
133}
134
135; negative test - can't reassociate different vector types
136
137define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
138; CHECK-LABEL: @diff_of_sums_type_mismatch2(
139; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])
140; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
141; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
142; CHECK-NEXT:    ret i32 [[R]]
143;
144  %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0)
145  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
146  %r = sub i32 %r0, %r1
147  ret i32 %r
148}
149