• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -slp-vectorizer -S  -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.9.0"
6
7; Test if the budget for the scheduling region size works.
8; We test with a reduced budget of 16 which should prevent vectorizing the loads.
9
10declare void @unknown()
11
12define void @test(float * %a, float * %b, float * %c, float * %d) {
13; CHECK-LABEL: @test(
14; CHECK-NEXT:  entry:
15; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[A:%.*]], align 4
16; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
17; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[A1]], align 4
18; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
19; CHECK-NEXT:    [[L2:%.*]] = load float, float* [[A2]], align 4
20; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
21; CHECK-NEXT:    [[L3:%.*]] = load float, float* [[A3]], align 4
22; CHECK-NEXT:    call void @unknown()
23; CHECK-NEXT:    call void @unknown()
24; CHECK-NEXT:    call void @unknown()
25; CHECK-NEXT:    call void @unknown()
26; CHECK-NEXT:    call void @unknown()
27; CHECK-NEXT:    call void @unknown()
28; CHECK-NEXT:    call void @unknown()
29; CHECK-NEXT:    call void @unknown()
30; CHECK-NEXT:    call void @unknown()
31; CHECK-NEXT:    call void @unknown()
32; CHECK-NEXT:    call void @unknown()
33; CHECK-NEXT:    call void @unknown()
34; CHECK-NEXT:    call void @unknown()
35; CHECK-NEXT:    call void @unknown()
36; CHECK-NEXT:    call void @unknown()
37; CHECK-NEXT:    call void @unknown()
38; CHECK-NEXT:    call void @unknown()
39; CHECK-NEXT:    call void @unknown()
40; CHECK-NEXT:    call void @unknown()
41; CHECK-NEXT:    call void @unknown()
42; CHECK-NEXT:    call void @unknown()
43; CHECK-NEXT:    call void @unknown()
44; CHECK-NEXT:    call void @unknown()
45; CHECK-NEXT:    call void @unknown()
46; CHECK-NEXT:    call void @unknown()
47; CHECK-NEXT:    call void @unknown()
48; CHECK-NEXT:    call void @unknown()
49; CHECK-NEXT:    call void @unknown()
50; CHECK-NEXT:    store float [[L0]], float* [[B:%.*]], align 4
51; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds float, float* [[B]], i64 1
52; CHECK-NEXT:    store float [[L1]], float* [[B1]], align 4
53; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
54; CHECK-NEXT:    store float [[L2]], float* [[B2]], align 4
55; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
56; CHECK-NEXT:    store float [[L3]], float* [[B3]], align 4
57; CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 1
58; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds float, float* [[C]], i64 2
59; CHECK-NEXT:    [[C3:%.*]] = getelementptr inbounds float, float* [[C]], i64 3
60; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[C]] to <4 x float>*
61; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
62; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds float, float* [[D:%.*]], i64 1
63; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds float, float* [[D]], i64 2
64; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds float, float* [[D]], i64 3
65; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[D]] to <4 x float>*
66; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
67; CHECK-NEXT:    ret void
68;
69entry:
70  ; Don't vectorize these loads.
71  %l0 = load float, float* %a
72  %a1 = getelementptr inbounds float, float* %a, i64 1
73  %l1 = load float, float* %a1
74  %a2 = getelementptr inbounds float, float* %a, i64 2
75  %l2 = load float, float* %a2
76  %a3 = getelementptr inbounds float, float* %a, i64 3
77  %l3 = load float, float* %a3
78
79  ; some unrelated instructions inbetween to enlarge the scheduling region
80  call void @unknown()
81  call void @unknown()
82  call void @unknown()
83  call void @unknown()
84  call void @unknown()
85  call void @unknown()
86  call void @unknown()
87  call void @unknown()
88  call void @unknown()
89  call void @unknown()
90  call void @unknown()
91  call void @unknown()
92  call void @unknown()
93  call void @unknown()
94  call void @unknown()
95  call void @unknown()
96  call void @unknown()
97  call void @unknown()
98  call void @unknown()
99  call void @unknown()
100  call void @unknown()
101  call void @unknown()
102  call void @unknown()
103  call void @unknown()
104  call void @unknown()
105  call void @unknown()
106  call void @unknown()
107  call void @unknown()
108
109  ; Don't vectorize these stores because their operands are too far away.
110  store float %l0, float* %b
111  %b1 = getelementptr inbounds float, float* %b, i64 1
112  store float %l1, float* %b1
113  %b2 = getelementptr inbounds float, float* %b, i64 2
114  store float %l2, float* %b2
115  %b3 = getelementptr inbounds float, float* %b, i64 3
116  store float %l3, float* %b3
117
118  ; But still vectorize the following instructions, because even if the budget
119  ; is exceeded there is a minimum region size.
120  %l4 = load float, float* %c
121  %c1 = getelementptr inbounds float, float* %c, i64 1
122  %l5 = load float, float* %c1
123  %c2 = getelementptr inbounds float, float* %c, i64 2
124  %l6 = load float, float* %c2
125  %c3 = getelementptr inbounds float, float* %c, i64 3
126  %l7 = load float, float* %c3
127
128  store float %l4, float* %d
129  %d1 = getelementptr inbounds float, float* %d, i64 1
130  store float %l5, float* %d1
131  %d2 = getelementptr inbounds float, float* %d, i64 2
132  store float %l6, float* %d2
133  %d3 = getelementptr inbounds float, float* %d, i64 3
134  store float %l7, float* %d3
135
136  ret void
137}
138
139