• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=hexagon -O0 < %s | FileCheck --check-prefix=CHECKO0 %s
2; KP: Removed -O2 check. The code has become more aggressively optimized
3; (some loads were found to be redundant and have been removed completely),
4; and verifying correct code generation has become more difficult than
5; its worth.
6
7; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}})
8; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}})
9
10; CHECKO0: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
11; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
12; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
13
14; Allow .cur loads.
15; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0)
16; CHECKO2: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
17; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0)
18
19; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vcombine(v{{[0-9]*}},v{{[0-9]*}})
20; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
21; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}}
22; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0)
23; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#32)
24; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}}
25; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}}
26
27target triple = "hexagon"
28
29@g0 = common global [10 x <32 x i32>] zeroinitializer, align 64
30@g1 = private unnamed_addr constant [11 x i8] c"c[%d]= %x\0A\00", align 8
31@g2 = common global [10 x <16 x i32>] zeroinitializer, align 64
32@g3 = common global [10 x <16 x i32>] zeroinitializer, align 64
33@g4 = common global [10 x <32 x i32>] zeroinitializer, align 64
34
35declare i32 @f0(i8*, ...)
36
37; Function Attrs: nounwind
38define void @f1(i32 %a0) #0 {
39b0:
40  %v0 = alloca i32, align 4
41  %v1 = alloca i32*, align 4
42  %v2 = alloca i32, align 4
43  store i32 %a0, i32* %v0, align 4
44  store i32* getelementptr inbounds ([10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 0, i32 0), i32** %v1, align 4
45  %v3 = load i32, i32* %v0, align 4
46  %v4 = load i32*, i32** %v1, align 4
47  %v5 = getelementptr inbounds i32, i32* %v4, i32 %v3
48  store i32* %v5, i32** %v1, align 4
49  store i32 0, i32* %v2, align 4
50  br label %b1
51
52b1:                                               ; preds = %b3, %b0
53  %v6 = load i32, i32* %v2, align 4
54  %v7 = icmp slt i32 %v6, 16
55  br i1 %v7, label %b2, label %b4
56
57b2:                                               ; preds = %b1
58  %v8 = load i32, i32* %v2, align 4
59  %v9 = load i32*, i32** %v1, align 4
60  %v10 = getelementptr inbounds i32, i32* %v9, i32 1
61  store i32* %v10, i32** %v1, align 4
62  %v11 = load i32, i32* %v9, align 4
63  %v12 = call i32 (i8*, ...) @f0(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @g1, i32 0, i32 0), i32 %v8, i32 %v11)
64  br label %b3
65
66b3:                                               ; preds = %b2
67  %v13 = load i32, i32* %v2, align 4
68  %v14 = add nsw i32 %v13, 1
69  store i32 %v14, i32* %v2, align 4
70  br label %b1
71
72b4:                                               ; preds = %b1
73  ret void
74}
75
76; Function Attrs: nounwind
77define i32 @f2() #0 {
78b0:
79  %v0 = alloca i32, align 4
80  %v1 = alloca i32, align 4
81  store i32 0, i32* %v0
82  store i32 0, i32* %v1, align 4
83  br label %b1
84
85b1:                                               ; preds = %b3, %b0
86  %v2 = load i32, i32* %v1, align 4
87  %v3 = icmp slt i32 %v2, 3
88  br i1 %v3, label %b2, label %b4
89
90b2:                                               ; preds = %b1
91  %v4 = load i32, i32* %v1, align 4
92  %v5 = add nsw i32 %v4, 1
93  %v6 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v5)
94  %v7 = load i32, i32* %v1, align 4
95  %v8 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v7
96  store <16 x i32> %v6, <16 x i32>* %v8, align 64
97  %v9 = load i32, i32* %v1, align 4
98  %v10 = mul nsw i32 %v9, 10
99  %v11 = add nsw i32 %v10, 1
100  %v12 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v11)
101  %v13 = load i32, i32* %v1, align 4
102  %v14 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v13
103  store <16 x i32> %v12, <16 x i32>* %v14, align 64
104  %v15 = load i32, i32* %v1, align 4
105  %v16 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v15
106  %v17 = load <16 x i32>, <16 x i32>* %v16, align 64
107  %v18 = load i32, i32* %v1, align 4
108  %v19 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v18
109  %v20 = load <16 x i32>, <16 x i32>* %v19, align 64
110  %v21 = call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v17, <16 x i32> %v20)
111  %v22 = load i32, i32* %v1, align 4
112  %v23 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v22
113  store <32 x i32> %v21, <32 x i32>* %v23, align 64
114  br label %b3
115
116b3:                                               ; preds = %b2
117  %v24 = load i32, i32* %v1, align 4
118  %v25 = add nsw i32 %v24, 1
119  store i32 %v25, i32* %v1, align 4
120  br label %b1
121
122b4:                                               ; preds = %b1
123  store i32 0, i32* %v1, align 4
124  br label %b5
125
126b5:                                               ; preds = %b7, %b4
127  %v26 = load i32, i32* %v1, align 4
128  %v27 = icmp slt i32 %v26, 3
129  br i1 %v27, label %b6, label %b8
130
131b6:                                               ; preds = %b5
132  %v28 = load i32, i32* %v1, align 4
133  %v29 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v28
134  %v30 = load <32 x i32>, <32 x i32>* %v29, align 64
135  %v31 = load i32, i32* %v1, align 4
136  %v32 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 %v31
137  store <32 x i32> %v30, <32 x i32>* %v32, align 64
138  br label %b7
139
140b7:                                               ; preds = %b6
141  %v33 = load i32, i32* %v1, align 4
142  %v34 = add nsw i32 %v33, 1
143  store i32 %v34, i32* %v1, align 4
144  br label %b5
145
146b8:                                               ; preds = %b5
147  store i32 0, i32* %v1, align 4
148  br label %b9
149
150b9:                                               ; preds = %b11, %b8
151  %v35 = load i32, i32* %v1, align 4
152  %v36 = icmp slt i32 %v35, 3
153  br i1 %v36, label %b10, label %b12
154
155b10:                                              ; preds = %b9
156  %v37 = load i32, i32* %v1, align 4
157  %v38 = mul nsw i32 %v37, 16
158  call void @f1(i32 %v38)
159  br label %b11
160
161b11:                                              ; preds = %b10
162  %v39 = load i32, i32* %v1, align 4
163  %v40 = add nsw i32 %v39, 1
164  store i32 %v40, i32* %v1, align 4
165  br label %b9
166
167b12:                                              ; preds = %b9
168  ret i32 0
169}
170
171; Function Attrs: nounwind readnone
172declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
173
174; Function Attrs: nounwind readnone
175declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
176
177attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
178attributes #1 = { nounwind readnone }
179