• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=hexagon -O3 < %s | FileCheck %s
2; REQUIRES: asserts
3
4; Check that the code compiles successfully.
5; CHECK: call f1
6
7target triple = "hexagon-unknown--elf"
8
9%s.0 = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] }
10
11; Function Attrs: nounwind
12declare noalias i8* @f0() local_unnamed_addr #0
13
14; Function Attrs: nounwind
15declare void @f1() local_unnamed_addr #0
16
17; Function Attrs: nounwind readnone
18declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #1
19
20; Function Attrs: nounwind readnone
21declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
22
23; Function Attrs: nounwind readnone
24declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #1
25
26; Function Attrs: nounwind readnone
27declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #1
28
29; Function Attrs: nounwind readnone
30declare <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32>, i32) #1
31
32; Function Attrs: nounwind readnone
33declare <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32>, <32 x i32>) #1
34
35; Function Attrs: nounwind readnone
36declare <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32>, <64 x i32>) #1
37
38; Function Attrs: nounwind readnone
39declare <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32>, i32) #1
40
41; Function Attrs: nounwind readnone
42declare <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32>) #1
43
44; Function Attrs: nounwind readnone
45declare <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32>, <32 x i32>) #1
46
47; Function Attrs: nounwind readnone
48declare <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32>, i32) #1
49
50; Function Attrs: nounwind readnone
51declare <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32>, <32 x i32>, i32) #1
52
53; Function Attrs: nounwind readnone
54declare <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32>, <32 x i32>, i32) #1
55
56; Function Attrs: noreturn nounwind
57define void @f2(%s.0* noalias nocapture readonly %a01, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) local_unnamed_addr #2 {
58b0:
59  %v0 = getelementptr inbounds %s.0, %s.0* %a01, i32 0, i32 1
60  %v1 = bitcast i8** %v0 to i16**
61  %v2 = load i16*, i16** %v1, align 4
62  %v3 = tail call i8* @f0()
63  %v4 = icmp sgt i32 %a1, 0
64  %v5 = select i1 %v4, i32 0, i32 %a1
65  %v6 = or i32 %v5, 1
66  %v7 = icmp sgt i32 %v6, 0
67  br i1 %v7, label %b1, label %b2, !prof !1
68
69b1:                                               ; preds = %b0
70  br label %b4
71
72b2:                                               ; preds = %b0
73  %v8 = ashr i32 %a6, 6
74  %v9 = mul i32 %v8, 64
75  %v10 = add nsw i32 %v9, 255
76  %v11 = icmp sgt i32 %a6, -193
77  %v12 = ashr i32 %a5, 6
78  %v13 = ashr i32 %a4, 6
79  %v14 = ashr i32 %a2, 6
80  %v15 = icmp ult i32 %v10, 128
81  %v16 = tail call i8* @f0()
82  %v17 = icmp eq i8* %v16, null
83  br i1 %v17, label %b6, label %b3, !prof !2
84
85b3:                                               ; preds = %b2
86  %v18 = mul nsw i32 %v13, 16
87  %v19 = mul nsw i32 %v13, 19
88  %v20 = mul nsw i32 %v13, 17
89  %v21 = mul nsw i32 %v13, 18
90  br label %b7
91
92b4:                                               ; preds = %b4, %b1
93  br label %b4
94
95b5:                                               ; preds = %b8
96  br label %b6
97
98b6:                                               ; preds = %b5, %b2
99  tail call void @f1() #3
100  unreachable
101
102b7:                                               ; preds = %b8, %b3
103  %v22 = phi i8* [ %v16, %b3 ], [ %v28, %b8 ]
104  %v23 = phi i32 [ 1, %b3 ], [ %v27, %b8 ]
105  %v24 = sub i32 %v23, %a3
106  %v25 = mul i32 %v24, %v12
107  %v26 = sub i32 %v25, %v14
108  br i1 %v11, label %b9, label %b8
109
110b8:                                               ; preds = %b13, %b7
111  %v27 = add nuw nsw i32 %v23, 1
112  %v28 = tail call i8* @f0()
113  %v29 = icmp eq i8* %v28, null
114  br i1 %v29, label %b5, label %b7, !prof !2
115
116b9:                                               ; preds = %b7
117  %v30 = add i32 %v26, %v18
118  %v31 = add i32 %v26, %v19
119  %v32 = add i32 %v26, %v20
120  %v33 = add i32 %v26, %v21
121  %v34 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 undef) #3
122  %v35 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 8) #3
123  %v36 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v35, <32 x i32> %v35)
124  %v37 = bitcast i8* %v22 to i16*
125  br i1 %v15, label %b13, label %b10
126
127b10:                                              ; preds = %b9
128  %v38 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> undef) #3
129  %v39 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> undef, <64 x i32> %v38) #3
130  %v40 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v39, <64 x i32> %v36) #3
131  %v41 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v40)
132  %v42 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v41, i32 4) #3
133  %v43 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> undef, <32 x i32> %v42)
134  %v44 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v43) #3
135  %v45 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> undef, <32 x i32> %v44) #3
136  br label %b11
137
138b11:                                              ; preds = %b11, %b10
139  %v46 = phi <32 x i32> [ %v120, %b11 ], [ undef, %b10 ]
140  %v47 = phi <32 x i32> [ %v115, %b11 ], [ undef, %b10 ]
141  %v48 = phi <32 x i32> [ %v110, %b11 ], [ undef, %b10 ]
142  %v49 = phi i32 [ %v124, %b11 ], [ 0, %b10 ]
143  %v50 = phi i32 [ %v125, %b11 ], [ undef, %b10 ]
144  %v51 = add i32 %v49, %v33
145  %v52 = shl nsw i32 %v51, 6
146  %v53 = getelementptr inbounds i16, i16* %v2, i32 %v52
147  %v54 = bitcast i16* %v53 to <32 x i32>*
148  %v55 = load <32 x i32>, <32 x i32>* %v54, align 128, !tbaa !3
149  %v56 = add i32 %v49, %v32
150  %v57 = shl nsw i32 %v56, 6
151  %v58 = getelementptr inbounds i16, i16* %v2, i32 %v57
152  %v59 = bitcast i16* %v58 to <32 x i32>*
153  %v60 = load <32 x i32>, <32 x i32>* %v59, align 128, !tbaa !3
154  %v61 = add i32 %v31, %v49
155  %v62 = shl nsw i32 %v61, 6
156  %v63 = getelementptr inbounds i16, i16* %v2, i32 %v62
157  %v64 = bitcast i16* %v63 to <32 x i32>*
158  %v65 = load <32 x i32>, <32 x i32>* %v64, align 128, !tbaa !3
159  %v66 = add i32 %v49, %v30
160  %v67 = shl nsw i32 %v66, 6
161  %v68 = getelementptr inbounds i16, i16* %v2, i32 %v67
162  %v69 = bitcast i16* %v68 to <32 x i32>*
163  %v70 = load <32 x i32>, <32 x i32>* %v69, align 128, !tbaa !3
164  %v71 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v55, <32 x i32> undef, i32 92)
165  %v72 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v71, i32 1) #3
166  %v73 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v72, <32 x i32> %v34) #3
167  %v74 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32> %v73, i32 393222) #3
168  %v75 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v60, <32 x i32> %v48, i32 92)
169  %v76 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v75, i32 1) #3
170  %v77 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v76, <32 x i32> %v34) #3
171  %v78 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v65, <32 x i32> undef, i32 92)
172  %v79 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v78, i32 1) #3
173  %v80 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v79, <32 x i32> %v34) #3
174  %v81 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v77, <32 x i32> %v80) #3
175  %v82 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v81) #3
176  %v83 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v74)
177  %v84 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v82)
178  %v85 = tail call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %v83, <32 x i32> %v84, i32 2) #3
179  %v86 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v85, <32 x i32> undef)
180  %v87 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v70, <32 x i32> %v47, i32 92)
181  %v88 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v87, i32 1) #3
182  %v89 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v88, <32 x i32> %v34) #3
183  %v90 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> undef, <32 x i32> %v46, i32 92)
184  %v91 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v90, i32 1) #3
185  %v92 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v91, <32 x i32> %v34) #3
186  %v93 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v89, <32 x i32> %v92) #3
187  %v94 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v93) #3
188  %v95 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v86, <64 x i32> %v94) #3
189  %v96 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v95, <64 x i32> %v36) #3
190  %v97 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v96)
191  %v98 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v97, i32 4) #3
192  %v99 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v98, <32 x i32> undef)
193  %v100 = tail call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %v99) #3
194  %v101 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> undef, <32 x i32> %v100) #3
195  %v102 = shl nsw i32 %v49, 6
196  %v103 = getelementptr inbounds i16, i16* %v37, i32 %v102
197  %v104 = bitcast i16* %v103 to <32 x i32>*
198  store <32 x i32> %v101, <32 x i32>* %v104, align 128, !tbaa !6
199  %v105 = or i32 %v49, 1
200  %v106 = add i32 %v105, %v32
201  %v107 = shl nsw i32 %v106, 6
202  %v108 = getelementptr inbounds i16, i16* %v2, i32 %v107
203  %v109 = bitcast i16* %v108 to <32 x i32>*
204  %v110 = load <32 x i32>, <32 x i32>* %v109, align 128, !tbaa !3
205  %v111 = add i32 %v105, %v30
206  %v112 = shl nsw i32 %v111, 6
207  %v113 = getelementptr inbounds i16, i16* %v2, i32 %v112
208  %v114 = bitcast i16* %v113 to <32 x i32>*
209  %v115 = load <32 x i32>, <32 x i32>* %v114, align 128, !tbaa !3
210  %v116 = add i32 %v105, %v26
211  %v117 = shl nsw i32 %v116, 6
212  %v118 = getelementptr inbounds i16, i16* %v2, i32 %v117
213  %v119 = bitcast i16* %v118 to <32 x i32>*
214  %v120 = load <32 x i32>, <32 x i32>* %v119, align 128, !tbaa !3
215  %v121 = shl nsw i32 %v105, 6
216  %v122 = getelementptr inbounds i16, i16* %v37, i32 %v121
217  %v123 = bitcast i16* %v122 to <32 x i32>*
218  store <32 x i32> %v45, <32 x i32>* %v123, align 128, !tbaa !6
219  %v124 = add nuw nsw i32 %v49, 2
220  %v125 = add i32 %v50, -2
221  %v126 = icmp eq i32 %v125, 0
222  br i1 %v126, label %b12, label %b11
223
224b12:                                              ; preds = %b11
225  br label %b13
226
227b13:                                              ; preds = %b12, %b9
228  %v127 = phi i32 [ 0, %b9 ], [ %v124, %b12 ]
229  %v128 = add i32 %v127, %v33
230  %v129 = shl nsw i32 %v128, 6
231  %v130 = getelementptr inbounds i16, i16* %v2, i32 %v129
232  %v131 = bitcast i16* %v130 to <32 x i32>*
233  %v132 = load <32 x i32>, <32 x i32>* %v131, align 128, !tbaa !3
234  %v133 = add i32 %v127, %v30
235  %v134 = shl nsw i32 %v133, 6
236  %v135 = getelementptr inbounds i16, i16* %v2, i32 %v134
237  %v136 = bitcast i16* %v135 to <32 x i32>*
238  %v137 = load <32 x i32>, <32 x i32>* %v136, align 128, !tbaa !3
239  %v138 = add i32 %v127, %v26
240  %v139 = shl nsw i32 %v138, 6
241  %v140 = getelementptr inbounds i16, i16* %v2, i32 %v139
242  %v141 = bitcast i16* %v140 to <32 x i32>*
243  %v142 = load <32 x i32>, <32 x i32>* %v141, align 128, !tbaa !3
244  %v143 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v132, <32 x i32> undef, i32 92)
245  %v144 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v143, i32 1) #3
246  %v145 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v144, <32 x i32> %v34) #3
247  %v146 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.128B(<32 x i32> %v145, i32 393222) #3
248  %v147 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v146)
249  %v148 = tail call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %v147, <32 x i32> undef, i32 2) #3
250  %v149 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v148, <32 x i32> undef)
251  %v150 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v137, <32 x i32> undef, i32 92)
252  %v151 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v150, i32 1) #3
253  %v152 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v151, <32 x i32> %v34) #3
254  %v153 = tail call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> %v142, <32 x i32> undef, i32 92)
255  %v154 = tail call <32 x i32> @llvm.hexagon.V6.vasrh.128B(<32 x i32> %v153, i32 1) #3
256  %v155 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v154, <32 x i32> %v34) #3
257  %v156 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.128B(<32 x i32> %v152, <32 x i32> %v155) #3
258  %v157 = tail call <64 x i32> @llvm.hexagon.V6.vzh.128B(<32 x i32> %v156) #3
259  %v158 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v149, <64 x i32> %v157) #3
260  %v159 = tail call <64 x i32> @llvm.hexagon.V6.vaddw.dv.128B(<64 x i32> %v158, <64 x i32> %v36) #3
261  %v160 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v159)
262  %v161 = tail call <32 x i32> @llvm.hexagon.V6.vlsrw.128B(<32 x i32> %v160, i32 4) #3
263  %v162 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %v161, <32 x i32> undef)
264  %v163 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %v162) #3
265  %v164 = tail call <32 x i32> @llvm.hexagon.V6.vshufeh.128B(<32 x i32> %v163, <32 x i32> undef) #3
266  %v165 = getelementptr inbounds i16, i16* %v37, i32 undef
267  %v166 = bitcast i16* %v165 to <32 x i32>*
268  store <32 x i32> %v164, <32 x i32>* %v166, align 128, !tbaa !6
269  br label %b8
270}
271
272attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" }
273attributes #1 = { nounwind readnone }
274attributes #2 = { noreturn nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" }
275attributes #3 = { nounwind }
276
277!llvm.module.flags = !{!0}
278
279!0 = !{i32 2, !"halide_mattrs", !"+hvxv60,+hvx-length128b"}
280!1 = !{!"branch_weights", i32 1073741824, i32 0}
281!2 = !{!"branch_weights", i32 0, i32 1073741824}
282!3 = !{!4, !4, i64 0}
283!4 = !{!"input_yuv", !5}
284!5 = !{!"Halide buffer"}
285!6 = !{!7, !7, i64 0}
286!7 = !{!"blurred_ds_y", !5}
287