; RUN: llc -march=hexagon < %s | FileCheck %s ; CHECK-NOT: setbit(r{{[0-9]+}},#1) target triple = "hexagon-unknown--elf" %s.8 = type { i8*, i32, i32, i32, i32, %s.9*, %s.9*, %s.9* } %s.9 = type { %s.10 } %s.10 = type { i64 } %s.4 = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } @g0 = private constant [6 x i8] c"input\00", align 32 @g1 = private constant [11 x i8] c"gaussian11\00", align 32 @g2 = private constant [2 x %s.8] [%s.8 { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @g0, i32 0, i32 0), i32 1, i32 2, i32 1, i32 8, %s.9* null, %s.9* null, %s.9* null }, %s.8 { i8* getelementptr inbounds ([11 x i8], [11 x i8]* @g1, i32 0, i32 0), i32 2, i32 2, i32 1, i32 8, %s.9* null, %s.9* null, %s.9* null }] @g3 = private constant [53 x i8] c"hexagon-32-os_unknown-no_asserts-no_bounds_query-hvx\00", align 32 ; Function Attrs: nounwind declare i8* @f0(i8*, i32) #0 ; Function Attrs: nounwind declare void @f1(i8*, i8*) #0 ; Function Attrs: nounwind declare noalias i8* @f2(i8*, i32) #0 ; Function Attrs: nounwind declare void @f3(i8*, i8*) #0 ; Function Attrs: nounwind declare void @f4() #0 ; Function Attrs: nounwind declare void @f5() #0 ; Function Attrs: nounwind define i32 @f6(%s.4* noalias nocapture readonly %a0, %s.4* noalias nocapture readonly %a1) #0 { b0: %v0 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 1 %v1 = load i8*, i8** %v0 %v2 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 2, i32 0 %v3 = load i32, i32* %v2 %v4 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 2, i32 1 %v5 = load i32, i32* %v4 %v6 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 3, i32 1 %v7 = load i32, i32* %v6 %v8 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 4, i32 0 %v9 = load i32, i32* %v8 %v10 = getelementptr inbounds %s.4, %s.4* %a0, i32 0, i32 4, i32 1 %v11 = load i32, i32* %v10 %v12 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 1 %v13 = load i8*, i8** %v12 %v14 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 2, i32 0 %v15 = load i32, i32* %v14 %v16 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 2, i32 1 %v17 = load i32, i32* %v16 %v18 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 3, i32 1 %v19 = load i32, i32* %v18 %v20 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 4, i32 0 %v21 = load i32, i32* %v20 %v22 = getelementptr inbounds %s.4, %s.4* %a1, i32 0, i32 4, i32 1 %v23 = load i32, i32* %v22 %v24 = add nsw i32 %v21, %v15 %v25 = add nsw i32 %v24, -64 %v26 = icmp slt i32 %v21, %v25 %v27 = select i1 %v26, i32 %v21, i32 %v25 %v28 = add nsw i32 %v15, -1 %v29 = and i32 %v28, -64 %v30 = add i32 %v21, 63 %v31 = add i32 %v30, %v29 %v32 = add nsw i32 %v24, -1 %v33 = icmp slt i32 %v31, %v32 %v34 = select i1 %v33, i32 %v31, i32 %v32 %v35 = sub nsw i32 %v34, %v27 %v36 = icmp slt i32 %v24, %v34 %v37 = select i1 %v36, i32 %v34, i32 %v24 %v38 = add nsw i32 %v37, -1 %v39 = icmp slt i32 %v38, %v34 %v40 = select i1 %v39, i32 %v34, i32 %v38 %v41 = add nsw i32 %v17, 1 %v42 = sext i32 %v41 to i64 %v43 = sub nsw i32 %v40, %v27 %v44 = add nsw i32 %v43, 2 %v45 = sext i32 %v44 to i64 %v46 = mul nsw i64 %v45, %v42 %v47 = trunc i64 %v46 to i32 %v48 = tail call i8* @f2(i8* null, i32 %v47) %v49 = add nsw i32 %v23, -1 %v50 = add i32 %v23, %v17 %v51 = icmp sgt i32 %v23, %v50 br i1 %v51, label %b12, label %b1, !prof !3 b1: ; preds = %b11, %b0 %v52 = phi i32 [ %v220, %b11 ], [ %v49, %b0 ] %v53 = icmp slt i32 %v9, %v24 %v54 = select i1 %v53, i32 %v9, i32 %v24 %v55 = add nsw i32 %v21, -1 %v56 = icmp slt i32 %v54, %v55 %v57 = select i1 %v56, i32 %v55, i32 %v54 %v58 = add nsw i32 %v9, %v3 %v59 = icmp slt i32 %v58, %v24 %v60 = select i1 %v59, i32 %v58, i32 %v24 %v61 = icmp slt i32 %v60, %v57 %v62 = select i1 %v61, i32 %v57, i32 %v60 %v63 = icmp slt i32 %v57, %v21 br i1 %v63, label %b7, label %b2, !prof !3 b2: ; preds = %b1 %v64 = add nsw i32 %v11, %v5 %v65 = add nsw i32 %v64, -1 %v66 = icmp slt i32 %v52, %v65 br i1 %v66, label %b3, label %b4 b3: ; preds = %b3, %b2 %v67 = phi i32 [ %v96, %b3 ], [ %v55, %b2 ] %v68 = mul nsw i32 %v11, %v7 %v69 = icmp slt i32 %v52, %v11 %v70 = select i1 %v69, i32 %v11, i32 %v52 %v71 = mul nsw i32 %v70, %v7 %v72 = add nsw i32 %v58, -1 %v73 = icmp slt i32 %v67, %v72 %v74 = select i1 %v73, i32 %v67, i32 %v72 %v75 = icmp slt i32 %v74, %v9 %v76 = select i1 %v75, i32 %v9, i32 %v74 %v77 = add i32 %v68, %v9 %v78 = sub i32 %v71, %v77 %v79 = add i32 %v78, %v76 %v80 = getelementptr inbounds i8, i8* %v1, i32 %v79 %v81 = load i8, i8* %v80, align 1, !tbaa !4 %v82 = icmp sle i32 %v64, %v52 %v83 = icmp sle i32 %v58, %v67 %v84 = icmp slt i32 %v67, %v9 %v85 = or i1 %v84, %v83 %v86 = or i1 %v69, %v85 %v87 = or i1 %v82, %v86 %v88 = select i1 %v87, i8 0, i8 %v81 %v89 = sub i32 1, %v23 %v90 = add i32 %v89, %v52 %v91 = mul nsw i32 %v90, %v44 %v92 = sub i32 1, %v27 %v93 = add i32 %v92, %v91 %v94 = add i32 %v93, %v67 %v95 = getelementptr inbounds i8, i8* %v48, i32 %v94 store i8 %v88, i8* %v95, align 1, !tbaa !7 %v96 = add nsw i32 %v67, 1 %v97 = icmp eq i32 %v96, %v57 br i1 %v97, label %b7, label %b3 b4: ; preds = %b2 %v98 = icmp slt i32 %v5, 1 br i1 %v98, label %b5, label %b6 b5: ; preds = %b5, %b4 %v99 = phi i32 [ %v123, %b5 ], [ %v55, %b4 ] %v100 = add nsw i32 %v58, -1 %v101 = icmp slt i32 %v99, %v100 %v102 = select i1 %v101, i32 %v99, i32 %v100 %v103 = icmp slt i32 %v102, %v9 %v104 = select i1 %v103, i32 %v9, i32 %v102 %v105 = sub i32 %v104, %v9 %v106 = getelementptr inbounds i8, i8* %v1, i32 %v105 %v107 = load i8, i8* %v106, align 1, !tbaa !4 %v108 = icmp sle i32 %v64, %v52 %v109 = icmp slt i32 %v52, %v11 %v110 = icmp sle i32 %v58, %v99 %v111 = icmp slt i32 %v99, %v9 %v112 = or i1 %v111, %v110 %v113 = or i1 %v109, %v112 %v114 = or i1 %v108, %v113 %v115 = select i1 %v114, i8 0, i8 %v107 %v116 = sub i32 1, %v23 %v117 = add i32 %v116, %v52 %v118 = mul nsw i32 %v117, %v44 %v119 = sub i32 1, %v27 %v120 = add i32 %v119, %v118 %v121 = add i32 %v120, %v99 %v122 = getelementptr inbounds i8, i8* %v48, i32 %v121 store i8 %v115, i8* %v122, align 1, !tbaa !7 %v123 = add nsw i32 %v99, 1 %v124 = icmp eq i32 %v123, %v57 br i1 %v124, label %b7, label %b5 b6: ; preds = %b6, %b4 %v125 = phi i32 [ %v153, %b6 ], [ %v55, %b4 ] %v126 = mul nsw i32 %v11, %v7 %v127 = mul nsw i32 %v65, %v7 %v128 = add nsw i32 %v58, -1 %v129 = icmp slt i32 %v125, %v128 %v130 = select i1 %v129, i32 %v125, i32 %v128 %v131 = icmp slt i32 %v130, %v9 %v132 = select i1 %v131, i32 %v9, i32 %v130 %v133 = add i32 %v126, %v9 %v134 = sub i32 %v127, %v133 %v135 = add i32 %v134, %v132 %v136 = getelementptr inbounds i8, i8* %v1, i32 %v135 %v137 = load i8, i8* %v136, align 1, !tbaa !4 %v138 = icmp sle i32 %v64, %v52 %v139 = icmp slt i32 %v52, %v11 %v140 = icmp sle i32 %v58, %v125 %v141 = icmp slt i32 %v125, %v9 %v142 = or i1 %v141, %v140 %v143 = or i1 %v139, %v142 %v144 = or i1 %v138, %v143 %v145 = select i1 %v144, i8 0, i8 %v137 %v146 = sub i32 1, %v23 %v147 = add i32 %v146, %v52 %v148 = mul nsw i32 %v147, %v44 %v149 = sub i32 1, %v27 %v150 = add i32 %v149, %v148 %v151 = add i32 %v150, %v125 %v152 = getelementptr inbounds i8, i8* %v48, i32 %v151 store i8 %v145, i8* %v152, align 1, !tbaa !7 %v153 = add nsw i32 %v125, 1 %v154 = icmp eq i32 %v153, %v57 br i1 %v154, label %b7, label %b6 b7: ; preds = %b6, %b5, %b3, %b1 %v155 = icmp slt i32 %v57, %v62 br i1 %v155, label %b8, label %b9, !prof !9 b8: ; preds = %b8, %b7 %v156 = phi i32 [ %v181, %b8 ], [ %v57, %b7 ] %v157 = mul nsw i32 %v11, %v7 %v158 = add nsw i32 %v11, %v5 %v159 = add nsw i32 %v158, -1 %v160 = icmp slt i32 %v52, %v159 %v161 = select i1 %v160, i32 %v52, i32 %v159 %v162 = icmp slt i32 %v161, %v11 %v163 = select i1 %v162, i32 %v11, i32 %v161 %v164 = mul nsw i32 %v163, %v7 %v165 = add i32 %v157, %v9 %v166 = sub i32 %v164, %v165 %v167 = add i32 %v166, %v156 %v168 = getelementptr inbounds i8, i8* %v1, i32 %v167 %v169 = load i8, i8* %v168, align 1, !tbaa !4 %v170 = icmp sle i32 %v158, %v52 %v171 = icmp slt i32 %v52, %v11 %v172 = or i1 %v171, %v170 %v173 = select i1 %v172, i8 0, i8 %v169 %v174 = sub i32 1, %v23 %v175 = add i32 %v174, %v52 %v176 = mul nsw i32 %v175, %v44 %v177 = sub i32 1, %v27 %v178 = add i32 %v177, %v176 %v179 = add i32 %v178, %v156 %v180 = getelementptr inbounds i8, i8* %v48, i32 %v179 store i8 %v173, i8* %v180, align 1, !tbaa !7 %v181 = add nsw i32 %v156, 1 %v182 = icmp eq i32 %v181, %v62 br i1 %v182, label %b9, label %b8 b9: ; preds = %b8, %b7 %v183 = icmp slt i32 %v62, %v24 br i1 %v183, label %b10, label %b11, !prof !9 b10: ; preds = %b10, %b9 %v184 = phi i32 [ %v218, %b10 ], [ %v62, %b9 ] %v185 = mul nsw i32 %v11, %v7 %v186 = add nsw i32 %v11, %v5 %v187 = add nsw i32 %v186, -1 %v188 = icmp slt i32 %v52, %v187 %v189 = select i1 %v188, i32 %v52, i32 %v187 %v190 = icmp slt i32 %v189, %v11 %v191 = select i1 %v190, i32 %v11, i32 %v189 %v192 = mul nsw i32 %v191, %v7 %v193 = add nsw i32 %v58, -1 %v194 = icmp slt i32 %v184, %v193 %v195 = select i1 %v194, i32 %v184, i32 %v193 %v196 = icmp slt i32 %v195, %v9 %v197 = select i1 %v196, i32 %v9, i32 %v195 %v198 = add i32 %v185, %v9 %v199 = sub i32 %v192, %v198 %v200 = add i32 %v199, %v197 %v201 = getelementptr inbounds i8, i8* %v1, i32 %v200 %v202 = load i8, i8* %v201, align 1, !tbaa !4 %v203 = icmp sle i32 %v186, %v52 %v204 = icmp slt i32 %v52, %v11 %v205 = icmp sle i32 %v58, %v184 %v206 = icmp slt i32 %v184, %v9 %v207 = or i1 %v206, %v205 %v208 = or i1 %v204, %v207 %v209 = or i1 %v203, %v208 %v210 = select i1 %v209, i8 0, i8 %v202 %v211 = sub i32 1, %v23 %v212 = add i32 %v211, %v52 %v213 = mul nsw i32 %v212, %v44 %v214 = sub i32 1, %v27 %v215 = add i32 %v214, %v213 %v216 = add i32 %v215, %v184 %v217 = getelementptr inbounds i8, i8* %v48, i32 %v216 store i8 %v210, i8* %v217, align 1, !tbaa !7 %v218 = add nsw i32 %v184, 1 %v219 = icmp eq i32 %v218, %v24 br i1 %v219, label %b11, label %b10 b11: ; preds = %b10, %b9 %v220 = add nsw i32 %v52, 1 %v221 = icmp eq i32 %v220, %v50 br i1 %v221, label %b12, label %b1 b12: ; preds = %b11, %b0 %v222 = add nsw i32 %v35, 1 %v223 = sext i32 %v222 to i64 %v224 = shl nsw i64 %v42, 2 %v225 = mul i64 %v224, %v223 %v226 = trunc i64 %v225 to i32 %v227 = tail call i8* @f2(i8* null, i32 %v226) br i1 %v51, label %b14, label %b13, !prof !3 b13: ; preds = %b19, %b12 %v228 = phi i32 [ %v351, %b19 ], [ %v49, %b12 ] %v229 = ashr i32 %v15, 6 %v230 = icmp slt i32 %v229, 0 %v231 = select i1 %v230, i32 0, i32 %v229 %v232 = icmp sgt i32 %v231, 0 br i1 %v232, label %b16, label %b17, !prof !9 b14: ; preds = %b19, %b12 %v233 = icmp eq i8* %v48, null br i1 %v233, label %b20, label %b15 b15: ; preds = %b14 tail call void @f3(i8* null, i8* %v48) #2 br label %b20 b16: ; preds = %b16, %b13 %v234 = phi i32 [ %v289, %b16 ], [ 0, %b13 ] %v235 = sub nsw i32 %v228, %v23 %v236 = add nsw i32 %v235, 1 %v237 = mul nsw i32 %v236, %v44 %v238 = shl i32 %v234, 6 %v239 = sub i32 %v21, %v27 %v240 = add i32 %v239, %v238 %v241 = add nsw i32 %v240, %v237 %v242 = getelementptr inbounds i8, i8* %v48, i32 %v241 %v243 = bitcast i8* %v242 to <16 x i32>* %v244 = load <16 x i32>, <16 x i32>* %v243, align 1, !tbaa !7 %v245 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v244) %v246 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v245) %v247 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v245) %v248 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v247) %v249 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v246) %v250 = add nsw i32 %v241, 1 %v251 = getelementptr inbounds i8, i8* %v48, i32 %v250 %v252 = bitcast i8* %v251 to <16 x i32>* %v253 = load <16 x i32>, <16 x i32>* %v252, align 1, !tbaa !7 %v254 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v253) %v255 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v254) %v256 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v255) %v257 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v256) %v258 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v256) %v259 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v257, i32 168430090) %v260 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v258, i32 168430090) %v261 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v259, <16 x i32> %v260) %v262 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v254) %v263 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v262) %v264 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v263) %v265 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v263) %v266 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v264, i32 168430090) %v267 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v265, i32 168430090) %v268 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v266, <16 x i32> %v267) %v269 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v248, <32 x i32> %v261) %v270 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v249, <32 x i32> %v268) %v271 = shufflevector <32 x i32> %v269, <32 x i32> undef, <16 x i32> %v272 = mul nsw i32 %v236, %v222 %v273 = add nsw i32 %v240, %v272 %v274 = bitcast i8* %v227 to i32* %v275 = getelementptr inbounds i32, i32* %v274, i32 %v273 %v276 = bitcast i32* %v275 to <16 x i32>* store <16 x i32> %v271, <16 x i32>* %v276, align 4, !tbaa !10 %v277 = shufflevector <32 x i32> %v269, <32 x i32> undef, <16 x i32> %v278 = add nsw i32 %v273, 16 %v279 = getelementptr inbounds i32, i32* %v274, i32 %v278 %v280 = bitcast i32* %v279 to <16 x i32>* store <16 x i32> %v277, <16 x i32>* %v280, align 4, !tbaa !10 %v281 = shufflevector <32 x i32> %v270, <32 x i32> undef, <16 x i32> %v282 = add nsw i32 %v273, 32 %v283 = getelementptr inbounds i32, i32* %v274, i32 %v282 %v284 = bitcast i32* %v283 to <16 x i32>* store <16 x i32> %v281, <16 x i32>* %v284, align 4, !tbaa !10 %v285 = shufflevector <32 x i32> %v270, <32 x i32> undef, <16 x i32> %v286 = add nsw i32 %v273, 48 %v287 = getelementptr inbounds i32, i32* %v274, i32 %v286 %v288 = bitcast i32* %v287 to <16 x i32>* store <16 x i32> %v285, <16 x i32>* %v288, align 4, !tbaa !10 %v289 = add nuw nsw i32 %v234, 1 %v290 = icmp eq i32 %v289, %v231 br i1 %v290, label %b17, label %b16 b17: ; preds = %b16, %b13 %v291 = add nsw i32 %v15, 63 %v292 = ashr i32 %v291, 6 %v293 = icmp slt i32 %v231, %v292 br i1 %v293, label %b18, label %b19, !prof !9 b18: ; preds = %b18, %b17 %v294 = phi i32 [ %v349, %b18 ], [ %v231, %b17 ] %v295 = sub nsw i32 %v228, %v23 %v296 = add nsw i32 %v295, 1 %v297 = mul nsw i32 %v296, %v44 %v298 = sub nsw i32 %v24, %v27 %v299 = add nsw i32 %v297, %v298 %v300 = add nsw i32 %v299, -64 %v301 = getelementptr inbounds i8, i8* %v48, i32 %v300 %v302 = bitcast i8* %v301 to <16 x i32>* %v303 = load <16 x i32>, <16 x i32>* %v302, align 1, !tbaa !7 %v304 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v303) %v305 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v304) %v306 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v304) %v307 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v306) %v308 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v305) %v309 = add nsw i32 %v299, -63 %v310 = getelementptr inbounds i8, i8* %v48, i32 %v309 %v311 = bitcast i8* %v310 to <16 x i32>* %v312 = load <16 x i32>, <16 x i32>* %v311, align 1, !tbaa !7 %v313 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %v312) %v314 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v313) %v315 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v314) %v316 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v315) %v317 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v315) %v318 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v316, i32 168430090) %v319 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v317, i32 168430090) %v320 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v318, <16 x i32> %v319) %v321 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v313) %v322 = tail call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %v321) %v323 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v322) %v324 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v322) %v325 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v323, i32 168430090) %v326 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v324, i32 168430090) %v327 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v325, <16 x i32> %v326) %v328 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v307, <32 x i32> %v320) %v329 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v308, <32 x i32> %v327) %v330 = shufflevector <32 x i32> %v328, <32 x i32> undef, <16 x i32> %v331 = mul nsw i32 %v296, %v222 %v332 = add nsw i32 %v331, %v298 %v333 = add nsw i32 %v332, -64 %v334 = bitcast i8* %v227 to i32* %v335 = getelementptr inbounds i32, i32* %v334, i32 %v333 %v336 = bitcast i32* %v335 to <16 x i32>* store <16 x i32> %v330, <16 x i32>* %v336, align 4, !tbaa !10 %v337 = shufflevector <32 x i32> %v328, <32 x i32> undef, <16 x i32> %v338 = add nsw i32 %v332, -48 %v339 = getelementptr inbounds i32, i32* %v334, i32 %v338 %v340 = bitcast i32* %v339 to <16 x i32>* store <16 x i32> %v337, <16 x i32>* %v340, align 4, !tbaa !10 %v341 = shufflevector <32 x i32> %v329, <32 x i32> undef, <16 x i32> %v342 = add nsw i32 %v332, -32 %v343 = getelementptr inbounds i32, i32* %v334, i32 %v342 %v344 = bitcast i32* %v343 to <16 x i32>* store <16 x i32> %v341, <16 x i32>* %v344, align 4, !tbaa !10 %v345 = shufflevector <32 x i32> %v329, <32 x i32> undef, <16 x i32> %v346 = add nsw i32 %v332, -16 %v347 = getelementptr inbounds i32, i32* %v334, i32 %v346 %v348 = bitcast i32* %v347 to <16 x i32>* store <16 x i32> %v345, <16 x i32>* %v348, align 4, !tbaa !10 %v349 = add nuw nsw i32 %v294, 1 %v350 = icmp eq i32 %v349, %v292 br i1 %v350, label %b19, label %b18 b19: ; preds = %b18, %b17 %v351 = add nsw i32 %v228, 1 %v352 = icmp eq i32 %v351, %v50 br i1 %v352, label %b14, label %b13 b20: ; preds = %b15, %b14 %v353 = icmp sgt i32 %v17, 0 br i1 %v353, label %b21, label %b31, !prof !9 b21: ; preds = %b20 %v354 = ashr i32 %v15, 6 %v355 = icmp slt i32 %v354, 0 %v356 = select i1 %v355, i32 0, i32 %v354 %v357 = icmp sgt i32 %v356, 0 br i1 %v357, label %b25, label %b27 b22: ; preds = %b25, %b22 %v358 = phi i32 [ %v442, %b22 ], [ 0, %b25 ] %v359 = sub nsw i32 %v525, %v23 %v360 = mul nsw i32 %v359, %v222 %v361 = shl nsw i32 %v358, 6 %v362 = add nsw i32 %v361, %v21 %v363 = sub nsw i32 %v362, %v27 %v364 = add nsw i32 %v363, %v360 %v365 = bitcast i8* %v227 to i32* %v366 = getelementptr inbounds i32, i32* %v365, i32 %v364 %v367 = bitcast i32* %v366 to <16 x i32>* %v368 = load <16 x i32>, <16 x i32>* %v367, align 4, !tbaa !10 %v369 = add nsw i32 %v364, 16 %v370 = getelementptr inbounds i32, i32* %v365, i32 %v369 %v371 = bitcast i32* %v370 to <16 x i32>* %v372 = load <16 x i32>, <16 x i32>* %v371, align 4, !tbaa !10 %v373 = shufflevector <16 x i32> %v368, <16 x i32> %v372, <32 x i32> %v374 = add nsw i32 %v359, 1 %v375 = mul nsw i32 %v374, %v222 %v376 = add nsw i32 %v363, %v375 %v377 = getelementptr inbounds i32, i32* %v365, i32 %v376 %v378 = bitcast i32* %v377 to <16 x i32>* %v379 = load <16 x i32>, <16 x i32>* %v378, align 4, !tbaa !10 %v380 = add nsw i32 %v376, 16 %v381 = getelementptr inbounds i32, i32* %v365, i32 %v380 %v382 = bitcast i32* %v381 to <16 x i32>* %v383 = load <16 x i32>, <16 x i32>* %v382, align 4, !tbaa !10 %v384 = shufflevector <16 x i32> %v379, <16 x i32> %v383, <32 x i32> %v385 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v384) %v386 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v384) %v387 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v385, i32 168430090) %v388 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v386, i32 168430090) %v389 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v387, <16 x i32> %v388) %v390 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v373, <32 x i32> %v389) %v391 = shufflevector <32 x i32> %v390, <32 x i32> undef, <16 x i32> %v392 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v391, i32 20) %v393 = shufflevector <32 x i32> %v390, <32 x i32> undef, <16 x i32> %v394 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v393, i32 20) %v395 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v394, <16 x i32> %v392) %v396 = add nsw i32 %v364, 32 %v397 = getelementptr inbounds i32, i32* %v365, i32 %v396 %v398 = bitcast i32* %v397 to <16 x i32>* %v399 = load <16 x i32>, <16 x i32>* %v398, align 4, !tbaa !10 %v400 = add nsw i32 %v364, 48 %v401 = getelementptr inbounds i32, i32* %v365, i32 %v400 %v402 = bitcast i32* %v401 to <16 x i32>* %v403 = load <16 x i32>, <16 x i32>* %v402, align 4, !tbaa !10 %v404 = shufflevector <16 x i32> %v399, <16 x i32> %v403, <32 x i32> %v405 = add nsw i32 %v376, 32 %v406 = getelementptr inbounds i32, i32* %v365, i32 %v405 %v407 = bitcast i32* %v406 to <16 x i32>* %v408 = load <16 x i32>, <16 x i32>* %v407, align 4, !tbaa !10 %v409 = add nsw i32 %v376, 48 %v410 = getelementptr inbounds i32, i32* %v365, i32 %v409 %v411 = bitcast i32* %v410 to <16 x i32>* %v412 = load <16 x i32>, <16 x i32>* %v411, align 4, !tbaa !10 %v413 = shufflevector <16 x i32> %v408, <16 x i32> %v412, <32 x i32> %v414 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v413) %v415 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v413) %v416 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v414, i32 168430090) %v417 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v415, i32 168430090) %v418 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v416, <16 x i32> %v417) %v419 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v404, <32 x i32> %v418) %v420 = shufflevector <32 x i32> %v419, <32 x i32> undef, <16 x i32> %v421 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v420, i32 20) %v422 = shufflevector <32 x i32> %v419, <32 x i32> undef, <16 x i32> %v423 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v422, i32 20) %v424 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v423, <16 x i32> %v421) %v425 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v395) %v426 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v395) %v427 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v425, <16 x i32> %v426) %v428 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v424) %v429 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v424) %v430 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v428, <16 x i32> %v429) %v431 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v430, <16 x i32> %v427) %v432 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v431) %v433 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v431) %v434 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v432, <16 x i32> %v433) %v435 = mul nsw i32 %v23, %v19 %v436 = mul nsw i32 %v525, %v19 %v437 = add i32 %v435, %v21 %v438 = sub i32 %v436, %v437 %v439 = add i32 %v438, %v362 %v440 = getelementptr inbounds i8, i8* %v13, i32 %v439 %v441 = bitcast i8* %v440 to <16 x i32>* store <16 x i32> %v434, <16 x i32>* %v441, align 1, !tbaa !12 %v442 = add nuw nsw i32 %v358, 1 %v443 = icmp eq i32 %v442, %v356 br i1 %v443, label %b26, label %b22 b23: ; preds = %b26, %b23 %v444 = phi i32 [ %v521, %b23 ], [ %v356, %b26 ] %v445 = sub nsw i32 %v24, %v27 %v446 = add nsw i32 %v360, %v445 %v447 = add nsw i32 %v446, -64 %v448 = getelementptr inbounds i32, i32* %v365, i32 %v447 %v449 = bitcast i32* %v448 to <16 x i32>* %v450 = load <16 x i32>, <16 x i32>* %v449, align 4, !tbaa !10 %v451 = add nsw i32 %v446, -48 %v452 = getelementptr inbounds i32, i32* %v365, i32 %v451 %v453 = bitcast i32* %v452 to <16 x i32>* %v454 = load <16 x i32>, <16 x i32>* %v453, align 4, !tbaa !10 %v455 = shufflevector <16 x i32> %v450, <16 x i32> %v454, <32 x i32> %v456 = add nsw i32 %v375, %v445 %v457 = add nsw i32 %v456, -64 %v458 = getelementptr inbounds i32, i32* %v365, i32 %v457 %v459 = bitcast i32* %v458 to <16 x i32>* %v460 = load <16 x i32>, <16 x i32>* %v459, align 4, !tbaa !10 %v461 = add nsw i32 %v456, -48 %v462 = getelementptr inbounds i32, i32* %v365, i32 %v461 %v463 = bitcast i32* %v462 to <16 x i32>* %v464 = load <16 x i32>, <16 x i32>* %v463, align 4, !tbaa !10 %v465 = shufflevector <16 x i32> %v460, <16 x i32> %v464, <32 x i32> %v466 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v465) %v467 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v465) %v468 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v466, i32 168430090) %v469 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v467, i32 168430090) %v470 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v468, <16 x i32> %v469) %v471 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v455, <32 x i32> %v470) %v472 = shufflevector <32 x i32> %v471, <32 x i32> undef, <16 x i32> %v473 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v472, i32 20) %v474 = shufflevector <32 x i32> %v471, <32 x i32> undef, <16 x i32> %v475 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v474, i32 20) %v476 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v475, <16 x i32> %v473) %v477 = add nsw i32 %v446, -32 %v478 = getelementptr inbounds i32, i32* %v365, i32 %v477 %v479 = bitcast i32* %v478 to <16 x i32>* %v480 = load <16 x i32>, <16 x i32>* %v479, align 4, !tbaa !10 %v481 = add nsw i32 %v446, -16 %v482 = getelementptr inbounds i32, i32* %v365, i32 %v481 %v483 = bitcast i32* %v482 to <16 x i32>* %v484 = load <16 x i32>, <16 x i32>* %v483, align 4, !tbaa !10 %v485 = shufflevector <16 x i32> %v480, <16 x i32> %v484, <32 x i32> %v486 = add nsw i32 %v456, -32 %v487 = getelementptr inbounds i32, i32* %v365, i32 %v486 %v488 = bitcast i32* %v487 to <16 x i32>* %v489 = load <16 x i32>, <16 x i32>* %v488, align 4, !tbaa !10 %v490 = add nsw i32 %v456, -16 %v491 = getelementptr inbounds i32, i32* %v365, i32 %v490 %v492 = bitcast i32* %v491 to <16 x i32>* %v493 = load <16 x i32>, <16 x i32>* %v492, align 4, !tbaa !10 %v494 = shufflevector <16 x i32> %v489, <16 x i32> %v493, <32 x i32> %v495 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v494) %v496 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v494) %v497 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v495, i32 168430090) %v498 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v496, i32 168430090) %v499 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v497, <16 x i32> %v498) %v500 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v485, <32 x i32> %v499) %v501 = shufflevector <32 x i32> %v500, <32 x i32> undef, <16 x i32> %v502 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v501, i32 20) %v503 = shufflevector <32 x i32> %v500, <32 x i32> undef, <16 x i32> %v504 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v503, i32 20) %v505 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v504, <16 x i32> %v502) %v506 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v476) %v507 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v476) %v508 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v506, <16 x i32> %v507) %v509 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v505) %v510 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v505) %v511 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v509, <16 x i32> %v510) %v512 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v511, <16 x i32> %v508) %v513 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v512) %v514 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v512) %v515 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v513, <16 x i32> %v514) %v516 = add i32 %v15, -64 %v517 = sub i32 %v516, %v435 %v518 = add i32 %v517, %v436 %v519 = getelementptr inbounds i8, i8* %v13, i32 %v518 %v520 = bitcast i8* %v519 to <16 x i32>* store <16 x i32> %v515, <16 x i32>* %v520, align 1, !tbaa !12 %v521 = add nuw nsw i32 %v444, 1 %v522 = icmp eq i32 %v521, %v527 br i1 %v522, label %b24, label %b23 b24: ; preds = %b26, %b23 %v523 = add nsw i32 %v525, 1 %v524 = icmp eq i32 %v523, %v50 br i1 %v524, label %b32, label %b25 b25: ; preds = %b24, %b21 %v525 = phi i32 [ %v523, %b24 ], [ %v23, %b21 ] br label %b22 b26: ; preds = %b22 %v526 = add nsw i32 %v15, 63 %v527 = ashr i32 %v526, 6 %v528 = icmp slt i32 %v356, %v527 br i1 %v528, label %b23, label %b24, !prof !9 b27: ; preds = %b21 %v529 = add nsw i32 %v15, 63 %v530 = ashr i32 %v529, 6 %v531 = icmp slt i32 %v356, %v530 br i1 %v531, label %b29, label %b31 b28: ; preds = %b29, %b28 %v532 = phi i32 [ %v616, %b28 ], [ %v356, %b29 ] %v533 = sub nsw i32 %v618, %v23 %v534 = mul nsw i32 %v533, %v222 %v535 = sub nsw i32 %v24, %v27 %v536 = add nsw i32 %v534, %v535 %v537 = add nsw i32 %v536, -64 %v538 = bitcast i8* %v227 to i32* %v539 = getelementptr inbounds i32, i32* %v538, i32 %v537 %v540 = bitcast i32* %v539 to <16 x i32>* %v541 = load <16 x i32>, <16 x i32>* %v540, align 4, !tbaa !10 %v542 = add nsw i32 %v536, -48 %v543 = getelementptr inbounds i32, i32* %v538, i32 %v542 %v544 = bitcast i32* %v543 to <16 x i32>* %v545 = load <16 x i32>, <16 x i32>* %v544, align 4, !tbaa !10 %v546 = shufflevector <16 x i32> %v541, <16 x i32> %v545, <32 x i32> %v547 = add nsw i32 %v533, 1 %v548 = mul nsw i32 %v547, %v222 %v549 = add nsw i32 %v548, %v535 %v550 = add nsw i32 %v549, -64 %v551 = getelementptr inbounds i32, i32* %v538, i32 %v550 %v552 = bitcast i32* %v551 to <16 x i32>* %v553 = load <16 x i32>, <16 x i32>* %v552, align 4, !tbaa !10 %v554 = add nsw i32 %v549, -48 %v555 = getelementptr inbounds i32, i32* %v538, i32 %v554 %v556 = bitcast i32* %v555 to <16 x i32>* %v557 = load <16 x i32>, <16 x i32>* %v556, align 4, !tbaa !10 %v558 = shufflevector <16 x i32> %v553, <16 x i32> %v557, <32 x i32> %v559 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v558) %v560 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v558) %v561 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v559, i32 168430090) %v562 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v560, i32 168430090) %v563 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v561, <16 x i32> %v562) %v564 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v546, <32 x i32> %v563) %v565 = shufflevector <32 x i32> %v564, <32 x i32> undef, <16 x i32> %v566 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v565, i32 20) %v567 = shufflevector <32 x i32> %v564, <32 x i32> undef, <16 x i32> %v568 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v567, i32 20) %v569 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v568, <16 x i32> %v566) %v570 = add nsw i32 %v536, -32 %v571 = getelementptr inbounds i32, i32* %v538, i32 %v570 %v572 = bitcast i32* %v571 to <16 x i32>* %v573 = load <16 x i32>, <16 x i32>* %v572, align 4, !tbaa !10 %v574 = add nsw i32 %v536, -16 %v575 = getelementptr inbounds i32, i32* %v538, i32 %v574 %v576 = bitcast i32* %v575 to <16 x i32>* %v577 = load <16 x i32>, <16 x i32>* %v576, align 4, !tbaa !10 %v578 = shufflevector <16 x i32> %v573, <16 x i32> %v577, <32 x i32> %v579 = add nsw i32 %v549, -32 %v580 = getelementptr inbounds i32, i32* %v538, i32 %v579 %v581 = bitcast i32* %v580 to <16 x i32>* %v582 = load <16 x i32>, <16 x i32>* %v581, align 4, !tbaa !10 %v583 = add nsw i32 %v549, -16 %v584 = getelementptr inbounds i32, i32* %v538, i32 %v583 %v585 = bitcast i32* %v584 to <16 x i32>* %v586 = load <16 x i32>, <16 x i32>* %v585, align 4, !tbaa !10 %v587 = shufflevector <16 x i32> %v582, <16 x i32> %v586, <32 x i32> %v588 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v587) %v589 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v587) %v590 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v588, i32 168430090) %v591 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %v589, i32 168430090) %v592 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v590, <16 x i32> %v591) %v593 = tail call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %v578, <32 x i32> %v592) %v594 = shufflevector <32 x i32> %v593, <32 x i32> undef, <16 x i32> %v595 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v594, i32 20) %v596 = shufflevector <32 x i32> %v593, <32 x i32> undef, <16 x i32> %v597 = tail call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %v596, i32 20) %v598 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v597, <16 x i32> %v595) %v599 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v569) %v600 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v569) %v601 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v599, <16 x i32> %v600) %v602 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v598) %v603 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v598) %v604 = tail call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %v602, <16 x i32> %v603) %v605 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v604, <16 x i32> %v601) %v606 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v605) %v607 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v605) %v608 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v606, <16 x i32> %v607) %v609 = mul nsw i32 %v23, %v19 %v610 = mul nsw i32 %v618, %v19 %v611 = add i32 %v15, -64 %v612 = sub i32 %v611, %v609 %v613 = add i32 %v612, %v610 %v614 = getelementptr inbounds i8, i8* %v13, i32 %v613 %v615 = bitcast i8* %v614 to <16 x i32>* store <16 x i32> %v608, <16 x i32>* %v615, align 1, !tbaa !12 %v616 = add nuw nsw i32 %v532, 1 %v617 = icmp eq i32 %v616, %v530 br i1 %v617, label %b30, label %b28 b29: ; preds = %b30, %b27 %v618 = phi i32 [ %v619, %b30 ], [ %v23, %b27 ] br label %b28 b30: ; preds = %b28 %v619 = add nsw i32 %v618, 1 %v620 = icmp eq i32 %v619, %v50 br i1 %v620, label %b32, label %b29 b31: ; preds = %b27, %b20 %v621 = icmp eq i8* %v227, null br i1 %v621, label %b33, label %b32 b32: ; preds = %b31, %b30, %b24 tail call void @f3(i8* null, i8* %v227) #2 br label %b33 b33: ; preds = %b32, %b31 ret i32 0 } ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1 ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32>, i32) #1 ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1 ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32>, i32) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32>, <16 x i32>) #1 ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } attributes #2 = { nobuiltin nounwind } !llvm.module.flags = !{!0, !1, !2} !0 = !{i32 2, !"halide_use_soft_float_abi", i32 0} !1 = !{i32 2, !"halide_mcpu", !"hexagonv60"} !2 = !{i32 2, !"halide_mattrs", !"+hvx"} !3 = !{!"branch_weights", i32 0, i32 1073741824} !4 = !{!5, !5, i64 0} !5 = !{!"input", !6} !6 = !{!"Halide buffer"} !7 = !{!8, !8, i64 0} !8 = !{!"constant_exterior", !6} !9 = !{!"branch_weights", i32 1073741824, i32 0} !10 = !{!11, !11, i64 0} !11 = !{!"rows", !6} !12 = !{!13, !13, i64 0} !13 = !{!"gaussian11", !6}