1; RUN: llc -march=hexagon -O0 < %s | FileCheck --check-prefix=CHECKO0 %s 2; KP: Removed -O2 check. The code has become more aggressively optimized 3; (some loads were found to be redundant and have been removed completely), 4; and verifying correct code generation has become more difficult than 5; its worth. 6 7; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}}) 8; CHECK: v{{[0-9]*}} = vsplat(r{{[0-9]*}}) 9 10; CHECKO0: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}} 11; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0) 12; CHECKO0: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0) 13 14; Allow .cur loads. 15; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0) 16; CHECKO2: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}} 17; CHECKO2: v{{[0-9].*}} = vmem(r{{[0-9]*}}+#0) 18 19; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vcombine(v{{[0-9]*}},v{{[0-9]*}}) 20; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}} 21; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}} 22; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#0) 23; CHECK: v{{[0-9]*}} = vmem(r{{[0-9]*}}+#32) 24; CHECK: vmem(r{{[0-9]*}}+#0) = v{{[0-9]*}} 25; CHECK: vmem(r{{[0-9]*}}+#32) = v{{[0-9]*}} 26 27target triple = "hexagon" 28 29@g0 = common global [10 x <32 x i32>] zeroinitializer, align 64 30@g1 = private unnamed_addr constant [11 x i8] c"c[%d]= %x\0A\00", align 8 31@g2 = common global [10 x <16 x i32>] zeroinitializer, align 64 32@g3 = common global [10 x <16 x i32>] zeroinitializer, align 64 33@g4 = common global [10 x <32 x i32>] zeroinitializer, align 64 34 35declare i32 @f0(i8*, ...) 36 37; Function Attrs: nounwind 38define void @f1(i32 %a0) #0 { 39b0: 40 %v0 = alloca i32, align 4 41 %v1 = alloca i32*, align 4 42 %v2 = alloca i32, align 4 43 store i32 %a0, i32* %v0, align 4 44 store i32* getelementptr inbounds ([10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 0, i32 0), i32** %v1, align 4 45 %v3 = load i32, i32* %v0, align 4 46 %v4 = load i32*, i32** %v1, align 4 47 %v5 = getelementptr inbounds i32, i32* %v4, i32 %v3 48 store i32* %v5, i32** %v1, align 4 49 store i32 0, i32* %v2, align 4 50 br label %b1 51 52b1: ; preds = %b3, %b0 53 %v6 = load i32, i32* %v2, align 4 54 %v7 = icmp slt i32 %v6, 16 55 br i1 %v7, label %b2, label %b4 56 57b2: ; preds = %b1 58 %v8 = load i32, i32* %v2, align 4 59 %v9 = load i32*, i32** %v1, align 4 60 %v10 = getelementptr inbounds i32, i32* %v9, i32 1 61 store i32* %v10, i32** %v1, align 4 62 %v11 = load i32, i32* %v9, align 4 63 %v12 = call i32 (i8*, ...) @f0(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @g1, i32 0, i32 0), i32 %v8, i32 %v11) 64 br label %b3 65 66b3: ; preds = %b2 67 %v13 = load i32, i32* %v2, align 4 68 %v14 = add nsw i32 %v13, 1 69 store i32 %v14, i32* %v2, align 4 70 br label %b1 71 72b4: ; preds = %b1 73 ret void 74} 75 76; Function Attrs: nounwind 77define i32 @f2() #0 { 78b0: 79 %v0 = alloca i32, align 4 80 %v1 = alloca i32, align 4 81 store i32 0, i32* %v0 82 store i32 0, i32* %v1, align 4 83 br label %b1 84 85b1: ; preds = %b3, %b0 86 %v2 = load i32, i32* %v1, align 4 87 %v3 = icmp slt i32 %v2, 3 88 br i1 %v3, label %b2, label %b4 89 90b2: ; preds = %b1 91 %v4 = load i32, i32* %v1, align 4 92 %v5 = add nsw i32 %v4, 1 93 %v6 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v5) 94 %v7 = load i32, i32* %v1, align 4 95 %v8 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v7 96 store <16 x i32> %v6, <16 x i32>* %v8, align 64 97 %v9 = load i32, i32* %v1, align 4 98 %v10 = mul nsw i32 %v9, 10 99 %v11 = add nsw i32 %v10, 1 100 %v12 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v11) 101 %v13 = load i32, i32* %v1, align 4 102 %v14 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v13 103 store <16 x i32> %v12, <16 x i32>* %v14, align 64 104 %v15 = load i32, i32* %v1, align 4 105 %v16 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g2, i32 0, i32 %v15 106 %v17 = load <16 x i32>, <16 x i32>* %v16, align 64 107 %v18 = load i32, i32* %v1, align 4 108 %v19 = getelementptr inbounds [10 x <16 x i32>], [10 x <16 x i32>]* @g3, i32 0, i32 %v18 109 %v20 = load <16 x i32>, <16 x i32>* %v19, align 64 110 %v21 = call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v17, <16 x i32> %v20) 111 %v22 = load i32, i32* %v1, align 4 112 %v23 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v22 113 store <32 x i32> %v21, <32 x i32>* %v23, align 64 114 br label %b3 115 116b3: ; preds = %b2 117 %v24 = load i32, i32* %v1, align 4 118 %v25 = add nsw i32 %v24, 1 119 store i32 %v25, i32* %v1, align 4 120 br label %b1 121 122b4: ; preds = %b1 123 store i32 0, i32* %v1, align 4 124 br label %b5 125 126b5: ; preds = %b7, %b4 127 %v26 = load i32, i32* %v1, align 4 128 %v27 = icmp slt i32 %v26, 3 129 br i1 %v27, label %b6, label %b8 130 131b6: ; preds = %b5 132 %v28 = load i32, i32* %v1, align 4 133 %v29 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g4, i32 0, i32 %v28 134 %v30 = load <32 x i32>, <32 x i32>* %v29, align 64 135 %v31 = load i32, i32* %v1, align 4 136 %v32 = getelementptr inbounds [10 x <32 x i32>], [10 x <32 x i32>]* @g0, i32 0, i32 %v31 137 store <32 x i32> %v30, <32 x i32>* %v32, align 64 138 br label %b7 139 140b7: ; preds = %b6 141 %v33 = load i32, i32* %v1, align 4 142 %v34 = add nsw i32 %v33, 1 143 store i32 %v34, i32* %v1, align 4 144 br label %b5 145 146b8: ; preds = %b5 147 store i32 0, i32* %v1, align 4 148 br label %b9 149 150b9: ; preds = %b11, %b8 151 %v35 = load i32, i32* %v1, align 4 152 %v36 = icmp slt i32 %v35, 3 153 br i1 %v36, label %b10, label %b12 154 155b10: ; preds = %b9 156 %v37 = load i32, i32* %v1, align 4 157 %v38 = mul nsw i32 %v37, 16 158 call void @f1(i32 %v38) 159 br label %b11 160 161b11: ; preds = %b10 162 %v39 = load i32, i32* %v1, align 4 163 %v40 = add nsw i32 %v39, 1 164 store i32 %v40, i32* %v1, align 4 165 br label %b9 166 167b12: ; preds = %b9 168 ret i32 0 169} 170 171; Function Attrs: nounwind readnone 172declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1 173 174; Function Attrs: nounwind readnone 175declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1 176 177attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } 178attributes #1 = { nounwind readnone } 179