1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5target triple = "x86_64-apple-macosx10.8.0" 6 7; int foo(int * restrict B, int * restrict A, int n, int m) { 8; B[0] = n * A[0] + m * A[0]; 9; B[1] = n * A[1] + m * A[1]; 10; B[2] = n * A[2] + m * A[2]; 11; B[3] = n * A[3] + m * A[3]; 12; return 0; 13; } 14 15define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 { 16; CHECK-LABEL: @foo( 17; CHECK-NEXT: entry: 18; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] 19; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 20; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 21; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 22; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 23; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 24; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* 25; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 26; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0 27; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 28; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 29; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 30; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] 31; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 32; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* 33; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 34; CHECK-NEXT: ret i32 0 35; 36entry: 37 %0 = load i32, i32* %A, align 4 38 %mul238 = add i32 %m, %n 39 %add = mul i32 %0, %mul238 40 store i32 %add, i32* %B, align 4 41 %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1 42 %1 = load i32, i32* %arrayidx4, align 4 43 %add8 = mul i32 %1, %mul238 44 %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1 45 store i32 %add8, i32* %arrayidx9, align 4 46 %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2 47 %2 = load i32, i32* %arrayidx10, align 4 48 %add14 = mul i32 %2, %mul238 49 %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2 50 store i32 %add14, i32* %arrayidx15, align 4 51 %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3 52 %3 = load i32, i32* %arrayidx16, align 4 53 %add20 = mul i32 %3, %mul238 54 %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3 55 store i32 %add20, i32* %arrayidx21, align 4 56 ret i32 0 57} 58 59 60; int extr_user(int * restrict B, int * restrict A, int n, int m) { 61; B[0] = n * A[0] + m * A[0]; 62; B[1] = n * A[1] + m * A[1]; 63; B[2] = n * A[2] + m * A[2]; 64; B[3] = n * A[3] + m * A[3]; 65; return A[0]; 66; } 67 68define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { 69; CHECK-LABEL: @extr_user( 70; CHECK-NEXT: entry: 71; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] 72; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 73; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 74; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 75; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 76; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 77; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* 78; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 79; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0 80; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 81; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 82; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 83; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] 84; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 85; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* 86; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 87; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 88; CHECK-NEXT: ret i32 [[TMP8]] 89; 90entry: 91 %0 = load i32, i32* %A, align 4 92 %mul238 = add i32 %m, %n 93 %add = mul i32 %0, %mul238 94 store i32 %add, i32* %B, align 4 95 %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1 96 %1 = load i32, i32* %arrayidx4, align 4 97 %add8 = mul i32 %1, %mul238 98 %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1 99 store i32 %add8, i32* %arrayidx9, align 4 100 %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2 101 %2 = load i32, i32* %arrayidx10, align 4 102 %add14 = mul i32 %2, %mul238 103 %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2 104 store i32 %add14, i32* %arrayidx15, align 4 105 %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3 106 %3 = load i32, i32* %arrayidx16, align 4 107 %add20 = mul i32 %3, %mul238 108 %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3 109 store i32 %add20, i32* %arrayidx21, align 4 110 ret i32 %0 ;<--------- This value has multiple users 111} 112 113; In this example we have an external user that is not the first element in the vector. 114define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) { 115; CHECK-LABEL: @extr_user1( 116; CHECK-NEXT: entry: 117; CHECK-NEXT: [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]] 118; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1 119; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1 120; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 121; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2 122; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 123; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>* 124; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 125; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0 126; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1 127; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2 128; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3 129; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]] 130; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3 131; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>* 132; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 133; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 134; CHECK-NEXT: ret i32 [[TMP8]] 135; 136entry: 137 %0 = load i32, i32* %A, align 4 138 %mul238 = add i32 %m, %n 139 %add = mul i32 %0, %mul238 140 store i32 %add, i32* %B, align 4 141 %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1 142 %1 = load i32, i32* %arrayidx4, align 4 143 %add8 = mul i32 %1, %mul238 144 %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1 145 store i32 %add8, i32* %arrayidx9, align 4 146 %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2 147 %2 = load i32, i32* %arrayidx10, align 4 148 %add14 = mul i32 %2, %mul238 149 %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2 150 store i32 %add14, i32* %arrayidx15, align 4 151 %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3 152 %3 = load i32, i32* %arrayidx16, align 4 153 %add20 = mul i32 %3, %mul238 154 %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3 155 store i32 %add20, i32* %arrayidx21, align 4 156 ret i32 %1 ;<--------- This value has multiple users 157} 158