1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s 3; RUN: opt -instsimplify -disable-output < %s 4 5define <4 x i32*> @AGEP0(i32* %ptr) nounwind { 6; CHECK-LABEL: AGEP0: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 9; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 10; CHECK-NEXT: retl 11 %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0 12 %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1 13 %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2 14 %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3 15 %A2 = getelementptr i32, <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 16 %A3 = getelementptr i32, <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233> 17 ret <4 x i32*> %A3 18} 19 20define i32 @AGEP1(<4 x i32*> %param) nounwind { 21; CHECK-LABEL: AGEP1: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vextractps $3, %xmm0, %eax 24; CHECK-NEXT: movl 16(%eax), %eax 25; CHECK-NEXT: retl 26 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 27 %k = extractelement <4 x i32*> %A2, i32 3 28 %v = load i32, i32* %k 29 ret i32 %v 30} 31 32define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind { 33; CHECK-LABEL: AGEP2: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vpslld $2, %xmm1, %xmm1 36; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 37; CHECK-NEXT: vpextrd $3, %xmm0, %eax 38; CHECK-NEXT: movl (%eax), %eax 39; CHECK-NEXT: retl 40 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off 41 %k = extractelement <4 x i32*> %A2, i32 3 42 %v = load i32, i32* %k 43 ret i32 %v 44} 45 46define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind { 47; CHECK-LABEL: AGEP3: 48; CHECK: # %bb.0: 49; CHECK-NEXT: pushl %eax 50; CHECK-NEXT: vpslld $2, %xmm1, %xmm1 51; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 52; CHECK-NEXT: movl %esp, %eax 53; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 54; CHECK-NEXT: popl %eax 55; CHECK-NEXT: retl 56 %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off 57 %v = alloca i32 58 %k = insertelement <4 x i32*> %A2, i32* %v, i32 3 59 ret <4 x i32*> %k 60} 61 62define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind { 63; Multiply offset by two (add it to itself). 64; add the base to the offset 65; CHECK-LABEL: AGEP4: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1 68; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 69; CHECK-NEXT: retl 70 %A = getelementptr i16, <4 x i16*> %param, <4 x i32> %off 71 ret <4 x i16*> %A 72} 73 74define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind { 75; CHECK-LABEL: AGEP5: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1 78; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 79; CHECK-NEXT: retl 80 %A = getelementptr i8, <4 x i8*> %param, <4 x i8> %off 81 ret <4 x i8*> %A 82} 83 84 85; The size of each element is 1 byte. No need to multiply by element size. 86define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind { 87; CHECK-LABEL: AGEP6: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 90; CHECK-NEXT: retl 91 %A = getelementptr i8, <4 x i8*> %param, <4 x i32> %off 92 ret <4 x i8*> %A 93} 94 95define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind { 96; CHECK-LABEL: AGEP7: 97; CHECK: # %bb.0: 98; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 99; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 100; CHECK-NEXT: retl 101 %A = getelementptr i8, <4 x i8*> %param, i32 %off 102 ret <4 x i8*> %A 103} 104 105define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind { 106; Multiply offset by two (add it to itself). 107; add the base to the offset 108; CHECK-LABEL: AGEP8: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 111; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 112; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 113; CHECK-NEXT: retl 114 %A = getelementptr i16, i16* %param, <4 x i32> %off 115 ret <4 x i16*> %A 116} 117 118define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind { 119; CHECK-LABEL: AGEP9: 120; CHECK: # %bb.0: 121; CHECK-NEXT: pushl %ebp 122; CHECK-NEXT: movl %esp, %ebp 123; CHECK-NEXT: andl $-32, %esp 124; CHECK-NEXT: subl $160, %esp 125; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm3 126; CHECK-NEXT: vbroadcastss 12(%ebp), %xmm5 127; CHECK-NEXT: vpaddd %xmm3, %xmm5, %xmm3 128; CHECK-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 129; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 130; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 131; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 132; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 133; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm0 134; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 135; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 136; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0 137; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 138; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 139; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 140; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm0 141; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 142; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 143; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm0 144; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 145; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 146; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 147; CHECK-NEXT: vmovdqa 40(%ebp), %xmm0 148; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 149; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 150; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 151; CHECK-NEXT: vmovdqa 56(%ebp), %xmm0 152; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 153; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 154; CHECK-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 155; CHECK-NEXT: vmovdqa 72(%ebp), %xmm3 156; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm3 157; CHECK-NEXT: vpaddd %xmm3, %xmm5, %xmm0 158; CHECK-NEXT: vmovdqa %xmm0, (%esp) # 16-byte Spill 159; CHECK-NEXT: vmovdqa 88(%ebp), %xmm4 160; CHECK-NEXT: vpaddd %xmm4, %xmm4, %xmm4 161; CHECK-NEXT: vpaddd %xmm4, %xmm5, %xmm4 162; CHECK-NEXT: vmovdqa 104(%ebp), %xmm1 163; CHECK-NEXT: vpaddd %xmm1, %xmm1, %xmm1 164; CHECK-NEXT: vpaddd %xmm1, %xmm5, %xmm1 165; CHECK-NEXT: vmovdqa 120(%ebp), %xmm6 166; CHECK-NEXT: vpaddd %xmm6, %xmm6, %xmm6 167; CHECK-NEXT: vpaddd %xmm6, %xmm5, %xmm6 168; CHECK-NEXT: vmovdqa 136(%ebp), %xmm2 169; CHECK-NEXT: vpaddd %xmm2, %xmm2, %xmm2 170; CHECK-NEXT: vpaddd %xmm2, %xmm5, %xmm2 171; CHECK-NEXT: vmovdqa 152(%ebp), %xmm7 172; CHECK-NEXT: vpaddd %xmm7, %xmm7, %xmm7 173; CHECK-NEXT: vpaddd %xmm7, %xmm5, %xmm7 174; CHECK-NEXT: vmovdqa 168(%ebp), %xmm0 175; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 176; CHECK-NEXT: vpaddd %xmm0, %xmm5, %xmm0 177; CHECK-NEXT: vmovdqa 184(%ebp), %xmm3 178; CHECK-NEXT: vpaddd %xmm3, %xmm3, %xmm3 179; CHECK-NEXT: vpaddd %xmm3, %xmm5, %xmm3 180; CHECK-NEXT: movl 8(%ebp), %eax 181; CHECK-NEXT: vmovdqa %xmm3, 240(%eax) 182; CHECK-NEXT: vmovdqa %xmm0, 224(%eax) 183; CHECK-NEXT: vmovdqa %xmm7, 208(%eax) 184; CHECK-NEXT: vmovdqa %xmm2, 192(%eax) 185; CHECK-NEXT: vmovdqa %xmm6, 176(%eax) 186; CHECK-NEXT: vmovdqa %xmm1, 160(%eax) 187; CHECK-NEXT: vmovdqa %xmm4, 144(%eax) 188; CHECK-NEXT: vmovaps (%esp), %xmm0 # 16-byte Reload 189; CHECK-NEXT: vmovaps %xmm0, 128(%eax) 190; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 191; CHECK-NEXT: vmovaps %xmm0, 112(%eax) 192; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 193; CHECK-NEXT: vmovaps %xmm0, 96(%eax) 194; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 195; CHECK-NEXT: vmovaps %xmm0, 80(%eax) 196; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 197; CHECK-NEXT: vmovaps %xmm0, 64(%eax) 198; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 199; CHECK-NEXT: vmovaps %xmm0, 48(%eax) 200; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 201; CHECK-NEXT: vmovaps %xmm0, 32(%eax) 202; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 203; CHECK-NEXT: vmovaps %xmm0, 16(%eax) 204; CHECK-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 205; CHECK-NEXT: vmovaps %xmm0, (%eax) 206; CHECK-NEXT: movl %ebp, %esp 207; CHECK-NEXT: popl %ebp 208; CHECK-NEXT: vzeroupper 209; CHECK-NEXT: retl $4 210 %A = getelementptr i16, i16* %param, <64 x i32> %off 211 ret <64 x i16*> %A 212} 213 214