1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s 3 4; widen v8i8 to v16i8 (checks even power of 2 widening with add & and) 5 6define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { 7; CHECK-LABEL: update: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: subl $12, %esp 10; CHECK-NEXT: movl $0, (%esp) 11; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 12; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <4,4,4,4,4,4,4,4,u,u,u,u,u,u,u,u> 13; CHECK-NEXT: .p2align 4, 0x90 14; CHECK-NEXT: .LBB0_1: # %forcond 15; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 16; CHECK-NEXT: movl (%esp), %eax 17; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax 18; CHECK-NEXT: jge .LBB0_3 19; CHECK-NEXT: # %bb.2: # %forbody 20; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 21; CHECK-NEXT: movl (%esp), %eax 22; CHECK-NEXT: leal (,%eax,8), %ecx 23; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 24; CHECK-NEXT: addl %ecx, %edx 25; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) 26; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx 27; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) 28; CHECK-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 29; CHECK-NEXT: psubb %xmm0, %xmm2 30; CHECK-NEXT: pand %xmm1, %xmm2 31; CHECK-NEXT: movq %xmm2, (%edx,%eax,8) 32; CHECK-NEXT: incl (%esp) 33; CHECK-NEXT: jmp .LBB0_1 34; CHECK-NEXT: .LBB0_3: # %afterfor 35; CHECK-NEXT: addl $12, %esp 36; CHECK-NEXT: retl 37entry: 38 %dst_i.addr = alloca i64* 39 %src_i.addr = alloca i64* 40 %n.addr = alloca i32 41 %i = alloca i32, align 4 42 %dst = alloca <8 x i8>*, align 4 43 %src = alloca <8 x i8>*, align 4 44 store i64* %dst_i, i64** %dst_i.addr 45 store i64* %src_i, i64** %src_i.addr 46 store i32 %n, i32* %n.addr 47 store i32 0, i32* %i 48 br label %forcond 49 50forcond: 51 %tmp = load i32, i32* %i 52 %tmp1 = load i32, i32* %n.addr 53 %cmp = icmp slt i32 %tmp, %tmp1 54 br i1 %cmp, label %forbody, label %afterfor 55 56forbody: 57 %tmp2 = load i32, i32* %i 58 %tmp3 = load i64*, i64** %dst_i.addr 59 %arrayidx = getelementptr i64, i64* %tmp3, i32 %tmp2 60 %conv = bitcast i64* %arrayidx to <8 x i8>* 61 store <8 x i8>* %conv, <8 x i8>** %dst 62 %tmp4 = load i32, i32* %i 63 %tmp5 = load i64*, i64** %src_i.addr 64 %arrayidx6 = getelementptr i64, i64* %tmp5, i32 %tmp4 65 %conv7 = bitcast i64* %arrayidx6 to <8 x i8>* 66 store <8 x i8>* %conv7, <8 x i8>** %src 67 %tmp8 = load i32, i32* %i 68 %tmp9 = load <8 x i8>*, <8 x i8>** %dst 69 %arrayidx10 = getelementptr <8 x i8>, <8 x i8>* %tmp9, i32 %tmp8 70 %tmp11 = load i32, i32* %i 71 %tmp12 = load <8 x i8>*, <8 x i8>** %src 72 %arrayidx13 = getelementptr <8 x i8>, <8 x i8>* %tmp12, i32 %tmp11 73 %tmp14 = load <8 x i8>, <8 x i8>* %arrayidx13 74 %add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 > 75 %and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 > 76 store <8 x i8> %and, <8 x i8>* %arrayidx10 77 br label %forinc 78 79forinc: 80 %tmp15 = load i32, i32* %i 81 %inc = add i32 %tmp15, 1 82 store i32 %inc, i32* %i 83 br label %forcond 84 85afterfor: 86 ret void 87} 88 89