1; RUN: llc < %s -march=x86-64 -mattr=+sse4.2 | FileCheck %s 2 3; Verify when widening a divide/remainder operation, we only generate a 4; divide/rem per element since divide/remainder can trap. 5 6; CHECK: vectorDiv 7define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { 8; CHECK: idivq 9; CHECK: idivq 10; CHECK-NOT: idivl 11; CHECK: ret 12entry: 13 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 14 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 15 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 16 %index = alloca i32, align 4 17 store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr 18 store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr 19 store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr 20 %tmp = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %qdest.addr 21 %tmp1 = load i32, i32* %index 22 %arrayidx = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp, i32 %tmp1 23 %tmp2 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %nsource.addr 24 %tmp3 = load i32, i32* %index 25 %arrayidx4 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp2, i32 %tmp3 26 %tmp5 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx4 27 %tmp6 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %dsource.addr 28 %tmp7 = load i32, i32* %index 29 %arrayidx8 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp6, i32 %tmp7 30 %tmp9 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx8 31 %tmp10 = sdiv <2 x i32> %tmp5, %tmp9 32 store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx 33 ret void 34} 35 36; CHECK: test_char_div 37define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { 38; CHECK: idivb 39; CHECK: idivb 40; CHECK: idivb 41; CHECK-NOT: idivb 42; CHECK: ret 43 %div.r = sdiv <3 x i8> %num, %div 44 ret <3 x i8> %div.r 45} 46 47; CHECK: test_uchar_div 48define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { 49; CHECK: divb 50; CHECK: divb 51; CHECK: divb 52; CHECK-NOT: divb 53; CHECK: ret 54 %div.r = udiv <3 x i8> %num, %div 55 ret <3 x i8> %div.r 56} 57 58; CHECK: test_short_div 59define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { 60; CHECK: idivw 61; CHECK: idivw 62; CHECK: idivw 63; CHECK: idivw 64; CHECK: idivw 65; CHECK-NOT: idivw 66; CHECK: ret 67 %div.r = sdiv <5 x i16> %num, %div 68 ret <5 x i16> %div.r 69} 70 71; CHECK: test_ushort_div 72define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { 73; CHECK: divl 74; CHECK: divl 75; CHECK: divl 76; CHECK: divl 77; CHECK-NOT: divl 78; CHECK: ret 79 %div.r = udiv <4 x i16> %num, %div 80 ret <4 x i16> %div.r 81} 82 83; CHECK: test_uint_div 84define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { 85; CHECK: divl 86; CHECK: divl 87; CHECK: divl 88; CHECK-NOT: divl 89; CHECK: ret 90 %div.r = udiv <3 x i32> %num, %div 91 ret <3 x i32> %div.r 92} 93 94; CHECK: test_long_div 95define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { 96; CHECK: idivq 97; CHECK: idivq 98; CHECK: idivq 99; CHECK-NOT: idivq 100; CHECK: ret 101 %div.r = sdiv <3 x i64> %num, %div 102 ret <3 x i64> %div.r 103} 104 105; CHECK: test_ulong_div 106define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { 107; CHECK: divq 108; CHECK: divq 109; CHECK: divq 110; CHECK-NOT: divq 111; CHECK: ret 112 %div.r = udiv <3 x i64> %num, %div 113 ret <3 x i64> %div.r 114} 115 116; CHECK: test_char_rem 117define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { 118; CHECK: idivl 119; CHECK: idivl 120; CHECK: idivl 121; CHECK: idivl 122; CHECK-NOT: idivl 123; CHECK: ret 124 %rem.r = srem <4 x i8> %num, %rem 125 ret <4 x i8> %rem.r 126} 127 128; CHECK: test_short_rem 129define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { 130; CHECK: idivw 131; CHECK: idivw 132; CHECK: idivw 133; CHECK: idivw 134; CHECK: idivw 135; CHECK-NOT: idivw 136; CHECK: ret 137 %rem.r = srem <5 x i16> %num, %rem 138 ret <5 x i16> %rem.r 139} 140 141; CHECK: test_uint_rem 142define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { 143; CHECK: idivl 144; CHECK: idivl 145; CHECK: idivl 146; CHECK: idivl 147; CHECK-NOT: idivl 148; CHECK: ret 149 %rem.r = srem <4 x i32> %num, %rem 150 ret <4 x i32> %rem.r 151} 152 153 154; CHECK: test_ulong_rem 155define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { 156; CHECK: divq 157; CHECK: divq 158; CHECK: divq 159; CHECK: divq 160; CHECK: divq 161; CHECK-NOT: divq 162; CHECK: ret 163 %rem.r = urem <5 x i64> %num, %rem 164 ret <5 x i64> %rem.r 165} 166 167; CHECK: test_int_div 168define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { 169; CHECK: idivl 170; CHECK: idivl 171; CHECK: idivl 172; CHECK-NOT: idivl 173; CHECK: ret 174entry: 175 %cmp13 = icmp sgt i32 %n, 0 176 br i1 %cmp13, label %bb.nph, label %for.end 177 178bb.nph: 179 br label %for.body 180 181for.body: 182 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 183 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014 184 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1] 185 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014 186 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1] 187 %div = sdiv <3 x i32> %tmp4, %tmp8 188 store <3 x i32> %div, <3 x i32>* %arrayidx11 189 %inc = add nsw i32 %i.014, 1 190 %exitcond = icmp eq i32 %inc, %n 191 br i1 %exitcond, label %for.end, label %for.body 192 193for.end: ; preds = %for.body, %entry 194 ret void 195} 196