• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 |  FileCheck %s
3
4; Verify when widening a divide/remainder operation, we only generate a
5; divide/rem per element since divide/remainder can trap.
6
7; CHECK: vectorDiv
8define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
9; CHECK-LABEL: vectorDiv:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    movq %rdx, %r8
12; CHECK-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
13; CHECK-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
14; CHECK-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
15; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rcx
16; CHECK-NEXT:    movq (%rdi,%rcx,8), %rdi
17; CHECK-NEXT:    movq (%rsi,%rcx,8), %r10
18; CHECK-NEXT:    movq %rdi, %rax
19; CHECK-NEXT:    shrq $32, %rax
20; CHECK-NEXT:    movq %r10, %rsi
21; CHECK-NEXT:    shrq $32, %rsi
22; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
23; CHECK-NEXT:    cltd
24; CHECK-NEXT:    idivl %esi
25; CHECK-NEXT:    movl %eax, %r9d
26; CHECK-NEXT:    movl %edi, %eax
27; CHECK-NEXT:    cltd
28; CHECK-NEXT:    idivl %r10d
29; CHECK-NEXT:    movd %eax, %xmm0
30; CHECK-NEXT:    pinsrd $1, %r9d, %xmm0
31; CHECK-NEXT:    movq %xmm0, (%r8,%rcx,8)
32; CHECK-NEXT:    retq
33entry:
34  %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
35  %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
36  %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
37  %index = alloca i32, align 4
38  store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
39  store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
40  store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
41  %tmp = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %qdest.addr
42  %tmp1 = load i32, i32* %index
43  %arrayidx = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp, i32 %tmp1
44  %tmp2 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %nsource.addr
45  %tmp3 = load i32, i32* %index
46  %arrayidx4 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
47  %tmp5 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx4
48  %tmp6 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %dsource.addr
49  %tmp7 = load i32, i32* %index
50  %arrayidx8 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
51  %tmp9 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx8
52  %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
53  store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
54  ret void
55}
56
57; CHECK: test_char_div
58define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
59; CHECK-LABEL: test_char_div:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    movsbl %dil, %eax
62; CHECK-NEXT:    idivb %cl
63; CHECK-NEXT:    movl %eax, %edi
64; CHECK-NEXT:    movsbl %sil, %eax
65; CHECK-NEXT:    idivb %r8b
66; CHECK-NEXT:    movl %eax, %esi
67; CHECK-NEXT:    movsbl %dl, %eax
68; CHECK-NEXT:    idivb %r9b
69; CHECK-NEXT:    movl %eax, %ecx
70; CHECK-NEXT:    movl %edi, %eax
71; CHECK-NEXT:    movl %esi, %edx
72; CHECK-NEXT:    retq
73  %div.r = sdiv <3 x i8> %num, %div
74  ret <3 x i8>  %div.r
75}
76
77; CHECK: test_uchar_div
78define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
79; CHECK-LABEL: test_uchar_div:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    movzbl %dil, %eax
82; CHECK-NEXT:    divb %cl
83; CHECK-NEXT:    movl %eax, %edi
84; CHECK-NEXT:    movzbl %sil, %eax
85; CHECK-NEXT:    divb %r8b
86; CHECK-NEXT:    movl %eax, %esi
87; CHECK-NEXT:    movzbl %dl, %eax
88; CHECK-NEXT:    divb %r9b
89; CHECK-NEXT:    movl %eax, %ecx
90; CHECK-NEXT:    movl %edi, %eax
91; CHECK-NEXT:    movl %esi, %edx
92; CHECK-NEXT:    retq
93  %div.r = udiv <3 x i8> %num, %div
94  ret <3 x i8>  %div.r
95}
96
97; CHECK: test_short_div
98define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
99; CHECK-LABEL: test_short_div:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    pextrw $4, %xmm0, %eax
102; CHECK-NEXT:    pextrw $4, %xmm1, %ecx
103; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
104; CHECK-NEXT:    cwtd
105; CHECK-NEXT:    idivw %cx
106; CHECK-NEXT:    movl %eax, %r8d
107; CHECK-NEXT:    pextrw $3, %xmm0, %eax
108; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
109; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
110; CHECK-NEXT:    cwtd
111; CHECK-NEXT:    idivw %cx
112; CHECK-NEXT:    movl %eax, %r9d
113; CHECK-NEXT:    pextrw $2, %xmm0, %eax
114; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
115; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
116; CHECK-NEXT:    cwtd
117; CHECK-NEXT:    idivw %cx
118; CHECK-NEXT:    movl %eax, %edi
119; CHECK-NEXT:    movd %xmm0, %eax
120; CHECK-NEXT:    movd %xmm1, %ecx
121; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
122; CHECK-NEXT:    cwtd
123; CHECK-NEXT:    idivw %cx
124; CHECK-NEXT:    movl %eax, %ecx
125; CHECK-NEXT:    pextrw $1, %xmm0, %eax
126; CHECK-NEXT:    pextrw $1, %xmm1, %esi
127; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
128; CHECK-NEXT:    cwtd
129; CHECK-NEXT:    idivw %si
130; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
131; CHECK-NEXT:    movd %ecx, %xmm0
132; CHECK-NEXT:    pinsrw $1, %eax, %xmm0
133; CHECK-NEXT:    pinsrw $2, %edi, %xmm0
134; CHECK-NEXT:    pinsrw $3, %r9d, %xmm0
135; CHECK-NEXT:    pinsrw $4, %r8d, %xmm0
136; CHECK-NEXT:    retq
137  %div.r = sdiv <5 x i16> %num, %div
138  ret <5 x i16>  %div.r
139}
140
141; CHECK: test_ushort_div
142define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
143; CHECK-LABEL: test_ushort_div:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    pextrw $1, %xmm0, %eax
146; CHECK-NEXT:    pextrw $1, %xmm1, %ecx
147; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
148; CHECK-NEXT:    xorl %edx, %edx
149; CHECK-NEXT:    divw %cx
150; CHECK-NEXT:    movl %eax, %ecx
151; CHECK-NEXT:    movd %xmm0, %eax
152; CHECK-NEXT:    movd %xmm1, %esi
153; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
154; CHECK-NEXT:    xorl %edx, %edx
155; CHECK-NEXT:    divw %si
156; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
157; CHECK-NEXT:    movd %eax, %xmm2
158; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
159; CHECK-NEXT:    pextrw $2, %xmm0, %eax
160; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
161; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
162; CHECK-NEXT:    xorl %edx, %edx
163; CHECK-NEXT:    divw %cx
164; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
165; CHECK-NEXT:    pinsrw $2, %eax, %xmm2
166; CHECK-NEXT:    pextrw $3, %xmm0, %eax
167; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
168; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
169; CHECK-NEXT:    xorl %edx, %edx
170; CHECK-NEXT:    divw %cx
171; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
172; CHECK-NEXT:    pinsrw $3, %eax, %xmm2
173; CHECK-NEXT:    movdqa %xmm2, %xmm0
174; CHECK-NEXT:    retq
175  %div.r = udiv <4 x i16> %num, %div
176  ret <4 x i16>  %div.r
177}
178
179; CHECK: test_uint_div
180define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
181; CHECK-LABEL: test_uint_div:
182; CHECK:       # %bb.0:
183; CHECK-NEXT:    pextrd $2, %xmm0, %eax
184; CHECK-NEXT:    pextrd $2, %xmm1, %ecx
185; CHECK-NEXT:    xorl %edx, %edx
186; CHECK-NEXT:    divl %ecx
187; CHECK-NEXT:    movl %eax, %ecx
188; CHECK-NEXT:    pextrd $1, %xmm0, %eax
189; CHECK-NEXT:    pextrd $1, %xmm1, %esi
190; CHECK-NEXT:    xorl %edx, %edx
191; CHECK-NEXT:    divl %esi
192; CHECK-NEXT:    movl %eax, %esi
193; CHECK-NEXT:    movd %xmm0, %eax
194; CHECK-NEXT:    movd %xmm1, %edi
195; CHECK-NEXT:    xorl %edx, %edx
196; CHECK-NEXT:    divl %edi
197; CHECK-NEXT:    movd %eax, %xmm0
198; CHECK-NEXT:    pinsrd $1, %esi, %xmm0
199; CHECK-NEXT:    pinsrd $2, %ecx, %xmm0
200; CHECK-NEXT:    retq
201  %div.r = udiv <3 x i32> %num, %div
202  ret <3 x i32>  %div.r
203}
204
205; CHECK: test_long_div
206define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
207; CHECK-LABEL: test_long_div:
208; CHECK:       # %bb.0:
209; CHECK-NEXT:    movq %rdx, %r10
210; CHECK-NEXT:    movq %rdi, %rax
211; CHECK-NEXT:    cqto
212; CHECK-NEXT:    idivq %rcx
213; CHECK-NEXT:    movq %rax, %rcx
214; CHECK-NEXT:    movq %rsi, %rax
215; CHECK-NEXT:    cqto
216; CHECK-NEXT:    idivq %r8
217; CHECK-NEXT:    movq %rax, %rsi
218; CHECK-NEXT:    movq %r10, %rax
219; CHECK-NEXT:    cqto
220; CHECK-NEXT:    idivq %r9
221; CHECK-NEXT:    movq %rax, %rdi
222; CHECK-NEXT:    movq %rcx, %rax
223; CHECK-NEXT:    movq %rsi, %rdx
224; CHECK-NEXT:    movq %rdi, %rcx
225; CHECK-NEXT:    retq
226  %div.r = sdiv <3 x i64> %num, %div
227  ret <3 x i64>  %div.r
228}
229
230; CHECK: test_ulong_div
231define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
232; CHECK-LABEL: test_ulong_div:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    movq %rdx, %r10
235; CHECK-NEXT:    movq %rdi, %rax
236; CHECK-NEXT:    xorl %edx, %edx
237; CHECK-NEXT:    divq %rcx
238; CHECK-NEXT:    movq %rax, %rcx
239; CHECK-NEXT:    movq %rsi, %rax
240; CHECK-NEXT:    xorl %edx, %edx
241; CHECK-NEXT:    divq %r8
242; CHECK-NEXT:    movq %rax, %rsi
243; CHECK-NEXT:    movq %r10, %rax
244; CHECK-NEXT:    xorl %edx, %edx
245; CHECK-NEXT:    divq %r9
246; CHECK-NEXT:    movq %rax, %rdi
247; CHECK-NEXT:    movq %rcx, %rax
248; CHECK-NEXT:    movq %rsi, %rdx
249; CHECK-NEXT:    movq %rdi, %rcx
250; CHECK-NEXT:    retq
251  %div.r = udiv <3 x i64> %num, %div
252  ret <3 x i64>  %div.r
253}
254
255; CHECK: test_char_rem
256define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
257; CHECK-LABEL: test_char_rem:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    pextrb $1, %xmm1, %ecx
260; CHECK-NEXT:    pextrb $1, %xmm0, %eax
261; CHECK-NEXT:    cbtw
262; CHECK-NEXT:    idivb %cl
263; CHECK-NEXT:    movsbl %ah, %ecx
264; CHECK-NEXT:    movd %xmm1, %edx
265; CHECK-NEXT:    movd %xmm0, %eax
266; CHECK-NEXT:    cbtw
267; CHECK-NEXT:    idivb %dl
268; CHECK-NEXT:    movsbl %ah, %eax
269; CHECK-NEXT:    movd %eax, %xmm2
270; CHECK-NEXT:    pinsrb $1, %ecx, %xmm2
271; CHECK-NEXT:    pextrb $2, %xmm1, %ecx
272; CHECK-NEXT:    pextrb $2, %xmm0, %eax
273; CHECK-NEXT:    cbtw
274; CHECK-NEXT:    idivb %cl
275; CHECK-NEXT:    movsbl %ah, %eax
276; CHECK-NEXT:    pinsrb $2, %eax, %xmm2
277; CHECK-NEXT:    pextrb $3, %xmm1, %ecx
278; CHECK-NEXT:    pextrb $3, %xmm0, %eax
279; CHECK-NEXT:    cbtw
280; CHECK-NEXT:    idivb %cl
281; CHECK-NEXT:    movsbl %ah, %eax
282; CHECK-NEXT:    pinsrb $3, %eax, %xmm2
283; CHECK-NEXT:    movdqa %xmm2, %xmm0
284; CHECK-NEXT:    retq
285  %rem.r = srem <4 x i8> %num, %rem
286  ret <4 x i8>  %rem.r
287}
288
289; CHECK: test_short_rem
290define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
291; CHECK-LABEL: test_short_rem:
292; CHECK:       # %bb.0:
293; CHECK-NEXT:    pextrw $4, %xmm0, %eax
294; CHECK-NEXT:    pextrw $4, %xmm1, %ecx
295; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
296; CHECK-NEXT:    cwtd
297; CHECK-NEXT:    idivw %cx
298; CHECK-NEXT:    movl %edx, %r8d
299; CHECK-NEXT:    pextrw $3, %xmm0, %eax
300; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
301; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
302; CHECK-NEXT:    cwtd
303; CHECK-NEXT:    idivw %cx
304; CHECK-NEXT:    movl %edx, %r9d
305; CHECK-NEXT:    pextrw $2, %xmm0, %eax
306; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
307; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
308; CHECK-NEXT:    cwtd
309; CHECK-NEXT:    idivw %cx
310; CHECK-NEXT:    movl %edx, %edi
311; CHECK-NEXT:    movd %xmm0, %eax
312; CHECK-NEXT:    movd %xmm1, %ecx
313; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
314; CHECK-NEXT:    cwtd
315; CHECK-NEXT:    idivw %cx
316; CHECK-NEXT:    movl %edx, %ecx
317; CHECK-NEXT:    pextrw $1, %xmm0, %eax
318; CHECK-NEXT:    pextrw $1, %xmm1, %esi
319; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
320; CHECK-NEXT:    cwtd
321; CHECK-NEXT:    idivw %si
322; CHECK-NEXT:    # kill: def $dx killed $dx def $edx
323; CHECK-NEXT:    movd %ecx, %xmm0
324; CHECK-NEXT:    pinsrw $1, %edx, %xmm0
325; CHECK-NEXT:    pinsrw $2, %edi, %xmm0
326; CHECK-NEXT:    pinsrw $3, %r9d, %xmm0
327; CHECK-NEXT:    pinsrw $4, %r8d, %xmm0
328; CHECK-NEXT:    retq
329  %rem.r = srem <5 x i16> %num, %rem
330  ret <5 x i16>  %rem.r
331}
332
333; CHECK: test_uint_rem
334define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
335; CHECK-LABEL: test_uint_rem:
336; CHECK:       # %bb.0:
337; CHECK-NEXT:    pextrd $1, %xmm0, %eax
338; CHECK-NEXT:    pextrd $1, %xmm1, %ecx
339; CHECK-NEXT:    cltd
340; CHECK-NEXT:    idivl %ecx
341; CHECK-NEXT:    movl %edx, %ecx
342; CHECK-NEXT:    movd %xmm0, %eax
343; CHECK-NEXT:    movd %xmm1, %esi
344; CHECK-NEXT:    cltd
345; CHECK-NEXT:    idivl %esi
346; CHECK-NEXT:    movd %edx, %xmm2
347; CHECK-NEXT:    pinsrd $1, %ecx, %xmm2
348; CHECK-NEXT:    pextrd $2, %xmm0, %eax
349; CHECK-NEXT:    pextrd $2, %xmm1, %ecx
350; CHECK-NEXT:    cltd
351; CHECK-NEXT:    idivl %ecx
352; CHECK-NEXT:    pinsrd $2, %edx, %xmm2
353; CHECK-NEXT:    pextrd $3, %xmm0, %eax
354; CHECK-NEXT:    pextrd $3, %xmm1, %ecx
355; CHECK-NEXT:    cltd
356; CHECK-NEXT:    idivl %ecx
357; CHECK-NEXT:    pinsrd $3, %edx, %xmm2
358; CHECK-NEXT:    movdqa %xmm2, %xmm0
359; CHECK-NEXT:    retq
360  %rem.r = srem <4 x i32> %num, %rem
361  ret <4 x i32>  %rem.r
362}
363
364
365; CHECK: test_ulong_rem
366define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
367; CHECK-LABEL: test_ulong_rem:
368; CHECK:       # %bb.0:
369; CHECK-NEXT:    movq %rdx, %rax
370; CHECK-NEXT:    xorl %edx, %edx
371; CHECK-NEXT:    divq {{[0-9]+}}(%rsp)
372; CHECK-NEXT:    movq %rdx, %xmm0
373; CHECK-NEXT:    movq %rsi, %rax
374; CHECK-NEXT:    xorl %edx, %edx
375; CHECK-NEXT:    divq {{[0-9]+}}(%rsp)
376; CHECK-NEXT:    movq %rdx, %xmm1
377; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
378; CHECK-NEXT:    movq %r8, %rax
379; CHECK-NEXT:    xorl %edx, %edx
380; CHECK-NEXT:    divq {{[0-9]+}}(%rsp)
381; CHECK-NEXT:    movq %rdx, %xmm0
382; CHECK-NEXT:    movq %rcx, %rax
383; CHECK-NEXT:    xorl %edx, %edx
384; CHECK-NEXT:    divq {{[0-9]+}}(%rsp)
385; CHECK-NEXT:    movq %rdx, %xmm2
386; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
387; CHECK-NEXT:    movq %r9, %rax
388; CHECK-NEXT:    xorl %edx, %edx
389; CHECK-NEXT:    divq {{[0-9]+}}(%rsp)
390; CHECK-NEXT:    movq %rdx, 32(%rdi)
391; CHECK-NEXT:    movdqa %xmm2, 16(%rdi)
392; CHECK-NEXT:    movdqa %xmm1, (%rdi)
393; CHECK-NEXT:    movq %rdi, %rax
394; CHECK-NEXT:    retq
395  %rem.r = urem <5 x i64> %num, %rem
396  ret <5 x i64>  %rem.r
397}
398
399; CHECK: test_int_div
400define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
401; CHECK-LABEL: test_int_div:
402; CHECK:       # %bb.0: # %entry
403; CHECK-NEXT:    testl %edx, %edx
404; CHECK-NEXT:    jle .LBB12_3
405; CHECK-NEXT:  # %bb.1: # %bb.nph
406; CHECK-NEXT:    movl %edx, %r9d
407; CHECK-NEXT:    xorl %ecx, %ecx
408; CHECK-NEXT:    .p2align 4, 0x90
409; CHECK-NEXT:  .LBB12_2: # %for.body
410; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
411; CHECK-NEXT:    movdqa (%rdi,%rcx), %xmm0
412; CHECK-NEXT:    movdqa (%rsi,%rcx), %xmm1
413; CHECK-NEXT:    pextrd $1, %xmm0, %eax
414; CHECK-NEXT:    pextrd $1, %xmm1, %r8d
415; CHECK-NEXT:    cltd
416; CHECK-NEXT:    idivl %r8d
417; CHECK-NEXT:    movl %eax, %r8d
418; CHECK-NEXT:    movd %xmm0, %eax
419; CHECK-NEXT:    movd %xmm1, %r10d
420; CHECK-NEXT:    cltd
421; CHECK-NEXT:    idivl %r10d
422; CHECK-NEXT:    movd %eax, %xmm2
423; CHECK-NEXT:    pinsrd $1, %r8d, %xmm2
424; CHECK-NEXT:    pextrd $2, %xmm0, %eax
425; CHECK-NEXT:    pextrd $2, %xmm1, %r8d
426; CHECK-NEXT:    cltd
427; CHECK-NEXT:    idivl %r8d
428; CHECK-NEXT:    movl %eax, 8(%rdi,%rcx)
429; CHECK-NEXT:    movq %xmm2, (%rdi,%rcx)
430; CHECK-NEXT:    addq $16, %rcx
431; CHECK-NEXT:    decl %r9d
432; CHECK-NEXT:    jne .LBB12_2
433; CHECK-NEXT:  .LBB12_3: # %for.end
434; CHECK-NEXT:    retq
435entry:
436  %cmp13 = icmp sgt i32 %n, 0
437  br i1 %cmp13, label %bb.nph, label %for.end
438
439bb.nph:
440  br label %for.body
441
442for.body:
443  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
444  %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014
445  %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
446  %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014
447  %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
448  %div = sdiv <3 x i32> %tmp4, %tmp8
449  store <3 x i32> %div, <3 x i32>* %arrayidx11
450  %inc = add nsw i32 %i.014, 1
451  %exitcond = icmp eq i32 %inc, %n
452  br i1 %exitcond, label %for.end, label %for.body
453
454for.end:                                          ; preds = %for.body, %entry
455  ret void
456}
457