1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X64 4 5; There are no MMX operations in @t1 6 7define void @t1(i32 %a, x86_mmx* %P) nounwind { 8; X32-LABEL: t1: 9; X32: # BB#0: 10; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X32-NEXT: shll $12, %ecx 13; X32-NEXT: movd %ecx, %xmm0 14; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 15; X32-NEXT: movq %xmm0, (%eax) 16; X32-NEXT: retl 17; 18; X64-LABEL: t1: 19; X64: # BB#0: 20; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> 21; X64-NEXT: shll $12, %edi 22; X64-NEXT: movd %rdi, %xmm0 23; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 24; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 25; X64-NEXT: movq %xmm0, (%rsi) 26; X64-NEXT: retq 27 %tmp12 = shl i32 %a, 12 28 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 29 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 30 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx 31 store x86_mmx %tmp23, x86_mmx* %P 32 ret void 33} 34 35define <4 x float> @t2(<4 x float>* %P) nounwind { 36; X32-LABEL: t2: 37; X32: # BB#0: 38; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 39; X32-NEXT: movaps (%eax), %xmm1 40; X32-NEXT: xorps %xmm0, %xmm0 41; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 42; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 43; X32-NEXT: retl 44; 45; X64-LABEL: t2: 46; X64: # BB#0: 47; X64-NEXT: movaps (%rdi), %xmm1 48; X64-NEXT: xorps %xmm0, %xmm0 49; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 50; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 51; X64-NEXT: retq 52 %tmp1 = load <4 x float>, <4 x float>* %P 53 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > 54 ret <4 x float> %tmp2 55} 56 57define <4 x float> @t3(<4 x float>* %P) nounwind { 58; X32-LABEL: t3: 59; X32: # BB#0: 60; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 61; X32-NEXT: movapd (%eax), %xmm0 62; X32-NEXT: xorpd %xmm1, %xmm1 63; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 64; X32-NEXT: retl 65; 66; X64-LABEL: t3: 67; X64: # BB#0: 68; X64-NEXT: movapd (%rdi), %xmm0 69; X64-NEXT: xorpd %xmm1, %xmm1 70; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 71; X64-NEXT: retq 72 %tmp1 = load <4 x float>, <4 x float>* %P 73 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > 74 ret <4 x float> %tmp2 75} 76 77define <4 x float> @t4(<4 x float>* %P) nounwind { 78; X32-LABEL: t4: 79; X32: # BB#0: 80; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 81; X32-NEXT: movaps (%eax), %xmm0 82; X32-NEXT: xorps %xmm1, %xmm1 83; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 84; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 85; X32-NEXT: retl 86; 87; X64-LABEL: t4: 88; X64: # BB#0: 89; X64-NEXT: movaps (%rdi), %xmm0 90; X64-NEXT: xorps %xmm1, %xmm1 91; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 92; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 93; X64-NEXT: retq 94 %tmp1 = load <4 x float>, <4 x float>* %P 95 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 96 ret <4 x float> %tmp2 97} 98 99define <16 x i8> @t5(<16 x i8> %x) nounwind { 100; X32-LABEL: t5: 101; X32: # BB#0: 102; X32-NEXT: psrlw $8, %xmm0 103; X32-NEXT: retl 104; 105; X64-LABEL: t5: 106; X64: # BB#0: 107; X64-NEXT: psrlw $8, %xmm0 108; X64-NEXT: retq 109 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 110 ret <16 x i8> %s 111} 112 113define <16 x i8> @t6(<16 x i8> %x) nounwind { 114; X32-LABEL: t6: 115; X32: # BB#0: 116; X32-NEXT: psrlw $8, %xmm0 117; X32-NEXT: retl 118; 119; X64-LABEL: t6: 120; X64: # BB#0: 121; X64-NEXT: psrlw $8, %xmm0 122; X64-NEXT: retq 123 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 124 ret <16 x i8> %s 125} 126 127define <16 x i8> @t7(<16 x i8> %x) nounwind { 128; X32-LABEL: t7: 129; X32: # BB#0: 130; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 131; X32-NEXT: retl 132; 133; X64-LABEL: t7: 134; X64: # BB#0: 135; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 136; X64-NEXT: retq 137 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2> 138 ret <16 x i8> %s 139} 140 141define <16 x i8> @t8(<16 x i8> %x) nounwind { 142; X32-LABEL: t8: 143; X32: # BB#0: 144; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 145; X32-NEXT: retl 146; 147; X64-LABEL: t8: 148; X64: # BB#0: 149; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 150; X64-NEXT: retq 151 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 152 ret <16 x i8> %s 153} 154 155define <16 x i8> @t9(<16 x i8> %x) nounwind { 156; X32-LABEL: t9: 157; X32: # BB#0: 158; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 159; X32-NEXT: retl 160; 161; X64-LABEL: t9: 162; X64: # BB#0: 163; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 164; X64-NEXT: retq 165 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef> 166 ret <16 x i8> %s 167} 168