1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X64 4 5; There are no MMX operations in @t1 6 7define void @t1(i32 %a, x86_mmx* %P) nounwind { 8; X32-LABEL: t1: 9; X32: # %bb.0: 10; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X32-NEXT: shll $12, %ecx 13; X32-NEXT: movd %ecx, %xmm0 14; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 15; X32-NEXT: movq %xmm0, (%eax) 16; X32-NEXT: retl 17; 18; X64-LABEL: t1: 19; X64: # %bb.0: 20; X64-NEXT: # kill: def $edi killed $edi def $rdi 21; X64-NEXT: shll $12, %edi 22; X64-NEXT: movq %rdi, %xmm0 23; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 24; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 25; X64-NEXT: movq %xmm0, (%rsi) 26; X64-NEXT: retq 27 %tmp12 = shl i32 %a, 12 28 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 29 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 30 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx 31 store x86_mmx %tmp23, x86_mmx* %P 32 ret void 33} 34 35define <4 x float> @t2(<4 x float>* %P) nounwind { 36; X32-LABEL: t2: 37; X32: # %bb.0: 38; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 39; X32-NEXT: movaps (%eax), %xmm1 40; X32-NEXT: xorps %xmm0, %xmm0 41; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 42; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 43; X32-NEXT: retl 44; 45; X64-LABEL: t2: 46; X64: # %bb.0: 47; X64-NEXT: movaps (%rdi), %xmm1 48; X64-NEXT: xorps %xmm0, %xmm0 49; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0] 50; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 51; X64-NEXT: retq 52 %tmp1 = load <4 x float>, <4 x float>* %P 53 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > 54 ret <4 x float> %tmp2 55} 56 57define <4 x float> @t3(<4 x float>* %P) nounwind { 58; X32-LABEL: t3: 59; X32: # %bb.0: 60; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 61; X32-NEXT: xorps %xmm0, %xmm0 62; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 63; X32-NEXT: retl 64; 65; X64-LABEL: t3: 66; X64: # %bb.0: 67; X64-NEXT: xorps %xmm0, %xmm0 68; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 69; X64-NEXT: retq 70 %tmp1 = load <4 x float>, <4 x float>* %P 71 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > 72 ret <4 x float> %tmp2 73} 74 75define <4 x float> @t4(<4 x float>* %P) nounwind { 76; X32-LABEL: t4: 77; X32: # %bb.0: 78; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 79; X32-NEXT: movaps (%eax), %xmm0 80; X32-NEXT: xorps %xmm1, %xmm1 81; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 82; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 83; X32-NEXT: retl 84; 85; X64-LABEL: t4: 86; X64: # %bb.0: 87; X64-NEXT: movaps (%rdi), %xmm0 88; X64-NEXT: xorps %xmm1, %xmm1 89; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 90; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 91; X64-NEXT: retq 92 %tmp1 = load <4 x float>, <4 x float>* %P 93 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 94 ret <4 x float> %tmp2 95} 96 97define <16 x i8> @t5(<16 x i8> %x) nounwind { 98; X32-LABEL: t5: 99; X32: # %bb.0: 100; X32-NEXT: psrlw $8, %xmm0 101; X32-NEXT: retl 102; 103; X64-LABEL: t5: 104; X64: # %bb.0: 105; X64-NEXT: psrlw $8, %xmm0 106; X64-NEXT: retq 107 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 108 ret <16 x i8> %s 109} 110 111define <16 x i8> @t6(<16 x i8> %x) nounwind { 112; X32-LABEL: t6: 113; X32: # %bb.0: 114; X32-NEXT: psrlw $8, %xmm0 115; X32-NEXT: retl 116; 117; X64-LABEL: t6: 118; X64: # %bb.0: 119; X64-NEXT: psrlw $8, %xmm0 120; X64-NEXT: retq 121 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 122 ret <16 x i8> %s 123} 124 125define <16 x i8> @t7(<16 x i8> %x) nounwind { 126; X32-LABEL: t7: 127; X32: # %bb.0: 128; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 129; X32-NEXT: retl 130; 131; X64-LABEL: t7: 132; X64: # %bb.0: 133; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 134; X64-NEXT: retq 135 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2> 136 ret <16 x i8> %s 137} 138 139define <16 x i8> @t8(<16 x i8> %x) nounwind { 140; X32-LABEL: t8: 141; X32: # %bb.0: 142; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 143; X32-NEXT: retl 144; 145; X64-LABEL: t8: 146; X64: # %bb.0: 147; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 148; X64-NEXT: retq 149 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 150 ret <16 x i8> %s 151} 152 153define <16 x i8> @t9(<16 x i8> %x) nounwind { 154; X32-LABEL: t9: 155; X32: # %bb.0: 156; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 157; X32-NEXT: retl 158; 159; X64-LABEL: t9: 160; X64: # %bb.0: 161; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 162; X64-NEXT: retq 163 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef> 164 ret <16 x i8> %s 165} 166