1; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s 2 3; rdar://11897677 4 5;CHECK-LABEL: intrin_pmov: 6;CHECK: pmovzxbw (%{{.*}}), %xmm0 7;CHECK-NEXT: movdqu 8;CHECK-NEXT: ret 9define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp { 10 %1 = bitcast i8* %src to <2 x i64>* 11 %2 = load <2 x i64>, <2 x i64>* %1, align 16 12 %3 = bitcast <2 x i64> %2 to <16 x i8> 13 %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind 14 %5 = bitcast i16* %dest to i8* 15 %6 = bitcast <8 x i16> %4 to <16 x i8> 16 tail call void @llvm.x86.sse2.storeu.dq(i8* %5, <16 x i8> %6) nounwind 17 ret void 18} 19 20declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 21declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 22 23; rdar://15245794 24 25define <4 x i32> @foo0(double %v.coerce) nounwind ssp { 26; CHECK-LABEL: foo0 27; CHECK: pmovzxwd %xmm0, %xmm0 28; CHECK-NEXT: ret 29 %tmp = bitcast double %v.coerce to <4 x i16> 30 %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 31 %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind 32 ret <4 x i32> %tmp2 33} 34 35define <8 x i16> @foo1(double %v.coerce) nounwind ssp { 36; CHECK-LABEL: foo1 37; CHECK: pmovzxbw %xmm0, %xmm0 38; CHECK-NEXT: ret 39 %tmp = bitcast double %v.coerce to <8 x i8> 40 %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 41 %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1) 42 ret <8 x i16> %tmp2 43} 44 45declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 46