1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) { 8; X32-SSE-LABEL: fptrunc_frommem2: 9; X32-SSE: # %bb.0: # %entry 10; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0 13; X32-SSE-NEXT: movlpd %xmm0, (%eax) 14; X32-SSE-NEXT: retl 15; 16; X32-AVX-LABEL: fptrunc_frommem2: 17; X32-AVX: # %bb.0: # %entry 18; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 19; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0 21; X32-AVX-NEXT: vmovlpd %xmm0, (%eax) 22; X32-AVX-NEXT: retl 23; 24; X64-SSE-LABEL: fptrunc_frommem2: 25; X64-SSE: # %bb.0: # %entry 26; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 27; X64-SSE-NEXT: movlpd %xmm0, (%rsi) 28; X64-SSE-NEXT: retq 29; 30; X64-AVX-LABEL: fptrunc_frommem2: 31; X64-AVX: # %bb.0: # %entry 32; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 33; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi) 34; X64-AVX-NEXT: retq 35entry: 36 %0 = load <2 x double>, <2 x double>* %in 37 %1 = fptrunc <2 x double> %0 to <2 x float> 38 store <2 x float> %1, <2 x float>* %out, align 1 39 ret void 40} 41 42define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) { 43; X32-SSE-LABEL: fptrunc_frommem4: 44; X32-SSE: # %bb.0: # %entry 45; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 46; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 47; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 48; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 49; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 50; X32-SSE-NEXT: movupd %xmm1, (%eax) 51; X32-SSE-NEXT: retl 52; 53; X32-AVX-LABEL: fptrunc_frommem4: 54; X32-AVX: # %bb.0: # %entry 55; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 56; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 57; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 58; X32-AVX-NEXT: vmovupd %xmm0, (%eax) 59; X32-AVX-NEXT: retl 60; 61; X64-SSE-LABEL: fptrunc_frommem4: 62; X64-SSE: # %bb.0: # %entry 63; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 64; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 65; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 66; X64-SSE-NEXT: movupd %xmm1, (%rsi) 67; X64-SSE-NEXT: retq 68; 69; X64-AVX-LABEL: fptrunc_frommem4: 70; X64-AVX: # %bb.0: # %entry 71; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 72; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 73; X64-AVX-NEXT: retq 74entry: 75 %0 = load <4 x double>, <4 x double>* %in 76 %1 = fptrunc <4 x double> %0 to <4 x float> 77 store <4 x float> %1, <4 x float>* %out, align 1 78 ret void 79} 80 81define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { 82; X32-SSE-LABEL: fptrunc_frommem8: 83; X32-SSE: # %bb.0: # %entry 84; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 85; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 86; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 87; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 88; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 89; X32-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0 90; X32-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2 91; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 92; X32-SSE-NEXT: movupd %xmm2, 16(%eax) 93; X32-SSE-NEXT: movupd %xmm1, (%eax) 94; X32-SSE-NEXT: retl 95; 96; X32-AVX-LABEL: fptrunc_frommem8: 97; X32-AVX: # %bb.0: # %entry 98; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 100; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 101; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 102; X32-AVX-NEXT: vmovupd %xmm1, 16(%eax) 103; X32-AVX-NEXT: vmovupd %xmm0, (%eax) 104; X32-AVX-NEXT: retl 105; 106; X64-SSE-LABEL: fptrunc_frommem8: 107; X64-SSE: # %bb.0: # %entry 108; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 109; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 110; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 111; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0 112; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2 113; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 114; X64-SSE-NEXT: movupd %xmm2, 16(%rsi) 115; X64-SSE-NEXT: movupd %xmm1, (%rsi) 116; X64-SSE-NEXT: retq 117; 118; X64-AVX-LABEL: fptrunc_frommem8: 119; X64-AVX: # %bb.0: # %entry 120; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 121; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 122; X64-AVX-NEXT: vmovupd %xmm1, 16(%rsi) 123; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 124; X64-AVX-NEXT: retq 125entry: 126 %0 = load <8 x double>, <8 x double>* %in 127 %1 = fptrunc <8 x double> %0 to <8 x float> 128 store <8 x float> %1, <8 x float>* %out, align 1 129 ret void 130} 131 132define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) { 133; X32-SSE-LABEL: fptrunc_frommem2_zext: 134; X32-SSE: # %bb.0: 135; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 136; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0 137; X32-SSE-NEXT: retl 138; 139; X32-AVX-LABEL: fptrunc_frommem2_zext: 140; X32-AVX: # %bb.0: 141; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 142; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0 143; X32-AVX-NEXT: retl 144; 145; X64-SSE-LABEL: fptrunc_frommem2_zext: 146; X64-SSE: # %bb.0: 147; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 148; X64-SSE-NEXT: retq 149; 150; X64-AVX-LABEL: fptrunc_frommem2_zext: 151; X64-AVX: # %bb.0: 152; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 153; X64-AVX-NEXT: retq 154 %arg = load <2 x double>, <2 x double> * %ld, align 16 155 %cvt = fptrunc <2 x double> %arg to <2 x float> 156 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 157 ret <4 x float> %ret 158} 159 160define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) { 161; X32-SSE-LABEL: fptrunc_fromreg2_zext: 162; X32-SSE: # %bb.0: 163; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 164; X32-SSE-NEXT: retl 165; 166; X32-AVX-LABEL: fptrunc_fromreg2_zext: 167; X32-AVX: # %bb.0: 168; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 169; X32-AVX-NEXT: retl 170; 171; X64-SSE-LABEL: fptrunc_fromreg2_zext: 172; X64-SSE: # %bb.0: 173; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 174; X64-SSE-NEXT: retq 175; 176; X64-AVX-LABEL: fptrunc_fromreg2_zext: 177; X64-AVX: # %bb.0: 178; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 179; X64-AVX-NEXT: retq 180 %cvt = fptrunc <2 x double> %arg to <2 x float> 181 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 182 ret <4 x float> %ret 183} 184 185; FIXME: For exact truncations we should be able to fold this. 186define <4 x float> @fptrunc_fromconst() { 187; X32-SSE-LABEL: fptrunc_fromconst: 188; X32-SSE: # %bb.0: # %entry 189; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm1 190; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm0 191; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 192; X32-SSE-NEXT: retl 193; 194; X32-AVX-LABEL: fptrunc_fromconst: 195; X32-AVX: # %bb.0: # %entry 196; X32-AVX-NEXT: vcvtpd2psy {{\.LCPI.*}}, %xmm0 197; X32-AVX-NEXT: retl 198; 199; X64-SSE-LABEL: fptrunc_fromconst: 200; X64-SSE: # %bb.0: # %entry 201; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 202; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 203; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 204; X64-SSE-NEXT: retq 205; 206; X64-AVX-LABEL: fptrunc_fromconst: 207; X64-AVX: # %bb.0: # %entry 208; X64-AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 209; X64-AVX-NEXT: retq 210entry: 211 %0 = insertelement <4 x double> undef, double 1.0, i32 0 212 %1 = insertelement <4 x double> %0, double -2.0, i32 1 213 %2 = insertelement <4 x double> %1, double +4.0, i32 2 214 %3 = insertelement <4 x double> %2, double -0.0, i32 3 215 %4 = fptrunc <4 x double> %3 to <4 x float> 216 ret <4 x float> %4 217} 218