1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx | FileCheck %s 3; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE 4 5define <2 x float> @cvt_v2i8_v2f32(<2 x i8> %src) { 6; CHECK-LABEL: cvt_v2i8_v2f32: 7; CHECK: ## %bb.0: 8; CHECK-NEXT: vpsllq $56, %xmm0, %xmm0 9; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0 10; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 11; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 12; CHECK-NEXT: retl 13; 14; CHECK-WIDE-LABEL: cvt_v2i8_v2f32: 15; CHECK-WIDE: ## %bb.0: 16; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0 17; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 18; CHECK-WIDE-NEXT: retl 19 %res = sitofp <2 x i8> %src to <2 x float> 20 ret <2 x float> %res 21} 22 23define <2 x float> @cvt_v2i16_v2f32(<2 x i16> %src) { 24; CHECK-LABEL: cvt_v2i16_v2f32: 25; CHECK: ## %bb.0: 26; CHECK-NEXT: vpsllq $48, %xmm0, %xmm0 27; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0 28; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 29; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 30; CHECK-NEXT: retl 31; 32; CHECK-WIDE-LABEL: cvt_v2i16_v2f32: 33; CHECK-WIDE: ## %bb.0: 34; CHECK-WIDE-NEXT: vpmovsxwd %xmm0, %xmm0 35; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 36; CHECK-WIDE-NEXT: retl 37 %res = sitofp <2 x i16> %src to <2 x float> 38 ret <2 x float> %res 39} 40 41define <2 x float> @cvt_v2i32_v2f32(<2 x i32> %src) { 42; CHECK-LABEL: cvt_v2i32_v2f32: 43; CHECK: ## %bb.0: 44; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 45; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 46; CHECK-NEXT: retl 47; 48; CHECK-WIDE-LABEL: cvt_v2i32_v2f32: 49; CHECK-WIDE: ## %bb.0: 50; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 51; CHECK-WIDE-NEXT: retl 52 %res = sitofp <2 x i32> %src to <2 x float> 53 ret <2 x float> %res 54} 55 56define <2 x float> @cvt_v2u8_v2f32(<2 x i8> %src) { 57; CHECK-LABEL: cvt_v2u8_v2f32: 58; CHECK: ## %bb.0: 59; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] 60; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 61; CHECK-NEXT: retl 62; 63; CHECK-WIDE-LABEL: cvt_v2u8_v2f32: 64; CHECK-WIDE: ## %bb.0: 65; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 66; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 67; CHECK-WIDE-NEXT: retl 68 %res = uitofp <2 x i8> %src to <2 x float> 69 ret <2 x float> %res 70} 71 72define <2 x float> @cvt_v2u16_v2f32(<2 x i16> %src) { 73; CHECK-LABEL: cvt_v2u16_v2f32: 74; CHECK: ## %bb.0: 75; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[8,9],zero,zero,xmm0[8,9],zero,zero,xmm0[10,11],zero,zero 76; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 77; CHECK-NEXT: retl 78; 79; CHECK-WIDE-LABEL: cvt_v2u16_v2f32: 80; CHECK-WIDE: ## %bb.0: 81; CHECK-WIDE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 82; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0 83; CHECK-WIDE-NEXT: retl 84 %res = uitofp <2 x i16> %src to <2 x float> 85 ret <2 x float> %res 86} 87 88define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) { 89; CHECK-LABEL: cvt_v2u32_v2f32: 90; CHECK: ## %bb.0: 91; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 92; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 93; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15] 94; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 95; CHECK-NEXT: vsubpd %xmm1, %xmm0, %xmm0 96; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 97; CHECK-NEXT: retl 98; 99; CHECK-WIDE-LABEL: cvt_v2u32_v2f32: 100; CHECK-WIDE: ## %bb.0: 101; CHECK-WIDE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 102; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15] 103; CHECK-WIDE-NEXT: vpor %xmm1, %xmm0, %xmm0 104; CHECK-WIDE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 105; CHECK-WIDE-NEXT: vcvtpd2ps %xmm0, %xmm0 106; CHECK-WIDE-NEXT: retl 107 %res = uitofp <2 x i32> %src to <2 x float> 108 ret <2 x float> %res 109} 110 111define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) { 112; CHECK-LABEL: cvt_v2f32_v2i8: 113; CHECK: ## %bb.0: 114; CHECK-NEXT: subl $68, %esp 115; CHECK-NEXT: .cfi_def_cfa_offset 72 116; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 117; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 118; CHECK-NEXT: flds {{[0-9]+}}(%esp) 119; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) 120; CHECK-NEXT: flds {{[0-9]+}}(%esp) 121; CHECK-NEXT: fisttpll (%esp) 122; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 123; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 124; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 125; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 126; CHECK-NEXT: addl $68, %esp 127; CHECK-NEXT: retl 128; 129; CHECK-WIDE-LABEL: cvt_v2f32_v2i8: 130; CHECK-WIDE: ## %bb.0: 131; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 132; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax 133; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx 134; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0 135; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 136; CHECK-WIDE-NEXT: retl 137 %res = fptosi <2 x float> %src to <2 x i8> 138 ret <2 x i8> %res 139} 140 141define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) { 142; CHECK-LABEL: cvt_v2f32_v2i16: 143; CHECK: ## %bb.0: 144; CHECK-NEXT: subl $68, %esp 145; CHECK-NEXT: .cfi_def_cfa_offset 72 146; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 147; CHECK-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 148; CHECK-NEXT: flds {{[0-9]+}}(%esp) 149; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) 150; CHECK-NEXT: flds {{[0-9]+}}(%esp) 151; CHECK-NEXT: fisttpll (%esp) 152; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 153; CHECK-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 154; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 155; CHECK-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 156; CHECK-NEXT: addl $68, %esp 157; CHECK-NEXT: retl 158; 159; CHECK-WIDE-LABEL: cvt_v2f32_v2i16: 160; CHECK-WIDE: ## %bb.0: 161; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 162; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0 163; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1 164; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 165; CHECK-WIDE-NEXT: vzeroupper 166; CHECK-WIDE-NEXT: retl 167 %res = fptosi <2 x float> %src to <2 x i16> 168 ret <2 x i16> %res 169} 170 171define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) { 172; CHECK-LABEL: cvt_v2f32_v2i32: 173; CHECK: ## %bb.0: 174; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 175; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 176; CHECK-NEXT: retl 177; 178; CHECK-WIDE-LABEL: cvt_v2f32_v2i32: 179; CHECK-WIDE: ## %bb.0: 180; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0 181; CHECK-WIDE-NEXT: retl 182 %res = fptosi <2 x float> %src to <2 x i32> 183 ret <2 x i32> %res 184} 185 186define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) { 187; CHECK-LABEL: cvt_v2f32_v2u8: 188; CHECK: ## %bb.0: 189; CHECK-NEXT: subl $68, %esp 190; CHECK-NEXT: .cfi_def_cfa_offset 72 191; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 192; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 193; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 194; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4 195; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 196; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 197; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3 198; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4 199; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 200; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 201; CHECK-NEXT: flds {{[0-9]+}}(%esp) 202; CHECK-NEXT: fisttpll (%esp) 203; CHECK-NEXT: flds {{[0-9]+}}(%esp) 204; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) 205; CHECK-NEXT: xorl %eax, %eax 206; CHECK-NEXT: vucomiss %xmm2, %xmm1 207; CHECK-NEXT: setae %al 208; CHECK-NEXT: shll $31, %eax 209; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax 210; CHECK-NEXT: xorl %ecx, %ecx 211; CHECK-NEXT: vucomiss %xmm2, %xmm0 212; CHECK-NEXT: setae %cl 213; CHECK-NEXT: shll $31, %ecx 214; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx 215; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 216; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 217; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 218; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 219; CHECK-NEXT: addl $68, %esp 220; CHECK-NEXT: retl 221; 222; CHECK-WIDE-LABEL: cvt_v2f32_v2u8: 223; CHECK-WIDE: ## %bb.0: 224; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 225; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax 226; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx 227; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0 228; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 229; CHECK-WIDE-NEXT: retl 230 %res = fptoui <2 x float> %src to <2 x i8> 231 ret <2 x i8> %res 232} 233 234define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) { 235; CHECK-LABEL: cvt_v2f32_v2u16: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: subl $68, %esp 238; CHECK-NEXT: .cfi_def_cfa_offset 72 239; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 240; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 241; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 242; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4 243; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 244; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 245; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3 246; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4 247; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 248; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 249; CHECK-NEXT: flds {{[0-9]+}}(%esp) 250; CHECK-NEXT: fisttpll (%esp) 251; CHECK-NEXT: flds {{[0-9]+}}(%esp) 252; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) 253; CHECK-NEXT: xorl %eax, %eax 254; CHECK-NEXT: vucomiss %xmm2, %xmm1 255; CHECK-NEXT: setae %al 256; CHECK-NEXT: shll $31, %eax 257; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax 258; CHECK-NEXT: xorl %ecx, %ecx 259; CHECK-NEXT: vucomiss %xmm2, %xmm0 260; CHECK-NEXT: setae %cl 261; CHECK-NEXT: shll $31, %ecx 262; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx 263; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 264; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 265; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 266; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 267; CHECK-NEXT: addl $68, %esp 268; CHECK-NEXT: retl 269; 270; CHECK-WIDE-LABEL: cvt_v2f32_v2u16: 271; CHECK-WIDE: ## %bb.0: 272; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 273; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0 274; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1 275; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 276; CHECK-WIDE-NEXT: vzeroupper 277; CHECK-WIDE-NEXT: retl 278 %res = fptoui <2 x float> %src to <2 x i16> 279 ret <2 x i16> %res 280} 281 282define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) { 283; CHECK-LABEL: cvt_v2f32_v2u32: 284; CHECK: ## %bb.0: 285; CHECK-NEXT: subl $68, %esp 286; CHECK-NEXT: .cfi_def_cfa_offset 72 287; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 288; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 289; CHECK-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 290; CHECK-NEXT: vsubss %xmm2, %xmm1, %xmm4 291; CHECK-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 292; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 293; CHECK-NEXT: vcmpltss %xmm2, %xmm0, %xmm3 294; CHECK-NEXT: vsubss %xmm2, %xmm0, %xmm4 295; CHECK-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 296; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 297; CHECK-NEXT: flds {{[0-9]+}}(%esp) 298; CHECK-NEXT: fisttpll (%esp) 299; CHECK-NEXT: flds {{[0-9]+}}(%esp) 300; CHECK-NEXT: fisttpll {{[0-9]+}}(%esp) 301; CHECK-NEXT: xorl %eax, %eax 302; CHECK-NEXT: vucomiss %xmm2, %xmm1 303; CHECK-NEXT: setae %al 304; CHECK-NEXT: shll $31, %eax 305; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %eax 306; CHECK-NEXT: xorl %ecx, %ecx 307; CHECK-NEXT: vucomiss %xmm2, %xmm0 308; CHECK-NEXT: setae %cl 309; CHECK-NEXT: shll $31, %ecx 310; CHECK-NEXT: xorl {{[0-9]+}}(%esp), %ecx 311; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 312; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 313; CHECK-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 314; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 315; CHECK-NEXT: addl $68, %esp 316; CHECK-NEXT: retl 317; 318; CHECK-WIDE-LABEL: cvt_v2f32_v2u32: 319; CHECK-WIDE: ## %bb.0: 320; CHECK-WIDE-NEXT: subl $68, %esp 321; CHECK-WIDE-NEXT: .cfi_def_cfa_offset 72 322; CHECK-WIDE-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 323; CHECK-WIDE-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 324; CHECK-WIDE-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) 325; CHECK-WIDE-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) 326; CHECK-WIDE-NEXT: flds {{[0-9]+}}(%esp) 327; CHECK-WIDE-NEXT: fisttpll {{[0-9]+}}(%esp) 328; CHECK-WIDE-NEXT: flds {{[0-9]+}}(%esp) 329; CHECK-WIDE-NEXT: fisttpll {{[0-9]+}}(%esp) 330; CHECK-WIDE-NEXT: flds {{[0-9]+}}(%esp) 331; CHECK-WIDE-NEXT: fisttpll {{[0-9]+}}(%esp) 332; CHECK-WIDE-NEXT: flds {{[0-9]+}}(%esp) 333; CHECK-WIDE-NEXT: fisttpll (%esp) 334; CHECK-WIDE-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 335; CHECK-WIDE-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 336; CHECK-WIDE-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 337; CHECK-WIDE-NEXT: vpinsrd $3, (%esp), %xmm0, %xmm0 338; CHECK-WIDE-NEXT: addl $68, %esp 339; CHECK-WIDE-NEXT: retl 340 %res = fptoui <2 x float> %src to <2 x i32> 341 ret <2 x i32> %res 342} 343