1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; 4; 32-bit tests to make sure we're not doing anything stupid. 5; RUN: llc < %s -mtriple=i686-unknown-unknown 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 8 9; 10; Double to Signed Integer 11; 12 13define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { 14; SSE-LABEL: fptosi_2f64_to_2i64: 15; SSE: # BB#0: 16; SSE-NEXT: cvttsd2si %xmm0, %rax 17; SSE-NEXT: movd %rax, %xmm1 18; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 19; SSE-NEXT: cvttsd2si %xmm0, %rax 20; SSE-NEXT: movd %rax, %xmm0 21; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 22; SSE-NEXT: movdqa %xmm1, %xmm0 23; SSE-NEXT: retq 24; 25; AVX-LABEL: fptosi_2f64_to_2i64: 26; AVX: # BB#0: 27; AVX-NEXT: vcvttsd2si %xmm0, %rax 28; AVX-NEXT: vmovq %rax, %xmm1 29; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 30; AVX-NEXT: vcvttsd2si %xmm0, %rax 31; AVX-NEXT: vmovq %rax, %xmm0 32; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 33; AVX-NEXT: retq 34 %cvt = fptosi <2 x double> %a to <2 x i64> 35 ret <2 x i64> %cvt 36} 37 38define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { 39; SSE-LABEL: fptosi_2f64_to_2i32: 40; SSE: # BB#0: 41; SSE-NEXT: cvttsd2si %xmm0, %rax 42; SSE-NEXT: movd %rax, %xmm1 43; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 44; SSE-NEXT: cvttsd2si %xmm0, %rax 45; SSE-NEXT: movd %rax, %xmm0 46; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 47; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 48; SSE-NEXT: retq 49; 50; AVX-LABEL: fptosi_2f64_to_2i32: 51; AVX: # BB#0: 52; AVX-NEXT: vcvttsd2si %xmm0, %rax 53; AVX-NEXT: vmovq %rax, %xmm1 54; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 55; AVX-NEXT: vcvttsd2si %xmm0, %rax 56; AVX-NEXT: vmovq %rax, %xmm0 57; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 58; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 59; AVX-NEXT: retq 60 %cvt = fptosi <2 x double> %a to <2 x i32> 61 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 62 ret <4 x i32> %ext 63} 64 65define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { 66; SSE-LABEL: fptosi_4f64_to_2i32: 67; SSE: # BB#0: 68; SSE-NEXT: cvttsd2si %xmm0, %rax 69; SSE-NEXT: movd %rax, %xmm1 70; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 71; SSE-NEXT: cvttsd2si %xmm0, %rax 72; SSE-NEXT: movd %rax, %xmm0 73; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 74; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 75; SSE-NEXT: cvttsd2si %xmm0, %rax 76; SSE-NEXT: movd %rax, %xmm1 77; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 78; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 79; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 80; SSE-NEXT: retq 81; 82; AVX-LABEL: fptosi_4f64_to_2i32: 83; AVX: # BB#0: 84; AVX-NEXT: # kill 85; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 86; AVX-NEXT: vzeroupper 87; AVX-NEXT: retq 88 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 89 %cvt = fptosi <4 x double> %ext to <4 x i32> 90 ret <4 x i32> %cvt 91} 92 93define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { 94; SSE-LABEL: fptosi_4f64_to_4i64: 95; SSE: # BB#0: 96; SSE-NEXT: cvttsd2si %xmm0, %rax 97; SSE-NEXT: movd %rax, %xmm2 98; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 99; SSE-NEXT: cvttsd2si %xmm0, %rax 100; SSE-NEXT: movd %rax, %xmm0 101; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 102; SSE-NEXT: cvttsd2si %xmm1, %rax 103; SSE-NEXT: movd %rax, %xmm3 104; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 105; SSE-NEXT: cvttsd2si %xmm1, %rax 106; SSE-NEXT: movd %rax, %xmm0 107; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 108; SSE-NEXT: movdqa %xmm2, %xmm0 109; SSE-NEXT: movdqa %xmm3, %xmm1 110; SSE-NEXT: retq 111; 112; AVX-LABEL: fptosi_4f64_to_4i64: 113; AVX: # BB#0: 114; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 115; AVX-NEXT: vcvttsd2si %xmm1, %rax 116; AVX-NEXT: vmovq %rax, %xmm2 117; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 118; AVX-NEXT: vcvttsd2si %xmm1, %rax 119; AVX-NEXT: vmovq %rax, %xmm1 120; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 121; AVX-NEXT: vcvttsd2si %xmm0, %rax 122; AVX-NEXT: vmovq %rax, %xmm2 123; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 124; AVX-NEXT: vcvttsd2si %xmm0, %rax 125; AVX-NEXT: vmovq %rax, %xmm0 126; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 127; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 128; AVX-NEXT: retq 129 %cvt = fptosi <4 x double> %a to <4 x i64> 130 ret <4 x i64> %cvt 131} 132 133define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { 134; SSE-LABEL: fptosi_4f64_to_4i32: 135; SSE: # BB#0: 136; SSE-NEXT: cvttsd2si %xmm1, %rax 137; SSE-NEXT: movd %rax, %xmm2 138; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 139; SSE-NEXT: cvttsd2si %xmm1, %rax 140; SSE-NEXT: movd %rax, %xmm1 141; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 142; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 143; SSE-NEXT: cvttsd2si %xmm0, %rax 144; SSE-NEXT: movd %rax, %xmm2 145; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 146; SSE-NEXT: cvttsd2si %xmm0, %rax 147; SSE-NEXT: movd %rax, %xmm0 148; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 149; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 150; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 151; SSE-NEXT: retq 152; 153; AVX-LABEL: fptosi_4f64_to_4i32: 154; AVX: # BB#0: 155; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 156; AVX-NEXT: vzeroupper 157; AVX-NEXT: retq 158 %cvt = fptosi <4 x double> %a to <4 x i32> 159 ret <4 x i32> %cvt 160} 161 162; 163; Double to Unsigned Integer 164; 165 166define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { 167; SSE-LABEL: fptoui_2f64_to_2i64: 168; SSE: # BB#0: 169; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 170; SSE-NEXT: movapd %xmm0, %xmm1 171; SSE-NEXT: subsd %xmm2, %xmm1 172; SSE-NEXT: cvttsd2si %xmm1, %rax 173; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 174; SSE-NEXT: xorq %rcx, %rax 175; SSE-NEXT: cvttsd2si %xmm0, %rdx 176; SSE-NEXT: ucomisd %xmm2, %xmm0 177; SSE-NEXT: cmovaeq %rax, %rdx 178; SSE-NEXT: movd %rdx, %xmm1 179; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 180; SSE-NEXT: movapd %xmm0, %xmm3 181; SSE-NEXT: subsd %xmm2, %xmm3 182; SSE-NEXT: cvttsd2si %xmm3, %rax 183; SSE-NEXT: xorq %rcx, %rax 184; SSE-NEXT: cvttsd2si %xmm0, %rcx 185; SSE-NEXT: ucomisd %xmm2, %xmm0 186; SSE-NEXT: cmovaeq %rax, %rcx 187; SSE-NEXT: movd %rcx, %xmm0 188; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 189; SSE-NEXT: movdqa %xmm1, %xmm0 190; SSE-NEXT: retq 191; 192; AVX-LABEL: fptoui_2f64_to_2i64: 193; AVX: # BB#0: 194; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 195; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 196; AVX-NEXT: vcvttsd2si %xmm2, %rax 197; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 198; AVX-NEXT: xorq %rcx, %rax 199; AVX-NEXT: vcvttsd2si %xmm0, %rdx 200; AVX-NEXT: vucomisd %xmm1, %xmm0 201; AVX-NEXT: cmovaeq %rax, %rdx 202; AVX-NEXT: vmovq %rdx, %xmm2 203; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 204; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 205; AVX-NEXT: vcvttsd2si %xmm3, %rax 206; AVX-NEXT: xorq %rcx, %rax 207; AVX-NEXT: vcvttsd2si %xmm0, %rcx 208; AVX-NEXT: vucomisd %xmm1, %xmm0 209; AVX-NEXT: cmovaeq %rax, %rcx 210; AVX-NEXT: vmovq %rcx, %xmm0 211; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 212; AVX-NEXT: retq 213 %cvt = fptoui <2 x double> %a to <2 x i64> 214 ret <2 x i64> %cvt 215} 216 217define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { 218; SSE-LABEL: fptoui_2f64_to_2i32: 219; SSE: # BB#0: 220; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 221; SSE-NEXT: movapd %xmm0, %xmm2 222; SSE-NEXT: subsd %xmm1, %xmm2 223; SSE-NEXT: cvttsd2si %xmm2, %rax 224; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 225; SSE-NEXT: xorq %rcx, %rax 226; SSE-NEXT: cvttsd2si %xmm0, %rdx 227; SSE-NEXT: ucomisd %xmm1, %xmm0 228; SSE-NEXT: cmovaeq %rax, %rdx 229; SSE-NEXT: movd %rdx, %xmm2 230; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 231; SSE-NEXT: movapd %xmm0, %xmm3 232; SSE-NEXT: subsd %xmm1, %xmm3 233; SSE-NEXT: cvttsd2si %xmm3, %rax 234; SSE-NEXT: xorq %rcx, %rax 235; SSE-NEXT: cvttsd2si %xmm0, %rcx 236; SSE-NEXT: ucomisd %xmm1, %xmm0 237; SSE-NEXT: cmovaeq %rax, %rcx 238; SSE-NEXT: movd %rcx, %xmm0 239; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 240; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 241; SSE-NEXT: retq 242; 243; AVX-LABEL: fptoui_2f64_to_2i32: 244; AVX: # BB#0: 245; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 246; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 247; AVX-NEXT: vcvttsd2si %xmm2, %rax 248; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 249; AVX-NEXT: xorq %rcx, %rax 250; AVX-NEXT: vcvttsd2si %xmm0, %rdx 251; AVX-NEXT: vucomisd %xmm1, %xmm0 252; AVX-NEXT: cmovaeq %rax, %rdx 253; AVX-NEXT: vmovq %rdx, %xmm2 254; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 255; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 256; AVX-NEXT: vcvttsd2si %xmm3, %rax 257; AVX-NEXT: xorq %rcx, %rax 258; AVX-NEXT: vcvttsd2si %xmm0, %rcx 259; AVX-NEXT: vucomisd %xmm1, %xmm0 260; AVX-NEXT: cmovaeq %rax, %rcx 261; AVX-NEXT: vmovq %rcx, %xmm0 262; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 263; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 264; AVX-NEXT: retq 265 %cvt = fptoui <2 x double> %a to <2 x i32> 266 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 267 ret <4 x i32> %ext 268} 269 270define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { 271; SSE-LABEL: fptoui_4f64_to_2i32: 272; SSE: # BB#0: 273; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 274; SSE-NEXT: movapd %xmm0, %xmm2 275; SSE-NEXT: subsd %xmm1, %xmm2 276; SSE-NEXT: cvttsd2si %xmm2, %rax 277; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 278; SSE-NEXT: xorq %rcx, %rax 279; SSE-NEXT: cvttsd2si %xmm0, %rdx 280; SSE-NEXT: ucomisd %xmm1, %xmm0 281; SSE-NEXT: cmovaeq %rax, %rdx 282; SSE-NEXT: movd %rdx, %xmm2 283; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 284; SSE-NEXT: movapd %xmm0, %xmm3 285; SSE-NEXT: subsd %xmm1, %xmm3 286; SSE-NEXT: cvttsd2si %xmm3, %rax 287; SSE-NEXT: xorq %rcx, %rax 288; SSE-NEXT: cvttsd2si %xmm0, %rdx 289; SSE-NEXT: ucomisd %xmm1, %xmm0 290; SSE-NEXT: cmovaeq %rax, %rdx 291; SSE-NEXT: movd %rdx, %xmm0 292; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 293; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 294; SSE-NEXT: cvttsd2si %xmm0, %rax 295; SSE-NEXT: xorq %rax, %rcx 296; SSE-NEXT: ucomisd %xmm1, %xmm0 297; SSE-NEXT: cmovbq %rax, %rcx 298; SSE-NEXT: movd %rcx, %xmm1 299; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 300; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 301; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 302; SSE-NEXT: retq 303; 304; AVX-LABEL: fptoui_4f64_to_2i32: 305; AVX: # BB#0: 306; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 307; AVX-NEXT: vcvttsd2si %xmm1, %rax 308; AVX-NEXT: vcvttsd2si %xmm0, %rcx 309; AVX-NEXT: vmovd %ecx, %xmm0 310; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 311; AVX-NEXT: vcvttsd2si %xmm0, %rax 312; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 313; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 314; AVX-NEXT: retq 315 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 316 %cvt = fptoui <4 x double> %ext to <4 x i32> 317 ret <4 x i32> %cvt 318} 319 320define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { 321; SSE-LABEL: fptoui_4f64_to_4i64: 322; SSE: # BB#0: 323; SSE-NEXT: movapd %xmm0, %xmm2 324; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 325; SSE-NEXT: subsd %xmm3, %xmm0 326; SSE-NEXT: cvttsd2si %xmm0, %rcx 327; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 328; SSE-NEXT: xorq %rax, %rcx 329; SSE-NEXT: cvttsd2si %xmm2, %rdx 330; SSE-NEXT: ucomisd %xmm3, %xmm2 331; SSE-NEXT: cmovaeq %rcx, %rdx 332; SSE-NEXT: movd %rdx, %xmm0 333; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 334; SSE-NEXT: movapd %xmm2, %xmm4 335; SSE-NEXT: subsd %xmm3, %xmm4 336; SSE-NEXT: cvttsd2si %xmm4, %rcx 337; SSE-NEXT: xorq %rax, %rcx 338; SSE-NEXT: cvttsd2si %xmm2, %rdx 339; SSE-NEXT: ucomisd %xmm3, %xmm2 340; SSE-NEXT: cmovaeq %rcx, %rdx 341; SSE-NEXT: movd %rdx, %xmm2 342; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 343; SSE-NEXT: movapd %xmm1, %xmm2 344; SSE-NEXT: subsd %xmm3, %xmm2 345; SSE-NEXT: cvttsd2si %xmm2, %rcx 346; SSE-NEXT: xorq %rax, %rcx 347; SSE-NEXT: cvttsd2si %xmm1, %rdx 348; SSE-NEXT: ucomisd %xmm3, %xmm1 349; SSE-NEXT: cmovaeq %rcx, %rdx 350; SSE-NEXT: movd %rdx, %xmm2 351; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 352; SSE-NEXT: movapd %xmm1, %xmm4 353; SSE-NEXT: subsd %xmm3, %xmm4 354; SSE-NEXT: cvttsd2si %xmm4, %rcx 355; SSE-NEXT: xorq %rax, %rcx 356; SSE-NEXT: cvttsd2si %xmm1, %rax 357; SSE-NEXT: ucomisd %xmm3, %xmm1 358; SSE-NEXT: cmovaeq %rcx, %rax 359; SSE-NEXT: movd %rax, %xmm1 360; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 361; SSE-NEXT: movdqa %xmm2, %xmm1 362; SSE-NEXT: retq 363; 364; AVX-LABEL: fptoui_4f64_to_4i64: 365; AVX: # BB#0: 366; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 367; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 368; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm3 369; AVX-NEXT: vcvttsd2si %xmm3, %rax 370; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 371; AVX-NEXT: xorq %rcx, %rax 372; AVX-NEXT: vcvttsd2si %xmm2, %rdx 373; AVX-NEXT: vucomisd %xmm1, %xmm2 374; AVX-NEXT: cmovaeq %rax, %rdx 375; AVX-NEXT: vmovq %rdx, %xmm3 376; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 377; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm4 378; AVX-NEXT: vcvttsd2si %xmm4, %rax 379; AVX-NEXT: xorq %rcx, %rax 380; AVX-NEXT: vcvttsd2si %xmm2, %rdx 381; AVX-NEXT: vucomisd %xmm1, %xmm2 382; AVX-NEXT: cmovaeq %rax, %rdx 383; AVX-NEXT: vmovq %rdx, %xmm2 384; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 385; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 386; AVX-NEXT: vcvttsd2si %xmm3, %rax 387; AVX-NEXT: xorq %rcx, %rax 388; AVX-NEXT: vcvttsd2si %xmm0, %rdx 389; AVX-NEXT: vucomisd %xmm1, %xmm0 390; AVX-NEXT: cmovaeq %rax, %rdx 391; AVX-NEXT: vmovq %rdx, %xmm3 392; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 393; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 394; AVX-NEXT: vcvttsd2si %xmm4, %rax 395; AVX-NEXT: xorq %rcx, %rax 396; AVX-NEXT: vcvttsd2si %xmm0, %rcx 397; AVX-NEXT: vucomisd %xmm1, %xmm0 398; AVX-NEXT: cmovaeq %rax, %rcx 399; AVX-NEXT: vmovq %rcx, %xmm0 400; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 401; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 402; AVX-NEXT: retq 403 %cvt = fptoui <4 x double> %a to <4 x i64> 404 ret <4 x i64> %cvt 405} 406 407define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { 408; SSE-LABEL: fptoui_4f64_to_4i32: 409; SSE: # BB#0: 410; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 411; SSE-NEXT: movapd %xmm1, %xmm3 412; SSE-NEXT: subsd %xmm2, %xmm3 413; SSE-NEXT: cvttsd2si %xmm3, %rcx 414; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 415; SSE-NEXT: xorq %rax, %rcx 416; SSE-NEXT: cvttsd2si %xmm1, %rdx 417; SSE-NEXT: ucomisd %xmm2, %xmm1 418; SSE-NEXT: cmovaeq %rcx, %rdx 419; SSE-NEXT: movd %rdx, %xmm3 420; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 421; SSE-NEXT: movapd %xmm1, %xmm4 422; SSE-NEXT: subsd %xmm2, %xmm4 423; SSE-NEXT: cvttsd2si %xmm4, %rcx 424; SSE-NEXT: xorq %rax, %rcx 425; SSE-NEXT: cvttsd2si %xmm1, %rdx 426; SSE-NEXT: ucomisd %xmm2, %xmm1 427; SSE-NEXT: cmovaeq %rcx, %rdx 428; SSE-NEXT: movd %rdx, %xmm1 429; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] 430; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 431; SSE-NEXT: movapd %xmm0, %xmm3 432; SSE-NEXT: subsd %xmm2, %xmm3 433; SSE-NEXT: cvttsd2si %xmm3, %rcx 434; SSE-NEXT: xorq %rax, %rcx 435; SSE-NEXT: cvttsd2si %xmm0, %rdx 436; SSE-NEXT: ucomisd %xmm2, %xmm0 437; SSE-NEXT: cmovaeq %rcx, %rdx 438; SSE-NEXT: movd %rdx, %xmm3 439; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 440; SSE-NEXT: movapd %xmm0, %xmm4 441; SSE-NEXT: subsd %xmm2, %xmm4 442; SSE-NEXT: cvttsd2si %xmm4, %rcx 443; SSE-NEXT: xorq %rax, %rcx 444; SSE-NEXT: cvttsd2si %xmm0, %rax 445; SSE-NEXT: ucomisd %xmm2, %xmm0 446; SSE-NEXT: cmovaeq %rcx, %rax 447; SSE-NEXT: movd %rax, %xmm0 448; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 449; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] 450; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 451; SSE-NEXT: retq 452; 453; AVX-LABEL: fptoui_4f64_to_4i32: 454; AVX: # BB#0: 455; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 456; AVX-NEXT: vcvttsd2si %xmm1, %rax 457; AVX-NEXT: vcvttsd2si %xmm0, %rcx 458; AVX-NEXT: vmovd %ecx, %xmm1 459; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 460; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 461; AVX-NEXT: vcvttsd2si %xmm0, %rax 462; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 463; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 464; AVX-NEXT: vcvttsd2si %xmm0, %rax 465; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 466; AVX-NEXT: vzeroupper 467; AVX-NEXT: retq 468 %cvt = fptoui <4 x double> %a to <4 x i32> 469 ret <4 x i32> %cvt 470} 471 472; 473; Float to Signed Integer 474; 475 476define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { 477; SSE-LABEL: fptosi_4f32_to_4i32: 478; SSE: # BB#0: 479; SSE-NEXT: cvttps2dq %xmm0, %xmm0 480; SSE-NEXT: retq 481; 482; AVX-LABEL: fptosi_4f32_to_4i32: 483; AVX: # BB#0: 484; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 485; AVX-NEXT: retq 486 %cvt = fptosi <4 x float> %a to <4 x i32> 487 ret <4 x i32> %cvt 488} 489 490define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { 491; SSE-LABEL: fptosi_2f32_to_2i64: 492; SSE: # BB#0: 493; SSE-NEXT: cvttss2si %xmm0, %rax 494; SSE-NEXT: movd %rax, %xmm1 495; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 496; SSE-NEXT: cvttss2si %xmm0, %rax 497; SSE-NEXT: movd %rax, %xmm0 498; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 499; SSE-NEXT: movdqa %xmm1, %xmm0 500; SSE-NEXT: retq 501; 502; AVX-LABEL: fptosi_2f32_to_2i64: 503; AVX: # BB#0: 504; AVX-NEXT: vcvttss2si %xmm0, %rax 505; AVX-NEXT: vmovq %rax, %xmm1 506; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 507; AVX-NEXT: vcvttss2si %xmm0, %rax 508; AVX-NEXT: vmovq %rax, %xmm0 509; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 510; AVX-NEXT: retq 511 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 512 %cvt = fptosi <2 x float> %shuf to <2 x i64> 513 ret <2 x i64> %cvt 514} 515 516define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { 517; SSE-LABEL: fptosi_4f32_to_2i64: 518; SSE: # BB#0: 519; SSE-NEXT: cvttss2si %xmm0, %rax 520; SSE-NEXT: movd %rax, %xmm1 521; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 522; SSE-NEXT: cvttss2si %xmm0, %rax 523; SSE-NEXT: movd %rax, %xmm0 524; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 525; SSE-NEXT: movdqa %xmm1, %xmm0 526; SSE-NEXT: retq 527; 528; AVX-LABEL: fptosi_4f32_to_2i64: 529; AVX: # BB#0: 530; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 531; AVX-NEXT: vcvttss2si %xmm1, %rax 532; AVX-NEXT: vcvttss2si %xmm0, %rcx 533; AVX-NEXT: vmovq %rcx, %xmm0 534; AVX-NEXT: vmovq %rax, %xmm1 535; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 536; AVX-NEXT: retq 537 %cvt = fptosi <4 x float> %a to <4 x i64> 538 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 539 ret <2 x i64> %shuf 540} 541 542define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { 543; SSE-LABEL: fptosi_8f32_to_8i32: 544; SSE: # BB#0: 545; SSE-NEXT: cvttps2dq %xmm0, %xmm0 546; SSE-NEXT: cvttps2dq %xmm1, %xmm1 547; SSE-NEXT: retq 548; 549; AVX-LABEL: fptosi_8f32_to_8i32: 550; AVX: # BB#0: 551; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 552; AVX-NEXT: retq 553 %cvt = fptosi <8 x float> %a to <8 x i32> 554 ret <8 x i32> %cvt 555} 556 557define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { 558; SSE-LABEL: fptosi_4f32_to_4i64: 559; SSE: # BB#0: 560; SSE-NEXT: cvttss2si %xmm0, %rax 561; SSE-NEXT: movd %rax, %xmm2 562; SSE-NEXT: movaps %xmm0, %xmm1 563; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 564; SSE-NEXT: cvttss2si %xmm1, %rax 565; SSE-NEXT: movd %rax, %xmm1 566; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 567; SSE-NEXT: movaps %xmm0, %xmm1 568; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 569; SSE-NEXT: cvttss2si %xmm1, %rax 570; SSE-NEXT: movd %rax, %xmm3 571; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 572; SSE-NEXT: cvttss2si %xmm0, %rax 573; SSE-NEXT: movd %rax, %xmm1 574; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 575; SSE-NEXT: movdqa %xmm2, %xmm0 576; SSE-NEXT: retq 577; 578; AVX-LABEL: fptosi_4f32_to_4i64: 579; AVX: # BB#0: 580; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 581; AVX-NEXT: vcvttss2si %xmm1, %rax 582; AVX-NEXT: vmovq %rax, %xmm1 583; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 584; AVX-NEXT: vcvttss2si %xmm2, %rax 585; AVX-NEXT: vmovq %rax, %xmm2 586; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 587; AVX-NEXT: vcvttss2si %xmm0, %rax 588; AVX-NEXT: vmovq %rax, %xmm2 589; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 590; AVX-NEXT: vcvttss2si %xmm0, %rax 591; AVX-NEXT: vmovq %rax, %xmm0 592; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 593; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 594; AVX-NEXT: retq 595 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 596 %cvt = fptosi <4 x float> %shuf to <4 x i64> 597 ret <4 x i64> %cvt 598} 599 600define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { 601; SSE-LABEL: fptosi_8f32_to_4i64: 602; SSE: # BB#0: 603; SSE-NEXT: cvttss2si %xmm0, %rax 604; SSE-NEXT: movd %rax, %xmm2 605; SSE-NEXT: movaps %xmm0, %xmm1 606; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 607; SSE-NEXT: cvttss2si %xmm1, %rax 608; SSE-NEXT: movd %rax, %xmm1 609; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 610; SSE-NEXT: movaps %xmm0, %xmm1 611; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 612; SSE-NEXT: cvttss2si %xmm1, %rax 613; SSE-NEXT: movd %rax, %xmm3 614; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 615; SSE-NEXT: cvttss2si %xmm0, %rax 616; SSE-NEXT: movd %rax, %xmm1 617; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 618; SSE-NEXT: movdqa %xmm2, %xmm0 619; SSE-NEXT: retq 620; 621; AVX-LABEL: fptosi_8f32_to_4i64: 622; AVX: # BB#0: 623; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 624; AVX-NEXT: vcvttss2si %xmm1, %rax 625; AVX-NEXT: vmovq %rax, %xmm1 626; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 627; AVX-NEXT: vcvttss2si %xmm2, %rax 628; AVX-NEXT: vmovq %rax, %xmm2 629; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 630; AVX-NEXT: vcvttss2si %xmm0, %rax 631; AVX-NEXT: vmovq %rax, %xmm2 632; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 633; AVX-NEXT: vcvttss2si %xmm0, %rax 634; AVX-NEXT: vmovq %rax, %xmm0 635; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 636; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 637; AVX-NEXT: retq 638 %cvt = fptosi <8 x float> %a to <8 x i64> 639 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 640 ret <4 x i64> %shuf 641} 642 643; 644; Float to Unsigned Integer 645; 646 647define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { 648; SSE-LABEL: fptoui_4f32_to_4i32: 649; SSE: # BB#0: 650; SSE-NEXT: movaps %xmm0, %xmm1 651; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 652; SSE-NEXT: cvttss2si %xmm1, %rax 653; SSE-NEXT: movd %eax, %xmm1 654; SSE-NEXT: movaps %xmm0, %xmm2 655; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 656; SSE-NEXT: cvttss2si %xmm2, %rax 657; SSE-NEXT: movd %eax, %xmm2 658; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 659; SSE-NEXT: cvttss2si %xmm0, %rax 660; SSE-NEXT: movd %eax, %xmm1 661; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 662; SSE-NEXT: cvttss2si %xmm0, %rax 663; SSE-NEXT: movd %eax, %xmm0 664; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 665; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 666; SSE-NEXT: movdqa %xmm1, %xmm0 667; SSE-NEXT: retq 668; 669; AVX-LABEL: fptoui_4f32_to_4i32: 670; AVX: # BB#0: 671; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 672; AVX-NEXT: vcvttss2si %xmm1, %rax 673; AVX-NEXT: vcvttss2si %xmm0, %rcx 674; AVX-NEXT: vmovd %ecx, %xmm1 675; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 676; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 677; AVX-NEXT: vcvttss2si %xmm2, %rax 678; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 679; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 680; AVX-NEXT: vcvttss2si %xmm0, %rax 681; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 682; AVX-NEXT: retq 683 %cvt = fptoui <4 x float> %a to <4 x i32> 684 ret <4 x i32> %cvt 685} 686 687define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { 688; SSE-LABEL: fptoui_2f32_to_2i64: 689; SSE: # BB#0: 690; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 691; SSE-NEXT: movaps %xmm0, %xmm1 692; SSE-NEXT: subss %xmm2, %xmm1 693; SSE-NEXT: cvttss2si %xmm1, %rax 694; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 695; SSE-NEXT: xorq %rcx, %rax 696; SSE-NEXT: cvttss2si %xmm0, %rdx 697; SSE-NEXT: ucomiss %xmm2, %xmm0 698; SSE-NEXT: cmovaeq %rax, %rdx 699; SSE-NEXT: movd %rdx, %xmm1 700; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 701; SSE-NEXT: movaps %xmm0, %xmm3 702; SSE-NEXT: subss %xmm2, %xmm3 703; SSE-NEXT: cvttss2si %xmm3, %rax 704; SSE-NEXT: xorq %rcx, %rax 705; SSE-NEXT: cvttss2si %xmm0, %rcx 706; SSE-NEXT: ucomiss %xmm2, %xmm0 707; SSE-NEXT: cmovaeq %rax, %rcx 708; SSE-NEXT: movd %rcx, %xmm0 709; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 710; SSE-NEXT: movdqa %xmm1, %xmm0 711; SSE-NEXT: retq 712; 713; AVX-LABEL: fptoui_2f32_to_2i64: 714; AVX: # BB#0: 715; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 716; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 717; AVX-NEXT: vcvttss2si %xmm2, %rax 718; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 719; AVX-NEXT: xorq %rcx, %rax 720; AVX-NEXT: vcvttss2si %xmm0, %rdx 721; AVX-NEXT: vucomiss %xmm1, %xmm0 722; AVX-NEXT: cmovaeq %rax, %rdx 723; AVX-NEXT: vmovq %rdx, %xmm2 724; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 725; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 726; AVX-NEXT: vcvttss2si %xmm3, %rax 727; AVX-NEXT: xorq %rcx, %rax 728; AVX-NEXT: vcvttss2si %xmm0, %rcx 729; AVX-NEXT: vucomiss %xmm1, %xmm0 730; AVX-NEXT: cmovaeq %rax, %rcx 731; AVX-NEXT: vmovq %rcx, %xmm0 732; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 733; AVX-NEXT: retq 734 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 735 %cvt = fptoui <2 x float> %shuf to <2 x i64> 736 ret <2 x i64> %cvt 737} 738 739define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { 740; SSE-LABEL: fptoui_4f32_to_2i64: 741; SSE: # BB#0: 742; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 743; SSE-NEXT: movaps %xmm0, %xmm1 744; SSE-NEXT: subss %xmm2, %xmm1 745; SSE-NEXT: cvttss2si %xmm1, %rax 746; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 747; SSE-NEXT: xorq %rcx, %rax 748; SSE-NEXT: cvttss2si %xmm0, %rdx 749; SSE-NEXT: ucomiss %xmm2, %xmm0 750; SSE-NEXT: cmovaeq %rax, %rdx 751; SSE-NEXT: movd %rdx, %xmm1 752; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 753; SSE-NEXT: movaps %xmm0, %xmm3 754; SSE-NEXT: subss %xmm2, %xmm3 755; SSE-NEXT: cvttss2si %xmm3, %rax 756; SSE-NEXT: xorq %rcx, %rax 757; SSE-NEXT: cvttss2si %xmm0, %rcx 758; SSE-NEXT: ucomiss %xmm2, %xmm0 759; SSE-NEXT: cmovaeq %rax, %rcx 760; SSE-NEXT: movd %rcx, %xmm0 761; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 762; SSE-NEXT: movdqa %xmm1, %xmm0 763; SSE-NEXT: retq 764; 765; AVX-LABEL: fptoui_4f32_to_2i64: 766; AVX: # BB#0: 767; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 768; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 769; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm3 770; AVX-NEXT: vcvttss2si %xmm3, %rax 771; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 772; AVX-NEXT: xorq %rcx, %rax 773; AVX-NEXT: vcvttss2si %xmm1, %rdx 774; AVX-NEXT: vucomiss %xmm2, %xmm1 775; AVX-NEXT: cmovaeq %rax, %rdx 776; AVX-NEXT: vsubss %xmm2, %xmm0, %xmm1 777; AVX-NEXT: vcvttss2si %xmm1, %rax 778; AVX-NEXT: xorq %rcx, %rax 779; AVX-NEXT: vcvttss2si %xmm0, %rcx 780; AVX-NEXT: vucomiss %xmm2, %xmm0 781; AVX-NEXT: cmovaeq %rax, %rcx 782; AVX-NEXT: vmovq %rcx, %xmm0 783; AVX-NEXT: vmovq %rdx, %xmm1 784; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 785; AVX-NEXT: retq 786 %cvt = fptoui <4 x float> %a to <4 x i64> 787 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 788 ret <2 x i64> %shuf 789} 790 791define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { 792; SSE-LABEL: fptoui_8f32_to_8i32: 793; SSE: # BB#0: 794; SSE-NEXT: movaps %xmm0, %xmm2 795; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 796; SSE-NEXT: cvttss2si %xmm0, %rax 797; SSE-NEXT: movd %eax, %xmm0 798; SSE-NEXT: movaps %xmm2, %xmm3 799; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 800; SSE-NEXT: cvttss2si %xmm3, %rax 801; SSE-NEXT: movd %eax, %xmm3 802; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 803; SSE-NEXT: cvttss2si %xmm2, %rax 804; SSE-NEXT: movd %eax, %xmm0 805; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 806; SSE-NEXT: cvttss2si %xmm2, %rax 807; SSE-NEXT: movd %eax, %xmm2 808; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 809; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 810; SSE-NEXT: movaps %xmm1, %xmm2 811; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 812; SSE-NEXT: cvttss2si %xmm2, %rax 813; SSE-NEXT: movd %eax, %xmm2 814; SSE-NEXT: movaps %xmm1, %xmm3 815; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 816; SSE-NEXT: cvttss2si %xmm3, %rax 817; SSE-NEXT: movd %eax, %xmm3 818; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 819; SSE-NEXT: cvttss2si %xmm1, %rax 820; SSE-NEXT: movd %eax, %xmm2 821; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 822; SSE-NEXT: cvttss2si %xmm1, %rax 823; SSE-NEXT: movd %eax, %xmm1 824; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 825; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 826; SSE-NEXT: movdqa %xmm2, %xmm1 827; SSE-NEXT: retq 828; 829; AVX-LABEL: fptoui_8f32_to_8i32: 830; AVX: # BB#0: 831; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 832; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 833; AVX-NEXT: vcvttss2si %xmm2, %rax 834; AVX-NEXT: vcvttss2si %xmm1, %rcx 835; AVX-NEXT: vmovd %ecx, %xmm2 836; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 837; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 838; AVX-NEXT: vcvttss2si %xmm3, %rax 839; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 840; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 841; AVX-NEXT: vcvttss2si %xmm1, %rax 842; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 843; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 844; AVX-NEXT: vcvttss2si %xmm2, %rax 845; AVX-NEXT: vcvttss2si %xmm0, %rcx 846; AVX-NEXT: vmovd %ecx, %xmm2 847; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 848; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 849; AVX-NEXT: vcvttss2si %xmm3, %rax 850; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 851; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 852; AVX-NEXT: vcvttss2si %xmm0, %rax 853; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 854; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 855; AVX-NEXT: retq 856 %cvt = fptoui <8 x float> %a to <8 x i32> 857 ret <8 x i32> %cvt 858} 859 860define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { 861; SSE-LABEL: fptoui_4f32_to_4i64: 862; SSE: # BB#0: 863; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 864; SSE-NEXT: movaps %xmm0, %xmm2 865; SSE-NEXT: subss %xmm1, %xmm2 866; SSE-NEXT: cvttss2si %xmm2, %rcx 867; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 868; SSE-NEXT: xorq %rax, %rcx 869; SSE-NEXT: cvttss2si %xmm0, %rdx 870; SSE-NEXT: ucomiss %xmm1, %xmm0 871; SSE-NEXT: cmovaeq %rcx, %rdx 872; SSE-NEXT: movd %rdx, %xmm2 873; SSE-NEXT: movaps %xmm0, %xmm3 874; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 875; SSE-NEXT: movaps %xmm3, %xmm4 876; SSE-NEXT: subss %xmm1, %xmm4 877; SSE-NEXT: cvttss2si %xmm4, %rcx 878; SSE-NEXT: xorq %rax, %rcx 879; SSE-NEXT: cvttss2si %xmm3, %rdx 880; SSE-NEXT: ucomiss %xmm1, %xmm3 881; SSE-NEXT: cmovaeq %rcx, %rdx 882; SSE-NEXT: movd %rdx, %xmm3 883; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 884; SSE-NEXT: movaps %xmm0, %xmm3 885; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 886; SSE-NEXT: movaps %xmm3, %xmm4 887; SSE-NEXT: subss %xmm1, %xmm4 888; SSE-NEXT: cvttss2si %xmm4, %rcx 889; SSE-NEXT: xorq %rax, %rcx 890; SSE-NEXT: cvttss2si %xmm3, %rdx 891; SSE-NEXT: ucomiss %xmm1, %xmm3 892; SSE-NEXT: cmovaeq %rcx, %rdx 893; SSE-NEXT: movd %rdx, %xmm3 894; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 895; SSE-NEXT: movapd %xmm0, %xmm4 896; SSE-NEXT: subss %xmm1, %xmm4 897; SSE-NEXT: cvttss2si %xmm4, %rcx 898; SSE-NEXT: xorq %rax, %rcx 899; SSE-NEXT: cvttss2si %xmm0, %rax 900; SSE-NEXT: ucomiss %xmm1, %xmm0 901; SSE-NEXT: cmovaeq %rcx, %rax 902; SSE-NEXT: movd %rax, %xmm1 903; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 904; SSE-NEXT: movdqa %xmm2, %xmm0 905; SSE-NEXT: retq 906; 907; AVX-LABEL: fptoui_4f32_to_4i64: 908; AVX: # BB#0: 909; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 910; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 911; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 912; AVX-NEXT: vcvttss2si %xmm3, %rax 913; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 914; AVX-NEXT: xorq %rcx, %rax 915; AVX-NEXT: vcvttss2si %xmm2, %rdx 916; AVX-NEXT: vucomiss %xmm1, %xmm2 917; AVX-NEXT: cmovaeq %rax, %rdx 918; AVX-NEXT: vmovq %rdx, %xmm2 919; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 920; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 921; AVX-NEXT: vcvttss2si %xmm4, %rax 922; AVX-NEXT: xorq %rcx, %rax 923; AVX-NEXT: vcvttss2si %xmm3, %rdx 924; AVX-NEXT: vucomiss %xmm1, %xmm3 925; AVX-NEXT: cmovaeq %rax, %rdx 926; AVX-NEXT: vmovq %rdx, %xmm3 927; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 928; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 929; AVX-NEXT: vcvttss2si %xmm3, %rax 930; AVX-NEXT: xorq %rcx, %rax 931; AVX-NEXT: vcvttss2si %xmm0, %rdx 932; AVX-NEXT: vucomiss %xmm1, %xmm0 933; AVX-NEXT: cmovaeq %rax, %rdx 934; AVX-NEXT: vmovq %rdx, %xmm3 935; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 936; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 937; AVX-NEXT: vcvttss2si %xmm4, %rax 938; AVX-NEXT: xorq %rcx, %rax 939; AVX-NEXT: vcvttss2si %xmm0, %rcx 940; AVX-NEXT: vucomiss %xmm1, %xmm0 941; AVX-NEXT: cmovaeq %rax, %rcx 942; AVX-NEXT: vmovq %rcx, %xmm0 943; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 944; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 945; AVX-NEXT: retq 946 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 947 %cvt = fptoui <4 x float> %shuf to <4 x i64> 948 ret <4 x i64> %cvt 949} 950 951define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { 952; SSE-LABEL: fptoui_8f32_to_4i64: 953; SSE: # BB#0: 954; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 955; SSE-NEXT: movaps %xmm0, %xmm2 956; SSE-NEXT: subss %xmm1, %xmm2 957; SSE-NEXT: cvttss2si %xmm2, %rcx 958; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 959; SSE-NEXT: xorq %rax, %rcx 960; SSE-NEXT: cvttss2si %xmm0, %rdx 961; SSE-NEXT: ucomiss %xmm1, %xmm0 962; SSE-NEXT: cmovaeq %rcx, %rdx 963; SSE-NEXT: movd %rdx, %xmm2 964; SSE-NEXT: movaps %xmm0, %xmm3 965; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 966; SSE-NEXT: movaps %xmm3, %xmm4 967; SSE-NEXT: subss %xmm1, %xmm4 968; SSE-NEXT: cvttss2si %xmm4, %rcx 969; SSE-NEXT: xorq %rax, %rcx 970; SSE-NEXT: cvttss2si %xmm3, %rdx 971; SSE-NEXT: ucomiss %xmm1, %xmm3 972; SSE-NEXT: cmovaeq %rcx, %rdx 973; SSE-NEXT: movd %rdx, %xmm3 974; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 975; SSE-NEXT: movaps %xmm0, %xmm3 976; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 977; SSE-NEXT: movaps %xmm3, %xmm4 978; SSE-NEXT: subss %xmm1, %xmm4 979; SSE-NEXT: cvttss2si %xmm4, %rcx 980; SSE-NEXT: xorq %rax, %rcx 981; SSE-NEXT: cvttss2si %xmm3, %rdx 982; SSE-NEXT: ucomiss %xmm1, %xmm3 983; SSE-NEXT: cmovaeq %rcx, %rdx 984; SSE-NEXT: movd %rdx, %xmm3 985; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 986; SSE-NEXT: movapd %xmm0, %xmm4 987; SSE-NEXT: subss %xmm1, %xmm4 988; SSE-NEXT: cvttss2si %xmm4, %rcx 989; SSE-NEXT: xorq %rax, %rcx 990; SSE-NEXT: cvttss2si %xmm0, %rax 991; SSE-NEXT: ucomiss %xmm1, %xmm0 992; SSE-NEXT: cmovaeq %rcx, %rax 993; SSE-NEXT: movd %rax, %xmm1 994; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 995; SSE-NEXT: movdqa %xmm2, %xmm0 996; SSE-NEXT: retq 997; 998; AVX-LABEL: fptoui_8f32_to_4i64: 999; AVX: # BB#0: 1000; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 1001; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1002; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 1003; AVX-NEXT: vcvttss2si %xmm3, %rax 1004; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1005; AVX-NEXT: xorq %rcx, %rax 1006; AVX-NEXT: vcvttss2si %xmm2, %rdx 1007; AVX-NEXT: vucomiss %xmm1, %xmm2 1008; AVX-NEXT: cmovaeq %rax, %rdx 1009; AVX-NEXT: vmovq %rdx, %xmm2 1010; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1011; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 1012; AVX-NEXT: vcvttss2si %xmm4, %rax 1013; AVX-NEXT: xorq %rcx, %rax 1014; AVX-NEXT: vcvttss2si %xmm3, %rdx 1015; AVX-NEXT: vucomiss %xmm1, %xmm3 1016; AVX-NEXT: cmovaeq %rax, %rdx 1017; AVX-NEXT: vmovq %rdx, %xmm3 1018; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1019; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 1020; AVX-NEXT: vcvttss2si %xmm3, %rax 1021; AVX-NEXT: xorq %rcx, %rax 1022; AVX-NEXT: vcvttss2si %xmm0, %rdx 1023; AVX-NEXT: vucomiss %xmm1, %xmm0 1024; AVX-NEXT: cmovaeq %rax, %rdx 1025; AVX-NEXT: vmovq %rdx, %xmm3 1026; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1027; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 1028; AVX-NEXT: vcvttss2si %xmm4, %rax 1029; AVX-NEXT: xorq %rcx, %rax 1030; AVX-NEXT: vcvttss2si %xmm0, %rcx 1031; AVX-NEXT: vucomiss %xmm1, %xmm0 1032; AVX-NEXT: cmovaeq %rax, %rcx 1033; AVX-NEXT: vmovq %rcx, %xmm0 1034; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1035; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1036; AVX-NEXT: retq 1037 %cvt = fptoui <8 x float> %a to <8 x i64> 1038 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1039 ret <4 x i64> %shuf 1040} 1041 1042; 1043; Constant Folding 1044; 1045 1046define <2 x i64> @fptosi_2f64_to_2i64_const() { 1047; SSE-LABEL: fptosi_2f64_to_2i64_const: 1048; SSE: # BB#0: 1049; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1050; SSE-NEXT: retq 1051; 1052; AVX-LABEL: fptosi_2f64_to_2i64_const: 1053; AVX: # BB#0: 1054; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] 1055; AVX-NEXT: retq 1056 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64> 1057 ret <2 x i64> %cvt 1058} 1059 1060define <4 x i32> @fptosi_2f64_to_2i32_const() { 1061; SSE-LABEL: fptosi_2f64_to_2i32_const: 1062; SSE: # BB#0: 1063; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1064; SSE-NEXT: retq 1065; 1066; AVX-LABEL: fptosi_2f64_to_2i32_const: 1067; AVX: # BB#0: 1068; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1069; AVX-NEXT: retq 1070 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32> 1071 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1072 ret <4 x i32> %ext 1073} 1074 1075define <4 x i64> @fptosi_4f64_to_4i64_const() { 1076; SSE-LABEL: fptosi_4f64_to_4i64_const: 1077; SSE: # BB#0: 1078; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1079; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613] 1080; SSE-NEXT: retq 1081; 1082; AVX-LABEL: fptosi_4f64_to_4i64_const: 1083; AVX: # BB#0: 1084; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1085; AVX-NEXT: retq 1086 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64> 1087 ret <4 x i64> %cvt 1088} 1089 1090define <4 x i32> @fptosi_4f64_to_4i32_const() { 1091; SSE-LABEL: fptosi_4f64_to_4i32_const: 1092; SSE: # BB#0: 1093; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1094; SSE-NEXT: retq 1095; 1096; AVX-LABEL: fptosi_4f64_to_4i32_const: 1097; AVX: # BB#0: 1098; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1099; AVX-NEXT: retq 1100 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32> 1101 ret <4 x i32> %cvt 1102} 1103 1104define <2 x i64> @fptoui_2f64_to_2i64_const() { 1105; SSE-LABEL: fptoui_2f64_to_2i64_const: 1106; SSE: # BB#0: 1107; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1108; SSE-NEXT: retq 1109; 1110; AVX-LABEL: fptoui_2f64_to_2i64_const: 1111; AVX: # BB#0: 1112; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] 1113; AVX-NEXT: retq 1114 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64> 1115 ret <2 x i64> %cvt 1116} 1117 1118define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { 1119; SSE-LABEL: fptoui_2f64_to_2i32_const: 1120; SSE: # BB#0: 1121; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u> 1122; SSE-NEXT: retq 1123; 1124; AVX-LABEL: fptoui_2f64_to_2i32_const: 1125; AVX: # BB#0: 1126; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> 1127; AVX-NEXT: retq 1128 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32> 1129 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1130 ret <4 x i32> %ext 1131} 1132 1133define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { 1134; SSE-LABEL: fptoui_4f64_to_4i64_const: 1135; SSE: # BB#0: 1136; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1137; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8] 1138; SSE-NEXT: retq 1139; 1140; AVX-LABEL: fptoui_4f64_to_4i64_const: 1141; AVX: # BB#0: 1142; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] 1143; AVX-NEXT: retq 1144 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64> 1145 ret <4 x i64> %cvt 1146} 1147 1148define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { 1149; SSE-LABEL: fptoui_4f64_to_4i32_const: 1150; SSE: # BB#0: 1151; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8] 1152; SSE-NEXT: retq 1153; 1154; AVX-LABEL: fptoui_4f64_to_4i32_const: 1155; AVX: # BB#0: 1156; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] 1157; AVX-NEXT: retq 1158 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32> 1159 ret <4 x i32> %cvt 1160} 1161 1162define <4 x i32> @fptosi_4f32_to_4i32_const() { 1163; SSE-LABEL: fptosi_4f32_to_4i32_const: 1164; SSE: # BB#0: 1165; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1166; SSE-NEXT: retq 1167; 1168; AVX-LABEL: fptosi_4f32_to_4i32_const: 1169; AVX: # BB#0: 1170; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1171; AVX-NEXT: retq 1172 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32> 1173 ret <4 x i32> %cvt 1174} 1175 1176define <4 x i64> @fptosi_4f32_to_4i64_const() { 1177; SSE-LABEL: fptosi_4f32_to_4i64_const: 1178; SSE: # BB#0: 1179; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1180; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] 1181; SSE-NEXT: retq 1182; 1183; AVX-LABEL: fptosi_4f32_to_4i64_const: 1184; AVX: # BB#0: 1185; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 1186; AVX-NEXT: retq 1187 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64> 1188 ret <4 x i64> %cvt 1189} 1190 1191define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { 1192; SSE-LABEL: fptosi_8f32_to_8i32_const: 1193; SSE: # BB#0: 1194; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1195; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295] 1196; SSE-NEXT: retq 1197; 1198; AVX-LABEL: fptosi_8f32_to_8i32_const: 1199; AVX: # BB#0: 1200; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 1201; AVX-NEXT: retq 1202 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32> 1203 ret <8 x i32> %cvt 1204} 1205 1206define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { 1207; SSE-LABEL: fptoui_4f32_to_4i32_const: 1208; SSE: # BB#0: 1209; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1210; SSE-NEXT: retq 1211; 1212; AVX-LABEL: fptoui_4f32_to_4i32_const: 1213; AVX: # BB#0: 1214; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] 1215; AVX-NEXT: retq 1216 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32> 1217 ret <4 x i32> %cvt 1218} 1219 1220define <4 x i64> @fptoui_4f32_to_4i64_const() { 1221; SSE-LABEL: fptoui_4f32_to_4i64_const: 1222; SSE: # BB#0: 1223; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2] 1224; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8] 1225; SSE-NEXT: retq 1226; 1227; AVX-LABEL: fptoui_4f32_to_4i64_const: 1228; AVX: # BB#0: 1229; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 1230; AVX-NEXT: retq 1231 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64> 1232 ret <4 x i64> %cvt 1233} 1234 1235define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { 1236; SSE-LABEL: fptoui_8f32_to_8i32_const: 1237; SSE: # BB#0: 1238; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1239; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1] 1240; SSE-NEXT: retq 1241; 1242; AVX-LABEL: fptoui_8f32_to_8i32_const: 1243; AVX: # BB#0: 1244; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 1245; AVX-NEXT: retq 1246 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32> 1247 ret <8 x i32> %cvt 1248} 1249