1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(half* %in, half* %out) #0 { 12; BWON-LABEL: test_load_store: 13; BWON: # %bb.0: 14; BWON-NEXT: movzwl (%rdi), %eax 15; BWON-NEXT: movw %ax, (%rsi) 16; BWON-NEXT: retq 17; 18; BWOFF-LABEL: test_load_store: 19; BWOFF: # %bb.0: 20; BWOFF-NEXT: movw (%rdi), %ax 21; BWOFF-NEXT: movw %ax, (%rsi) 22; BWOFF-NEXT: retq 23; 24; CHECK-I686-LABEL: test_load_store: 25; CHECK-I686: # %bb.0: 26; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; CHECK-I686-NEXT: movw (%ecx), %cx 29; CHECK-I686-NEXT: movw %cx, (%eax) 30; CHECK-I686-NEXT: retl 31 %val = load half, half* %in 32 store half %val, half* %out 33 ret void 34} 35 36define i16 @test_bitcast_from_half(half* %addr) #0 { 37; BWON-LABEL: test_bitcast_from_half: 38; BWON: # %bb.0: 39; BWON-NEXT: movzwl (%rdi), %eax 40; BWON-NEXT: retq 41; 42; BWOFF-LABEL: test_bitcast_from_half: 43; BWOFF: # %bb.0: 44; BWOFF-NEXT: movw (%rdi), %ax 45; BWOFF-NEXT: retq 46; 47; CHECK-I686-LABEL: test_bitcast_from_half: 48; CHECK-I686: # %bb.0: 49; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 50; CHECK-I686-NEXT: movw (%eax), %ax 51; CHECK-I686-NEXT: retl 52 %val = load half, half* %addr 53 %val_int = bitcast half %val to i16 54 ret i16 %val_int 55} 56 57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { 58; CHECK-LABEL: test_bitcast_to_half: 59; CHECK: # %bb.0: 60; CHECK-NEXT: movw %si, (%rdi) 61; CHECK-NEXT: retq 62; 63; CHECK-I686-LABEL: test_bitcast_to_half: 64; CHECK-I686: # %bb.0: 65; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 66; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; CHECK-I686-NEXT: movw %ax, (%ecx) 68; CHECK-I686-NEXT: retl 69 %val_fp = bitcast i16 %in to half 70 store half %val_fp, half* %addr 71 ret void 72} 73 74define float @test_extend32(half* %addr) #0 { 75; CHECK-LIBCALL-LABEL: test_extend32: 76; CHECK-LIBCALL: # %bb.0: 77; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 78; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL 79; 80; BWON-F16C-LABEL: test_extend32: 81; BWON-F16C: # %bb.0: 82; BWON-F16C-NEXT: movswl (%rdi), %eax 83; BWON-F16C-NEXT: vmovd %eax, %xmm0 84; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 85; BWON-F16C-NEXT: retq 86; 87; CHECK-I686-LABEL: test_extend32: 88; CHECK-I686: # %bb.0: 89; CHECK-I686-NEXT: subl $12, %esp 90; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 91; CHECK-I686-NEXT: movzwl (%eax), %eax 92; CHECK-I686-NEXT: movl %eax, (%esp) 93; CHECK-I686-NEXT: calll __gnu_h2f_ieee 94; CHECK-I686-NEXT: addl $12, %esp 95; CHECK-I686-NEXT: retl 96 %val16 = load half, half* %addr 97 %val32 = fpext half %val16 to float 98 ret float %val32 99} 100 101define double @test_extend64(half* %addr) #0 { 102; CHECK-LIBCALL-LABEL: test_extend64: 103; CHECK-LIBCALL: # %bb.0: 104; CHECK-LIBCALL-NEXT: pushq %rax 105; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 106; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 107; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 108; CHECK-LIBCALL-NEXT: popq %rax 109; CHECK-LIBCALL-NEXT: retq 110; 111; BWON-F16C-LABEL: test_extend64: 112; BWON-F16C: # %bb.0: 113; BWON-F16C-NEXT: movswl (%rdi), %eax 114; BWON-F16C-NEXT: vmovd %eax, %xmm0 115; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 116; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 117; BWON-F16C-NEXT: retq 118; 119; CHECK-I686-LABEL: test_extend64: 120; CHECK-I686: # %bb.0: 121; CHECK-I686-NEXT: subl $12, %esp 122; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 123; CHECK-I686-NEXT: movzwl (%eax), %eax 124; CHECK-I686-NEXT: movl %eax, (%esp) 125; CHECK-I686-NEXT: calll __gnu_h2f_ieee 126; CHECK-I686-NEXT: addl $12, %esp 127; CHECK-I686-NEXT: retl 128 %val16 = load half, half* %addr 129 %val32 = fpext half %val16 to double 130 ret double %val32 131} 132 133define void @test_trunc32(float %in, half* %addr) #0 { 134; CHECK-LIBCALL-LABEL: test_trunc32: 135; CHECK-LIBCALL: # %bb.0: 136; CHECK-LIBCALL-NEXT: pushq %rbx 137; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 138; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 139; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 140; CHECK-LIBCALL-NEXT: popq %rbx 141; CHECK-LIBCALL-NEXT: retq 142; 143; BWON-F16C-LABEL: test_trunc32: 144; BWON-F16C: # %bb.0: 145; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 146; BWON-F16C-NEXT: vmovd %xmm0, %eax 147; BWON-F16C-NEXT: movw %ax, (%rdi) 148; BWON-F16C-NEXT: retq 149; 150; CHECK-I686-LABEL: test_trunc32: 151; CHECK-I686: # %bb.0: 152; CHECK-I686-NEXT: pushl %esi 153; CHECK-I686-NEXT: subl $8, %esp 154; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 155; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 156; CHECK-I686-NEXT: movss %xmm0, (%esp) 157; CHECK-I686-NEXT: calll __gnu_f2h_ieee 158; CHECK-I686-NEXT: movw %ax, (%esi) 159; CHECK-I686-NEXT: addl $8, %esp 160; CHECK-I686-NEXT: popl %esi 161; CHECK-I686-NEXT: retl 162 %val16 = fptrunc float %in to half 163 store half %val16, half* %addr 164 ret void 165} 166 167define void @test_trunc64(double %in, half* %addr) #0 { 168; CHECK-LABEL: test_trunc64: 169; CHECK: # %bb.0: 170; CHECK-NEXT: pushq %rbx 171; CHECK-NEXT: movq %rdi, %rbx 172; CHECK-NEXT: callq __truncdfhf2 173; CHECK-NEXT: movw %ax, (%rbx) 174; CHECK-NEXT: popq %rbx 175; CHECK-NEXT: retq 176; 177; CHECK-I686-LABEL: test_trunc64: 178; CHECK-I686: # %bb.0: 179; CHECK-I686-NEXT: pushl %esi 180; CHECK-I686-NEXT: subl $8, %esp 181; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 182; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 183; CHECK-I686-NEXT: movsd %xmm0, (%esp) 184; CHECK-I686-NEXT: calll __truncdfhf2 185; CHECK-I686-NEXT: movw %ax, (%esi) 186; CHECK-I686-NEXT: addl $8, %esp 187; CHECK-I686-NEXT: popl %esi 188; CHECK-I686-NEXT: retl 189 %val16 = fptrunc double %in to half 190 store half %val16, half* %addr 191 ret void 192} 193 194define i64 @test_fptosi_i64(half* %p) #0 { 195; CHECK-LIBCALL-LABEL: test_fptosi_i64: 196; CHECK-LIBCALL: # %bb.0: 197; CHECK-LIBCALL-NEXT: pushq %rax 198; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 199; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 200; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 201; CHECK-LIBCALL-NEXT: popq %rcx 202; CHECK-LIBCALL-NEXT: retq 203; 204; BWON-F16C-LABEL: test_fptosi_i64: 205; BWON-F16C: # %bb.0: 206; BWON-F16C-NEXT: movswl (%rdi), %eax 207; BWON-F16C-NEXT: vmovd %eax, %xmm0 208; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 209; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 210; BWON-F16C-NEXT: retq 211; 212; CHECK-I686-LABEL: test_fptosi_i64: 213; CHECK-I686: # %bb.0: 214; CHECK-I686-NEXT: subl $12, %esp 215; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 216; CHECK-I686-NEXT: movzwl (%eax), %eax 217; CHECK-I686-NEXT: movl %eax, (%esp) 218; CHECK-I686-NEXT: calll __gnu_h2f_ieee 219; CHECK-I686-NEXT: fstps (%esp) 220; CHECK-I686-NEXT: calll __fixsfdi 221; CHECK-I686-NEXT: addl $12, %esp 222; CHECK-I686-NEXT: retl 223 %a = load half, half* %p, align 2 224 %r = fptosi half %a to i64 225 ret i64 %r 226} 227 228define void @test_sitofp_i64(i64 %a, half* %p) #0 { 229; CHECK-LIBCALL-LABEL: test_sitofp_i64: 230; CHECK-LIBCALL: # %bb.0: 231; CHECK-LIBCALL-NEXT: pushq %rbx 232; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 233; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 234; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 235; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 236; CHECK-LIBCALL-NEXT: popq %rbx 237; CHECK-LIBCALL-NEXT: retq 238; 239; BWON-F16C-LABEL: test_sitofp_i64: 240; BWON-F16C: # %bb.0: 241; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 242; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 243; BWON-F16C-NEXT: vmovd %xmm0, %eax 244; BWON-F16C-NEXT: movw %ax, (%rsi) 245; BWON-F16C-NEXT: retq 246; 247; CHECK-I686-LABEL: test_sitofp_i64: 248; CHECK-I686: # %bb.0: 249; CHECK-I686-NEXT: pushl %esi 250; CHECK-I686-NEXT: subl $24, %esp 251; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 252; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 253; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 254; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 255; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 256; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 257; CHECK-I686-NEXT: movss %xmm0, (%esp) 258; CHECK-I686-NEXT: calll __gnu_f2h_ieee 259; CHECK-I686-NEXT: movw %ax, (%esi) 260; CHECK-I686-NEXT: addl $24, %esp 261; CHECK-I686-NEXT: popl %esi 262; CHECK-I686-NEXT: retl 263 %r = sitofp i64 %a to half 264 store half %r, half* %p 265 ret void 266} 267 268define i64 @test_fptoui_i64(half* %p) #0 { 269; CHECK-LIBCALL-LABEL: test_fptoui_i64: 270; CHECK-LIBCALL: # %bb.0: 271; CHECK-LIBCALL-NEXT: pushq %rax 272; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 273; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 274; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 275; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 276; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 277; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax 278; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 279; CHECK-LIBCALL-NEXT: xorq %rax, %rcx 280; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 281; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 282; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax 283; CHECK-LIBCALL-NEXT: popq %rcx 284; CHECK-LIBCALL-NEXT: retq 285; 286; BWON-F16C-LABEL: test_fptoui_i64: 287; BWON-F16C: # %bb.0: 288; BWON-F16C-NEXT: movswl (%rdi), %eax 289; BWON-F16C-NEXT: vmovd %eax, %xmm0 290; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 291; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 292; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 293; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax 294; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 295; BWON-F16C-NEXT: xorq %rax, %rcx 296; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 297; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 298; BWON-F16C-NEXT: cmovaeq %rcx, %rax 299; BWON-F16C-NEXT: retq 300; 301; CHECK-I686-LABEL: test_fptoui_i64: 302; CHECK-I686: # %bb.0: 303; CHECK-I686-NEXT: subl $12, %esp 304; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 305; CHECK-I686-NEXT: movzwl (%eax), %eax 306; CHECK-I686-NEXT: movl %eax, (%esp) 307; CHECK-I686-NEXT: calll __gnu_h2f_ieee 308; CHECK-I686-NEXT: fstps (%esp) 309; CHECK-I686-NEXT: calll __fixunssfdi 310; CHECK-I686-NEXT: addl $12, %esp 311; CHECK-I686-NEXT: retl 312 %a = load half, half* %p, align 2 313 %r = fptoui half %a to i64 314 ret i64 %r 315} 316 317define void @test_uitofp_i64(i64 %a, half* %p) #0 { 318; CHECK-LIBCALL-LABEL: test_uitofp_i64: 319; CHECK-LIBCALL: # %bb.0: 320; CHECK-LIBCALL-NEXT: pushq %rbx 321; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 322; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 323; CHECK-LIBCALL-NEXT: js .LBB10_1 324; CHECK-LIBCALL-NEXT: # %bb.2: 325; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 326; CHECK-LIBCALL-NEXT: jmp .LBB10_3 327; CHECK-LIBCALL-NEXT: .LBB10_1: 328; CHECK-LIBCALL-NEXT: movq %rdi, %rax 329; CHECK-LIBCALL-NEXT: shrq %rax 330; CHECK-LIBCALL-NEXT: andl $1, %edi 331; CHECK-LIBCALL-NEXT: orq %rax, %rdi 332; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 333; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 334; CHECK-LIBCALL-NEXT: .LBB10_3: 335; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 336; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 337; CHECK-LIBCALL-NEXT: popq %rbx 338; CHECK-LIBCALL-NEXT: retq 339; 340; BWON-F16C-LABEL: test_uitofp_i64: 341; BWON-F16C: # %bb.0: 342; BWON-F16C-NEXT: testq %rdi, %rdi 343; BWON-F16C-NEXT: js .LBB10_1 344; BWON-F16C-NEXT: # %bb.2: 345; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 346; BWON-F16C-NEXT: jmp .LBB10_3 347; BWON-F16C-NEXT: .LBB10_1: 348; BWON-F16C-NEXT: movq %rdi, %rax 349; BWON-F16C-NEXT: shrq %rax 350; BWON-F16C-NEXT: andl $1, %edi 351; BWON-F16C-NEXT: orq %rax, %rdi 352; BWON-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 353; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 354; BWON-F16C-NEXT: .LBB10_3: 355; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 356; BWON-F16C-NEXT: vmovd %xmm0, %eax 357; BWON-F16C-NEXT: movw %ax, (%rsi) 358; BWON-F16C-NEXT: retq 359; 360; CHECK-I686-LABEL: test_uitofp_i64: 361; CHECK-I686: # %bb.0: 362; CHECK-I686-NEXT: pushl %esi 363; CHECK-I686-NEXT: subl $24, %esp 364; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 365; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 366; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 367; CHECK-I686-NEXT: xorl %eax, %eax 368; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp) 369; CHECK-I686-NEXT: setns %al 370; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 371; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4) 372; CHECK-I686-NEXT: fstps (%esp) 373; CHECK-I686-NEXT: calll __gnu_f2h_ieee 374; CHECK-I686-NEXT: movw %ax, (%esi) 375; CHECK-I686-NEXT: addl $24, %esp 376; CHECK-I686-NEXT: popl %esi 377; CHECK-I686-NEXT: retl 378 %r = uitofp i64 %a to half 379 store half %r, half* %p 380 ret void 381} 382 383define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 384; CHECK-LIBCALL-LABEL: test_extend32_vec4: 385; CHECK-LIBCALL: # %bb.0: 386; CHECK-LIBCALL-NEXT: pushq %rbx 387; CHECK-LIBCALL-NEXT: subq $48, %rsp 388; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 389; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 390; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 391; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 392; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi 393; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 394; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 395; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi 396; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 397; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 398; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi 399; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 400; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 401; CHECK-LIBCALL-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 402; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 403; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 404; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 405; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 406; CHECK-LIBCALL-NEXT: addq $48, %rsp 407; CHECK-LIBCALL-NEXT: popq %rbx 408; CHECK-LIBCALL-NEXT: retq 409; 410; BWON-F16C-LABEL: test_extend32_vec4: 411; BWON-F16C: # %bb.0: 412; BWON-F16C-NEXT: movswl 6(%rdi), %eax 413; BWON-F16C-NEXT: vmovd %eax, %xmm0 414; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 415; BWON-F16C-NEXT: movswl 4(%rdi), %eax 416; BWON-F16C-NEXT: vmovd %eax, %xmm1 417; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 418; BWON-F16C-NEXT: movswl (%rdi), %eax 419; BWON-F16C-NEXT: vmovd %eax, %xmm2 420; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 421; BWON-F16C-NEXT: movswl 2(%rdi), %eax 422; BWON-F16C-NEXT: vmovd %eax, %xmm3 423; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 424; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 425; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 426; BWON-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 427; BWON-F16C-NEXT: retq 428; 429; CHECK-I686-LABEL: test_extend32_vec4: 430; CHECK-I686: # %bb.0: 431; CHECK-I686-NEXT: pushl %esi 432; CHECK-I686-NEXT: subl $56, %esp 433; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 434; CHECK-I686-NEXT: movzwl 4(%esi), %eax 435; CHECK-I686-NEXT: movl %eax, (%esp) 436; CHECK-I686-NEXT: calll __gnu_h2f_ieee 437; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 438; CHECK-I686-NEXT: movzwl 2(%esi), %eax 439; CHECK-I686-NEXT: movl %eax, (%esp) 440; CHECK-I686-NEXT: calll __gnu_h2f_ieee 441; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 442; CHECK-I686-NEXT: movzwl (%esi), %eax 443; CHECK-I686-NEXT: movl %eax, (%esp) 444; CHECK-I686-NEXT: calll __gnu_h2f_ieee 445; CHECK-I686-NEXT: movzwl 6(%esi), %eax 446; CHECK-I686-NEXT: movl %eax, (%esp) 447; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 448; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 449; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 450; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 451; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 452; CHECK-I686-NEXT: calll __gnu_h2f_ieee 453; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 454; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 455; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 456; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 457; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 458; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 459; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 460; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 461; CHECK-I686-NEXT: addl $56, %esp 462; CHECK-I686-NEXT: popl %esi 463; CHECK-I686-NEXT: retl 464 %a = load <4 x half>, <4 x half>* %p, align 8 465 %b = fpext <4 x half> %a to <4 x float> 466 ret <4 x float> %b 467} 468 469define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 470; CHECK-LIBCALL-LABEL: test_extend64_vec4: 471; CHECK-LIBCALL: # %bb.0: 472; CHECK-LIBCALL-NEXT: pushq %rbx 473; CHECK-LIBCALL-NEXT: subq $16, %rsp 474; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 475; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi 476; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 477; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 478; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi 479; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 480; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 481; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi 482; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 483; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 484; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi 485; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 486; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 487; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 488; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 489; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 490; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 491; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 492; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 493; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2 494; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload 495; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero 496; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1 497; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 498; CHECK-LIBCALL-NEXT: addq $16, %rsp 499; CHECK-LIBCALL-NEXT: popq %rbx 500; CHECK-LIBCALL-NEXT: retq 501; 502; BWON-F16C-LABEL: test_extend64_vec4: 503; BWON-F16C: # %bb.0: 504; BWON-F16C-NEXT: movswl (%rdi), %eax 505; BWON-F16C-NEXT: vmovd %eax, %xmm0 506; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 507; BWON-F16C-NEXT: movswl 2(%rdi), %eax 508; BWON-F16C-NEXT: vmovd %eax, %xmm1 509; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 510; BWON-F16C-NEXT: movswl 4(%rdi), %eax 511; BWON-F16C-NEXT: vmovd %eax, %xmm2 512; BWON-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 513; BWON-F16C-NEXT: movswl 6(%rdi), %eax 514; BWON-F16C-NEXT: vmovd %eax, %xmm3 515; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 516; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 517; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 518; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 519; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 520; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 521; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 522; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 523; BWON-F16C-NEXT: retq 524; 525; CHECK-I686-LABEL: test_extend64_vec4: 526; CHECK-I686: # %bb.0: 527; CHECK-I686-NEXT: pushl %esi 528; CHECK-I686-NEXT: subl $88, %esp 529; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 530; CHECK-I686-NEXT: movzwl 6(%esi), %eax 531; CHECK-I686-NEXT: movl %eax, (%esp) 532; CHECK-I686-NEXT: calll __gnu_h2f_ieee 533; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 534; CHECK-I686-NEXT: movzwl 4(%esi), %eax 535; CHECK-I686-NEXT: movl %eax, (%esp) 536; CHECK-I686-NEXT: calll __gnu_h2f_ieee 537; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 538; CHECK-I686-NEXT: movzwl 2(%esi), %eax 539; CHECK-I686-NEXT: movl %eax, (%esp) 540; CHECK-I686-NEXT: calll __gnu_h2f_ieee 541; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 542; CHECK-I686-NEXT: movzwl (%esi), %eax 543; CHECK-I686-NEXT: movl %eax, (%esp) 544; CHECK-I686-NEXT: calll __gnu_h2f_ieee 545; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 546; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 547; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 548; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 549; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 550; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 551; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 552; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 553; CHECK-I686-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 554; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 555; CHECK-I686-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 556; CHECK-I686-NEXT: addl $88, %esp 557; CHECK-I686-NEXT: popl %esi 558; CHECK-I686-NEXT: retl 559 %a = load <4 x half>, <4 x half>* %p, align 8 560 %b = fpext <4 x half> %a to <4 x double> 561 ret <4 x double> %b 562} 563 564define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { 565; BWON-NOF16C-LABEL: test_trunc32_vec4: 566; BWON-NOF16C: # %bb.0: 567; BWON-NOF16C-NEXT: pushq %rbp 568; BWON-NOF16C-NEXT: pushq %r15 569; BWON-NOF16C-NEXT: pushq %r14 570; BWON-NOF16C-NEXT: pushq %rbx 571; BWON-NOF16C-NEXT: subq $24, %rsp 572; BWON-NOF16C-NEXT: movq %rdi, %rbx 573; BWON-NOF16C-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 574; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 575; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 576; BWON-NOF16C-NEXT: movl %eax, %r14d 577; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 578; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 579; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 580; BWON-NOF16C-NEXT: movl %eax, %r15d 581; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 582; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 583; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 584; BWON-NOF16C-NEXT: movl %eax, %ebp 585; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 586; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee 587; BWON-NOF16C-NEXT: movw %ax, (%rbx) 588; BWON-NOF16C-NEXT: movw %bp, 6(%rbx) 589; BWON-NOF16C-NEXT: movw %r15w, 4(%rbx) 590; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 591; BWON-NOF16C-NEXT: addq $24, %rsp 592; BWON-NOF16C-NEXT: popq %rbx 593; BWON-NOF16C-NEXT: popq %r14 594; BWON-NOF16C-NEXT: popq %r15 595; BWON-NOF16C-NEXT: popq %rbp 596; BWON-NOF16C-NEXT: retq 597; 598; BWOFF-LABEL: test_trunc32_vec4: 599; BWOFF: # %bb.0: 600; BWOFF-NEXT: pushq %rbp 601; BWOFF-NEXT: pushq %r15 602; BWOFF-NEXT: pushq %r14 603; BWOFF-NEXT: pushq %rbx 604; BWOFF-NEXT: subq $24, %rsp 605; BWOFF-NEXT: movq %rdi, %rbx 606; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 607; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 608; BWOFF-NEXT: callq __gnu_f2h_ieee 609; BWOFF-NEXT: movw %ax, %r14w 610; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 611; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 612; BWOFF-NEXT: callq __gnu_f2h_ieee 613; BWOFF-NEXT: movw %ax, %r15w 614; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 615; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 616; BWOFF-NEXT: callq __gnu_f2h_ieee 617; BWOFF-NEXT: movw %ax, %bp 618; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 619; BWOFF-NEXT: callq __gnu_f2h_ieee 620; BWOFF-NEXT: movw %ax, (%rbx) 621; BWOFF-NEXT: movw %bp, 6(%rbx) 622; BWOFF-NEXT: movw %r15w, 4(%rbx) 623; BWOFF-NEXT: movw %r14w, 2(%rbx) 624; BWOFF-NEXT: addq $24, %rsp 625; BWOFF-NEXT: popq %rbx 626; BWOFF-NEXT: popq %r14 627; BWOFF-NEXT: popq %r15 628; BWOFF-NEXT: popq %rbp 629; BWOFF-NEXT: retq 630; 631; BWON-F16C-LABEL: test_trunc32_vec4: 632; BWON-F16C: # %bb.0: 633; BWON-F16C-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 634; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 635; BWON-F16C-NEXT: vmovd %xmm1, %eax 636; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 637; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 638; BWON-F16C-NEXT: vmovd %xmm1, %ecx 639; BWON-F16C-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 640; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 641; BWON-F16C-NEXT: vmovd %xmm1, %edx 642; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 643; BWON-F16C-NEXT: vmovd %xmm0, %esi 644; BWON-F16C-NEXT: movw %si, (%rdi) 645; BWON-F16C-NEXT: movw %dx, 6(%rdi) 646; BWON-F16C-NEXT: movw %cx, 4(%rdi) 647; BWON-F16C-NEXT: movw %ax, 2(%rdi) 648; BWON-F16C-NEXT: retq 649; 650; CHECK-I686-LABEL: test_trunc32_vec4: 651; CHECK-I686: # %bb.0: 652; CHECK-I686-NEXT: pushl %ebp 653; CHECK-I686-NEXT: pushl %ebx 654; CHECK-I686-NEXT: pushl %edi 655; CHECK-I686-NEXT: pushl %esi 656; CHECK-I686-NEXT: subl $44, %esp 657; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 658; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 659; CHECK-I686-NEXT: movaps %xmm0, %xmm1 660; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 661; CHECK-I686-NEXT: movss %xmm1, (%esp) 662; CHECK-I686-NEXT: calll __gnu_f2h_ieee 663; CHECK-I686-NEXT: movw %ax, %si 664; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 665; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 666; CHECK-I686-NEXT: movss %xmm0, (%esp) 667; CHECK-I686-NEXT: calll __gnu_f2h_ieee 668; CHECK-I686-NEXT: movw %ax, %di 669; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 670; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 671; CHECK-I686-NEXT: movss %xmm0, (%esp) 672; CHECK-I686-NEXT: calll __gnu_f2h_ieee 673; CHECK-I686-NEXT: movw %ax, %bx 674; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 675; CHECK-I686-NEXT: movss %xmm0, (%esp) 676; CHECK-I686-NEXT: calll __gnu_f2h_ieee 677; CHECK-I686-NEXT: movw %ax, (%ebp) 678; CHECK-I686-NEXT: movw %bx, 6(%ebp) 679; CHECK-I686-NEXT: movw %di, 4(%ebp) 680; CHECK-I686-NEXT: movw %si, 2(%ebp) 681; CHECK-I686-NEXT: addl $44, %esp 682; CHECK-I686-NEXT: popl %esi 683; CHECK-I686-NEXT: popl %edi 684; CHECK-I686-NEXT: popl %ebx 685; CHECK-I686-NEXT: popl %ebp 686; CHECK-I686-NEXT: retl 687 %v = fptrunc <4 x float> %a to <4 x half> 688 store <4 x half> %v, <4 x half>* %p 689 ret void 690} 691 692define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { 693; BWON-NOF16C-LABEL: test_trunc64_vec4: 694; BWON-NOF16C: # %bb.0: 695; BWON-NOF16C-NEXT: pushq %rbp 696; BWON-NOF16C-NEXT: pushq %r15 697; BWON-NOF16C-NEXT: pushq %r14 698; BWON-NOF16C-NEXT: pushq %rbx 699; BWON-NOF16C-NEXT: subq $40, %rsp 700; BWON-NOF16C-NEXT: movq %rdi, %rbx 701; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 702; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 703; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 704; BWON-NOF16C-NEXT: callq __truncdfhf2 705; BWON-NOF16C-NEXT: movl %eax, %r14d 706; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 707; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 708; BWON-NOF16C-NEXT: callq __truncdfhf2 709; BWON-NOF16C-NEXT: movl %eax, %r15d 710; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 711; BWON-NOF16C-NEXT: callq __truncdfhf2 712; BWON-NOF16C-NEXT: movl %eax, %ebp 713; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 714; BWON-NOF16C-NEXT: callq __truncdfhf2 715; BWON-NOF16C-NEXT: movw %ax, 4(%rbx) 716; BWON-NOF16C-NEXT: movw %bp, (%rbx) 717; BWON-NOF16C-NEXT: movw %r15w, 6(%rbx) 718; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 719; BWON-NOF16C-NEXT: addq $40, %rsp 720; BWON-NOF16C-NEXT: popq %rbx 721; BWON-NOF16C-NEXT: popq %r14 722; BWON-NOF16C-NEXT: popq %r15 723; BWON-NOF16C-NEXT: popq %rbp 724; BWON-NOF16C-NEXT: retq 725; 726; BWOFF-LABEL: test_trunc64_vec4: 727; BWOFF: # %bb.0: 728; BWOFF-NEXT: pushq %rbp 729; BWOFF-NEXT: pushq %r15 730; BWOFF-NEXT: pushq %r14 731; BWOFF-NEXT: pushq %rbx 732; BWOFF-NEXT: subq $40, %rsp 733; BWOFF-NEXT: movq %rdi, %rbx 734; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 735; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 736; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 737; BWOFF-NEXT: callq __truncdfhf2 738; BWOFF-NEXT: movw %ax, %r14w 739; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 740; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 741; BWOFF-NEXT: callq __truncdfhf2 742; BWOFF-NEXT: movw %ax, %r15w 743; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 744; BWOFF-NEXT: callq __truncdfhf2 745; BWOFF-NEXT: movw %ax, %bp 746; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 747; BWOFF-NEXT: callq __truncdfhf2 748; BWOFF-NEXT: movw %ax, 4(%rbx) 749; BWOFF-NEXT: movw %bp, (%rbx) 750; BWOFF-NEXT: movw %r15w, 6(%rbx) 751; BWOFF-NEXT: movw %r14w, 2(%rbx) 752; BWOFF-NEXT: addq $40, %rsp 753; BWOFF-NEXT: popq %rbx 754; BWOFF-NEXT: popq %r14 755; BWOFF-NEXT: popq %r15 756; BWOFF-NEXT: popq %rbp 757; BWOFF-NEXT: retq 758; 759; BWON-F16C-LABEL: test_trunc64_vec4: 760; BWON-F16C: # %bb.0: 761; BWON-F16C-NEXT: pushq %rbp 762; BWON-F16C-NEXT: pushq %r15 763; BWON-F16C-NEXT: pushq %r14 764; BWON-F16C-NEXT: pushq %rbx 765; BWON-F16C-NEXT: subq $88, %rsp 766; BWON-F16C-NEXT: movq %rdi, %rbx 767; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 768; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 769; BWON-F16C-NEXT: vzeroupper 770; BWON-F16C-NEXT: callq __truncdfhf2 771; BWON-F16C-NEXT: movl %eax, %r14d 772; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 773; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 774; BWON-F16C-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 775; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 776; BWON-F16C-NEXT: vzeroupper 777; BWON-F16C-NEXT: callq __truncdfhf2 778; BWON-F16C-NEXT: movl %eax, %r15d 779; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 780; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 781; BWON-F16C-NEXT: vzeroupper 782; BWON-F16C-NEXT: callq __truncdfhf2 783; BWON-F16C-NEXT: movl %eax, %ebp 784; BWON-F16C-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 785; BWON-F16C-NEXT: callq __truncdfhf2 786; BWON-F16C-NEXT: movw %ax, 4(%rbx) 787; BWON-F16C-NEXT: movw %bp, (%rbx) 788; BWON-F16C-NEXT: movw %r15w, 6(%rbx) 789; BWON-F16C-NEXT: movw %r14w, 2(%rbx) 790; BWON-F16C-NEXT: addq $88, %rsp 791; BWON-F16C-NEXT: popq %rbx 792; BWON-F16C-NEXT: popq %r14 793; BWON-F16C-NEXT: popq %r15 794; BWON-F16C-NEXT: popq %rbp 795; BWON-F16C-NEXT: retq 796; 797; CHECK-I686-LABEL: test_trunc64_vec4: 798; CHECK-I686: # %bb.0: 799; CHECK-I686-NEXT: pushl %ebp 800; CHECK-I686-NEXT: pushl %ebx 801; CHECK-I686-NEXT: pushl %edi 802; CHECK-I686-NEXT: pushl %esi 803; CHECK-I686-NEXT: subl $60, %esp 804; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 805; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 806; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 807; CHECK-I686-NEXT: movlps %xmm0, (%esp) 808; CHECK-I686-NEXT: calll __truncdfhf2 809; CHECK-I686-NEXT: movw %ax, %si 810; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 811; CHECK-I686-NEXT: movhpd %xmm0, (%esp) 812; CHECK-I686-NEXT: calll __truncdfhf2 813; CHECK-I686-NEXT: movw %ax, %di 814; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 815; CHECK-I686-NEXT: movlps %xmm0, (%esp) 816; CHECK-I686-NEXT: calll __truncdfhf2 817; CHECK-I686-NEXT: movw %ax, %bx 818; CHECK-I686-NEXT: movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 819; CHECK-I686-NEXT: movhpd %xmm0, (%esp) 820; CHECK-I686-NEXT: calll __truncdfhf2 821; CHECK-I686-NEXT: movw %ax, 6(%ebp) 822; CHECK-I686-NEXT: movw %bx, 4(%ebp) 823; CHECK-I686-NEXT: movw %di, 2(%ebp) 824; CHECK-I686-NEXT: movw %si, (%ebp) 825; CHECK-I686-NEXT: addl $60, %esp 826; CHECK-I686-NEXT: popl %esi 827; CHECK-I686-NEXT: popl %edi 828; CHECK-I686-NEXT: popl %ebx 829; CHECK-I686-NEXT: popl %ebp 830; CHECK-I686-NEXT: retl 831 %v = fptrunc <4 x double> %a to <4 x half> 832 store <4 x half> %v, <4 x half>* %p 833 ret void 834} 835 836declare float @test_floatret(); 837 838; On i686, if SSE2 is available, the return value from test_floatret is loaded 839; to f80 and then rounded to f32. The DAG combiner should not combine this 840; fp_round and the subsequent fptrunc from float to half. 841define half @test_f80trunc_nodagcombine() #0 { 842; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 843; CHECK-LIBCALL: # %bb.0: 844; CHECK-LIBCALL-NEXT: pushq %rax 845; CHECK-LIBCALL-NEXT: callq test_floatret 846; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 847; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 848; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 849; CHECK-LIBCALL-NEXT: popq %rax 850; CHECK-LIBCALL-NEXT: retq 851; 852; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 853; BWON-F16C: # %bb.0: 854; BWON-F16C-NEXT: pushq %rax 855; BWON-F16C-NEXT: callq test_floatret 856; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 857; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 858; BWON-F16C-NEXT: popq %rax 859; BWON-F16C-NEXT: retq 860; 861; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 862; CHECK-I686: # %bb.0: 863; CHECK-I686-NEXT: subl $12, %esp 864; CHECK-I686-NEXT: calll test_floatret 865; CHECK-I686-NEXT: fstps (%esp) 866; CHECK-I686-NEXT: calll __gnu_f2h_ieee 867; CHECK-I686-NEXT: movzwl %ax, %eax 868; CHECK-I686-NEXT: movl %eax, (%esp) 869; CHECK-I686-NEXT: calll __gnu_h2f_ieee 870; CHECK-I686-NEXT: addl $12, %esp 871; CHECK-I686-NEXT: retl 872 %1 = call float @test_floatret() 873 %2 = fptrunc float %1 to half 874 ret half %2 875} 876 877 878 879 880define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 881; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 882; CHECK-LIBCALL: # %bb.0: 883; CHECK-LIBCALL-NEXT: pushq %rbx 884; CHECK-LIBCALL-NEXT: subq $16, %rsp 885; CHECK-LIBCALL-NEXT: movl %edi, %ebx 886; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi 887; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 888; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 889; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 890; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 891; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 892; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 893; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 894; CHECK-LIBCALL-NEXT: addq $16, %rsp 895; CHECK-LIBCALL-NEXT: popq %rbx 896; CHECK-LIBCALL-NEXT: retq 897; 898; BWON-F16C-LABEL: test_sitofp_fadd_i32: 899; BWON-F16C: # %bb.0: 900; BWON-F16C-NEXT: movswl (%rsi), %eax 901; BWON-F16C-NEXT: vmovd %eax, %xmm0 902; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 903; BWON-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 904; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 905; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 906; BWON-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 907; BWON-F16C-NEXT: retq 908; 909; CHECK-I686-LABEL: test_sitofp_fadd_i32: 910; CHECK-I686: # %bb.0: 911; CHECK-I686-NEXT: subl $28, %esp 912; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 913; CHECK-I686-NEXT: movzwl (%eax), %eax 914; CHECK-I686-NEXT: movl %eax, (%esp) 915; CHECK-I686-NEXT: calll __gnu_h2f_ieee 916; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 917; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 918; CHECK-I686-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 919; CHECK-I686-NEXT: xorps %xmm0, %xmm0 920; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 921; CHECK-I686-NEXT: movss %xmm0, (%esp) 922; CHECK-I686-NEXT: calll __gnu_f2h_ieee 923; CHECK-I686-NEXT: movzwl %ax, %eax 924; CHECK-I686-NEXT: movl %eax, (%esp) 925; CHECK-I686-NEXT: calll __gnu_h2f_ieee 926; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 927; CHECK-I686-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload 928; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero 929; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 930; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 931; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 932; CHECK-I686-NEXT: addl $28, %esp 933; CHECK-I686-NEXT: retl 934 %tmp0 = load half, half* %b 935 %tmp1 = sitofp i32 %a to half 936 %tmp2 = fadd half %tmp0, %tmp1 937 %tmp3 = fpext half %tmp2 to float 938 ret float %tmp3 939} 940 941attributes #0 = { nounwind } 942