1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=1 \ 2; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWON 3; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=0 \ 4; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWOFF 5; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false -fixup-byte-word-insts=1 \ 6; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C -check-prefix=BWON 7; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false -fixup-byte-word-insts=0 \ 8; RUN: | FileCheck %s -check-prefix=CHECK-I686 9 10define void @test_load_store(half* %in, half* %out) { 11; CHECK-LABEL: test_load_store: 12; BWON: movzwl (%rdi), %eax 13; BWOFF: movw (%rdi), %ax 14; CHECK: movw %ax, (%rsi) 15 %val = load half, half* %in 16 store half %val, half* %out 17 ret void 18} 19 20define i16 @test_bitcast_from_half(half* %addr) { 21; CHECK-LABEL: test_bitcast_from_half: 22; BWON: movzwl (%rdi), %eax 23; BWOFF: movw (%rdi), %ax 24 %val = load half, half* %addr 25 %val_int = bitcast half %val to i16 26 ret i16 %val_int 27} 28 29define void @test_bitcast_to_half(half* %addr, i16 %in) { 30; CHECK-LABEL: test_bitcast_to_half: 31; CHECK: movw %si, (%rdi) 32 %val_fp = bitcast i16 %in to half 33 store half %val_fp, half* %addr 34 ret void 35} 36 37define float @test_extend32(half* %addr) { 38; CHECK-LABEL: test_extend32: 39 40; CHECK-LIBCALL: jmp __gnu_h2f_ieee 41; CHECK-F16C: vcvtph2ps 42 %val16 = load half, half* %addr 43 %val32 = fpext half %val16 to float 44 ret float %val32 45} 46 47define double @test_extend64(half* %addr) { 48; CHECK-LABEL: test_extend64: 49 50; CHECK-LIBCALL: callq __gnu_h2f_ieee 51; CHECK-LIBCALL: cvtss2sd 52; CHECK-F16C: vcvtph2ps 53; CHECK-F16C: vcvtss2sd 54 %val16 = load half, half* %addr 55 %val32 = fpext half %val16 to double 56 ret double %val32 57} 58 59define void @test_trunc32(float %in, half* %addr) { 60; CHECK-LABEL: test_trunc32: 61 62; CHECK-LIBCALL: callq __gnu_f2h_ieee 63; CHECK-F16C: vcvtps2ph 64 %val16 = fptrunc float %in to half 65 store half %val16, half* %addr 66 ret void 67} 68 69define void @test_trunc64(double %in, half* %addr) { 70; CHECK-LABEL: test_trunc64: 71 72; CHECK-LIBCALL: callq __truncdfhf2 73; CHECK-F16C: callq __truncdfhf2 74 %val16 = fptrunc double %in to half 75 store half %val16, half* %addr 76 ret void 77} 78 79define i64 @test_fptosi_i64(half* %p) #0 { 80; CHECK-LABEL: test_fptosi_i64: 81 82; CHECK-LIBCALL-NEXT: pushq %rax 83; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 84; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 85; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 86; CHECK-LIBCALL-NEXT: popq %rcx 87; CHECK-LIBCALL-NEXT: retq 88 89; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 90; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 91; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 92; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax 93; CHECK-F16C-NEXT: retq 94 %a = load half, half* %p, align 2 95 %r = fptosi half %a to i64 96 ret i64 %r 97} 98 99define void @test_sitofp_i64(i64 %a, half* %p) #0 { 100; CHECK-LABEL: test_sitofp_i64: 101 102; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] 103; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 104; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 105; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 106; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 107; CHECK_LIBCALL-NEXT: popq [[ADDR]] 108; CHECK_LIBCALL-NEXT: retq 109 110; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] 111; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] 112; CHECK-F16C-NEXT: vmovd [[REG0]], %eax 113; CHECK-F16C-NEXT: movw %ax, (%rsi) 114; CHECK-F16C-NEXT: retq 115 %r = sitofp i64 %a to half 116 store half %r, half* %p 117 ret void 118} 119 120define i64 @test_fptoui_i64(half* %p) #0 { 121; CHECK-LABEL: test_fptoui_i64: 122 123; FP_TO_UINT is expanded using FP_TO_SINT 124; CHECK-LIBCALL-NEXT: pushq %rax 125; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 126; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 127; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] 128; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] 129; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] 130; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] 131; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] 132; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] 133; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] 134; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 135; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] 136; CHECK-LIBCALL-NEXT: popq %rcx 137; CHECK-LIBCALL-NEXT: retq 138 139; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 140; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 141; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 142; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] 143; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] 144; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] 145; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] 146; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] 147; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] 148; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] 149; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax 150; CHECK-F16C-NEXT: retq 151 %a = load half, half* %p, align 2 152 %r = fptoui half %a to i64 153 ret i64 %r 154} 155 156define void @test_uitofp_i64(i64 %a, half* %p) #0 { 157; CHECK-LABEL: test_uitofp_i64: 158; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] 159; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 160; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]] 161; CHECK-NEXT: andl $1, [[REG0]] 162; CHECK-NEXT: testq %rdi, %rdi 163; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] 164 165; simple conversion to float if non-negative 166; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] 167; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] 168; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] 169 170; convert using shift+or if negative 171; CHECK-NEXT: [[LABEL1]]: 172; CHECK-NEXT: shrq %rdi 173; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]] 174; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] 175; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] 176; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] 177; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] 178 179; convert float to half 180; CHECK-NEXT: [[LABEL2]]: 181; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 182; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 183; CHECK-LIBCALL-NEXT: popq [[ADDR]] 184; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] 185; CHECK-F16C-NEXT: vmovd [[REG4]], %eax 186; CHECK-F16C-NEXT: movw %ax, (%rsi) 187; CHECK-NEXT: retq 188 189 %r = uitofp i64 %a to half 190 store half %r, half* %p 191 ret void 192} 193 194define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 195; CHECK-LABEL: test_extend32_vec4: 196 197; CHECK-LIBCALL: callq __gnu_h2f_ieee 198; CHECK-LIBCALL: callq __gnu_h2f_ieee 199; CHECK-LIBCALL: callq __gnu_h2f_ieee 200; CHECK-LIBCALL: callq __gnu_h2f_ieee 201; CHECK-F16C: vcvtph2ps 202; CHECK-F16C: vcvtph2ps 203; CHECK-F16C: vcvtph2ps 204; CHECK-F16C: vcvtph2ps 205 %a = load <4 x half>, <4 x half>* %p, align 8 206 %b = fpext <4 x half> %a to <4 x float> 207 ret <4 x float> %b 208} 209 210define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 211; CHECK-LABEL: test_extend64_vec4 212 213; CHECK-LIBCALL: callq __gnu_h2f_ieee 214; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 215; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 216; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 217; CHECK-LIBCALL-DAG: cvtss2sd 218; CHECK-LIBCALL-DAG: cvtss2sd 219; CHECK-LIBCALL-DAG: cvtss2sd 220; CHECK-LIBCALL: cvtss2sd 221; CHECK-F16C: vcvtph2ps 222; CHECK-F16C-DAG: vcvtph2ps 223; CHECK-F16C-DAG: vcvtph2ps 224; CHECK-F16C-DAG: vcvtph2ps 225; CHECK-F16C-DAG: vcvtss2sd 226; CHECK-F16C-DAG: vcvtss2sd 227; CHECK-F16C-DAG: vcvtss2sd 228; CHECK-F16C: vcvtss2sd 229 %a = load <4 x half>, <4 x half>* %p, align 8 230 %b = fpext <4 x half> %a to <4 x double> 231 ret <4 x double> %b 232} 233 234define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { 235; CHECK-LABEL: test_trunc32_vec4: 236 237; CHECK-LIBCALL: callq __gnu_f2h_ieee 238; CHECK-LIBCALL: callq __gnu_f2h_ieee 239; CHECK-LIBCALL: callq __gnu_f2h_ieee 240; CHECK-LIBCALL: callq __gnu_f2h_ieee 241; CHECK-F16C: vcvtps2ph 242; CHECK-F16C: vcvtps2ph 243; CHECK-F16C: vcvtps2ph 244; CHECK-F16C: vcvtps2ph 245; CHECK: movw 246; CHECK: movw 247; CHECK: movw 248; CHECK: movw 249 %v = fptrunc <4 x float> %a to <4 x half> 250 store <4 x half> %v, <4 x half>* %p 251 ret void 252} 253 254define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { 255; CHECK-LABEL: test_trunc64_vec4: 256; CHECK: callq __truncdfhf2 257; CHECK: callq __truncdfhf2 258; CHECK: callq __truncdfhf2 259; CHECK: callq __truncdfhf2 260; CHECK: movw 261; CHECK: movw 262; CHECK: movw 263; CHECK: movw 264 %v = fptrunc <4 x double> %a to <4 x half> 265 store <4 x half> %v, <4 x half>* %p 266 ret void 267} 268 269declare float @test_floatret(); 270 271; On i686, if SSE2 is available, the return value from test_floatret is loaded 272; to f80 and then rounded to f32. The DAG combiner should not combine this 273; fp_round and the subsequent fptrunc from float to half. 274define half @test_f80trunc_nodagcombine() #0 { 275; CHECK-LABEL: test_f80trunc_nodagcombine: 276; CHECK-I686-NOT: calll __truncxfhf2 277 %1 = call float @test_floatret() 278 %2 = fptrunc float %1 to half 279 ret half %2 280} 281 282; CHECK-LABEL: test_sitofp_fadd_i32: 283 284; CHECK-LIBCALL-NEXT: pushq %rbx 285; CHECK-LIBCALL-NEXT: subq $16, %rsp 286; CHECK-LIBCALL-NEXT: movl %edi, %ebx 287; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi 288; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 289; CHECK-LIBCALL-NEXT: movss %xmm0, 12(%rsp) 290; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 291; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 292; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 293; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 294; CHECK-LIBCALL-NEXT: addss 12(%rsp), %xmm0 295; CHECK-LIBCALL-NEXT: addq $16, %rsp 296; CHECK-LIBCALL-NEXT: popq %rbx 297; CHECK-LIBCALL-NEXT: retq 298 299; CHECK-F16C-NEXT: movswl (%rsi), %eax 300; CHECK-F16C-NEXT: vmovd %eax, %xmm0 301; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 302; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm1 303; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 304; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 305; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 306; CHECK-F16C-NEXT: retq 307 308define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 309 %tmp0 = load half, half* %b 310 %tmp1 = sitofp i32 %a to half 311 %tmp2 = fadd half %tmp0, %tmp1 312 %tmp3 = fpext half %tmp2 to float 313 ret float %tmp3 314} 315 316attributes #0 = { nounwind } 317