1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \ 2; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL 3; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \ 4; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C 5; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false \ 6; RUN: | FileCheck %s -check-prefix=CHECK-I686 7 8define void @test_load_store(half* %in, half* %out) { 9; CHECK-LABEL: test_load_store: 10; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]] 11; CHECK: movw [[TMP]], (%rsi) 12 %val = load half, half* %in 13 store half %val, half* %out 14 ret void 15} 16 17define i16 @test_bitcast_from_half(half* %addr) { 18; CHECK-LABEL: test_bitcast_from_half: 19; CHECK: movzwl (%rdi), %eax 20 %val = load half, half* %addr 21 %val_int = bitcast half %val to i16 22 ret i16 %val_int 23} 24 25define void @test_bitcast_to_half(half* %addr, i16 %in) { 26; CHECK-LABEL: test_bitcast_to_half: 27; CHECK: movw %si, (%rdi) 28 %val_fp = bitcast i16 %in to half 29 store half %val_fp, half* %addr 30 ret void 31} 32 33define float @test_extend32(half* %addr) { 34; CHECK-LABEL: test_extend32: 35 36; CHECK-LIBCALL: jmp __gnu_h2f_ieee 37; CHECK-F16C: vcvtph2ps 38 %val16 = load half, half* %addr 39 %val32 = fpext half %val16 to float 40 ret float %val32 41} 42 43define double @test_extend64(half* %addr) { 44; CHECK-LABEL: test_extend64: 45 46; CHECK-LIBCALL: callq __gnu_h2f_ieee 47; CHECK-LIBCALL: cvtss2sd 48; CHECK-F16C: vcvtph2ps 49; CHECK-F16C: vcvtss2sd 50 %val16 = load half, half* %addr 51 %val32 = fpext half %val16 to double 52 ret double %val32 53} 54 55define void @test_trunc32(float %in, half* %addr) { 56; CHECK-LABEL: test_trunc32: 57 58; CHECK-LIBCALL: callq __gnu_f2h_ieee 59; CHECK-F16C: vcvtps2ph 60 %val16 = fptrunc float %in to half 61 store half %val16, half* %addr 62 ret void 63} 64 65define void @test_trunc64(double %in, half* %addr) { 66; CHECK-LABEL: test_trunc64: 67 68; CHECK-LIBCALL: callq __truncdfhf2 69; CHECK-F16C: callq __truncdfhf2 70 %val16 = fptrunc double %in to half 71 store half %val16, half* %addr 72 ret void 73} 74 75define i64 @test_fptosi_i64(half* %p) #0 { 76; CHECK-LABEL: test_fptosi_i64: 77 78; CHECK-LIBCALL-NEXT: pushq %rax 79; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 80; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 81; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 82; CHECK-LIBCALL-NEXT: popq %rcx 83; CHECK-LIBCALL-NEXT: retq 84 85; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 86; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 87; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 88; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax 89; CHECK-F16C-NEXT: retq 90 %a = load half, half* %p, align 2 91 %r = fptosi half %a to i64 92 ret i64 %r 93} 94 95define void @test_sitofp_i64(i64 %a, half* %p) #0 { 96; CHECK-LABEL: test_sitofp_i64: 97 98; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] 99; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 100; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 101; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 102; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 103; CHECK_LIBCALL-NEXT: popq [[ADDR]] 104; CHECK_LIBCALL-NEXT: retq 105 106; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] 107; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]] 108; CHECK-F16C-NEXT: vmovd [[REG0]], %eax 109; CHECK-F16C-NEXT: movw %ax, (%rsi) 110; CHECK-F16C-NEXT: retq 111 %r = sitofp i64 %a to half 112 store half %r, half* %p 113 ret void 114} 115 116define i64 @test_fptoui_i64(half* %p) #0 { 117; CHECK-LABEL: test_fptoui_i64: 118 119; FP_TO_UINT is expanded using FP_TO_SINT 120; CHECK-LIBCALL-NEXT: pushq %rax 121; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 122; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 123; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] 124; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] 125; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] 126; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] 127; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] 128; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] 129; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] 130; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 131; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] 132; CHECK-LIBCALL-NEXT: popq %rcx 133; CHECK-LIBCALL-NEXT: retq 134 135; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 136; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 137; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 138; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] 139; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] 140; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] 141; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] 142; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] 143; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] 144; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] 145; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax 146; CHECK-F16C-NEXT: retq 147 %a = load half, half* %p, align 2 148 %r = fptoui half %a to i64 149 ret i64 %r 150} 151 152define void @test_uitofp_i64(i64 %a, half* %p) #0 { 153; CHECK-LABEL: test_uitofp_i64: 154; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] 155; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 156; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]] 157; CHECK-NEXT: andl $1, [[REG0]] 158; CHECK-NEXT: testq %rdi, %rdi 159; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] 160 161; simple conversion to float if non-negative 162; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] 163; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] 164; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] 165 166; convert using shift+or if negative 167; CHECK-NEXT: [[LABEL1]]: 168; CHECK-NEXT: shrq %rdi 169; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]] 170; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] 171; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] 172; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] 173; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] 174 175; convert float to half 176; CHECK-NEXT: [[LABEL2]]: 177; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 178; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 179; CHECK-LIBCALL-NEXT: popq [[ADDR]] 180; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]] 181; CHECK-F16C-NEXT: vmovd [[REG4]], %eax 182; CHECK-F16C-NEXT: movw %ax, (%rsi) 183; CHECK-NEXT: retq 184 185 %r = uitofp i64 %a to half 186 store half %r, half* %p 187 ret void 188} 189 190define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 191; CHECK-LABEL: test_extend32_vec4: 192 193; CHECK-LIBCALL: callq __gnu_h2f_ieee 194; CHECK-LIBCALL: callq __gnu_h2f_ieee 195; CHECK-LIBCALL: callq __gnu_h2f_ieee 196; CHECK-LIBCALL: callq __gnu_h2f_ieee 197; CHECK-F16C: vcvtph2ps 198; CHECK-F16C: vcvtph2ps 199; CHECK-F16C: vcvtph2ps 200; CHECK-F16C: vcvtph2ps 201 %a = load <4 x half>, <4 x half>* %p, align 8 202 %b = fpext <4 x half> %a to <4 x float> 203 ret <4 x float> %b 204} 205 206define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 207; CHECK-LABEL: test_extend64_vec4 208 209; CHECK-LIBCALL: callq __gnu_h2f_ieee 210; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 211; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 212; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 213; CHECK-LIBCALL-DAG: cvtss2sd 214; CHECK-LIBCALL-DAG: cvtss2sd 215; CHECK-LIBCALL-DAG: cvtss2sd 216; CHECK-LIBCALL: cvtss2sd 217; CHECK-F16C: vcvtph2ps 218; CHECK-F16C-DAG: vcvtph2ps 219; CHECK-F16C-DAG: vcvtph2ps 220; CHECK-F16C-DAG: vcvtph2ps 221; CHECK-F16C-DAG: vcvtss2sd 222; CHECK-F16C-DAG: vcvtss2sd 223; CHECK-F16C-DAG: vcvtss2sd 224; CHECK-F16C: vcvtss2sd 225 %a = load <4 x half>, <4 x half>* %p, align 8 226 %b = fpext <4 x half> %a to <4 x double> 227 ret <4 x double> %b 228} 229 230define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { 231; CHECK-LABEL: test_trunc32_vec4: 232 233; CHECK-LIBCALL: callq __gnu_f2h_ieee 234; CHECK-LIBCALL: callq __gnu_f2h_ieee 235; CHECK-LIBCALL: callq __gnu_f2h_ieee 236; CHECK-LIBCALL: callq __gnu_f2h_ieee 237; CHECK-F16C: vcvtps2ph 238; CHECK-F16C: vcvtps2ph 239; CHECK-F16C: vcvtps2ph 240; CHECK-F16C: vcvtps2ph 241; CHECK: movw 242; CHECK: movw 243; CHECK: movw 244; CHECK: movw 245 %v = fptrunc <4 x float> %a to <4 x half> 246 store <4 x half> %v, <4 x half>* %p 247 ret void 248} 249 250define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { 251; CHECK-LABEL: test_trunc64_vec4: 252; CHECK: callq __truncdfhf2 253; CHECK: callq __truncdfhf2 254; CHECK: callq __truncdfhf2 255; CHECK: callq __truncdfhf2 256; CHECK: movw 257; CHECK: movw 258; CHECK: movw 259; CHECK: movw 260 %v = fptrunc <4 x double> %a to <4 x half> 261 store <4 x half> %v, <4 x half>* %p 262 ret void 263} 264 265declare float @test_floatret(); 266 267; On i686, if SSE2 is available, the return value from test_floatret is loaded 268; to f80 and then rounded to f32. The DAG combiner should not combine this 269; fp_round and the subsequent fptrunc from float to half. 270define half @test_f80trunc_nodagcombine() #0 { 271; CHECK-LABEL: test_f80trunc_nodagcombine: 272; CHECK-I686-NOT: calll __truncxfhf2 273 %1 = call float @test_floatret() 274 %2 = fptrunc float %1 to half 275 ret half %2 276} 277 278attributes #0 = { nounwind } 279