1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \ 2; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL 3; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \ 4; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C 5 6define void @test_load_store(half* %in, half* %out) { 7; CHECK-LABEL: test_load_store: 8; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]] 9; CHECK: movw [[TMP]], (%rsi) 10 %val = load half, half* %in 11 store half %val, half* %out 12 ret void 13} 14 15define i16 @test_bitcast_from_half(half* %addr) { 16; CHECK-LABEL: test_bitcast_from_half: 17; CHECK: movzwl (%rdi), %eax 18 %val = load half, half* %addr 19 %val_int = bitcast half %val to i16 20 ret i16 %val_int 21} 22 23define void @test_bitcast_to_half(half* %addr, i16 %in) { 24; CHECK-LABEL: test_bitcast_to_half: 25; CHECK: movw %si, (%rdi) 26 %val_fp = bitcast i16 %in to half 27 store half %val_fp, half* %addr 28 ret void 29} 30 31define float @test_extend32(half* %addr) { 32; CHECK-LABEL: test_extend32: 33 34; CHECK-LIBCALL: jmp __gnu_h2f_ieee 35; CHECK-F16C: vcvtph2ps 36 %val16 = load half, half* %addr 37 %val32 = fpext half %val16 to float 38 ret float %val32 39} 40 41define double @test_extend64(half* %addr) { 42; CHECK-LABEL: test_extend64: 43 44; CHECK-LIBCALL: callq __gnu_h2f_ieee 45; CHECK-LIBCALL: cvtss2sd 46; CHECK-F16C: vcvtph2ps 47; CHECK-F16C: vcvtss2sd 48 %val16 = load half, half* %addr 49 %val32 = fpext half %val16 to double 50 ret double %val32 51} 52 53define void @test_trunc32(float %in, half* %addr) { 54; CHECK-LABEL: test_trunc32: 55 56; CHECK-LIBCALL: callq __gnu_f2h_ieee 57; CHECK-F16C: vcvtps2ph 58 %val16 = fptrunc float %in to half 59 store half %val16, half* %addr 60 ret void 61} 62 63define void @test_trunc64(double %in, half* %addr) { 64; CHECK-LABEL: test_trunc64: 65 66; CHECK-LIBCALL: callq __truncdfhf2 67; CHECK-F16C: callq __truncdfhf2 68 %val16 = fptrunc double %in to half 69 store half %val16, half* %addr 70 ret void 71} 72 73define i64 @test_fptosi_i64(half* %p) #0 { 74; CHECK-LABEL: test_fptosi_i64: 75 76; CHECK-LIBCALL-NEXT: pushq %rax 77; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 78; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 79; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 80; CHECK-LIBCALL-NEXT: popq %rdx 81; CHECK-LIBCALL-NEXT: retq 82 83; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 84; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 85; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 86; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax 87; CHECK-F16C-NEXT: retq 88 %a = load half, half* %p, align 2 89 %r = fptosi half %a to i64 90 ret i64 %r 91} 92 93define void @test_sitofp_i64(i64 %a, half* %p) #0 { 94; CHECK-LABEL: test_sitofp_i64: 95 96; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] 97; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 98; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 99; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 100; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 101; CHECK_LIBCALL-NEXT: popq [[ADDR]] 102; CHECK_LIBCALL-NEXT: retq 103 104; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] 105; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]] 106; CHECK-F16C-NEXT: vmovd [[REG0]], %eax 107; CHECK-F16C-NEXT: movw %ax, (%rsi) 108; CHECK-F16C-NEXT: retq 109 %r = sitofp i64 %a to half 110 store half %r, half* %p 111 ret void 112} 113 114define i64 @test_fptoui_i64(half* %p) #0 { 115; CHECK-LABEL: test_fptoui_i64: 116 117; FP_TO_UINT is expanded using FP_TO_SINT 118; CHECK-LIBCALL-NEXT: pushq %rax 119; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 120; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee 121; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] 122; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] 123; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] 124; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] 125; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] 126; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] 127; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] 128; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 129; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] 130; CHECK-LIBCALL-NEXT: popq %rdx 131; CHECK-LIBCALL-NEXT: retq 132 133; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] 134; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] 135; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] 136; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] 137; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] 138; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] 139; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] 140; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] 141; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] 142; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] 143; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax 144; CHECK-F16C-NEXT: retq 145 %a = load half, half* %p, align 2 146 %r = fptoui half %a to i64 147 ret i64 %r 148} 149 150define void @test_uitofp_i64(i64 %a, half* %p) #0 { 151; CHECK-LABEL: test_uitofp_i64: 152; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] 153; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] 154; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]] 155; CHECK-NEXT: andl $1, [[REG0]] 156; CHECK-NEXT: testq %rdi, %rdi 157; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] 158 159; simple conversion to float if non-negative 160; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] 161; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] 162; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] 163 164; convert using shift+or if negative 165; CHECK-NEXT: [[LABEL1]]: 166; CHECK-NEXT: shrq %rdi 167; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]] 168; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] 169; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] 170; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] 171; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] 172 173; convert float to half 174; CHECK-NEXT: [[LABEL2]]: 175; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee 176; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) 177; CHECK-LIBCALL-NEXT: popq [[ADDR]] 178; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]] 179; CHECK-F16C-NEXT: vmovd [[REG4]], %eax 180; CHECK-F16C-NEXT: movw %ax, (%rsi) 181; CHECK-NEXT: retq 182 183 %r = uitofp i64 %a to half 184 store half %r, half* %p 185 ret void 186} 187 188define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 189; CHECK-LABEL: test_extend32_vec4: 190 191; CHECK-LIBCALL: callq __gnu_h2f_ieee 192; CHECK-LIBCALL: callq __gnu_h2f_ieee 193; CHECK-LIBCALL: callq __gnu_h2f_ieee 194; CHECK-LIBCALL: callq __gnu_h2f_ieee 195; CHECK-F16C: vcvtph2ps 196; CHECK-F16C: vcvtph2ps 197; CHECK-F16C: vcvtph2ps 198; CHECK-F16C: vcvtph2ps 199 %a = load <4 x half>, <4 x half>* %p, align 8 200 %b = fpext <4 x half> %a to <4 x float> 201 ret <4 x float> %b 202} 203 204define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 205; CHECK-LABEL: test_extend64_vec4 206 207; CHECK-LIBCALL: callq __gnu_h2f_ieee 208; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 209; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 210; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee 211; CHECK-LIBCALL-DAG: cvtss2sd 212; CHECK-LIBCALL-DAG: cvtss2sd 213; CHECK-LIBCALL-DAG: cvtss2sd 214; CHECK-LIBCALL: cvtss2sd 215; CHECK-F16C: vcvtph2ps 216; CHECK-F16C-DAG: vcvtph2ps 217; CHECK-F16C-DAG: vcvtph2ps 218; CHECK-F16C-DAG: vcvtph2ps 219; CHECK-F16C-DAG: vcvtss2sd 220; CHECK-F16C-DAG: vcvtss2sd 221; CHECK-F16C-DAG: vcvtss2sd 222; CHECK-F16C: vcvtss2sd 223 %a = load <4 x half>, <4 x half>* %p, align 8 224 %b = fpext <4 x half> %a to <4 x double> 225 ret <4 x double> %b 226} 227 228define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { 229; CHECK-LABEL: test_trunc32_vec4: 230 231; CHECK-LIBCALL: callq __gnu_f2h_ieee 232; CHECK-LIBCALL: callq __gnu_f2h_ieee 233; CHECK-LIBCALL: callq __gnu_f2h_ieee 234; CHECK-LIBCALL: callq __gnu_f2h_ieee 235; CHECK-F16C: vcvtps2ph 236; CHECK-F16C: vcvtps2ph 237; CHECK-F16C: vcvtps2ph 238; CHECK-F16C: vcvtps2ph 239; CHECK: movw 240; CHECK: movw 241; CHECK: movw 242; CHECK: movw 243 %v = fptrunc <4 x float> %a to <4 x half> 244 store <4 x half> %v, <4 x half>* %p 245 ret void 246} 247 248define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { 249; CHECK-LABEL: test_trunc64_vec4: 250; CHECK: callq __truncdfhf2 251; CHECK: callq __truncdfhf2 252; CHECK: callq __truncdfhf2 253; CHECK: callq __truncdfhf2 254; CHECK: movw 255; CHECK: movw 256; CHECK: movw 257; CHECK: movw 258 %v = fptrunc <4 x double> %a to <4 x half> 259 store <4 x half> %v, <4 x half>* %p 260 ret void 261} 262 263attributes #0 = { nounwind } 264