1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC 3; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST 4; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ 5; RUN: -mtriple=arm64-eabi -aarch64-neon-syntax=apple \ 6; RUN: | FileCheck %s --check-prefixes=GISEL,FALLBACK 7 8; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32) 9; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32) 10define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { 11; CHECK-LABEL: test_vcvt_f64_f32: 12; CHECK: // %bb.0: 13; CHECK-NEXT: fcvtl v0.2d, v0.2s 14; CHECK-NEXT: ret 15; 16; GISEL-LABEL: test_vcvt_f64_f32: 17; GISEL: // %bb.0: 18; GISEL-NEXT: fcvtl v0.2d, v0.2s 19; GISEL-NEXT: ret 20 %vcvt1.i = fpext <2 x float> %x to <2 x double> 21 ret <2 x double> %vcvt1.i 22} 23 24; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32) 25; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32) 26define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp { 27; CHECK-LABEL: test_vcvt_high_f64_f32: 28; CHECK: // %bb.0: 29; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 30; CHECK-NEXT: ret 31; 32; GISEL-LABEL: test_vcvt_high_f64_f32: 33; GISEL: // %bb.0: 34; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 35; GISEL-NEXT: ret 36 %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3> 37 %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double> 38 ret <2 x double> %vcvt1.i 39} 40 41define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp { 42; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast: 43; CHECK: // %bb.0: 44; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 45; CHECK-NEXT: ret 46; 47; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast: 48; GISEL: // %bb.0: 49; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 50; GISEL-NEXT: ret 51 %bc1 = bitcast <4 x float> %x to <2 x double> 52 %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1> 53 %bc2 = bitcast <1 x double> %ext to <2 x float> 54 %r = fpext <2 x float> %bc2 to <2 x double> 55 ret <2 x double> %r 56} 57 58define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp { 59; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast: 60; CHECK: // %bb.0: 61; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 62; CHECK-NEXT: ret 63; 64; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast: 65; GISEL: // %bb.0: 66; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 67; GISEL-NEXT: ret 68 %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> 69 %bc2 = bitcast <1 x i64> %ext to <2 x float> 70 %r = fpext <2 x float> %bc2 to <2 x double> 71 ret <2 x double> %r 72} 73 74define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp { 75; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast: 76; CHECK: // %bb.0: 77; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 78; CHECK-NEXT: ret 79; 80; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: 81; GISEL: // %bb.0: 82; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 83; GISEL-NEXT: ret 84 %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 85 %bc2 = bitcast <2 x i32> %ext to <2 x float> 86 %r = fpext <2 x float> %bc2 to <2 x double> 87 ret <2 x double> %r 88} 89 90define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp { 91; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast: 92; CHECK: // %bb.0: 93; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 94; CHECK-NEXT: ret 95; 96; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: 97; GISEL: // %bb.0: 98; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 99; GISEL-NEXT: ret 100 %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 101 %bc2 = bitcast <4 x i16> %ext to <2 x float> 102 %r = fpext <2 x float> %bc2 to <2 x double> 103 ret <2 x double> %r 104} 105 106define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp { 107; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast: 108; CHECK: // %bb.0: 109; CHECK-NEXT: fcvtl2 v0.2d, v0.4s 110; CHECK-NEXT: ret 111; 112; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: 113; GISEL: // %bb.0: 114; GISEL-NEXT: fcvtl2 v0.2d, v0.4s 115; GISEL-NEXT: ret 116 %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 117 %bc2 = bitcast <8 x i8> %ext to <2 x float> 118 %r = fpext <2 x float> %bc2 to <2 x double> 119 ret <2 x double> %r 120} 121 122define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp { 123; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast: 124; CHECK: // %bb.0: 125; CHECK-NEXT: fcvtl2 v0.4s, v0.8h 126; CHECK-NEXT: ret 127; 128; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast: 129; GISEL: // %bb.0: 130; GISEL-NEXT: fcvtl2 v0.4s, v0.8h 131; GISEL-NEXT: ret 132 %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> 133 %bc2 = bitcast <1 x i64> %ext to <4 x half> 134 %r = fpext <4 x half> %bc2 to <4 x float> 135 ret <4 x float> %r 136} 137 138define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp { 139; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast: 140; CHECK: // %bb.0: 141; CHECK-NEXT: fcvtl2 v0.4s, v0.8h 142; CHECK-NEXT: ret 143; 144; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: 145; GISEL: // %bb.0: 146; GISEL-NEXT: fcvtl2 v0.4s, v0.8h 147; GISEL-NEXT: ret 148 %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 149 %bc2 = bitcast <2 x i32> %ext to <4 x half> 150 %r = fpext <4 x half> %bc2 to <4 x float> 151 ret <4 x float> %r 152} 153 154define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp { 155; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast: 156; CHECK: // %bb.0: 157; CHECK-NEXT: fcvtl2 v0.4s, v0.8h 158; CHECK-NEXT: ret 159; 160; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: 161; GISEL: // %bb.0: 162; GISEL-NEXT: fcvtl2 v0.4s, v0.8h 163; GISEL-NEXT: ret 164 %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 165 %bc2 = bitcast <4 x i16> %ext to <4 x half> 166 %r = fpext <4 x half> %bc2 to <4 x float> 167 ret <4 x float> %r 168} 169 170define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp { 171; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast: 172; CHECK: // %bb.0: 173; CHECK-NEXT: fcvtl2 v0.4s, v0.8h 174; CHECK-NEXT: ret 175; 176; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: 177; GISEL: // %bb.0: 178; GISEL-NEXT: fcvtl2 v0.4s, v0.8h 179; GISEL-NEXT: ret 180 %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 181 %bc2 = bitcast <8 x i8> %ext to <4 x half> 182 %r = fpext <4 x half> %bc2 to <4 x float> 183 ret <4 x float> %r 184} 185 186; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64) 187; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64) 188define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp { 189; CHECK-LABEL: test_vcvt_f32_f64: 190; CHECK: // %bb.0: 191; CHECK-NEXT: fcvtn v0.2s, v0.2d 192; CHECK-NEXT: ret 193; 194; GISEL-LABEL: test_vcvt_f32_f64: 195; GISEL: // %bb.0: 196; GISEL-NEXT: fcvtn v0.2s, v0.2d 197; GISEL-NEXT: ret 198 %vcvt1.i = fptrunc <2 x double> %v to <2 x float> 199 ret <2 x float> %vcvt1.i 200} 201 202; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64) 203; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64) 204define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { 205; GENERIC-LABEL: test_vcvt_high_f32_f64: 206; GENERIC: // %bb.0: 207; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 208; GENERIC-NEXT: fcvtn2 v0.4s, v1.2d 209; GENERIC-NEXT: ret 210; 211; FAST-LABEL: test_vcvt_high_f32_f64: 212; FAST: // %bb.0: 213; FAST-NEXT: mov.16b v2, v0 214; FAST-NEXT: // implicit-def: $q0 215; FAST-NEXT: mov.16b v0, v2 216; FAST-NEXT: fcvtn2 v0.4s, v1.2d 217; FAST-NEXT: ret 218; 219; GISEL-LABEL: test_vcvt_high_f32_f64: 220; GISEL: // %bb.0: 221; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 222; GISEL-NEXT: fcvtn2 v0.4s, v1.2d 223; GISEL-NEXT: ret 224 %cvt = fptrunc <2 x double> %v to <2 x float> 225 %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 226 ret <4 x float> %vcvt2.i 227} 228 229define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp { 230; CHECK-LABEL: test_vcvtx_f32_f64: 231; CHECK: // %bb.0: 232; CHECK-NEXT: fcvtxn v0.2s, v0.2d 233; CHECK-NEXT: ret 234; 235; GISEL-LABEL: test_vcvtx_f32_f64: 236; GISEL: // %bb.0: 237; GISEL-NEXT: fcvtxn v0.2s, v0.2d 238; GISEL-NEXT: ret 239 %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind 240 ret <2 x float> %vcvtx1.i 241} 242 243define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { 244; GENERIC-LABEL: test_vcvtx_high_f32_f64: 245; GENERIC: // %bb.0: 246; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 247; GENERIC-NEXT: fcvtxn2 v0.4s, v1.2d 248; GENERIC-NEXT: ret 249; 250; FAST-LABEL: test_vcvtx_high_f32_f64: 251; FAST: // %bb.0: 252; FAST-NEXT: mov.16b v2, v0 253; FAST-NEXT: // implicit-def: $q0 254; FAST-NEXT: mov.16b v0, v2 255; FAST-NEXT: fcvtxn2 v0.4s, v1.2d 256; FAST-NEXT: ret 257; 258; GISEL-LABEL: test_vcvtx_high_f32_f64: 259; GISEL: // %bb.0: 260; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 261; GISEL-NEXT: fcvtxn2 v0.4s, v1.2d 262; GISEL-NEXT: ret 263 %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind 264 %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 265 ret <4 x float> %res 266} 267 268 269declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone 270declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone 271 272declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone 273declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone 274 275declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone 276 277define i16 @to_half(float %in) { 278; GENERIC-LABEL: to_half: 279; GENERIC: // %bb.0: 280; GENERIC-NEXT: fcvt h0, s0 281; GENERIC-NEXT: fmov w0, s0 282; GENERIC-NEXT: ret 283; 284; FAST-LABEL: to_half: 285; FAST: // %bb.0: 286; FAST-NEXT: fcvt h1, s0 287; FAST-NEXT: // implicit-def: $w0 288; FAST-NEXT: fmov s0, w0 289; FAST-NEXT: mov.16b v0, v1 290; FAST-NEXT: fmov w0, s0 291; FAST-NEXT: // kill: def $w1 killed $w0 292; FAST-NEXT: ret 293; 294; GISEL-LABEL: to_half: 295; GISEL: // %bb.0: 296; GISEL-NEXT: fcvt h0, s0 297; GISEL-NEXT: fmov w0, s0 298; GISEL-NEXT: ret 299 %res = call i16 @llvm.convert.to.fp16.f32(float %in) 300 ret i16 %res 301} 302 303define float @from_half(i16 %in) { 304; GENERIC-LABEL: from_half: 305; GENERIC: // %bb.0: 306; GENERIC-NEXT: fmov s0, w0 307; GENERIC-NEXT: fcvt s0, h0 308; GENERIC-NEXT: ret 309; 310; FAST-LABEL: from_half: 311; FAST: // %bb.0: 312; FAST-NEXT: fmov s0, w0 313; FAST-NEXT: // kill: def $h0 killed $h0 killed $s0 314; FAST-NEXT: fcvt s0, h0 315; FAST-NEXT: ret 316; 317; GISEL-LABEL: from_half: 318; GISEL: // %bb.0: 319; GISEL-NEXT: fmov s0, w0 320; GISEL-NEXT: fcvt s0, h0 321; GISEL-NEXT: ret 322 %res = call float @llvm.convert.from.fp16.f32(i16 %in) 323 ret float %res 324} 325 326declare float @llvm.convert.from.fp16.f32(i16) #1 327declare i16 @llvm.convert.to.fp16.f32(float) #1 328