1; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s 2 3define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) { 4entry: 5; CHECK-LABEL: add_h: 6; CHECK: fcvt 7; CHECK: fcvt 8; CHECK-DAG: fadd 9; CHECK-DAG: fcvt 10; CHECK-DAG: fcvt 11; CHECK-DAG: fadd 12; CHECK-DAG: fcvt 13; CHECK-DAG: fcvt 14; CHECK-DAG: fadd 15; CHECK-DAG: fcvt 16; CHECK-DAG: fcvt 17; CHECK-DAG: fadd 18; CHECK-DAG: fcvt 19; CHECK-DAG: fcvt 20; CHECK-DAG: fadd 21; CHECK-DAG: fcvt 22; CHECK-DAG: fcvt 23; CHECK-DAG: fadd 24; CHECK-DAG: fcvt 25; CHECK-DAG: fcvt 26; CHECK-DAG: fadd 27; CHECK-DAG: fcvt 28; CHECK-DAG: fcvt 29; CHECK-DAG: fadd 30; CHECK-DAG: fcvt 31; CHECK-DAG: fcvt 32; CHECK-DAG: fcvt 33; CHECK-DAG: fcvt 34; CHECK-DAG: fcvt 35; CHECK-DAG: fcvt 36; CHECK-DAG: fcvt 37; CHECK: fcvt 38 %0 = fadd <8 x half> %a, %b 39 ret <8 x half> %0 40} 41 42 43define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) { 44entry: 45; CHECK-LABEL: sub_h: 46; CHECK: fcvt 47; CHECK: fcvt 48; CHECK-DAG: fsub 49; CHECK-DAG: fcvt 50; CHECK-DAG: fcvt 51; CHECK-DAG: fsub 52; CHECK-DAG: fcvt 53; CHECK-DAG: fcvt 54; CHECK-DAG: fsub 55; CHECK-DAG: fcvt 56; CHECK-DAG: fcvt 57; CHECK-DAG: fsub 58; CHECK-DAG: fcvt 59; CHECK-DAG: fcvt 60; CHECK-DAG: fsub 61; CHECK-DAG: fcvt 62; CHECK-DAG: fcvt 63; CHECK-DAG: fsub 64; CHECK-DAG: fcvt 65; CHECK-DAG: fcvt 66; CHECK-DAG: fsub 67; CHECK-DAG: fcvt 68; CHECK-DAG: fcvt 69; CHECK-DAG: fsub 70; CHECK-DAG: fcvt 71; CHECK-DAG: fcvt 72; CHECK-DAG: fcvt 73; CHECK-DAG: fcvt 74; CHECK-DAG: fcvt 75; CHECK-DAG: fcvt 76; CHECK-DAG: fcvt 77; CHECK: fcvt 78 %0 = fsub <8 x half> %a, %b 79 ret <8 x half> %0 80} 81 82 83define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) { 84entry: 85; CHECK-LABEL: mul_h: 86; CHECK: fcvt 87; CHECK: fcvt 88; CHECK-DAG: fmul 89; CHECK-DAG: fcvt 90; CHECK-DAG: fcvt 91; CHECK-DAG: fmul 92; CHECK-DAG: fcvt 93; CHECK-DAG: fcvt 94; CHECK-DAG: fmul 95; CHECK-DAG: fcvt 96; CHECK-DAG: fcvt 97; CHECK-DAG: fmul 98; CHECK-DAG: fcvt 99; CHECK-DAG: fcvt 100; CHECK-DAG: fmul 101; CHECK-DAG: fcvt 102; CHECK-DAG: fcvt 103; CHECK-DAG: fmul 104; CHECK-DAG: fcvt 105; CHECK-DAG: fcvt 106; CHECK-DAG: fmul 107; CHECK-DAG: fcvt 108; CHECK-DAG: fcvt 109; CHECK-DAG: fmul 110; CHECK-DAG: fcvt 111; CHECK-DAG: fcvt 112; CHECK-DAG: fcvt 113; CHECK-DAG: fcvt 114; CHECK-DAG: fcvt 115; CHECK-DAG: fcvt 116; CHECK-DAG: fcvt 117; CHECK: fcvt 118 %0 = fmul <8 x half> %a, %b 119 ret <8 x half> %0 120} 121 122 123define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) { 124entry: 125; CHECK-LABEL: div_h: 126; CHECK: fcvt 127; CHECK: fcvt 128; CHECK-DAG: fdiv 129; CHECK-DAG: fcvt 130; CHECK-DAG: fcvt 131; CHECK-DAG: fdiv 132; CHECK-DAG: fcvt 133; CHECK-DAG: fcvt 134; CHECK-DAG: fdiv 135; CHECK-DAG: fcvt 136; CHECK-DAG: fcvt 137; CHECK-DAG: fdiv 138; CHECK-DAG: fcvt 139; CHECK-DAG: fcvt 140; CHECK-DAG: fdiv 141; CHECK-DAG: fcvt 142; CHECK-DAG: fcvt 143; CHECK-DAG: fdiv 144; CHECK-DAG: fcvt 145; CHECK-DAG: fcvt 146; CHECK-DAG: fdiv 147; CHECK-DAG: fcvt 148; CHECK-DAG: fcvt 149; CHECK-DAG: fdiv 150; CHECK-DAG: fcvt 151; CHECK-DAG: fcvt 152; CHECK-DAG: fcvt 153; CHECK-DAG: fcvt 154; CHECK-DAG: fcvt 155; CHECK-DAG: fcvt 156; CHECK-DAG: fcvt 157; CHECK: fcvt 158 %0 = fdiv <8 x half> %a, %b 159 ret <8 x half> %0 160} 161 162 163define <8 x half> @load_h(<8 x half>* %a) { 164entry: 165; CHECK-LABEL: load_h: 166; CHECK: ldr q0, [x0] 167 %0 = load <8 x half>, <8 x half>* %a, align 4 168 ret <8 x half> %0 169} 170 171 172define void @store_h(<8 x half>* %a, <8 x half> %b) { 173entry: 174; CHECK-LABEL: store_h: 175; CHECK: str q0, [x0] 176 store <8 x half> %b, <8 x half>* %a, align 4 177 ret void 178} 179 180define <8 x half> @s_to_h(<8 x float> %a) { 181; CHECK-LABEL: s_to_h: 182; CHECK-DAG: fcvtn v0.4h, v0.4s 183; CHECK-DAG: fcvtn [[REG:v[0-9+]]].4h, v1.4s 184; CHECK: ins v0.d[1], [[REG]].d[0] 185 %1 = fptrunc <8 x float> %a to <8 x half> 186 ret <8 x half> %1 187} 188 189define <8 x half> @d_to_h(<8 x double> %a) { 190; CHECK-LABEL: d_to_h: 191; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] 192; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] 193; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] 194; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] 195; CHECK-DAG: fcvt h 196; CHECK-DAG: fcvt h 197; CHECK-DAG: fcvt h 198; CHECK-DAG: fcvt h 199; CHECK-DAG: fcvt h 200; CHECK-DAG: fcvt h 201; CHECK-DAG: fcvt h 202; CHECK-DAG: fcvt h 203; CHECK-DAG: ins v{{[0-9]+}}.h 204; CHECK-DAG: ins v{{[0-9]+}}.h 205; CHECK-DAG: ins v{{[0-9]+}}.h 206; CHECK-DAG: ins v{{[0-9]+}}.h 207; CHECK-DAG: ins v{{[0-9]+}}.h 208; CHECK-DAG: ins v{{[0-9]+}}.h 209; CHECK-DAG: ins v{{[0-9]+}}.h 210; CHECK-DAG: ins v{{[0-9]+}}.h 211 %1 = fptrunc <8 x double> %a to <8 x half> 212 ret <8 x half> %1 213} 214 215define <8 x float> @h_to_s(<8 x half> %a) { 216; CHECK-LABEL: h_to_s: 217; CHECK: fcvtl2 v1.4s, v0.8h 218; CHECK: fcvtl v0.4s, v0.4h 219 %1 = fpext <8 x half> %a to <8 x float> 220 ret <8 x float> %1 221} 222 223define <8 x double> @h_to_d(<8 x half> %a) { 224; CHECK-LABEL: h_to_d: 225; CHECK-DAG: fcvt d 226; CHECK-DAG: fcvt d 227; CHECK-DAG: fcvt d 228; CHECK-DAG: fcvt d 229; CHECK-DAG: fcvt d 230; CHECK-DAG: fcvt d 231; CHECK-DAG: fcvt d 232; CHECK-DAG: fcvt d 233; CHECK-DAG: ins 234; CHECK-DAG: ins 235; CHECK-DAG: ins 236; CHECK-DAG: ins 237 %1 = fpext <8 x half> %a to <8 x double> 238 ret <8 x double> %1 239} 240 241 242define <8 x half> @bitcast_i_to_h(float, <8 x i16> %a) { 243; CHECK-LABEL: bitcast_i_to_h: 244; CHECK: mov v0.16b, v1.16b 245 %2 = bitcast <8 x i16> %a to <8 x half> 246 ret <8 x half> %2 247} 248 249define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) { 250; CHECK-LABEL: bitcast_h_to_i: 251; CHECK: mov v0.16b, v1.16b 252 %2 = bitcast <8 x half> %a to <8 x i16> 253 ret <8 x i16> %2 254} 255 256 257define <8 x half> @sitofp_i8(<8 x i8> %a) #0 { 258; CHECK-LABEL: sitofp_i8: 259; CHECK-NEXT: sshll v[[REG1:[0-9]+]].8h, v0.8b, #0 260; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0 261; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0 262; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] 263; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] 264; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] 265; CHECK-DAG: fcvtn v0.4h, [[HIF]] 266; CHECK: ins v0.d[1], v[[LOREG]].d[0] 267 %1 = sitofp <8 x i8> %a to <8 x half> 268 ret <8 x half> %1 269} 270 271 272define <8 x half> @sitofp_i16(<8 x i16> %a) #0 { 273; CHECK-LABEL: sitofp_i16: 274; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 275; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0 276; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]] 277; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]] 278; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] 279; CHECK-DAG: fcvtn v0.4h, [[HIF]] 280; CHECK: ins v0.d[1], v[[LOREG]].d[0] 281 %1 = sitofp <8 x i16> %a to <8 x half> 282 ret <8 x half> %1 283} 284 285 286define <8 x half> @sitofp_i32(<8 x i32> %a) #0 { 287; CHECK-LABEL: sitofp_i32: 288; CHECK-DAG: scvtf [[OP1:v[0-9]+\.4s]], v0.4s 289; CHECK-DAG: scvtf [[OP2:v[0-9]+\.4s]], v1.4s 290; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]] 291; CHECK-DAG: fcvtn v0.4h, [[OP1]] 292; CHECK: ins v0.d[1], v[[REG]].d[0] 293 %1 = sitofp <8 x i32> %a to <8 x half> 294 ret <8 x half> %1 295} 296 297 298define <8 x half> @sitofp_i64(<8 x i64> %a) #0 { 299; CHECK-LABEL: sitofp_i64: 300; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d 301; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d 302; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] 303; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]] 304; CHECK: fcvtn v0.4h, [[OP3]].4s 305 %1 = sitofp <8 x i64> %a to <8 x half> 306 ret <8 x half> %1 307} 308 309define <8 x half> @uitofp_i8(<8 x i8> %a) #0 { 310; CHECK-LABEL: uitofp_i8: 311; CHECK-NEXT: ushll v[[REG1:[0-9]+]].8h, v0.8b, #0 312; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0 313; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0 314; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] 315; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] 316; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] 317; CHECK-DAG: fcvtn v0.4h, [[HIF]] 318; CHECK: ins v0.d[1], v[[LOREG]].d[0] 319 %1 = uitofp <8 x i8> %a to <8 x half> 320 ret <8 x half> %1 321} 322 323 324define <8 x half> @uitofp_i16(<8 x i16> %a) #0 { 325; CHECK-LABEL: uitofp_i16: 326; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0 327; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0 328; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]] 329; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]] 330; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]] 331; CHECK-DAG: fcvtn v0.4h, [[HIF]] 332; CHECK: ins v0.d[1], v[[LOREG]].d[0] 333 %1 = uitofp <8 x i16> %a to <8 x half> 334 ret <8 x half> %1 335} 336 337 338define <8 x half> @uitofp_i32(<8 x i32> %a) #0 { 339; CHECK-LABEL: uitofp_i32: 340; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s 341; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.4s]], v1.4s 342; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]] 343; CHECK-DAG: fcvtn v0.4h, [[OP1]] 344; CHECK: ins v0.d[1], v[[REG]].d[0] 345 %1 = uitofp <8 x i32> %a to <8 x half> 346 ret <8 x half> %1 347} 348 349 350define <8 x half> @uitofp_i64(<8 x i64> %a) #0 { 351; CHECK-LABEL: uitofp_i64: 352; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d 353; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d 354; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] 355; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]] 356; CHECK: fcvtn v0.4h, [[OP3]].4s 357 %1 = uitofp <8 x i64> %a to <8 x half> 358 ret <8 x half> %1 359} 360 361define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 { 362; CHECK-LABEL: test_insert_at_zero: 363; CHECK-NEXT: str q0, [x0] 364; CHECK-NEXT: ret 365 %1 = insertelement <8 x half> undef, half %a, i64 0 366 store <8 x half> %1, <8 x half>* %b, align 4 367 ret void 368} 369 370define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { 371; CHECK-LABEL: fptosi_i8: 372; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h 373; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h 374; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] 375; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] 376; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] 377; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] 378; CHECK-NEXT: xtn v0.8b, [[I16]].8h 379; CHECK-NEXT: ret 380 %1 = fptosi<8 x half> %a to <8 x i8> 381 ret <8 x i8> %1 382} 383 384define <8 x i16> @fptosi_i16(<8 x half> %a) #0 { 385; CHECK-LABEL: fptosi_i16: 386; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h 387; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h 388; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] 389; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] 390; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] 391; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] 392; CHECK-NEXT: ret 393 %1 = fptosi<8 x half> %a to <8 x i16> 394 ret <8 x i16> %1 395} 396 397define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { 398; CHECK-LABEL: fptoui_i8: 399; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h 400; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h 401; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] 402; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] 403; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] 404; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] 405; CHECK-NEXT: xtn v0.8b, [[I16]].8h 406; CHECK-NEXT: ret 407 %1 = fptoui<8 x half> %a to <8 x i8> 408 ret <8 x i8> %1 409} 410 411define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { 412; CHECK-LABEL: fptoui_i16: 413; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h 414; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h 415; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] 416; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] 417; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] 418; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] 419; CHECK-NEXT: ret 420 %1 = fptoui<8 x half> %a to <8 x i16> 421 ret <8 x i16> %1 422} 423 424; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 425define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 { 426 %1 = fcmp une <8 x half> %a, %b 427 ret <8 x i1> %1 428} 429 430; FileCheck checks are unwieldy with 16 fcvt and 16 csel tests. Skipped. 431define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { 432 %1 = fcmp ueq <8 x half> %a, %b 433 ret <8 x i1> %1 434} 435 436; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 437define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 { 438 %1 = fcmp ugt <8 x half> %a, %b 439 ret <8 x i1> %1 440} 441 442; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 443define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 { 444 %1 = fcmp uge <8 x half> %a, %b 445 ret <8 x i1> %1 446} 447 448; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 449define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 { 450 %1 = fcmp ult <8 x half> %a, %b 451 ret <8 x i1> %1 452} 453 454; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 455define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 { 456 %1 = fcmp ule <8 x half> %a, %b 457 ret <8 x i1> %1 458} 459 460; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 461define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { 462 %1 = fcmp uno <8 x half> %a, %b 463 ret <8 x i1> %1 464} 465 466; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 467define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 { 468 %1 = fcmp one <8 x half> %a, %b 469 ret <8 x i1> %1 470} 471 472; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 473define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 { 474 %1 = fcmp oeq <8 x half> %a, %b 475 ret <8 x i1> %1 476} 477 478; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 479define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 { 480 %1 = fcmp ogt <8 x half> %a, %b 481 ret <8 x i1> %1 482} 483 484; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 485define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 { 486 %1 = fcmp oge <8 x half> %a, %b 487 ret <8 x i1> %1 488} 489 490; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 491define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 { 492 %1 = fcmp olt <8 x half> %a, %b 493 ret <8 x i1> %1 494} 495 496; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 497define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 { 498 %1 = fcmp ole <8 x half> %a, %b 499 ret <8 x i1> %1 500} 501 502; FileCheck checks are unwieldy with 16 fcvt and 8 csel tests. Skipped. 503define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 { 504 %1 = fcmp ord <8 x half> %a, %b 505 ret <8 x i1> %1 506} 507 508attributes #0 = { nounwind } 509