1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: // %bb.0: 7; CHECK-NEXT: mov v0.b[15], w0 8; CHECK-NEXT: ret 9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 10 ret <16 x i8> %tmp3 11} 12 13define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 14; CHECK-LABEL: ins8hw: 15; CHECK: // %bb.0: 16; CHECK-NEXT: mov v0.h[6], w0 17; CHECK-NEXT: ret 18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 19 ret <8 x i16> %tmp3 20} 21 22define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 23; CHECK-LABEL: ins4sw: 24; CHECK: // %bb.0: 25; CHECK-NEXT: mov v0.s[2], w0 26; CHECK-NEXT: ret 27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 28 ret <4 x i32> %tmp3 29} 30 31define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 32; CHECK-LABEL: ins2dw: 33; CHECK: // %bb.0: 34; CHECK-NEXT: mov v0.d[1], x0 35; CHECK-NEXT: ret 36 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 37 ret <2 x i64> %tmp3 38} 39 40define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 41; CHECK-LABEL: ins8bw: 42; CHECK: // %bb.0: 43; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 44; CHECK-NEXT: mov v0.b[5], w0 45; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 46; CHECK-NEXT: ret 47 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 48 ret <8 x i8> %tmp3 49} 50 51define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 52; CHECK-LABEL: ins4hw: 53; CHECK: // %bb.0: 54; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 55; CHECK-NEXT: mov v0.h[3], w0 56; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 57; CHECK-NEXT: ret 58 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 59 ret <4 x i16> %tmp3 60} 61 62define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 63; CHECK-LABEL: ins2sw: 64; CHECK: // %bb.0: 65; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 66; CHECK-NEXT: mov v0.s[1], w0 67; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 68; CHECK-NEXT: ret 69 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 70 ret <2 x i32> %tmp3 71} 72 73define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 74; CHECK-LABEL: ins16b16: 75; CHECK: // %bb.0: 76; CHECK-NEXT: mov v1.b[15], v0.b[2] 77; CHECK-NEXT: mov v0.16b, v1.16b 78; CHECK-NEXT: ret 79 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 80 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 81 ret <16 x i8> %tmp4 82} 83 84define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 85; CHECK-LABEL: ins8h8: 86; CHECK: // %bb.0: 87; CHECK-NEXT: mov v1.h[7], v0.h[2] 88; CHECK-NEXT: mov v0.16b, v1.16b 89; CHECK-NEXT: ret 90 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 91 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 92 ret <8 x i16> %tmp4 93} 94 95define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 96; CHECK-LABEL: ins4s4: 97; CHECK: // %bb.0: 98; CHECK-NEXT: mov v1.s[1], v0.s[2] 99; CHECK-NEXT: mov v0.16b, v1.16b 100; CHECK-NEXT: ret 101 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 102 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 103 ret <4 x i32> %tmp4 104} 105 106define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 107; CHECK-LABEL: ins2d2: 108; CHECK: // %bb.0: 109; CHECK-NEXT: mov v1.d[1], v0.d[0] 110; CHECK-NEXT: mov v0.16b, v1.16b 111; CHECK-NEXT: ret 112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 114 ret <2 x i64> %tmp4 115} 116 117define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 118; CHECK-LABEL: ins4f4: 119; CHECK: // %bb.0: 120; CHECK-NEXT: mov v1.s[1], v0.s[2] 121; CHECK-NEXT: mov v0.16b, v1.16b 122; CHECK-NEXT: ret 123 %tmp3 = extractelement <4 x float> %tmp1, i32 2 124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 125 ret <4 x float> %tmp4 126} 127 128define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 129; CHECK-LABEL: ins2df2: 130; CHECK: // %bb.0: 131; CHECK-NEXT: mov v1.d[1], v0.d[0] 132; CHECK-NEXT: mov v0.16b, v1.16b 133; CHECK-NEXT: ret 134 %tmp3 = extractelement <2 x double> %tmp1, i32 0 135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 136 ret <2 x double> %tmp4 137} 138 139define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 140; CHECK-LABEL: ins8b16: 141; CHECK: // %bb.0: 142; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 143; CHECK-NEXT: mov v1.b[15], v0.b[2] 144; CHECK-NEXT: mov v0.16b, v1.16b 145; CHECK-NEXT: ret 146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 148 ret <16 x i8> %tmp4 149} 150 151define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 152; CHECK-LABEL: ins4h8: 153; CHECK: // %bb.0: 154; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 155; CHECK-NEXT: mov v1.h[7], v0.h[2] 156; CHECK-NEXT: mov v0.16b, v1.16b 157; CHECK-NEXT: ret 158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 160 ret <8 x i16> %tmp4 161} 162 163define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 164; CHECK-LABEL: ins2s4: 165; CHECK: // %bb.0: 166; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 167; CHECK-NEXT: mov v1.s[1], v0.s[1] 168; CHECK-NEXT: mov v0.16b, v1.16b 169; CHECK-NEXT: ret 170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 172 ret <4 x i32> %tmp4 173} 174 175define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 176; CHECK-LABEL: ins1d2: 177; CHECK: // %bb.0: 178; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 179; CHECK-NEXT: mov v1.d[1], v0.d[0] 180; CHECK-NEXT: mov v0.16b, v1.16b 181; CHECK-NEXT: ret 182 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 183 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 184 ret <2 x i64> %tmp4 185} 186 187define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 188; CHECK-LABEL: ins2f4: 189; CHECK: // %bb.0: 190; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 191; CHECK-NEXT: mov v1.s[1], v0.s[1] 192; CHECK-NEXT: mov v0.16b, v1.16b 193; CHECK-NEXT: ret 194 %tmp3 = extractelement <2 x float> %tmp1, i32 1 195 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 196 ret <4 x float> %tmp4 197} 198 199define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 200; CHECK-LABEL: ins1f2: 201; CHECK: // %bb.0: 202; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 203; CHECK-NEXT: mov v1.d[1], v0.d[0] 204; CHECK-NEXT: mov v0.16b, v1.16b 205; CHECK-NEXT: ret 206 %tmp3 = extractelement <1 x double> %tmp1, i32 0 207 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 208 ret <2 x double> %tmp4 209} 210 211define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) { 212; CHECK-LABEL: ins1f2_args_flipped: 213; CHECK: // %bb.0: 214; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 215; CHECK-NEXT: mov v0.d[1], v1.d[0] 216; CHECK-NEXT: ret 217 %tmp3 = extractelement <1 x double> %tmp1, i32 0 218 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 219 ret <2 x double> %tmp4 220} 221 222define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 223; CHECK-LABEL: ins16b8: 224; CHECK: // %bb.0: 225; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 226; CHECK-NEXT: mov v1.b[7], v0.b[2] 227; CHECK-NEXT: mov v0.16b, v1.16b 228; CHECK-NEXT: ret 229 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 230 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 231 ret <8 x i8> %tmp4 232} 233 234define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 235; CHECK-LABEL: ins8h4: 236; CHECK: // %bb.0: 237; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 238; CHECK-NEXT: mov v1.h[3], v0.h[2] 239; CHECK-NEXT: mov v0.16b, v1.16b 240; CHECK-NEXT: ret 241 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 242 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 243 ret <4 x i16> %tmp4 244} 245 246define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 247; CHECK-LABEL: ins4s2: 248; CHECK: // %bb.0: 249; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 250; CHECK-NEXT: mov v1.s[1], v0.s[2] 251; CHECK-NEXT: mov v0.16b, v1.16b 252; CHECK-NEXT: ret 253 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 254 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 255 ret <2 x i32> %tmp4 256} 257 258define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 259; CHECK-LABEL: ins2d1: 260; CHECK: // %bb.0: 261; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 262; CHECK-NEXT: mov v1.d[0], v0.d[0] 263; CHECK-NEXT: mov v0.16b, v1.16b 264; CHECK-NEXT: ret 265 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 266 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 267 ret <1 x i64> %tmp4 268} 269 270define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 271; CHECK-LABEL: ins4f2: 272; CHECK: // %bb.0: 273; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 274; CHECK-NEXT: mov v1.s[1], v0.s[2] 275; CHECK-NEXT: mov v0.16b, v1.16b 276; CHECK-NEXT: ret 277 %tmp3 = extractelement <4 x float> %tmp1, i32 2 278 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 279 ret <2 x float> %tmp4 280} 281 282define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 283; CHECK-LABEL: ins2f1: 284; CHECK: // %bb.0: 285; CHECK-NEXT: dup v0.2d, v0.d[1] 286; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 287; CHECK-NEXT: ret 288 %tmp3 = extractelement <2 x double> %tmp1, i32 1 289 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 290 ret <1 x double> %tmp4 291} 292 293define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 294; CHECK-LABEL: ins8b8: 295; CHECK: // %bb.0: 296; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 297; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 298; CHECK-NEXT: mov v1.b[4], v0.b[2] 299; CHECK-NEXT: mov v0.16b, v1.16b 300; CHECK-NEXT: ret 301 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 302 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 303 ret <8 x i8> %tmp4 304} 305 306define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 307; CHECK-LABEL: ins4h4: 308; CHECK: // %bb.0: 309; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 310; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 311; CHECK-NEXT: mov v1.h[3], v0.h[2] 312; CHECK-NEXT: mov v0.16b, v1.16b 313; CHECK-NEXT: ret 314 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 315 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 316 ret <4 x i16> %tmp4 317} 318 319define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 320; CHECK-LABEL: ins2s2: 321; CHECK: // %bb.0: 322; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 323; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 324; CHECK-NEXT: mov v1.s[1], v0.s[0] 325; CHECK-NEXT: mov v0.16b, v1.16b 326; CHECK-NEXT: ret 327 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 328 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 329 ret <2 x i32> %tmp4 330} 331 332define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 333; CHECK-LABEL: ins1d1: 334; CHECK: // %bb.0: 335; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 336; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 337; CHECK-NEXT: mov v1.d[0], v0.d[0] 338; CHECK-NEXT: mov v0.16b, v1.16b 339; CHECK-NEXT: ret 340 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 341 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 342 ret <1 x i64> %tmp4 343} 344 345define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 346; CHECK-LABEL: ins2f2: 347; CHECK: // %bb.0: 348; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 349; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 350; CHECK-NEXT: mov v1.s[1], v0.s[0] 351; CHECK-NEXT: mov v0.16b, v1.16b 352; CHECK-NEXT: ret 353 %tmp3 = extractelement <2 x float> %tmp1, i32 0 354 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 355 ret <2 x float> %tmp4 356} 357 358define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 359; CHECK-LABEL: ins1df1: 360; CHECK: // %bb.0: 361; CHECK-NEXT: ret 362 %tmp3 = extractelement <1 x double> %tmp1, i32 0 363 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 364 ret <1 x double> %tmp4 365} 366 367define i32 @umovw16b(<16 x i8> %tmp1) { 368; CHECK-LABEL: umovw16b: 369; CHECK: // %bb.0: 370; CHECK-NEXT: umov w0, v0.b[8] 371; CHECK-NEXT: ret 372 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 373 %tmp4 = zext i8 %tmp3 to i32 374 ret i32 %tmp4 375} 376 377define i32 @umovw8h(<8 x i16> %tmp1) { 378; CHECK-LABEL: umovw8h: 379; CHECK: // %bb.0: 380; CHECK-NEXT: umov w0, v0.h[2] 381; CHECK-NEXT: ret 382 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 383 %tmp4 = zext i16 %tmp3 to i32 384 ret i32 %tmp4 385} 386 387define i32 @umovw4s(<4 x i32> %tmp1) { 388; CHECK-LABEL: umovw4s: 389; CHECK: // %bb.0: 390; CHECK-NEXT: mov w0, v0.s[2] 391; CHECK-NEXT: ret 392 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 393 ret i32 %tmp3 394} 395 396define i64 @umovx2d(<2 x i64> %tmp1) { 397; CHECK-LABEL: umovx2d: 398; CHECK: // %bb.0: 399; CHECK-NEXT: mov x0, v0.d[1] 400; CHECK-NEXT: ret 401 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 402 ret i64 %tmp3 403} 404 405define i32 @umovw8b(<8 x i8> %tmp1) { 406; CHECK-LABEL: umovw8b: 407; CHECK: // %bb.0: 408; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 409; CHECK-NEXT: umov w0, v0.b[7] 410; CHECK-NEXT: ret 411 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 412 %tmp4 = zext i8 %tmp3 to i32 413 ret i32 %tmp4 414} 415 416define i32 @umovw4h(<4 x i16> %tmp1) { 417; CHECK-LABEL: umovw4h: 418; CHECK: // %bb.0: 419; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 420; CHECK-NEXT: umov w0, v0.h[2] 421; CHECK-NEXT: ret 422 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 423 %tmp4 = zext i16 %tmp3 to i32 424 ret i32 %tmp4 425} 426 427define i32 @umovw2s(<2 x i32> %tmp1) { 428; CHECK-LABEL: umovw2s: 429; CHECK: // %bb.0: 430; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 431; CHECK-NEXT: mov w0, v0.s[1] 432; CHECK-NEXT: ret 433 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 434 ret i32 %tmp3 435} 436 437define i64 @umovx1d(<1 x i64> %tmp1) { 438; CHECK-LABEL: umovx1d: 439; CHECK: // %bb.0: 440; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 441; CHECK-NEXT: fmov x0, d0 442; CHECK-NEXT: ret 443 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 444 ret i64 %tmp3 445} 446 447define i32 @smovw16b(<16 x i8> %tmp1) { 448; CHECK-LABEL: smovw16b: 449; CHECK: // %bb.0: 450; CHECK-NEXT: smov w8, v0.b[8] 451; CHECK-NEXT: add w0, w8, w8 452; CHECK-NEXT: ret 453 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 454 %tmp4 = sext i8 %tmp3 to i32 455 %tmp5 = add i32 %tmp4, %tmp4 456 ret i32 %tmp5 457} 458 459define i32 @smovw8h(<8 x i16> %tmp1) { 460; CHECK-LABEL: smovw8h: 461; CHECK: // %bb.0: 462; CHECK-NEXT: smov w8, v0.h[2] 463; CHECK-NEXT: add w0, w8, w8 464; CHECK-NEXT: ret 465 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 466 %tmp4 = sext i16 %tmp3 to i32 467 %tmp5 = add i32 %tmp4, %tmp4 468 ret i32 %tmp5 469} 470 471define i64 @smovx16b(<16 x i8> %tmp1) { 472; CHECK-LABEL: smovx16b: 473; CHECK: // %bb.0: 474; CHECK-NEXT: smov x0, v0.b[8] 475; CHECK-NEXT: ret 476 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 477 %tmp4 = sext i8 %tmp3 to i64 478 ret i64 %tmp4 479} 480 481define i64 @smovx8h(<8 x i16> %tmp1) { 482; CHECK-LABEL: smovx8h: 483; CHECK: // %bb.0: 484; CHECK-NEXT: smov x0, v0.h[2] 485; CHECK-NEXT: ret 486 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 487 %tmp4 = sext i16 %tmp3 to i64 488 ret i64 %tmp4 489} 490 491define i64 @smovx4s(<4 x i32> %tmp1) { 492; CHECK-LABEL: smovx4s: 493; CHECK: // %bb.0: 494; CHECK-NEXT: smov x0, v0.s[2] 495; CHECK-NEXT: ret 496 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 497 %tmp4 = sext i32 %tmp3 to i64 498 ret i64 %tmp4 499} 500 501define i32 @smovw8b(<8 x i8> %tmp1) { 502; CHECK-LABEL: smovw8b: 503; CHECK: // %bb.0: 504; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 505; CHECK-NEXT: smov w8, v0.b[4] 506; CHECK-NEXT: add w0, w8, w8 507; CHECK-NEXT: ret 508 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 509 %tmp4 = sext i8 %tmp3 to i32 510 %tmp5 = add i32 %tmp4, %tmp4 511 ret i32 %tmp5 512} 513 514define i32 @smovw4h(<4 x i16> %tmp1) { 515; CHECK-LABEL: smovw4h: 516; CHECK: // %bb.0: 517; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 518; CHECK-NEXT: smov w8, v0.h[2] 519; CHECK-NEXT: add w0, w8, w8 520; CHECK-NEXT: ret 521 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 522 %tmp4 = sext i16 %tmp3 to i32 523 %tmp5 = add i32 %tmp4, %tmp4 524 ret i32 %tmp5 525} 526 527define i32 @smovx8b(<8 x i8> %tmp1) { 528; CHECK-LABEL: smovx8b: 529; CHECK: // %bb.0: 530; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 531; CHECK-NEXT: smov w0, v0.b[6] 532; CHECK-NEXT: ret 533 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 534 %tmp4 = sext i8 %tmp3 to i32 535 ret i32 %tmp4 536} 537 538define i32 @smovx4h(<4 x i16> %tmp1) { 539; CHECK-LABEL: smovx4h: 540; CHECK: // %bb.0: 541; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 542; CHECK-NEXT: smov w0, v0.h[2] 543; CHECK-NEXT: ret 544 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 545 %tmp4 = sext i16 %tmp3 to i32 546 ret i32 %tmp4 547} 548 549define i64 @smovx2s(<2 x i32> %tmp1) { 550; CHECK-LABEL: smovx2s: 551; CHECK: // %bb.0: 552; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 553; CHECK-NEXT: smov x0, v0.s[1] 554; CHECK-NEXT: ret 555 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 556 %tmp4 = sext i32 %tmp3 to i64 557 ret i64 %tmp4 558} 559 560define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 561; CHECK-LABEL: test_vcopy_lane_s8: 562; CHECK: // %bb.0: 563; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 564; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 565; CHECK-NEXT: mov v0.b[5], v1.b[3] 566; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 567; CHECK-NEXT: ret 568 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 569 ret <8 x i8> %vset_lane 570} 571 572define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 573; CHECK-LABEL: test_vcopyq_laneq_s8: 574; CHECK: // %bb.0: 575; CHECK-NEXT: mov v0.b[14], v1.b[6] 576; CHECK-NEXT: ret 577 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 578 ret <16 x i8> %vset_lane 579} 580 581define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 582; CHECK-LABEL: test_vcopy_lane_swap_s8: 583; CHECK: // %bb.0: 584; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 585; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 586; CHECK-NEXT: mov v1.b[7], v0.b[0] 587; CHECK-NEXT: mov v0.16b, v1.16b 588; CHECK-NEXT: ret 589 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 590 ret <8 x i8> %vset_lane 591} 592 593define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 594; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 595; CHECK: // %bb.0: 596; CHECK-NEXT: mov v1.b[0], v0.b[15] 597; CHECK-NEXT: mov v0.16b, v1.16b 598; CHECK-NEXT: ret 599 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 600 ret <16 x i8> %vset_lane 601} 602 603define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 604; CHECK-LABEL: test_vdup_n_u8: 605; CHECK: // %bb.0: 606; CHECK-NEXT: dup v0.8b, w0 607; CHECK-NEXT: ret 608 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 609 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 610 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 611 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 612 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 613 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 614 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 615 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 616 ret <8 x i8> %vecinit7.i 617} 618 619define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 620; CHECK-LABEL: test_vdup_n_u16: 621; CHECK: // %bb.0: 622; CHECK-NEXT: dup v0.4h, w0 623; CHECK-NEXT: ret 624 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 625 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 626 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 627 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 628 ret <4 x i16> %vecinit3.i 629} 630 631define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 632; CHECK-LABEL: test_vdup_n_u32: 633; CHECK: // %bb.0: 634; CHECK-NEXT: dup v0.2s, w0 635; CHECK-NEXT: ret 636 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 637 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 638 ret <2 x i32> %vecinit1.i 639} 640 641define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 642; CHECK-LABEL: test_vdup_n_u64: 643; CHECK: // %bb.0: 644; CHECK-NEXT: fmov d0, x0 645; CHECK-NEXT: ret 646 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 647 ret <1 x i64> %vecinit.i 648} 649 650define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 651; CHECK-LABEL: test_vdupq_n_u8: 652; CHECK: // %bb.0: 653; CHECK-NEXT: dup v0.16b, w0 654; CHECK-NEXT: ret 655 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 656 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 657 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 658 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 659 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 660 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 661 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 662 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 663 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 664 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 665 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 666 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 667 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 668 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 669 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 670 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 671 ret <16 x i8> %vecinit15.i 672} 673 674define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 675; CHECK-LABEL: test_vdupq_n_u16: 676; CHECK: // %bb.0: 677; CHECK-NEXT: dup v0.8h, w0 678; CHECK-NEXT: ret 679 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 680 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 681 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 682 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 683 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 684 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 685 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 686 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 687 ret <8 x i16> %vecinit7.i 688} 689 690define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 691; CHECK-LABEL: test_vdupq_n_u32: 692; CHECK: // %bb.0: 693; CHECK-NEXT: dup v0.4s, w0 694; CHECK-NEXT: ret 695 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 696 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 697 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 698 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 699 ret <4 x i32> %vecinit3.i 700} 701 702define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 703; CHECK-LABEL: test_vdupq_n_u64: 704; CHECK: // %bb.0: 705; CHECK-NEXT: dup v0.2d, x0 706; CHECK-NEXT: ret 707 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 708 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 709 ret <2 x i64> %vecinit1.i 710} 711 712define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 713; CHECK-LABEL: test_vdup_lane_s8: 714; CHECK: // %bb.0: 715; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 716; CHECK-NEXT: dup v0.8b, v0.b[5] 717; CHECK-NEXT: ret 718 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 719 ret <8 x i8> %shuffle 720} 721 722define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 723; CHECK-LABEL: test_vdup_lane_s16: 724; CHECK: // %bb.0: 725; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 726; CHECK-NEXT: dup v0.4h, v0.h[2] 727; CHECK-NEXT: ret 728 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 729 ret <4 x i16> %shuffle 730} 731 732define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 733; CHECK-LABEL: test_vdup_lane_s32: 734; CHECK: // %bb.0: 735; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 736; CHECK-NEXT: dup v0.2s, v0.s[1] 737; CHECK-NEXT: ret 738 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 739 ret <2 x i32> %shuffle 740} 741 742define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 743; CHECK-LABEL: test_vdupq_lane_s8: 744; CHECK: // %bb.0: 745; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 746; CHECK-NEXT: dup v0.16b, v0.b[5] 747; CHECK-NEXT: ret 748 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 749 ret <16 x i8> %shuffle 750} 751 752define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 753; CHECK-LABEL: test_vdupq_lane_s16: 754; CHECK: // %bb.0: 755; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 756; CHECK-NEXT: dup v0.8h, v0.h[2] 757; CHECK-NEXT: ret 758 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 759 ret <8 x i16> %shuffle 760} 761 762define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 763; CHECK-LABEL: test_vdupq_lane_s32: 764; CHECK: // %bb.0: 765; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 766; CHECK-NEXT: dup v0.4s, v0.s[1] 767; CHECK-NEXT: ret 768 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 769 ret <4 x i32> %shuffle 770} 771 772define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 773; CHECK-LABEL: test_vdupq_lane_s64: 774; CHECK: // %bb.0: 775; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 776; CHECK-NEXT: dup v0.2d, v0.d[0] 777; CHECK-NEXT: ret 778 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 779 ret <2 x i64> %shuffle 780} 781 782define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 783; CHECK-LABEL: test_vdup_laneq_s8: 784; CHECK: // %bb.0: 785; CHECK-NEXT: dup v0.8b, v0.b[5] 786; CHECK-NEXT: ret 787 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 788 ret <8 x i8> %shuffle 789} 790 791define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 792; CHECK-LABEL: test_vdup_laneq_s16: 793; CHECK: // %bb.0: 794; CHECK-NEXT: dup v0.4h, v0.h[2] 795; CHECK-NEXT: ret 796 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 797 ret <4 x i16> %shuffle 798} 799 800define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 801; CHECK-LABEL: test_vdup_laneq_s32: 802; CHECK: // %bb.0: 803; CHECK-NEXT: dup v0.2s, v0.s[1] 804; CHECK-NEXT: ret 805 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 806 ret <2 x i32> %shuffle 807} 808 809define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 810; CHECK-LABEL: test_vdupq_laneq_s8: 811; CHECK: // %bb.0: 812; CHECK-NEXT: dup v0.16b, v0.b[5] 813; CHECK-NEXT: ret 814 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 815 ret <16 x i8> %shuffle 816} 817 818define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 819; CHECK-LABEL: test_vdupq_laneq_s16: 820; CHECK: // %bb.0: 821; CHECK-NEXT: dup v0.8h, v0.h[2] 822; CHECK-NEXT: ret 823 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 824 ret <8 x i16> %shuffle 825} 826 827define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 828; CHECK-LABEL: test_vdupq_laneq_s32: 829; CHECK: // %bb.0: 830; CHECK-NEXT: dup v0.4s, v0.s[1] 831; CHECK-NEXT: ret 832 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 833 ret <4 x i32> %shuffle 834} 835 836define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 837; CHECK-LABEL: test_vdupq_laneq_s64: 838; CHECK: // %bb.0: 839; CHECK-NEXT: dup v0.2d, v0.d[0] 840; CHECK-NEXT: ret 841 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 842 ret <2 x i64> %shuffle 843} 844 845define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 846; CHECK-LABEL: test_bitcastv8i8toi64: 847; CHECK: // %bb.0: 848; CHECK-NEXT: fmov x0, d0 849; CHECK-NEXT: ret 850 %res = bitcast <8 x i8> %in to i64 851 ret i64 %res 852} 853 854define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 855; CHECK-LABEL: test_bitcastv4i16toi64: 856; CHECK: // %bb.0: 857; CHECK-NEXT: fmov x0, d0 858; CHECK-NEXT: ret 859 %res = bitcast <4 x i16> %in to i64 860 ret i64 %res 861} 862 863define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 864; CHECK-LABEL: test_bitcastv2i32toi64: 865; CHECK: // %bb.0: 866; CHECK-NEXT: fmov x0, d0 867; CHECK-NEXT: ret 868 %res = bitcast <2 x i32> %in to i64 869 ret i64 %res 870} 871 872define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 873; CHECK-LABEL: test_bitcastv2f32toi64: 874; CHECK: // %bb.0: 875; CHECK-NEXT: fmov x0, d0 876; CHECK-NEXT: ret 877 %res = bitcast <2 x float> %in to i64 878 ret i64 %res 879} 880 881define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 882; CHECK-LABEL: test_bitcastv1i64toi64: 883; CHECK: // %bb.0: 884; CHECK-NEXT: fmov x0, d0 885; CHECK-NEXT: ret 886 %res = bitcast <1 x i64> %in to i64 887 ret i64 %res 888} 889 890define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 891; CHECK-LABEL: test_bitcastv1f64toi64: 892; CHECK: // %bb.0: 893; CHECK-NEXT: fmov x0, d0 894; CHECK-NEXT: ret 895 %res = bitcast <1 x double> %in to i64 896 ret i64 %res 897} 898 899define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 900; CHECK-LABEL: test_bitcasti64tov8i8: 901; CHECK: // %bb.0: 902; CHECK-NEXT: fmov d0, x0 903; CHECK-NEXT: ret 904 %res = bitcast i64 %in to <8 x i8> 905 ret <8 x i8> %res 906} 907 908define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 909; CHECK-LABEL: test_bitcasti64tov4i16: 910; CHECK: // %bb.0: 911; CHECK-NEXT: fmov d0, x0 912; CHECK-NEXT: ret 913 %res = bitcast i64 %in to <4 x i16> 914 ret <4 x i16> %res 915} 916 917define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 918; CHECK-LABEL: test_bitcasti64tov2i32: 919; CHECK: // %bb.0: 920; CHECK-NEXT: fmov d0, x0 921; CHECK-NEXT: ret 922 %res = bitcast i64 %in to <2 x i32> 923 ret <2 x i32> %res 924} 925 926define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 927; CHECK-LABEL: test_bitcasti64tov2f32: 928; CHECK: // %bb.0: 929; CHECK-NEXT: fmov d0, x0 930; CHECK-NEXT: ret 931 %res = bitcast i64 %in to <2 x float> 932 ret <2 x float> %res 933} 934 935define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 936; CHECK-LABEL: test_bitcasti64tov1i64: 937; CHECK: // %bb.0: 938; CHECK-NEXT: fmov d0, x0 939; CHECK-NEXT: ret 940 %res = bitcast i64 %in to <1 x i64> 941 ret <1 x i64> %res 942} 943 944define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 945; CHECK-LABEL: test_bitcasti64tov1f64: 946; CHECK: // %bb.0: 947; CHECK-NEXT: fmov d0, x0 948; CHECK-NEXT: ret 949 %res = bitcast i64 %in to <1 x double> 950 ret <1 x double> %res 951} 952 953define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 954; CHECK-LABEL: test_bitcastv8i8tov1f64: 955; CHECK: // %bb.0: 956; CHECK-NEXT: neg v0.8b, v0.8b 957; CHECK-NEXT: fcvtzs x8, d0 958; CHECK-NEXT: fmov d0, x8 959; CHECK-NEXT: ret 960 %sub.i = sub <8 x i8> zeroinitializer, %a 961 %1 = bitcast <8 x i8> %sub.i to <1 x double> 962 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 963 ret <1 x i64> %vcvt.i 964} 965 966define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 967; CHECK-LABEL: test_bitcastv4i16tov1f64: 968; CHECK: // %bb.0: 969; CHECK-NEXT: neg v0.4h, v0.4h 970; CHECK-NEXT: fcvtzs x8, d0 971; CHECK-NEXT: fmov d0, x8 972; CHECK-NEXT: ret 973 %sub.i = sub <4 x i16> zeroinitializer, %a 974 %1 = bitcast <4 x i16> %sub.i to <1 x double> 975 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 976 ret <1 x i64> %vcvt.i 977} 978 979define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 980; CHECK-LABEL: test_bitcastv2i32tov1f64: 981; CHECK: // %bb.0: 982; CHECK-NEXT: neg v0.2s, v0.2s 983; CHECK-NEXT: fcvtzs x8, d0 984; CHECK-NEXT: fmov d0, x8 985; CHECK-NEXT: ret 986 %sub.i = sub <2 x i32> zeroinitializer, %a 987 %1 = bitcast <2 x i32> %sub.i to <1 x double> 988 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 989 ret <1 x i64> %vcvt.i 990} 991 992define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 993; CHECK-LABEL: test_bitcastv1i64tov1f64: 994; CHECK: // %bb.0: 995; CHECK-NEXT: neg d0, d0 996; CHECK-NEXT: fcvtzs x8, d0 997; CHECK-NEXT: fmov d0, x8 998; CHECK-NEXT: ret 999 %sub.i = sub <1 x i64> zeroinitializer, %a 1000 %1 = bitcast <1 x i64> %sub.i to <1 x double> 1001 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1002 ret <1 x i64> %vcvt.i 1003} 1004 1005define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 1006; CHECK-LABEL: test_bitcastv2f32tov1f64: 1007; CHECK: // %bb.0: 1008; CHECK-NEXT: fneg v0.2s, v0.2s 1009; CHECK-NEXT: fcvtzs x8, d0 1010; CHECK-NEXT: fmov d0, x8 1011; CHECK-NEXT: ret 1012 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 1013 %1 = bitcast <2 x float> %sub.i to <1 x double> 1014 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1015 ret <1 x i64> %vcvt.i 1016} 1017 1018define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 1019; CHECK-LABEL: test_bitcastv1f64tov8i8: 1020; CHECK: // %bb.0: 1021; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1022; CHECK-NEXT: fmov x8, d0 1023; CHECK-NEXT: scvtf d0, x8 1024; CHECK-NEXT: neg v0.8b, v0.8b 1025; CHECK-NEXT: ret 1026 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1027 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 1028 %sub.i = sub <8 x i8> zeroinitializer, %1 1029 ret <8 x i8> %sub.i 1030} 1031 1032define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 1033; CHECK-LABEL: test_bitcastv1f64tov4i16: 1034; CHECK: // %bb.0: 1035; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1036; CHECK-NEXT: fmov x8, d0 1037; CHECK-NEXT: scvtf d0, x8 1038; CHECK-NEXT: neg v0.4h, v0.4h 1039; CHECK-NEXT: ret 1040 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1041 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 1042 %sub.i = sub <4 x i16> zeroinitializer, %1 1043 ret <4 x i16> %sub.i 1044} 1045 1046define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 1047; CHECK-LABEL: test_bitcastv1f64tov2i32: 1048; CHECK: // %bb.0: 1049; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1050; CHECK-NEXT: fmov x8, d0 1051; CHECK-NEXT: scvtf d0, x8 1052; CHECK-NEXT: neg v0.2s, v0.2s 1053; CHECK-NEXT: ret 1054 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1055 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 1056 %sub.i = sub <2 x i32> zeroinitializer, %1 1057 ret <2 x i32> %sub.i 1058} 1059 1060define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 1061; CHECK-LABEL: test_bitcastv1f64tov1i64: 1062; CHECK: // %bb.0: 1063; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1064; CHECK-NEXT: fmov x8, d0 1065; CHECK-NEXT: scvtf d0, x8 1066; CHECK-NEXT: neg d0, d0 1067; CHECK-NEXT: ret 1068 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1069 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 1070 %sub.i = sub <1 x i64> zeroinitializer, %1 1071 ret <1 x i64> %sub.i 1072} 1073 1074define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 1075; CHECK-LABEL: test_bitcastv1f64tov2f32: 1076; CHECK: // %bb.0: 1077; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1078; CHECK-NEXT: fmov x8, d0 1079; CHECK-NEXT: scvtf d0, x8 1080; CHECK-NEXT: fneg v0.2s, v0.2s 1081; CHECK-NEXT: ret 1082 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1083 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 1084 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 1085 ret <2 x float> %sub.i 1086} 1087 1088; Test insert element into an undef vector 1089define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 1090; CHECK-LABEL: scalar_to_vector.v8i8: 1091; CHECK: // %bb.0: 1092; CHECK-NEXT: fmov s0, w0 1093; CHECK-NEXT: ret 1094 %b = insertelement <8 x i8> undef, i8 %a, i32 0 1095 ret <8 x i8> %b 1096} 1097 1098define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 1099; CHECK-LABEL: scalar_to_vector.v16i8: 1100; CHECK: // %bb.0: 1101; CHECK-NEXT: fmov s0, w0 1102; CHECK-NEXT: ret 1103 %b = insertelement <16 x i8> undef, i8 %a, i32 0 1104 ret <16 x i8> %b 1105} 1106 1107define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 1108; CHECK-LABEL: scalar_to_vector.v4i16: 1109; CHECK: // %bb.0: 1110; CHECK-NEXT: fmov s0, w0 1111; CHECK-NEXT: ret 1112 %b = insertelement <4 x i16> undef, i16 %a, i32 0 1113 ret <4 x i16> %b 1114} 1115 1116define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 1117; CHECK-LABEL: scalar_to_vector.v8i16: 1118; CHECK: // %bb.0: 1119; CHECK-NEXT: fmov s0, w0 1120; CHECK-NEXT: ret 1121 %b = insertelement <8 x i16> undef, i16 %a, i32 0 1122 ret <8 x i16> %b 1123} 1124 1125define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 1126; CHECK-LABEL: scalar_to_vector.v2i32: 1127; CHECK: // %bb.0: 1128; CHECK-NEXT: fmov s0, w0 1129; CHECK-NEXT: ret 1130 %b = insertelement <2 x i32> undef, i32 %a, i32 0 1131 ret <2 x i32> %b 1132} 1133 1134define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 1135; CHECK-LABEL: scalar_to_vector.v4i32: 1136; CHECK: // %bb.0: 1137; CHECK-NEXT: fmov s0, w0 1138; CHECK-NEXT: ret 1139 %b = insertelement <4 x i32> undef, i32 %a, i32 0 1140 ret <4 x i32> %b 1141} 1142 1143define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 1144; CHECK-LABEL: scalar_to_vector.v2i64: 1145; CHECK: // %bb.0: 1146; CHECK-NEXT: fmov d0, x0 1147; CHECK-NEXT: ret 1148 %b = insertelement <2 x i64> undef, i64 %a, i32 0 1149 ret <2 x i64> %b 1150} 1151 1152define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 1153; CHECK-LABEL: testDUP.v1i8: 1154; CHECK: // %bb.0: 1155; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1156; CHECK-NEXT: dup v0.8b, v0.b[0] 1157; CHECK-NEXT: ret 1158 %b = extractelement <1 x i8> %a, i32 0 1159 %c = insertelement <8 x i8> undef, i8 %b, i32 0 1160 %d = insertelement <8 x i8> %c, i8 %b, i32 1 1161 %e = insertelement <8 x i8> %d, i8 %b, i32 2 1162 %f = insertelement <8 x i8> %e, i8 %b, i32 3 1163 %g = insertelement <8 x i8> %f, i8 %b, i32 4 1164 %h = insertelement <8 x i8> %g, i8 %b, i32 5 1165 %i = insertelement <8 x i8> %h, i8 %b, i32 6 1166 %j = insertelement <8 x i8> %i, i8 %b, i32 7 1167 ret <8 x i8> %j 1168} 1169 1170define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 1171; CHECK-LABEL: testDUP.v1i16: 1172; CHECK: // %bb.0: 1173; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1174; CHECK-NEXT: dup v0.8h, v0.h[0] 1175; CHECK-NEXT: ret 1176 %b = extractelement <1 x i16> %a, i32 0 1177 %c = insertelement <8 x i16> undef, i16 %b, i32 0 1178 %d = insertelement <8 x i16> %c, i16 %b, i32 1 1179 %e = insertelement <8 x i16> %d, i16 %b, i32 2 1180 %f = insertelement <8 x i16> %e, i16 %b, i32 3 1181 %g = insertelement <8 x i16> %f, i16 %b, i32 4 1182 %h = insertelement <8 x i16> %g, i16 %b, i32 5 1183 %i = insertelement <8 x i16> %h, i16 %b, i32 6 1184 %j = insertelement <8 x i16> %i, i16 %b, i32 7 1185 ret <8 x i16> %j 1186} 1187 1188define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 1189; CHECK-LABEL: testDUP.v1i32: 1190; CHECK: // %bb.0: 1191; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1192; CHECK-NEXT: dup v0.4s, v0.s[0] 1193; CHECK-NEXT: ret 1194 %b = extractelement <1 x i32> %a, i32 0 1195 %c = insertelement <4 x i32> undef, i32 %b, i32 0 1196 %d = insertelement <4 x i32> %c, i32 %b, i32 1 1197 %e = insertelement <4 x i32> %d, i32 %b, i32 2 1198 %f = insertelement <4 x i32> %e, i32 %b, i32 3 1199 ret <4 x i32> %f 1200} 1201 1202define <8 x i8> @getl(<16 x i8> %x) #0 { 1203; CHECK-LABEL: getl: 1204; CHECK: // %bb.0: 1205; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1206; CHECK-NEXT: ret 1207 %vecext = extractelement <16 x i8> %x, i32 0 1208 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 1209 %vecext1 = extractelement <16 x i8> %x, i32 1 1210 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 1211 %vecext3 = extractelement <16 x i8> %x, i32 2 1212 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 1213 %vecext5 = extractelement <16 x i8> %x, i32 3 1214 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 1215 %vecext7 = extractelement <16 x i8> %x, i32 4 1216 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 1217 %vecext9 = extractelement <16 x i8> %x, i32 5 1218 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 1219 %vecext11 = extractelement <16 x i8> %x, i32 6 1220 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 1221 %vecext13 = extractelement <16 x i8> %x, i32 7 1222 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 1223 ret <8 x i8> %vecinit14 1224} 1225 1226define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { 1227; CHECK-LABEL: test_extracts_inserts_varidx_extract: 1228; CHECK: // %bb.0: 1229; CHECK-NEXT: .cfi_def_cfa_offset 16 1230; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 1231; CHECK-NEXT: str q0, [sp, #-16]! 1232; CHECK-NEXT: and x8, x0, #0x7 1233; CHECK-NEXT: mov x9, sp 1234; CHECK-NEXT: bfi x9, x8, #1, #3 1235; CHECK-NEXT: ldr h1, [x9] 1236; CHECK-NEXT: mov v1.h[1], v0.h[1] 1237; CHECK-NEXT: mov v1.h[2], v0.h[2] 1238; CHECK-NEXT: mov v1.h[3], v0.h[3] 1239; CHECK-NEXT: mov v0.16b, v1.16b 1240; CHECK-NEXT: add sp, sp, #16 // =16 1241; CHECK-NEXT: ret 1242 %tmp = extractelement <8 x i16> %x, i32 %idx 1243 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 1244 %tmp3 = extractelement <8 x i16> %x, i32 1 1245 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 1246 %tmp5 = extractelement <8 x i16> %x, i32 2 1247 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 1248 %tmp7 = extractelement <8 x i16> %x, i32 3 1249 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 1250 ret <4 x i16> %tmp8 1251} 1252 1253define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { 1254; CHECK-LABEL: test_extracts_inserts_varidx_insert: 1255; CHECK: // %bb.0: 1256; CHECK-NEXT: sub sp, sp, #16 // =16 1257; CHECK-NEXT: .cfi_def_cfa_offset 16 1258; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 1259; CHECK-NEXT: and x8, x0, #0x3 1260; CHECK-NEXT: add x9, sp, #8 // =8 1261; CHECK-NEXT: bfi x9, x8, #1, #2 1262; CHECK-NEXT: str h0, [x9] 1263; CHECK-NEXT: ldr d1, [sp, #8] 1264; CHECK-NEXT: mov v1.h[1], v0.h[1] 1265; CHECK-NEXT: mov v1.h[2], v0.h[2] 1266; CHECK-NEXT: mov v1.h[3], v0.h[3] 1267; CHECK-NEXT: mov v0.16b, v1.16b 1268; CHECK-NEXT: add sp, sp, #16 // =16 1269; CHECK-NEXT: ret 1270 %tmp = extractelement <8 x i16> %x, i32 0 1271 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx 1272 %tmp3 = extractelement <8 x i16> %x, i32 1 1273 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 1274 %tmp5 = extractelement <8 x i16> %x, i32 2 1275 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 1276 %tmp7 = extractelement <8 x i16> %x, i32 3 1277 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 1278 ret <4 x i16> %tmp8 1279} 1280 1281define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 1282; CHECK-LABEL: test_dup_v2i32_v4i16: 1283; CHECK: // %bb.0: // %entry 1284; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1285; CHECK-NEXT: dup v0.4h, v0.h[2] 1286; CHECK-NEXT: ret 1287entry: 1288 %x = extractelement <2 x i32> %a, i32 1 1289 %vget_lane = trunc i32 %x to i16 1290 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1291 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1292 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1293 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1294 ret <4 x i16> %vecinit3.i 1295} 1296 1297define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 1298; CHECK-LABEL: test_dup_v4i32_v8i16: 1299; CHECK: // %bb.0: // %entry 1300; CHECK-NEXT: dup v0.8h, v0.h[6] 1301; CHECK-NEXT: ret 1302entry: 1303 %x = extractelement <4 x i32> %a, i32 3 1304 %vget_lane = trunc i32 %x to i16 1305 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1306 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1307 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1308 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1309 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1310 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1311 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1312 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1313 ret <8 x i16> %vecinit7.i 1314} 1315 1316define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 1317; CHECK-LABEL: test_dup_v1i64_v4i16: 1318; CHECK: // %bb.0: // %entry 1319; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1320; CHECK-NEXT: dup v0.4h, v0.h[0] 1321; CHECK-NEXT: ret 1322entry: 1323 %x = extractelement <1 x i64> %a, i32 0 1324 %vget_lane = trunc i64 %x to i16 1325 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1326 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1327 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1328 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1329 ret <4 x i16> %vecinit3.i 1330} 1331 1332define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 1333; CHECK-LABEL: test_dup_v1i64_v2i32: 1334; CHECK: // %bb.0: // %entry 1335; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1336; CHECK-NEXT: dup v0.2s, v0.s[0] 1337; CHECK-NEXT: ret 1338entry: 1339 %x = extractelement <1 x i64> %a, i32 0 1340 %vget_lane = trunc i64 %x to i32 1341 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1342 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1343 ret <2 x i32> %vecinit1.i 1344} 1345 1346define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 1347; CHECK-LABEL: test_dup_v2i64_v8i16: 1348; CHECK: // %bb.0: // %entry 1349; CHECK-NEXT: dup v0.8h, v0.h[4] 1350; CHECK-NEXT: ret 1351entry: 1352 %x = extractelement <2 x i64> %a, i32 1 1353 %vget_lane = trunc i64 %x to i16 1354 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1355 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1356 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1357 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1358 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1359 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1360 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1361 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1362 ret <8 x i16> %vecinit7.i 1363} 1364 1365define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 1366; CHECK-LABEL: test_dup_v2i64_v4i32: 1367; CHECK: // %bb.0: // %entry 1368; CHECK-NEXT: dup v0.4s, v0.s[2] 1369; CHECK-NEXT: ret 1370entry: 1371 %x = extractelement <2 x i64> %a, i32 1 1372 %vget_lane = trunc i64 %x to i32 1373 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1374 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1375 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1376 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1377 ret <4 x i32> %vecinit3.i 1378} 1379 1380define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 1381; CHECK-LABEL: test_dup_v4i32_v4i16: 1382; CHECK: // %bb.0: // %entry 1383; CHECK-NEXT: dup v0.4h, v0.h[2] 1384; CHECK-NEXT: ret 1385entry: 1386 %x = extractelement <4 x i32> %a, i32 1 1387 %vget_lane = trunc i32 %x to i16 1388 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1389 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1390 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1391 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1392 ret <4 x i16> %vecinit3.i 1393} 1394 1395define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1396; CHECK-LABEL: test_dup_v2i64_v4i16: 1397; CHECK: // %bb.0: // %entry 1398; CHECK-NEXT: dup v0.4h, v0.h[0] 1399; CHECK-NEXT: ret 1400entry: 1401 %x = extractelement <2 x i64> %a, i32 0 1402 %vget_lane = trunc i64 %x to i16 1403 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1404 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1405 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1406 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1407 ret <4 x i16> %vecinit3.i 1408} 1409 1410define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1411; CHECK-LABEL: test_dup_v2i64_v2i32: 1412; CHECK: // %bb.0: // %entry 1413; CHECK-NEXT: dup v0.2s, v0.s[0] 1414; CHECK-NEXT: ret 1415entry: 1416 %x = extractelement <2 x i64> %a, i32 0 1417 %vget_lane = trunc i64 %x to i32 1418 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1419 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1420 ret <2 x i32> %vecinit1.i 1421} 1422 1423 1424define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1425; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1426; CHECK: // %bb.0: // %entry 1427; CHECK-NEXT: fmaxp s0, v0.2s 1428; CHECK-NEXT: ret 1429entry: 1430 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1431 %1 = insertelement <1 x float> undef, float %0, i32 0 1432 %2 = extractelement <1 x float> %1, i32 0 1433 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1434 ret <2 x float> %vecinit1.i 1435} 1436 1437define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1438; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1439; CHECK: // %bb.0: // %entry 1440; CHECK-NEXT: fmaxp s0, v0.2s 1441; CHECK-NEXT: ret 1442entry: 1443 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1444 %1 = insertelement <1 x float> undef, float %0, i32 0 1445 %2 = extractelement <1 x float> %1, i32 0 1446 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1447 ret <4 x float> %vecinit1.i 1448} 1449 1450declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1451 1452define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1453; CHECK-LABEL: test_concat_undef_v1i32: 1454; CHECK: // %bb.0: // %entry 1455; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1456; CHECK-NEXT: dup v0.2s, v0.s[0] 1457; CHECK-NEXT: ret 1458entry: 1459 %0 = extractelement <2 x i32> %a, i32 0 1460 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1461 ret <2 x i32> %vecinit1.i 1462} 1463 1464declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1465 1466define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1467; CHECK-LABEL: test_concat_v1i32_undef: 1468; CHECK: // %bb.0: // %entry 1469; CHECK-NEXT: fmov s0, w0 1470; CHECK-NEXT: sqabs s0, s0 1471; CHECK-NEXT: ret 1472entry: 1473 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1474 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1475 ret <2 x i32> %vecinit.i432 1476} 1477 1478define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1479; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1480; CHECK: // %bb.0: // %entry 1481; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1482; CHECK-NEXT: dup v0.2s, v0.s[0] 1483; CHECK-NEXT: ret 1484entry: 1485 %0 = extractelement <2 x i32> %a, i32 0 1486 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1487 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1488 ret <2 x i32> %vecinit1.i 1489} 1490 1491define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1492; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1493; CHECK: // %bb.0: // %entry 1494; CHECK-NEXT: fmov s1, w1 1495; CHECK-NEXT: fmov s0, w0 1496; CHECK-NEXT: sqabs s1, s1 1497; CHECK-NEXT: sqabs s0, s0 1498; CHECK-NEXT: fmov w8, s1 1499; CHECK-NEXT: mov v0.s[1], w8 1500; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1501; CHECK-NEXT: ret 1502entry: 1503 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1504 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1505 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1506 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1507 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1508 ret <2 x i32> %h 1509} 1510 1511define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1512; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1513; CHECK: // %bb.0: // %entry 1514; CHECK-NEXT: mov v0.d[1], v1.d[0] 1515; CHECK-NEXT: ret 1516entry: 1517 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1518 ret <16 x i8> %vecinit30 1519} 1520 1521define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1522; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1523; CHECK: // %bb.0: // %entry 1524; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1525; CHECK-NEXT: mov v0.d[1], v1.d[0] 1526; CHECK-NEXT: ret 1527entry: 1528 %vecext = extractelement <8 x i8> %x, i32 0 1529 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1530 %vecext1 = extractelement <8 x i8> %x, i32 1 1531 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1532 %vecext3 = extractelement <8 x i8> %x, i32 2 1533 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1534 %vecext5 = extractelement <8 x i8> %x, i32 3 1535 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1536 %vecext7 = extractelement <8 x i8> %x, i32 4 1537 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1538 %vecext9 = extractelement <8 x i8> %x, i32 5 1539 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1540 %vecext11 = extractelement <8 x i8> %x, i32 6 1541 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1542 %vecext13 = extractelement <8 x i8> %x, i32 7 1543 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1544 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1545 ret <16 x i8> %vecinit30 1546} 1547 1548define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1549; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1550; CHECK: // %bb.0: // %entry 1551; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1552; CHECK-NEXT: mov v0.d[1], v1.d[0] 1553; CHECK-NEXT: ret 1554entry: 1555 %vecext = extractelement <16 x i8> %x, i32 0 1556 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1557 %vecext1 = extractelement <16 x i8> %x, i32 1 1558 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1559 %vecext3 = extractelement <16 x i8> %x, i32 2 1560 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1561 %vecext5 = extractelement <16 x i8> %x, i32 3 1562 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1563 %vecext7 = extractelement <16 x i8> %x, i32 4 1564 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1565 %vecext9 = extractelement <16 x i8> %x, i32 5 1566 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1567 %vecext11 = extractelement <16 x i8> %x, i32 6 1568 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1569 %vecext13 = extractelement <16 x i8> %x, i32 7 1570 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1571 %vecext15 = extractelement <8 x i8> %y, i32 0 1572 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1573 %vecext17 = extractelement <8 x i8> %y, i32 1 1574 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1575 %vecext19 = extractelement <8 x i8> %y, i32 2 1576 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1577 %vecext21 = extractelement <8 x i8> %y, i32 3 1578 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1579 %vecext23 = extractelement <8 x i8> %y, i32 4 1580 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1581 %vecext25 = extractelement <8 x i8> %y, i32 5 1582 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1583 %vecext27 = extractelement <8 x i8> %y, i32 6 1584 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1585 %vecext29 = extractelement <8 x i8> %y, i32 7 1586 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1587 ret <16 x i8> %vecinit30 1588} 1589 1590define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1591; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1592; CHECK: // %bb.0: // %entry 1593; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1594; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1595; CHECK-NEXT: mov v0.d[1], v1.d[0] 1596; CHECK-NEXT: ret 1597entry: 1598 %vecext = extractelement <8 x i8> %x, i32 0 1599 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1600 %vecext1 = extractelement <8 x i8> %x, i32 1 1601 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1602 %vecext3 = extractelement <8 x i8> %x, i32 2 1603 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1604 %vecext5 = extractelement <8 x i8> %x, i32 3 1605 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1606 %vecext7 = extractelement <8 x i8> %x, i32 4 1607 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1608 %vecext9 = extractelement <8 x i8> %x, i32 5 1609 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1610 %vecext11 = extractelement <8 x i8> %x, i32 6 1611 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1612 %vecext13 = extractelement <8 x i8> %x, i32 7 1613 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1614 %vecext15 = extractelement <8 x i8> %y, i32 0 1615 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1616 %vecext17 = extractelement <8 x i8> %y, i32 1 1617 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1618 %vecext19 = extractelement <8 x i8> %y, i32 2 1619 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1620 %vecext21 = extractelement <8 x i8> %y, i32 3 1621 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1622 %vecext23 = extractelement <8 x i8> %y, i32 4 1623 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1624 %vecext25 = extractelement <8 x i8> %y, i32 5 1625 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1626 %vecext27 = extractelement <8 x i8> %y, i32 6 1627 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1628 %vecext29 = extractelement <8 x i8> %y, i32 7 1629 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1630 ret <16 x i8> %vecinit30 1631} 1632 1633define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1634; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1635; CHECK: // %bb.0: // %entry 1636; CHECK-NEXT: mov v0.d[1], v1.d[0] 1637; CHECK-NEXT: ret 1638entry: 1639 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1640 ret <8 x i16> %vecinit14 1641} 1642 1643define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1644; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1645; CHECK: // %bb.0: // %entry 1646; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1647; CHECK-NEXT: mov v0.d[1], v1.d[0] 1648; CHECK-NEXT: ret 1649entry: 1650 %vecext = extractelement <4 x i16> %x, i32 0 1651 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1652 %vecext1 = extractelement <4 x i16> %x, i32 1 1653 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1654 %vecext3 = extractelement <4 x i16> %x, i32 2 1655 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1656 %vecext5 = extractelement <4 x i16> %x, i32 3 1657 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1658 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1659 ret <8 x i16> %vecinit14 1660} 1661 1662define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1663; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1664; CHECK: // %bb.0: // %entry 1665; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1666; CHECK-NEXT: mov v0.d[1], v1.d[0] 1667; CHECK-NEXT: ret 1668entry: 1669 %vecext = extractelement <8 x i16> %x, i32 0 1670 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1671 %vecext1 = extractelement <8 x i16> %x, i32 1 1672 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1673 %vecext3 = extractelement <8 x i16> %x, i32 2 1674 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1675 %vecext5 = extractelement <8 x i16> %x, i32 3 1676 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1677 %vecext7 = extractelement <4 x i16> %y, i32 0 1678 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1679 %vecext9 = extractelement <4 x i16> %y, i32 1 1680 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1681 %vecext11 = extractelement <4 x i16> %y, i32 2 1682 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1683 %vecext13 = extractelement <4 x i16> %y, i32 3 1684 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1685 ret <8 x i16> %vecinit14 1686} 1687 1688define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1689; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1690; CHECK: // %bb.0: // %entry 1691; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1692; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1693; CHECK-NEXT: mov v0.d[1], v1.d[0] 1694; CHECK-NEXT: ret 1695entry: 1696 %vecext = extractelement <4 x i16> %x, i32 0 1697 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1698 %vecext1 = extractelement <4 x i16> %x, i32 1 1699 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1700 %vecext3 = extractelement <4 x i16> %x, i32 2 1701 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1702 %vecext5 = extractelement <4 x i16> %x, i32 3 1703 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1704 %vecext7 = extractelement <4 x i16> %y, i32 0 1705 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1706 %vecext9 = extractelement <4 x i16> %y, i32 1 1707 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1708 %vecext11 = extractelement <4 x i16> %y, i32 2 1709 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1710 %vecext13 = extractelement <4 x i16> %y, i32 3 1711 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1712 ret <8 x i16> %vecinit14 1713} 1714 1715define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1716; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1717; CHECK: // %bb.0: // %entry 1718; CHECK-NEXT: mov v0.d[1], v1.d[0] 1719; CHECK-NEXT: ret 1720entry: 1721 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1722 ret <4 x i32> %vecinit6 1723} 1724 1725define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1726; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1727; CHECK: // %bb.0: // %entry 1728; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1729; CHECK-NEXT: mov v0.d[1], v1.d[0] 1730; CHECK-NEXT: ret 1731entry: 1732 %vecext = extractelement <2 x i32> %x, i32 0 1733 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1734 %vecext1 = extractelement <2 x i32> %x, i32 1 1735 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1736 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1737 ret <4 x i32> %vecinit6 1738} 1739 1740define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1741; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1742; CHECK: // %bb.0: // %entry 1743; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1744; CHECK-NEXT: mov v0.d[1], v1.d[0] 1745; CHECK-NEXT: ret 1746entry: 1747 %vecext = extractelement <4 x i32> %x, i32 0 1748 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1749 %vecext1 = extractelement <4 x i32> %x, i32 1 1750 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1751 %vecext3 = extractelement <2 x i32> %y, i32 0 1752 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1753 %vecext5 = extractelement <2 x i32> %y, i32 1 1754 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1755 ret <4 x i32> %vecinit6 1756} 1757 1758define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1759; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1760; CHECK: // %bb.0: // %entry 1761; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1762; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1763; CHECK-NEXT: mov v0.d[1], v1.d[0] 1764; CHECK-NEXT: ret 1765entry: 1766 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1767 ret <4 x i32> %vecinit6 1768} 1769 1770define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1771; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1772; CHECK: // %bb.0: // %entry 1773; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d 1774; CHECK-NEXT: ret 1775entry: 1776 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1777 ret <2 x i64> %vecinit2 1778} 1779 1780define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1781; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1782; CHECK: // %bb.0: // %entry 1783; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1784; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d 1785; CHECK-NEXT: ret 1786entry: 1787 %vecext = extractelement <1 x i64> %x, i32 0 1788 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1789 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1790 ret <2 x i64> %vecinit2 1791} 1792 1793define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1794; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1795; CHECK: // %bb.0: // %entry 1796; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1797; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d 1798; CHECK-NEXT: ret 1799entry: 1800 %vecext = extractelement <2 x i64> %x, i32 0 1801 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1802 %vecext1 = extractelement <1 x i64> %y, i32 0 1803 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1804 ret <2 x i64> %vecinit2 1805} 1806 1807define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1808; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1809; CHECK: // %bb.0: // %entry 1810; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1811; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1812; CHECK-NEXT: mov v0.d[1], v1.d[0] 1813; CHECK-NEXT: ret 1814entry: 1815 %vecext = extractelement <1 x i64> %x, i32 0 1816 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1817 %vecext1 = extractelement <1 x i64> %y, i32 0 1818 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1819 ret <2 x i64> %vecinit2 1820} 1821 1822 1823define <4 x i16> @concat_vector_v4i16_const() { 1824; CHECK-LABEL: concat_vector_v4i16_const: 1825; CHECK: // %bb.0: 1826; CHECK-NEXT: movi v0.2d, #0000000000000000 1827; CHECK-NEXT: ret 1828 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1829 ret <4 x i16> %r 1830} 1831 1832define <4 x i16> @concat_vector_v4i16_const_one() { 1833; CHECK-LABEL: concat_vector_v4i16_const_one: 1834; CHECK: // %bb.0: 1835; CHECK-NEXT: movi v0.4h, #1 1836; CHECK-NEXT: ret 1837 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1838 ret <4 x i16> %r 1839} 1840 1841define <4 x i32> @concat_vector_v4i32_const() { 1842; CHECK-LABEL: concat_vector_v4i32_const: 1843; CHECK: // %bb.0: 1844; CHECK-NEXT: movi v0.2d, #0000000000000000 1845; CHECK-NEXT: ret 1846 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1847 ret <4 x i32> %r 1848} 1849 1850define <8 x i8> @concat_vector_v8i8_const() { 1851; CHECK-LABEL: concat_vector_v8i8_const: 1852; CHECK: // %bb.0: 1853; CHECK-NEXT: movi v0.2d, #0000000000000000 1854; CHECK-NEXT: ret 1855 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1856 ret <8 x i8> %r 1857} 1858 1859define <8 x i16> @concat_vector_v8i16_const() { 1860; CHECK-LABEL: concat_vector_v8i16_const: 1861; CHECK: // %bb.0: 1862; CHECK-NEXT: movi v0.2d, #0000000000000000 1863; CHECK-NEXT: ret 1864 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1865 ret <8 x i16> %r 1866} 1867 1868define <8 x i16> @concat_vector_v8i16_const_one() { 1869; CHECK-LABEL: concat_vector_v8i16_const_one: 1870; CHECK: // %bb.0: 1871; CHECK-NEXT: movi v0.8h, #1 1872; CHECK-NEXT: ret 1873 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1874 ret <8 x i16> %r 1875} 1876 1877define <16 x i8> @concat_vector_v16i8_const() { 1878; CHECK-LABEL: concat_vector_v16i8_const: 1879; CHECK: // %bb.0: 1880; CHECK-NEXT: movi v0.2d, #0000000000000000 1881; CHECK-NEXT: ret 1882 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1883 ret <16 x i8> %r 1884} 1885 1886define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1887; CHECK-LABEL: concat_vector_v4i16: 1888; CHECK: // %bb.0: 1889; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1890; CHECK-NEXT: dup v0.4h, v0.h[0] 1891; CHECK-NEXT: ret 1892 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1893 ret <4 x i16> %r 1894} 1895 1896define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1897; CHECK-LABEL: concat_vector_v4i32: 1898; CHECK: // %bb.0: 1899; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1900; CHECK-NEXT: dup v0.4s, v0.s[0] 1901; CHECK-NEXT: ret 1902 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1903 ret <4 x i32> %r 1904} 1905 1906define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1907; CHECK-LABEL: concat_vector_v8i8: 1908; CHECK: // %bb.0: 1909; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1910; CHECK-NEXT: dup v0.8b, v0.b[0] 1911; CHECK-NEXT: ret 1912 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1913 ret <8 x i8> %r 1914} 1915 1916define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1917; CHECK-LABEL: concat_vector_v8i16: 1918; CHECK: // %bb.0: 1919; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1920; CHECK-NEXT: dup v0.8h, v0.h[0] 1921; CHECK-NEXT: ret 1922 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1923 ret <8 x i16> %r 1924} 1925 1926define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1927; CHECK-LABEL: concat_vector_v16i8: 1928; CHECK: // %bb.0: 1929; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1930; CHECK-NEXT: dup v0.16b, v0.b[0] 1931; CHECK-NEXT: ret 1932 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1933 ret <16 x i8> %r 1934} 1935