1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: mov {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9} 10 11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12; CHECK-LABEL: ins8hw: 13; CHECK: mov {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16} 17 18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19; CHECK-LABEL: ins4sw: 20; CHECK: mov {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23} 24 25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26; CHECK-LABEL: ins2dw: 27; CHECK: mov {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30} 31 32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33; CHECK-LABEL: ins8bw: 34; CHECK: mov {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40; CHECK-LABEL: ins4hw: 41; CHECK: mov {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44} 45 46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47; CHECK-LABEL: ins2sw: 48; CHECK: mov {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51} 52 53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54; CHECK-LABEL: ins16b16: 55; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59} 60 61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62; CHECK-LABEL: ins8h8: 63; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67} 68 69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70; CHECK-LABEL: ins4s4: 71; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75} 76 77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78; CHECK-LABEL: ins2d2: 79; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83} 84 85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86; CHECK-LABEL: ins4f4: 87; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91} 92 93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94; CHECK-LABEL: ins2df2: 95; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99} 100 101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102; CHECK-LABEL: ins8b16: 103; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107} 108 109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110; CHECK-LABEL: ins4h8: 111; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115} 116 117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118; CHECK-LABEL: ins2s4: 119; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123} 124 125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126; CHECK-LABEL: ins1d2: 127; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131} 132 133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134; CHECK-LABEL: ins2f4: 135; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139} 140 141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142; CHECK-LABEL: ins1f2: 143; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147} 148 149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150; CHECK-LABEL: ins16b8: 151; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155} 156 157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158; CHECK-LABEL: ins8h4: 159; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163} 164 165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166; CHECK-LABEL: ins4s2: 167; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171} 172 173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174; CHECK-LABEL: ins2d1: 175; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179} 180 181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182; CHECK-LABEL: ins4f2: 183; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187} 188 189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190; CHECK-LABEL: ins2f1: 191; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195} 196 197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198; CHECK-LABEL: ins8b8: 199; CHECK: mov {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203} 204 205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206; CHECK-LABEL: ins4h4: 207; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211} 212 213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214; CHECK-LABEL: ins2s2: 215; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219} 220 221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222; CHECK-LABEL: ins1d1: 223; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227} 228 229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230; CHECK-LABEL: ins2f2: 231; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235} 236 237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238; CHECK-LABEL: ins1df1: 239; CHECK-NOT: mov {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243} 244 245define i32 @umovw16b(<16 x i8> %tmp1) { 246; CHECK-LABEL: umovw16b: 247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251} 252 253define i32 @umovw8h(<8 x i16> %tmp1) { 254; CHECK-LABEL: umovw8h: 255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259} 260 261define i32 @umovw4s(<4 x i32> %tmp1) { 262; CHECK-LABEL: umovw4s: 263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266} 267 268define i64 @umovx2d(<2 x i64> %tmp1) { 269; CHECK-LABEL: umovx2d: 270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273} 274 275define i32 @umovw8b(<8 x i8> %tmp1) { 276; CHECK-LABEL: umovw8b: 277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281} 282 283define i32 @umovw4h(<4 x i16> %tmp1) { 284; CHECK-LABEL: umovw4h: 285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289} 290 291define i32 @umovw2s(<2 x i32> %tmp1) { 292; CHECK-LABEL: umovw2s: 293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296} 297 298define i64 @umovx1d(<1 x i64> %tmp1) { 299; CHECK-LABEL: umovx1d: 300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303} 304 305define i32 @smovw16b(<16 x i8> %tmp1) { 306; CHECK-LABEL: smovw16b: 307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312} 313 314define i32 @smovw8h(<8 x i16> %tmp1) { 315; CHECK-LABEL: smovw8h: 316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321} 322 323define i64 @smovx16b(<16 x i8> %tmp1) { 324; CHECK-LABEL: smovx16b: 325; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i64 328 ret i64 %tmp4 329} 330 331define i64 @smovx8h(<8 x i16> %tmp1) { 332; CHECK-LABEL: smovx8h: 333; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] 334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 335 %tmp4 = sext i16 %tmp3 to i64 336 ret i64 %tmp4 337} 338 339define i64 @smovx4s(<4 x i32> %tmp1) { 340; CHECK-LABEL: smovx4s: 341; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 343 %tmp4 = sext i32 %tmp3 to i64 344 ret i64 %tmp4 345} 346 347define i32 @smovw8b(<8 x i8> %tmp1) { 348; CHECK-LABEL: smovw8b: 349; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 351 %tmp4 = sext i8 %tmp3 to i32 352 %tmp5 = add i32 %tmp4, %tmp4 353 ret i32 %tmp5 354} 355 356define i32 @smovw4h(<4 x i16> %tmp1) { 357; CHECK-LABEL: smovw4h: 358; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 360 %tmp4 = sext i16 %tmp3 to i32 361 %tmp5 = add i32 %tmp4, %tmp4 362 ret i32 %tmp5 363} 364 365define i32 @smovx8b(<8 x i8> %tmp1) { 366; CHECK-LABEL: smovx8b: 367; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 369 %tmp4 = sext i8 %tmp3 to i32 370 ret i32 %tmp4 371} 372 373define i32 @smovx4h(<4 x i16> %tmp1) { 374; CHECK-LABEL: smovx4h: 375; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 377 %tmp4 = sext i16 %tmp3 to i32 378 ret i32 %tmp4 379} 380 381define i64 @smovx2s(<2 x i32> %tmp1) { 382; CHECK-LABEL: smovx2s: 383; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 385 %tmp4 = sext i32 %tmp3 to i64 386 ret i64 %tmp4 387} 388 389define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 390; CHECK-LABEL: test_vcopy_lane_s8: 391; CHECK: mov {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 393 ret <8 x i8> %vset_lane 394} 395 396define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 397; CHECK-LABEL: test_vcopyq_laneq_s8: 398; CHECK: mov {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 400 ret <16 x i8> %vset_lane 401} 402 403define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 404; CHECK-LABEL: test_vcopy_lane_swap_s8: 405; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 407 ret <8 x i8> %vset_lane 408} 409 410define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 411; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 412; CHECK: mov {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 414 ret <16 x i8> %vset_lane 415} 416 417define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 418; CHECK-LABEL: test_vdup_n_u8: 419; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 428 ret <8 x i8> %vecinit7.i 429} 430 431define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 432; CHECK-LABEL: test_vdup_n_u16: 433; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 438 ret <4 x i16> %vecinit3.i 439} 440 441define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 442; CHECK-LABEL: test_vdup_n_u32: 443; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 446 ret <2 x i32> %vecinit1.i 447} 448 449define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 450; CHECK-LABEL: test_vdup_n_u64: 451; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 453 ret <1 x i64> %vecinit.i 454} 455 456define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 457; CHECK-LABEL: test_vdupq_n_u8: 458; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 475 ret <16 x i8> %vecinit15.i 476} 477 478define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 479; CHECK-LABEL: test_vdupq_n_u16: 480; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 489 ret <8 x i16> %vecinit7.i 490} 491 492define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 493; CHECK-LABEL: test_vdupq_n_u32: 494; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 499 ret <4 x i32> %vecinit3.i 500} 501 502define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 503; CHECK-LABEL: test_vdupq_n_u64: 504; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 507 ret <2 x i64> %vecinit1.i 508} 509 510define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 511; CHECK-LABEL: test_vdup_lane_s8: 512; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 514 ret <8 x i8> %shuffle 515} 516 517define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 518; CHECK-LABEL: test_vdup_lane_s16: 519; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 521 ret <4 x i16> %shuffle 522} 523 524define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 525; CHECK-LABEL: test_vdup_lane_s32: 526; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 528 ret <2 x i32> %shuffle 529} 530 531define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 532; CHECK-LABEL: test_vdupq_lane_s8: 533; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 535 ret <16 x i8> %shuffle 536} 537 538define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 539; CHECK-LABEL: test_vdupq_lane_s16: 540; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 542 ret <8 x i16> %shuffle 543} 544 545define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 546; CHECK-LABEL: test_vdupq_lane_s32: 547; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 549 ret <4 x i32> %shuffle 550} 551 552define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 553; CHECK-LABEL: test_vdupq_lane_s64: 554; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 556 ret <2 x i64> %shuffle 557} 558 559define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 560; CHECK-LABEL: test_vdup_laneq_s8: 561; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 563 ret <8 x i8> %shuffle 564} 565 566define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 567; CHECK-LABEL: test_vdup_laneq_s16: 568; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 570 ret <4 x i16> %shuffle 571} 572 573define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 574; CHECK-LABEL: test_vdup_laneq_s32: 575; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 577 ret <2 x i32> %shuffle 578} 579 580define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 581; CHECK-LABEL: test_vdupq_laneq_s8: 582; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 584 ret <16 x i8> %shuffle 585} 586 587define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 588; CHECK-LABEL: test_vdupq_laneq_s16: 589; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 591 ret <8 x i16> %shuffle 592} 593 594define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 595; CHECK-LABEL: test_vdupq_laneq_s32: 596; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 598 ret <4 x i32> %shuffle 599} 600 601define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 602; CHECK-LABEL: test_vdupq_laneq_s64: 603; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 605 ret <2 x i64> %shuffle 606} 607 608define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 609; CHECK-LABEL: test_bitcastv8i8toi64: 610 %res = bitcast <8 x i8> %in to i64 611; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 612 ret i64 %res 613} 614 615define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 616; CHECK-LABEL: test_bitcastv4i16toi64: 617 %res = bitcast <4 x i16> %in to i64 618; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 619 ret i64 %res 620} 621 622define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 623; CHECK-LABEL: test_bitcastv2i32toi64: 624 %res = bitcast <2 x i32> %in to i64 625; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 626 ret i64 %res 627} 628 629define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 630; CHECK-LABEL: test_bitcastv2f32toi64: 631 %res = bitcast <2 x float> %in to i64 632; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 633 ret i64 %res 634} 635 636define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 637; CHECK-LABEL: test_bitcastv1i64toi64: 638 %res = bitcast <1 x i64> %in to i64 639; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 640 ret i64 %res 641} 642 643define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 644; CHECK-LABEL: test_bitcastv1f64toi64: 645 %res = bitcast <1 x double> %in to i64 646; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 647 ret i64 %res 648} 649 650define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 651; CHECK-LABEL: test_bitcasti64tov8i8: 652 %res = bitcast i64 %in to <8 x i8> 653; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 654 ret <8 x i8> %res 655} 656 657define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 658; CHECK-LABEL: test_bitcasti64tov4i16: 659 %res = bitcast i64 %in to <4 x i16> 660; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 661 ret <4 x i16> %res 662} 663 664define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 665; CHECK-LABEL: test_bitcasti64tov2i32: 666 %res = bitcast i64 %in to <2 x i32> 667; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 668 ret <2 x i32> %res 669} 670 671define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 672; CHECK-LABEL: test_bitcasti64tov2f32: 673 %res = bitcast i64 %in to <2 x float> 674; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 675 ret <2 x float> %res 676} 677 678define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 679; CHECK-LABEL: test_bitcasti64tov1i64: 680 %res = bitcast i64 %in to <1 x i64> 681; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 682 ret <1 x i64> %res 683} 684 685define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 686; CHECK-LABEL: test_bitcasti64tov1f64: 687 %res = bitcast i64 %in to <1 x double> 688; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 689 ret <1 x double> %res 690} 691 692define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 693; CHECK-LABEL: test_bitcastv8i8tov1f64: 694; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 695; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 696 %sub.i = sub <8 x i8> zeroinitializer, %a 697 %1 = bitcast <8 x i8> %sub.i to <1 x double> 698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 699 ret <1 x i64> %vcvt.i 700} 701 702define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 703; CHECK-LABEL: test_bitcastv4i16tov1f64: 704; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 705; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 706 %sub.i = sub <4 x i16> zeroinitializer, %a 707 %1 = bitcast <4 x i16> %sub.i to <1 x double> 708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 709 ret <1 x i64> %vcvt.i 710} 711 712define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 713; CHECK-LABEL: test_bitcastv2i32tov1f64: 714; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 715; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 716 %sub.i = sub <2 x i32> zeroinitializer, %a 717 %1 = bitcast <2 x i32> %sub.i to <1 x double> 718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 719 ret <1 x i64> %vcvt.i 720} 721 722define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 723; CHECK-LABEL: test_bitcastv1i64tov1f64: 724; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 725; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 726 %sub.i = sub <1 x i64> zeroinitializer, %a 727 %1 = bitcast <1 x i64> %sub.i to <1 x double> 728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 729 ret <1 x i64> %vcvt.i 730} 731 732define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 733; CHECK-LABEL: test_bitcastv2f32tov1f64: 734; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 735; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 737 %1 = bitcast <2 x float> %sub.i to <1 x double> 738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 739 ret <1 x i64> %vcvt.i 740} 741 742define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 743; CHECK-LABEL: test_bitcastv1f64tov8i8: 744; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 745; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 746 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 748 %sub.i = sub <8 x i8> zeroinitializer, %1 749 ret <8 x i8> %sub.i 750} 751 752define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 753; CHECK-LABEL: test_bitcastv1f64tov4i16: 754; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 755; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 756 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 758 %sub.i = sub <4 x i16> zeroinitializer, %1 759 ret <4 x i16> %sub.i 760} 761 762define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 763; CHECK-LABEL: test_bitcastv1f64tov2i32: 764; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 765; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 766 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 768 %sub.i = sub <2 x i32> zeroinitializer, %1 769 ret <2 x i32> %sub.i 770} 771 772define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 773; CHECK-LABEL: test_bitcastv1f64tov1i64: 774; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 775; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 776 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 778 %sub.i = sub <1 x i64> zeroinitializer, %1 779 ret <1 x i64> %sub.i 780} 781 782define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 783; CHECK-LABEL: test_bitcastv1f64tov2f32: 784; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 785; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 786 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 787 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 789 ret <2 x float> %sub.i 790} 791 792; Test insert element into an undef vector 793define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 794; CHECK-LABEL: scalar_to_vector.v8i8: 795; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 796 %b = insertelement <8 x i8> undef, i8 %a, i32 0 797 ret <8 x i8> %b 798} 799 800define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 801; CHECK-LABEL: scalar_to_vector.v16i8: 802; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 803 %b = insertelement <16 x i8> undef, i8 %a, i32 0 804 ret <16 x i8> %b 805} 806 807define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 808; CHECK-LABEL: scalar_to_vector.v4i16: 809; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 810 %b = insertelement <4 x i16> undef, i16 %a, i32 0 811 ret <4 x i16> %b 812} 813 814define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 815; CHECK-LABEL: scalar_to_vector.v8i16: 816; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 817 %b = insertelement <8 x i16> undef, i16 %a, i32 0 818 ret <8 x i16> %b 819} 820 821define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 822; CHECK-LABEL: scalar_to_vector.v2i32: 823; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 824 %b = insertelement <2 x i32> undef, i32 %a, i32 0 825 ret <2 x i32> %b 826} 827 828define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 829; CHECK-LABEL: scalar_to_vector.v4i32: 830; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 831 %b = insertelement <4 x i32> undef, i32 %a, i32 0 832 ret <4 x i32> %b 833} 834 835define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 836; CHECK-LABEL: scalar_to_vector.v2i64: 837; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 838 %b = insertelement <2 x i64> undef, i64 %a, i32 0 839 ret <2 x i64> %b 840} 841 842define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 843; CHECK-LABEL: testDUP.v1i8: 844; CHECK: dup v0.8b, v0.b[0] 845 %b = extractelement <1 x i8> %a, i32 0 846 %c = insertelement <8 x i8> undef, i8 %b, i32 0 847 %d = insertelement <8 x i8> %c, i8 %b, i32 1 848 %e = insertelement <8 x i8> %d, i8 %b, i32 2 849 %f = insertelement <8 x i8> %e, i8 %b, i32 3 850 %g = insertelement <8 x i8> %f, i8 %b, i32 4 851 %h = insertelement <8 x i8> %g, i8 %b, i32 5 852 %i = insertelement <8 x i8> %h, i8 %b, i32 6 853 %j = insertelement <8 x i8> %i, i8 %b, i32 7 854 ret <8 x i8> %j 855} 856 857define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 858; CHECK-LABEL: testDUP.v1i16: 859; CHECK: dup v0.8h, v0.h[0] 860 %b = extractelement <1 x i16> %a, i32 0 861 %c = insertelement <8 x i16> undef, i16 %b, i32 0 862 %d = insertelement <8 x i16> %c, i16 %b, i32 1 863 %e = insertelement <8 x i16> %d, i16 %b, i32 2 864 %f = insertelement <8 x i16> %e, i16 %b, i32 3 865 %g = insertelement <8 x i16> %f, i16 %b, i32 4 866 %h = insertelement <8 x i16> %g, i16 %b, i32 5 867 %i = insertelement <8 x i16> %h, i16 %b, i32 6 868 %j = insertelement <8 x i16> %i, i16 %b, i32 7 869 ret <8 x i16> %j 870} 871 872define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 873; CHECK-LABEL: testDUP.v1i32: 874; CHECK: dup v0.4s, v0.s[0] 875 %b = extractelement <1 x i32> %a, i32 0 876 %c = insertelement <4 x i32> undef, i32 %b, i32 0 877 %d = insertelement <4 x i32> %c, i32 %b, i32 1 878 %e = insertelement <4 x i32> %d, i32 %b, i32 2 879 %f = insertelement <4 x i32> %e, i32 %b, i32 3 880 ret <4 x i32> %f 881} 882 883define <8 x i8> @getl(<16 x i8> %x) #0 { 884; CHECK-LABEL: getl: 885; CHECK: ret 886 %vecext = extractelement <16 x i8> %x, i32 0 887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 888 %vecext1 = extractelement <16 x i8> %x, i32 1 889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 890 %vecext3 = extractelement <16 x i8> %x, i32 2 891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 892 %vecext5 = extractelement <16 x i8> %x, i32 3 893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 894 %vecext7 = extractelement <16 x i8> %x, i32 4 895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 896 %vecext9 = extractelement <16 x i8> %x, i32 5 897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 898 %vecext11 = extractelement <16 x i8> %x, i32 6 899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 900 %vecext13 = extractelement <16 x i8> %x, i32 7 901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 902 ret <8 x i8> %vecinit14 903} 904 905; CHECK-LABEL: test_extracts_inserts_varidx_extract: 906; CHECK: str q0 907; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7 908; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3 909; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}} 910; CHECK-DAG: mov v[[R]].h[1], v0.h[1] 911; CHECK-DAG: mov v[[R]].h[2], v0.h[2] 912; CHECK-DAG: mov v[[R]].h[3], v0.h[3] 913define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { 914 %tmp = extractelement <8 x i16> %x, i32 %idx 915 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 916 %tmp3 = extractelement <8 x i16> %x, i32 1 917 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 918 %tmp5 = extractelement <8 x i16> %x, i32 2 919 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 920 %tmp7 = extractelement <8 x i16> %x, i32 3 921 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 922 ret <4 x i16> %tmp8 923} 924 925; CHECK-LABEL: test_extracts_inserts_varidx_insert: 926; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3 927; CHECK: bfi x9, [[MASKED_IDX]], #1, #2 928; CHECK: str h0, [x9] 929; CHECK-DAG: ldr d[[R:[0-9]+]] 930; CHECK-DAG: mov v[[R]].h[1], v0.h[1] 931; CHECK-DAG: mov v[[R]].h[2], v0.h[2] 932; CHECK-DAG: mov v[[R]].h[3], v0.h[3] 933define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { 934 %tmp = extractelement <8 x i16> %x, i32 0 935 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx 936 %tmp3 = extractelement <8 x i16> %x, i32 1 937 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 938 %tmp5 = extractelement <8 x i16> %x, i32 2 939 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 940 %tmp7 = extractelement <8 x i16> %x, i32 3 941 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 942 ret <4 x i16> %tmp8 943} 944 945define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 946; CHECK-LABEL: test_dup_v2i32_v4i16: 947; CHECK: dup v0.4h, v0.h[2] 948entry: 949 %x = extractelement <2 x i32> %a, i32 1 950 %vget_lane = trunc i32 %x to i16 951 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 952 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 953 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 954 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 955 ret <4 x i16> %vecinit3.i 956} 957 958define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 959; CHECK-LABEL: test_dup_v4i32_v8i16: 960; CHECK: dup v0.8h, v0.h[6] 961entry: 962 %x = extractelement <4 x i32> %a, i32 3 963 %vget_lane = trunc i32 %x to i16 964 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 965 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 966 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 967 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 968 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 969 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 970 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 971 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 972 ret <8 x i16> %vecinit7.i 973} 974 975define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 976; CHECK-LABEL: test_dup_v1i64_v4i16: 977; CHECK: dup v0.4h, v0.h[0] 978entry: 979 %x = extractelement <1 x i64> %a, i32 0 980 %vget_lane = trunc i64 %x to i16 981 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 982 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 983 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 984 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 985 ret <4 x i16> %vecinit3.i 986} 987 988define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 989; CHECK-LABEL: test_dup_v1i64_v2i32: 990; CHECK: dup v0.2s, v0.s[0] 991entry: 992 %x = extractelement <1 x i64> %a, i32 0 993 %vget_lane = trunc i64 %x to i32 994 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 995 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 996 ret <2 x i32> %vecinit1.i 997} 998 999define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 1000; CHECK-LABEL: test_dup_v2i64_v8i16: 1001; CHECK: dup v0.8h, v0.h[4] 1002entry: 1003 %x = extractelement <2 x i64> %a, i32 1 1004 %vget_lane = trunc i64 %x to i16 1005 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1006 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1007 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1008 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1009 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1010 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1011 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1012 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1013 ret <8 x i16> %vecinit7.i 1014} 1015 1016define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 1017; CHECK-LABEL: test_dup_v2i64_v4i32: 1018; CHECK: dup v0.4s, v0.s[2] 1019entry: 1020 %x = extractelement <2 x i64> %a, i32 1 1021 %vget_lane = trunc i64 %x to i32 1022 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1023 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1024 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1025 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1026 ret <4 x i32> %vecinit3.i 1027} 1028 1029define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 1030; CHECK-LABEL: test_dup_v4i32_v4i16: 1031; CHECK: dup v0.4h, v0.h[2] 1032entry: 1033 %x = extractelement <4 x i32> %a, i32 1 1034 %vget_lane = trunc i32 %x to i16 1035 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1036 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1037 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1038 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1039 ret <4 x i16> %vecinit3.i 1040} 1041 1042define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1043; CHECK-LABEL: test_dup_v2i64_v4i16: 1044; CHECK: dup v0.4h, v0.h[0] 1045entry: 1046 %x = extractelement <2 x i64> %a, i32 0 1047 %vget_lane = trunc i64 %x to i16 1048 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1049 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1050 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1051 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1052 ret <4 x i16> %vecinit3.i 1053} 1054 1055define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1056; CHECK-LABEL: test_dup_v2i64_v2i32: 1057; CHECK: dup v0.2s, v0.s[0] 1058entry: 1059 %x = extractelement <2 x i64> %a, i32 0 1060 %vget_lane = trunc i64 %x to i32 1061 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1062 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1063 ret <2 x i32> %vecinit1.i 1064} 1065 1066 1067define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1068; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1069; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1070; CHECK-NEXT: ret 1071entry: 1072 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1073 %1 = insertelement <1 x float> undef, float %0, i32 0 1074 %2 = extractelement <1 x float> %1, i32 0 1075 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1076 ret <2 x float> %vecinit1.i 1077} 1078 1079define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1080; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1081; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1082; CHECK-NEXT: ret 1083entry: 1084 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1085 %1 = insertelement <1 x float> undef, float %0, i32 0 1086 %2 = extractelement <1 x float> %1, i32 0 1087 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1088 ret <4 x float> %vecinit1.i 1089} 1090 1091declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1092 1093define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1094; CHECK-LABEL: test_concat_undef_v1i32: 1095; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1096entry: 1097 %0 = extractelement <2 x i32> %a, i32 0 1098 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1099 ret <2 x i32> %vecinit1.i 1100} 1101 1102declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1103 1104define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1105; CHECK-LABEL: test_concat_v1i32_undef: 1106; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1107; CHECK-NEXT: ret 1108entry: 1109 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1110 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1111 ret <2 x i32> %vecinit.i432 1112} 1113 1114define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1115; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1116; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1117entry: 1118 %0 = extractelement <2 x i32> %a, i32 0 1119 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1120 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1121 ret <2 x i32> %vecinit1.i 1122} 1123 1124define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1125; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1126; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1127; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1128; CHECK: mov {{v[0-9]+}}.s[1], w{{[0-9]+}} 1129entry: 1130 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1131 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1132 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1133 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1134 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1135 ret <2 x i32> %h 1136} 1137 1138define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1139; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1140; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1141entry: 1142 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1143 ret <16 x i8> %vecinit30 1144} 1145 1146define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1147; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1148; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1149entry: 1150 %vecext = extractelement <8 x i8> %x, i32 0 1151 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1152 %vecext1 = extractelement <8 x i8> %x, i32 1 1153 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1154 %vecext3 = extractelement <8 x i8> %x, i32 2 1155 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1156 %vecext5 = extractelement <8 x i8> %x, i32 3 1157 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1158 %vecext7 = extractelement <8 x i8> %x, i32 4 1159 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1160 %vecext9 = extractelement <8 x i8> %x, i32 5 1161 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1162 %vecext11 = extractelement <8 x i8> %x, i32 6 1163 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1164 %vecext13 = extractelement <8 x i8> %x, i32 7 1165 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1166 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1167 ret <16 x i8> %vecinit30 1168} 1169 1170define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1171; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1172; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1173entry: 1174 %vecext = extractelement <16 x i8> %x, i32 0 1175 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1176 %vecext1 = extractelement <16 x i8> %x, i32 1 1177 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1178 %vecext3 = extractelement <16 x i8> %x, i32 2 1179 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1180 %vecext5 = extractelement <16 x i8> %x, i32 3 1181 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1182 %vecext7 = extractelement <16 x i8> %x, i32 4 1183 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1184 %vecext9 = extractelement <16 x i8> %x, i32 5 1185 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1186 %vecext11 = extractelement <16 x i8> %x, i32 6 1187 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1188 %vecext13 = extractelement <16 x i8> %x, i32 7 1189 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1190 %vecext15 = extractelement <8 x i8> %y, i32 0 1191 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1192 %vecext17 = extractelement <8 x i8> %y, i32 1 1193 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1194 %vecext19 = extractelement <8 x i8> %y, i32 2 1195 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1196 %vecext21 = extractelement <8 x i8> %y, i32 3 1197 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1198 %vecext23 = extractelement <8 x i8> %y, i32 4 1199 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1200 %vecext25 = extractelement <8 x i8> %y, i32 5 1201 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1202 %vecext27 = extractelement <8 x i8> %y, i32 6 1203 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1204 %vecext29 = extractelement <8 x i8> %y, i32 7 1205 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1206 ret <16 x i8> %vecinit30 1207} 1208 1209define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1210; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1211; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1212entry: 1213 %vecext = extractelement <8 x i8> %x, i32 0 1214 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1215 %vecext1 = extractelement <8 x i8> %x, i32 1 1216 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1217 %vecext3 = extractelement <8 x i8> %x, i32 2 1218 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1219 %vecext5 = extractelement <8 x i8> %x, i32 3 1220 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1221 %vecext7 = extractelement <8 x i8> %x, i32 4 1222 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1223 %vecext9 = extractelement <8 x i8> %x, i32 5 1224 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1225 %vecext11 = extractelement <8 x i8> %x, i32 6 1226 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1227 %vecext13 = extractelement <8 x i8> %x, i32 7 1228 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1229 %vecext15 = extractelement <8 x i8> %y, i32 0 1230 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1231 %vecext17 = extractelement <8 x i8> %y, i32 1 1232 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1233 %vecext19 = extractelement <8 x i8> %y, i32 2 1234 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1235 %vecext21 = extractelement <8 x i8> %y, i32 3 1236 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1237 %vecext23 = extractelement <8 x i8> %y, i32 4 1238 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1239 %vecext25 = extractelement <8 x i8> %y, i32 5 1240 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1241 %vecext27 = extractelement <8 x i8> %y, i32 6 1242 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1243 %vecext29 = extractelement <8 x i8> %y, i32 7 1244 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1245 ret <16 x i8> %vecinit30 1246} 1247 1248define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1249; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1250; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1251entry: 1252 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1253 ret <8 x i16> %vecinit14 1254} 1255 1256define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1257; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1258; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1259entry: 1260 %vecext = extractelement <4 x i16> %x, i32 0 1261 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1262 %vecext1 = extractelement <4 x i16> %x, i32 1 1263 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1264 %vecext3 = extractelement <4 x i16> %x, i32 2 1265 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1266 %vecext5 = extractelement <4 x i16> %x, i32 3 1267 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1268 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1269 ret <8 x i16> %vecinit14 1270} 1271 1272define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1273; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1274; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1275entry: 1276 %vecext = extractelement <8 x i16> %x, i32 0 1277 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1278 %vecext1 = extractelement <8 x i16> %x, i32 1 1279 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1280 %vecext3 = extractelement <8 x i16> %x, i32 2 1281 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1282 %vecext5 = extractelement <8 x i16> %x, i32 3 1283 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1284 %vecext7 = extractelement <4 x i16> %y, i32 0 1285 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1286 %vecext9 = extractelement <4 x i16> %y, i32 1 1287 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1288 %vecext11 = extractelement <4 x i16> %y, i32 2 1289 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1290 %vecext13 = extractelement <4 x i16> %y, i32 3 1291 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1292 ret <8 x i16> %vecinit14 1293} 1294 1295define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1296; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1297; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1298entry: 1299 %vecext = extractelement <4 x i16> %x, i32 0 1300 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1301 %vecext1 = extractelement <4 x i16> %x, i32 1 1302 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1303 %vecext3 = extractelement <4 x i16> %x, i32 2 1304 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1305 %vecext5 = extractelement <4 x i16> %x, i32 3 1306 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1307 %vecext7 = extractelement <4 x i16> %y, i32 0 1308 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1309 %vecext9 = extractelement <4 x i16> %y, i32 1 1310 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1311 %vecext11 = extractelement <4 x i16> %y, i32 2 1312 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1313 %vecext13 = extractelement <4 x i16> %y, i32 3 1314 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1315 ret <8 x i16> %vecinit14 1316} 1317 1318define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1319; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1320; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1321entry: 1322 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1323 ret <4 x i32> %vecinit6 1324} 1325 1326define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1327; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1328; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1329entry: 1330 %vecext = extractelement <2 x i32> %x, i32 0 1331 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1332 %vecext1 = extractelement <2 x i32> %x, i32 1 1333 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1334 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1335 ret <4 x i32> %vecinit6 1336} 1337 1338define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1339; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1340; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1341entry: 1342 %vecext = extractelement <4 x i32> %x, i32 0 1343 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1344 %vecext1 = extractelement <4 x i32> %x, i32 1 1345 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1346 %vecext3 = extractelement <2 x i32> %y, i32 0 1347 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1348 %vecext5 = extractelement <2 x i32> %y, i32 1 1349 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1350 ret <4 x i32> %vecinit6 1351} 1352 1353define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1354; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1355; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1356entry: 1357 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1358 ret <4 x i32> %vecinit6 1359} 1360 1361define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1362; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1363; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1364entry: 1365 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1366 ret <2 x i64> %vecinit2 1367} 1368 1369define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1370; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1371; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1372entry: 1373 %vecext = extractelement <1 x i64> %x, i32 0 1374 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1375 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1376 ret <2 x i64> %vecinit2 1377} 1378 1379define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1380; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1381; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1382entry: 1383 %vecext = extractelement <2 x i64> %x, i32 0 1384 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1385 %vecext1 = extractelement <1 x i64> %y, i32 0 1386 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1387 ret <2 x i64> %vecinit2 1388} 1389 1390define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1391; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1392; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1393entry: 1394 %vecext = extractelement <1 x i64> %x, i32 0 1395 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1396 %vecext1 = extractelement <1 x i64> %y, i32 0 1397 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1398 ret <2 x i64> %vecinit2 1399} 1400 1401 1402define <4 x i16> @concat_vector_v4i16_const() { 1403; CHECK-LABEL: concat_vector_v4i16_const: 1404; CHECK: movi {{d[0-9]+}}, #0 1405 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1406 ret <4 x i16> %r 1407} 1408 1409define <4 x i16> @concat_vector_v4i16_const_one() { 1410; CHECK-LABEL: concat_vector_v4i16_const_one: 1411; CHECK: movi {{v[0-9]+}}.4h, #1 1412 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1413 ret <4 x i16> %r 1414} 1415 1416define <4 x i32> @concat_vector_v4i32_const() { 1417; CHECK-LABEL: concat_vector_v4i32_const: 1418; CHECK: movi {{v[0-9]+}}.2d, #0 1419 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1420 ret <4 x i32> %r 1421} 1422 1423define <8 x i8> @concat_vector_v8i8_const() { 1424; CHECK-LABEL: concat_vector_v8i8_const: 1425; CHECK: movi {{d[0-9]+}}, #0 1426 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1427 ret <8 x i8> %r 1428} 1429 1430define <8 x i16> @concat_vector_v8i16_const() { 1431; CHECK-LABEL: concat_vector_v8i16_const: 1432; CHECK: movi {{v[0-9]+}}.2d, #0 1433 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1434 ret <8 x i16> %r 1435} 1436 1437define <8 x i16> @concat_vector_v8i16_const_one() { 1438; CHECK-LABEL: concat_vector_v8i16_const_one: 1439; CHECK: movi {{v[0-9]+}}.8h, #1 1440 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1441 ret <8 x i16> %r 1442} 1443 1444define <16 x i8> @concat_vector_v16i8_const() { 1445; CHECK-LABEL: concat_vector_v16i8_const: 1446; CHECK: movi {{v[0-9]+}}.2d, #0 1447 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1448 ret <16 x i8> %r 1449} 1450 1451define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1452; CHECK-LABEL: concat_vector_v4i16: 1453; CHECK: dup v0.4h, v0.h[0] 1454 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1455 ret <4 x i16> %r 1456} 1457 1458define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1459; CHECK-LABEL: concat_vector_v4i32: 1460; CHECK: dup v0.4s, v0.s[0] 1461 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1462 ret <4 x i32> %r 1463} 1464 1465define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1466; CHECK-LABEL: concat_vector_v8i8: 1467; CHECK: dup v0.8b, v0.b[0] 1468 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1469 ret <8 x i8> %r 1470} 1471 1472define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1473; CHECK-LABEL: concat_vector_v8i16: 1474; CHECK: dup v0.8h, v0.h[0] 1475 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1476 ret <8 x i16> %r 1477} 1478 1479define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1480; CHECK-LABEL: concat_vector_v16i8: 1481; CHECK: dup v0.16b, v0.b[0] 1482 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1483 ret <16 x i8> %r 1484} 1485