1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9} 10 11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12; CHECK-LABEL: ins8hw: 13; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16} 17 18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19; CHECK-LABEL: ins4sw: 20; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23} 24 25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26; CHECK-LABEL: ins2dw: 27; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30} 31 32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33; CHECK-LABEL: ins8bw: 34; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40; CHECK-LABEL: ins4hw: 41; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44} 45 46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47; CHECK-LABEL: ins2sw: 48; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51} 52 53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54; CHECK-LABEL: ins16b16: 55; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59} 60 61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62; CHECK-LABEL: ins8h8: 63; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67} 68 69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70; CHECK-LABEL: ins4s4: 71; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75} 76 77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78; CHECK-LABEL: ins2d2: 79; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83} 84 85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86; CHECK-LABEL: ins4f4: 87; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91} 92 93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94; CHECK-LABEL: ins2df2: 95; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99} 100 101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102; CHECK-LABEL: ins8b16: 103; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107} 108 109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110; CHECK-LABEL: ins4h8: 111; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115} 116 117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118; CHECK-LABEL: ins2s4: 119; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123} 124 125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126; CHECK-LABEL: ins1d2: 127; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131} 132 133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134; CHECK-LABEL: ins2f4: 135; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139} 140 141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142; CHECK-LABEL: ins1f2: 143; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147} 148 149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150; CHECK-LABEL: ins16b8: 151; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155} 156 157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158; CHECK-LABEL: ins8h4: 159; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163} 164 165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166; CHECK-LABEL: ins4s2: 167; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171} 172 173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174; CHECK-LABEL: ins2d1: 175; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179} 180 181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182; CHECK-LABEL: ins4f2: 183; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187} 188 189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190; CHECK-LABEL: ins2f1: 191; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195} 196 197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198; CHECK-LABEL: ins8b8: 199; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203} 204 205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206; CHECK-LABEL: ins4h4: 207; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211} 212 213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214; CHECK-LABEL: ins2s2: 215; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219} 220 221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222; CHECK-LABEL: ins1d1: 223; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227} 228 229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230; CHECK-LABEL: ins2f2: 231; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235} 236 237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238; CHECK-LABEL: ins1df1: 239; CHECK-NOT: ins {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243} 244 245define i32 @umovw16b(<16 x i8> %tmp1) { 246; CHECK-LABEL: umovw16b: 247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251} 252 253define i32 @umovw8h(<8 x i16> %tmp1) { 254; CHECK-LABEL: umovw8h: 255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259} 260 261define i32 @umovw4s(<4 x i32> %tmp1) { 262; CHECK-LABEL: umovw4s: 263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266} 267 268define i64 @umovx2d(<2 x i64> %tmp1) { 269; CHECK-LABEL: umovx2d: 270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273} 274 275define i32 @umovw8b(<8 x i8> %tmp1) { 276; CHECK-LABEL: umovw8b: 277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281} 282 283define i32 @umovw4h(<4 x i16> %tmp1) { 284; CHECK-LABEL: umovw4h: 285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289} 290 291define i32 @umovw2s(<2 x i32> %tmp1) { 292; CHECK-LABEL: umovw2s: 293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296} 297 298define i64 @umovx1d(<1 x i64> %tmp1) { 299; CHECK-LABEL: umovx1d: 300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303} 304 305define i32 @smovw16b(<16 x i8> %tmp1) { 306; CHECK-LABEL: smovw16b: 307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312} 313 314define i32 @smovw8h(<8 x i16> %tmp1) { 315; CHECK-LABEL: smovw8h: 316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321} 322 323define i64 @smovx16b(<16 x i8> %tmp1) { 324; CHECK-LABEL: smovx16b: 325; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i64 328 ret i64 %tmp4 329} 330 331define i64 @smovx8h(<8 x i16> %tmp1) { 332; CHECK-LABEL: smovx8h: 333; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] 334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 335 %tmp4 = sext i16 %tmp3 to i64 336 ret i64 %tmp4 337} 338 339define i64 @smovx4s(<4 x i32> %tmp1) { 340; CHECK-LABEL: smovx4s: 341; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 343 %tmp4 = sext i32 %tmp3 to i64 344 ret i64 %tmp4 345} 346 347define i32 @smovw8b(<8 x i8> %tmp1) { 348; CHECK-LABEL: smovw8b: 349; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 351 %tmp4 = sext i8 %tmp3 to i32 352 %tmp5 = add i32 %tmp4, %tmp4 353 ret i32 %tmp5 354} 355 356define i32 @smovw4h(<4 x i16> %tmp1) { 357; CHECK-LABEL: smovw4h: 358; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 360 %tmp4 = sext i16 %tmp3 to i32 361 %tmp5 = add i32 %tmp4, %tmp4 362 ret i32 %tmp5 363} 364 365define i32 @smovx8b(<8 x i8> %tmp1) { 366; CHECK-LABEL: smovx8b: 367; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 369 %tmp4 = sext i8 %tmp3 to i32 370 ret i32 %tmp4 371} 372 373define i32 @smovx4h(<4 x i16> %tmp1) { 374; CHECK-LABEL: smovx4h: 375; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 377 %tmp4 = sext i16 %tmp3 to i32 378 ret i32 %tmp4 379} 380 381define i64 @smovx2s(<2 x i32> %tmp1) { 382; CHECK-LABEL: smovx2s: 383; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 385 %tmp4 = sext i32 %tmp3 to i64 386 ret i64 %tmp4 387} 388 389define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 390; CHECK-LABEL: test_vcopy_lane_s8: 391; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 393 ret <8 x i8> %vset_lane 394} 395 396define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 397; CHECK-LABEL: test_vcopyq_laneq_s8: 398; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 400 ret <16 x i8> %vset_lane 401} 402 403define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 404; CHECK-LABEL: test_vcopy_lane_swap_s8: 405; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 407 ret <8 x i8> %vset_lane 408} 409 410define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 411; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 412; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 414 ret <16 x i8> %vset_lane 415} 416 417define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 418; CHECK-LABEL: test_vdup_n_u8: 419; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 428 ret <8 x i8> %vecinit7.i 429} 430 431define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 432; CHECK-LABEL: test_vdup_n_u16: 433; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 438 ret <4 x i16> %vecinit3.i 439} 440 441define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 442; CHECK-LABEL: test_vdup_n_u32: 443; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 446 ret <2 x i32> %vecinit1.i 447} 448 449define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 450; CHECK-LABEL: test_vdup_n_u64: 451; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 453 ret <1 x i64> %vecinit.i 454} 455 456define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 457; CHECK-LABEL: test_vdupq_n_u8: 458; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 475 ret <16 x i8> %vecinit15.i 476} 477 478define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 479; CHECK-LABEL: test_vdupq_n_u16: 480; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 489 ret <8 x i16> %vecinit7.i 490} 491 492define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 493; CHECK-LABEL: test_vdupq_n_u32: 494; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 499 ret <4 x i32> %vecinit3.i 500} 501 502define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 503; CHECK-LABEL: test_vdupq_n_u64: 504; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 507 ret <2 x i64> %vecinit1.i 508} 509 510define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 511; CHECK-LABEL: test_vdup_lane_s8: 512; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 514 ret <8 x i8> %shuffle 515} 516 517define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 518; CHECK-LABEL: test_vdup_lane_s16: 519; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 521 ret <4 x i16> %shuffle 522} 523 524define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 525; CHECK-LABEL: test_vdup_lane_s32: 526; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 528 ret <2 x i32> %shuffle 529} 530 531define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 532; CHECK-LABEL: test_vdupq_lane_s8: 533; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 535 ret <16 x i8> %shuffle 536} 537 538define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 539; CHECK-LABEL: test_vdupq_lane_s16: 540; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 542 ret <8 x i16> %shuffle 543} 544 545define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 546; CHECK-LABEL: test_vdupq_lane_s32: 547; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 549 ret <4 x i32> %shuffle 550} 551 552define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 553; CHECK-LABEL: test_vdupq_lane_s64: 554; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 556 ret <2 x i64> %shuffle 557} 558 559define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 560; CHECK-LABEL: test_vdup_laneq_s8: 561; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 563 ret <8 x i8> %shuffle 564} 565 566define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 567; CHECK-LABEL: test_vdup_laneq_s16: 568; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 570 ret <4 x i16> %shuffle 571} 572 573define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 574; CHECK-LABEL: test_vdup_laneq_s32: 575; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 577 ret <2 x i32> %shuffle 578} 579 580define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 581; CHECK-LABEL: test_vdupq_laneq_s8: 582; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 584 ret <16 x i8> %shuffle 585} 586 587define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 588; CHECK-LABEL: test_vdupq_laneq_s16: 589; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 591 ret <8 x i16> %shuffle 592} 593 594define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 595; CHECK-LABEL: test_vdupq_laneq_s32: 596; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 598 ret <4 x i32> %shuffle 599} 600 601define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 602; CHECK-LABEL: test_vdupq_laneq_s64: 603; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 605 ret <2 x i64> %shuffle 606} 607 608define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 609; CHECK-LABEL: test_bitcastv8i8toi64: 610 %res = bitcast <8 x i8> %in to i64 611; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 612 ret i64 %res 613} 614 615define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 616; CHECK-LABEL: test_bitcastv4i16toi64: 617 %res = bitcast <4 x i16> %in to i64 618; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 619 ret i64 %res 620} 621 622define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 623; CHECK-LABEL: test_bitcastv2i32toi64: 624 %res = bitcast <2 x i32> %in to i64 625; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 626 ret i64 %res 627} 628 629define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 630; CHECK-LABEL: test_bitcastv2f32toi64: 631 %res = bitcast <2 x float> %in to i64 632; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 633 ret i64 %res 634} 635 636define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 637; CHECK-LABEL: test_bitcastv1i64toi64: 638 %res = bitcast <1 x i64> %in to i64 639; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 640 ret i64 %res 641} 642 643define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 644; CHECK-LABEL: test_bitcastv1f64toi64: 645 %res = bitcast <1 x double> %in to i64 646; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 647 ret i64 %res 648} 649 650define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 651; CHECK-LABEL: test_bitcasti64tov8i8: 652 %res = bitcast i64 %in to <8 x i8> 653; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 654 ret <8 x i8> %res 655} 656 657define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 658; CHECK-LABEL: test_bitcasti64tov4i16: 659 %res = bitcast i64 %in to <4 x i16> 660; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 661 ret <4 x i16> %res 662} 663 664define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 665; CHECK-LABEL: test_bitcasti64tov2i32: 666 %res = bitcast i64 %in to <2 x i32> 667; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 668 ret <2 x i32> %res 669} 670 671define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 672; CHECK-LABEL: test_bitcasti64tov2f32: 673 %res = bitcast i64 %in to <2 x float> 674; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 675 ret <2 x float> %res 676} 677 678define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 679; CHECK-LABEL: test_bitcasti64tov1i64: 680 %res = bitcast i64 %in to <1 x i64> 681; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 682 ret <1 x i64> %res 683} 684 685define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 686; CHECK-LABEL: test_bitcasti64tov1f64: 687 %res = bitcast i64 %in to <1 x double> 688; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 689 ret <1 x double> %res 690} 691 692define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 693; CHECK-LABEL: test_bitcastv8i8tov1f64: 694; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 695; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 696 %sub.i = sub <8 x i8> zeroinitializer, %a 697 %1 = bitcast <8 x i8> %sub.i to <1 x double> 698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 699 ret <1 x i64> %vcvt.i 700} 701 702define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 703; CHECK-LABEL: test_bitcastv4i16tov1f64: 704; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 705; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 706 %sub.i = sub <4 x i16> zeroinitializer, %a 707 %1 = bitcast <4 x i16> %sub.i to <1 x double> 708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 709 ret <1 x i64> %vcvt.i 710} 711 712define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 713; CHECK-LABEL: test_bitcastv2i32tov1f64: 714; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 715; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 716 %sub.i = sub <2 x i32> zeroinitializer, %a 717 %1 = bitcast <2 x i32> %sub.i to <1 x double> 718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 719 ret <1 x i64> %vcvt.i 720} 721 722define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 723; CHECK-LABEL: test_bitcastv1i64tov1f64: 724; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 725; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 726 %sub.i = sub <1 x i64> zeroinitializer, %a 727 %1 = bitcast <1 x i64> %sub.i to <1 x double> 728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 729 ret <1 x i64> %vcvt.i 730} 731 732define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 733; CHECK-LABEL: test_bitcastv2f32tov1f64: 734; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 735; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 737 %1 = bitcast <2 x float> %sub.i to <1 x double> 738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 739 ret <1 x i64> %vcvt.i 740} 741 742define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 743; CHECK-LABEL: test_bitcastv1f64tov8i8: 744; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 745; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 746 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 748 %sub.i = sub <8 x i8> zeroinitializer, %1 749 ret <8 x i8> %sub.i 750} 751 752define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 753; CHECK-LABEL: test_bitcastv1f64tov4i16: 754; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 755; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 756 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 758 %sub.i = sub <4 x i16> zeroinitializer, %1 759 ret <4 x i16> %sub.i 760} 761 762define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 763; CHECK-LABEL: test_bitcastv1f64tov2i32: 764; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 765; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 766 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 768 %sub.i = sub <2 x i32> zeroinitializer, %1 769 ret <2 x i32> %sub.i 770} 771 772define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 773; CHECK-LABEL: test_bitcastv1f64tov1i64: 774; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 775; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 776 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 778 %sub.i = sub <1 x i64> zeroinitializer, %1 779 ret <1 x i64> %sub.i 780} 781 782define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 783; CHECK-LABEL: test_bitcastv1f64tov2f32: 784; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 785; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 786 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 787 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 789 ret <2 x float> %sub.i 790} 791 792; Test insert element into an undef vector 793define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 794; CHECK-LABEL: scalar_to_vector.v8i8: 795; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 796 %b = insertelement <8 x i8> undef, i8 %a, i32 0 797 ret <8 x i8> %b 798} 799 800define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 801; CHECK-LABEL: scalar_to_vector.v16i8: 802; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 803 %b = insertelement <16 x i8> undef, i8 %a, i32 0 804 ret <16 x i8> %b 805} 806 807define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 808; CHECK-LABEL: scalar_to_vector.v4i16: 809; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 810 %b = insertelement <4 x i16> undef, i16 %a, i32 0 811 ret <4 x i16> %b 812} 813 814define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 815; CHECK-LABEL: scalar_to_vector.v8i16: 816; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 817 %b = insertelement <8 x i16> undef, i16 %a, i32 0 818 ret <8 x i16> %b 819} 820 821define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 822; CHECK-LABEL: scalar_to_vector.v2i32: 823; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 824 %b = insertelement <2 x i32> undef, i32 %a, i32 0 825 ret <2 x i32> %b 826} 827 828define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 829; CHECK-LABEL: scalar_to_vector.v4i32: 830; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 831 %b = insertelement <4 x i32> undef, i32 %a, i32 0 832 ret <4 x i32> %b 833} 834 835define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 836; CHECK-LABEL: scalar_to_vector.v2i64: 837; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 838 %b = insertelement <2 x i64> undef, i64 %a, i32 0 839 ret <2 x i64> %b 840} 841 842define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 843; CHECK-LABEL: testDUP.v1i8: 844; CHECK: dup v0.8b, v0.b[0] 845 %b = extractelement <1 x i8> %a, i32 0 846 %c = insertelement <8 x i8> undef, i8 %b, i32 0 847 %d = insertelement <8 x i8> %c, i8 %b, i32 1 848 %e = insertelement <8 x i8> %d, i8 %b, i32 2 849 %f = insertelement <8 x i8> %e, i8 %b, i32 3 850 %g = insertelement <8 x i8> %f, i8 %b, i32 4 851 %h = insertelement <8 x i8> %g, i8 %b, i32 5 852 %i = insertelement <8 x i8> %h, i8 %b, i32 6 853 %j = insertelement <8 x i8> %i, i8 %b, i32 7 854 ret <8 x i8> %j 855} 856 857define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 858; CHECK-LABEL: testDUP.v1i16: 859; CHECK: dup v0.8h, v0.h[0] 860 %b = extractelement <1 x i16> %a, i32 0 861 %c = insertelement <8 x i16> undef, i16 %b, i32 0 862 %d = insertelement <8 x i16> %c, i16 %b, i32 1 863 %e = insertelement <8 x i16> %d, i16 %b, i32 2 864 %f = insertelement <8 x i16> %e, i16 %b, i32 3 865 %g = insertelement <8 x i16> %f, i16 %b, i32 4 866 %h = insertelement <8 x i16> %g, i16 %b, i32 5 867 %i = insertelement <8 x i16> %h, i16 %b, i32 6 868 %j = insertelement <8 x i16> %i, i16 %b, i32 7 869 ret <8 x i16> %j 870} 871 872define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 873; CHECK-LABEL: testDUP.v1i32: 874; CHECK: dup v0.4s, v0.s[0] 875 %b = extractelement <1 x i32> %a, i32 0 876 %c = insertelement <4 x i32> undef, i32 %b, i32 0 877 %d = insertelement <4 x i32> %c, i32 %b, i32 1 878 %e = insertelement <4 x i32> %d, i32 %b, i32 2 879 %f = insertelement <4 x i32> %e, i32 %b, i32 3 880 ret <4 x i32> %f 881} 882 883define <8 x i8> @getl(<16 x i8> %x) #0 { 884; CHECK-LABEL: getl: 885; CHECK: ret 886 %vecext = extractelement <16 x i8> %x, i32 0 887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 888 %vecext1 = extractelement <16 x i8> %x, i32 1 889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 890 %vecext3 = extractelement <16 x i8> %x, i32 2 891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 892 %vecext5 = extractelement <16 x i8> %x, i32 3 893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 894 %vecext7 = extractelement <16 x i8> %x, i32 4 895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 896 %vecext9 = extractelement <16 x i8> %x, i32 5 897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 898 %vecext11 = extractelement <16 x i8> %x, i32 6 899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 900 %vecext13 = extractelement <16 x i8> %x, i32 7 901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 902 ret <8 x i8> %vecinit14 903} 904 905define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 906; CHECK-LABEL: test_dup_v2i32_v4i16: 907; CHECK: dup v0.4h, v0.h[2] 908entry: 909 %x = extractelement <2 x i32> %a, i32 1 910 %vget_lane = trunc i32 %x to i16 911 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 912 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 913 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 914 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 915 ret <4 x i16> %vecinit3.i 916} 917 918define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 919; CHECK-LABEL: test_dup_v4i32_v8i16: 920; CHECK: dup v0.8h, v0.h[6] 921entry: 922 %x = extractelement <4 x i32> %a, i32 3 923 %vget_lane = trunc i32 %x to i16 924 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 925 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 926 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 927 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 928 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 929 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 930 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 931 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 932 ret <8 x i16> %vecinit7.i 933} 934 935define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 936; CHECK-LABEL: test_dup_v1i64_v4i16: 937; CHECK: dup v0.4h, v0.h[0] 938entry: 939 %x = extractelement <1 x i64> %a, i32 0 940 %vget_lane = trunc i64 %x to i16 941 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 942 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 943 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 944 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 945 ret <4 x i16> %vecinit3.i 946} 947 948define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 949; CHECK-LABEL: test_dup_v1i64_v2i32: 950; CHECK: dup v0.2s, v0.s[0] 951entry: 952 %x = extractelement <1 x i64> %a, i32 0 953 %vget_lane = trunc i64 %x to i32 954 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 955 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 956 ret <2 x i32> %vecinit1.i 957} 958 959define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 960; CHECK-LABEL: test_dup_v2i64_v8i16: 961; CHECK: dup v0.8h, v0.h[4] 962entry: 963 %x = extractelement <2 x i64> %a, i32 1 964 %vget_lane = trunc i64 %x to i16 965 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 966 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 967 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 968 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 969 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 970 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 971 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 972 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 973 ret <8 x i16> %vecinit7.i 974} 975 976define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 977; CHECK-LABEL: test_dup_v2i64_v4i32: 978; CHECK: dup v0.4s, v0.s[2] 979entry: 980 %x = extractelement <2 x i64> %a, i32 1 981 %vget_lane = trunc i64 %x to i32 982 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 983 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 984 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 985 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 986 ret <4 x i32> %vecinit3.i 987} 988 989define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 990; CHECK-LABEL: test_dup_v4i32_v4i16: 991; CHECK: dup v0.4h, v0.h[2] 992entry: 993 %x = extractelement <4 x i32> %a, i32 1 994 %vget_lane = trunc i32 %x to i16 995 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 996 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 997 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 998 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 999 ret <4 x i16> %vecinit3.i 1000} 1001 1002define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1003; CHECK-LABEL: test_dup_v2i64_v4i16: 1004; CHECK: dup v0.4h, v0.h[0] 1005entry: 1006 %x = extractelement <2 x i64> %a, i32 0 1007 %vget_lane = trunc i64 %x to i16 1008 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1009 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1010 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1011 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1012 ret <4 x i16> %vecinit3.i 1013} 1014 1015define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1016; CHECK-LABEL: test_dup_v2i64_v2i32: 1017; CHECK: dup v0.2s, v0.s[0] 1018entry: 1019 %x = extractelement <2 x i64> %a, i32 0 1020 %vget_lane = trunc i64 %x to i32 1021 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1022 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1023 ret <2 x i32> %vecinit1.i 1024} 1025 1026 1027define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1028; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1029; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1030; CHECK-NEXT: ret 1031entry: 1032 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1033 %1 = insertelement <1 x float> undef, float %0, i32 0 1034 %2 = extractelement <1 x float> %1, i32 0 1035 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1036 ret <2 x float> %vecinit1.i 1037} 1038 1039define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1040; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1041; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1042; CHECK-NEXT: ret 1043entry: 1044 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1045 %1 = insertelement <1 x float> undef, float %0, i32 0 1046 %2 = extractelement <1 x float> %1, i32 0 1047 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1048 ret <4 x float> %vecinit1.i 1049} 1050 1051declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1052 1053define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1054; CHECK-LABEL: test_concat_undef_v1i32: 1055; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1056entry: 1057 %0 = extractelement <2 x i32> %a, i32 0 1058 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1059 ret <2 x i32> %vecinit1.i 1060} 1061 1062declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1063 1064define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1065; CHECK-LABEL: test_concat_v1i32_undef: 1066; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1067; CHECK-NEXT: ret 1068entry: 1069 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1070 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1071 ret <2 x i32> %vecinit.i432 1072} 1073 1074define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1075; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1076; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1077entry: 1078 %0 = extractelement <2 x i32> %a, i32 0 1079 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1080 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1081 ret <2 x i32> %vecinit1.i 1082} 1083 1084define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1085; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1086; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1087; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1088; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}} 1089entry: 1090 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1091 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1092 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1093 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1094 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1095 ret <2 x i32> %h 1096} 1097 1098define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1099; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1100; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1101entry: 1102 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1103 ret <16 x i8> %vecinit30 1104} 1105 1106define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1107; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1108; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1109entry: 1110 %vecext = extractelement <8 x i8> %x, i32 0 1111 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1112 %vecext1 = extractelement <8 x i8> %x, i32 1 1113 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1114 %vecext3 = extractelement <8 x i8> %x, i32 2 1115 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1116 %vecext5 = extractelement <8 x i8> %x, i32 3 1117 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1118 %vecext7 = extractelement <8 x i8> %x, i32 4 1119 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1120 %vecext9 = extractelement <8 x i8> %x, i32 5 1121 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1122 %vecext11 = extractelement <8 x i8> %x, i32 6 1123 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1124 %vecext13 = extractelement <8 x i8> %x, i32 7 1125 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1126 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1127 ret <16 x i8> %vecinit30 1128} 1129 1130define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1131; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1132; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1133entry: 1134 %vecext = extractelement <16 x i8> %x, i32 0 1135 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1136 %vecext1 = extractelement <16 x i8> %x, i32 1 1137 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1138 %vecext3 = extractelement <16 x i8> %x, i32 2 1139 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1140 %vecext5 = extractelement <16 x i8> %x, i32 3 1141 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1142 %vecext7 = extractelement <16 x i8> %x, i32 4 1143 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1144 %vecext9 = extractelement <16 x i8> %x, i32 5 1145 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1146 %vecext11 = extractelement <16 x i8> %x, i32 6 1147 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1148 %vecext13 = extractelement <16 x i8> %x, i32 7 1149 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1150 %vecext15 = extractelement <8 x i8> %y, i32 0 1151 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1152 %vecext17 = extractelement <8 x i8> %y, i32 1 1153 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1154 %vecext19 = extractelement <8 x i8> %y, i32 2 1155 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1156 %vecext21 = extractelement <8 x i8> %y, i32 3 1157 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1158 %vecext23 = extractelement <8 x i8> %y, i32 4 1159 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1160 %vecext25 = extractelement <8 x i8> %y, i32 5 1161 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1162 %vecext27 = extractelement <8 x i8> %y, i32 6 1163 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1164 %vecext29 = extractelement <8 x i8> %y, i32 7 1165 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1166 ret <16 x i8> %vecinit30 1167} 1168 1169define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1170; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1171; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1172entry: 1173 %vecext = extractelement <8 x i8> %x, i32 0 1174 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1175 %vecext1 = extractelement <8 x i8> %x, i32 1 1176 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1177 %vecext3 = extractelement <8 x i8> %x, i32 2 1178 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1179 %vecext5 = extractelement <8 x i8> %x, i32 3 1180 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1181 %vecext7 = extractelement <8 x i8> %x, i32 4 1182 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1183 %vecext9 = extractelement <8 x i8> %x, i32 5 1184 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1185 %vecext11 = extractelement <8 x i8> %x, i32 6 1186 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1187 %vecext13 = extractelement <8 x i8> %x, i32 7 1188 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1189 %vecext15 = extractelement <8 x i8> %y, i32 0 1190 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1191 %vecext17 = extractelement <8 x i8> %y, i32 1 1192 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1193 %vecext19 = extractelement <8 x i8> %y, i32 2 1194 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1195 %vecext21 = extractelement <8 x i8> %y, i32 3 1196 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1197 %vecext23 = extractelement <8 x i8> %y, i32 4 1198 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1199 %vecext25 = extractelement <8 x i8> %y, i32 5 1200 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1201 %vecext27 = extractelement <8 x i8> %y, i32 6 1202 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1203 %vecext29 = extractelement <8 x i8> %y, i32 7 1204 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1205 ret <16 x i8> %vecinit30 1206} 1207 1208define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1209; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1210; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1211entry: 1212 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1213 ret <8 x i16> %vecinit14 1214} 1215 1216define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1217; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1218; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1219entry: 1220 %vecext = extractelement <4 x i16> %x, i32 0 1221 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1222 %vecext1 = extractelement <4 x i16> %x, i32 1 1223 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1224 %vecext3 = extractelement <4 x i16> %x, i32 2 1225 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1226 %vecext5 = extractelement <4 x i16> %x, i32 3 1227 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1228 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1229 ret <8 x i16> %vecinit14 1230} 1231 1232define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1233; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1234; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1235entry: 1236 %vecext = extractelement <8 x i16> %x, i32 0 1237 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1238 %vecext1 = extractelement <8 x i16> %x, i32 1 1239 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1240 %vecext3 = extractelement <8 x i16> %x, i32 2 1241 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1242 %vecext5 = extractelement <8 x i16> %x, i32 3 1243 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1244 %vecext7 = extractelement <4 x i16> %y, i32 0 1245 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1246 %vecext9 = extractelement <4 x i16> %y, i32 1 1247 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1248 %vecext11 = extractelement <4 x i16> %y, i32 2 1249 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1250 %vecext13 = extractelement <4 x i16> %y, i32 3 1251 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1252 ret <8 x i16> %vecinit14 1253} 1254 1255define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1256; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1257; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1258entry: 1259 %vecext = extractelement <4 x i16> %x, i32 0 1260 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1261 %vecext1 = extractelement <4 x i16> %x, i32 1 1262 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1263 %vecext3 = extractelement <4 x i16> %x, i32 2 1264 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1265 %vecext5 = extractelement <4 x i16> %x, i32 3 1266 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1267 %vecext7 = extractelement <4 x i16> %y, i32 0 1268 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1269 %vecext9 = extractelement <4 x i16> %y, i32 1 1270 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1271 %vecext11 = extractelement <4 x i16> %y, i32 2 1272 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1273 %vecext13 = extractelement <4 x i16> %y, i32 3 1274 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1275 ret <8 x i16> %vecinit14 1276} 1277 1278define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1279; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1280; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1281entry: 1282 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1283 ret <4 x i32> %vecinit6 1284} 1285 1286define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1287; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1288; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1289entry: 1290 %vecext = extractelement <2 x i32> %x, i32 0 1291 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1292 %vecext1 = extractelement <2 x i32> %x, i32 1 1293 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1294 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1295 ret <4 x i32> %vecinit6 1296} 1297 1298define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1299; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1300; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1301entry: 1302 %vecext = extractelement <4 x i32> %x, i32 0 1303 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1304 %vecext1 = extractelement <4 x i32> %x, i32 1 1305 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1306 %vecext3 = extractelement <2 x i32> %y, i32 0 1307 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1308 %vecext5 = extractelement <2 x i32> %y, i32 1 1309 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1310 ret <4 x i32> %vecinit6 1311} 1312 1313define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1314; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1315; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1316entry: 1317 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1318 ret <4 x i32> %vecinit6 1319} 1320 1321define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1322; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1323; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1324entry: 1325 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1326 ret <2 x i64> %vecinit2 1327} 1328 1329define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1330; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1331; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1332entry: 1333 %vecext = extractelement <1 x i64> %x, i32 0 1334 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1335 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1336 ret <2 x i64> %vecinit2 1337} 1338 1339define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1340; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1341; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1342entry: 1343 %vecext = extractelement <2 x i64> %x, i32 0 1344 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1345 %vecext1 = extractelement <1 x i64> %y, i32 0 1346 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1347 ret <2 x i64> %vecinit2 1348} 1349 1350define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1351; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1352; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1353entry: 1354 %vecext = extractelement <1 x i64> %x, i32 0 1355 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1356 %vecext1 = extractelement <1 x i64> %y, i32 0 1357 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1358 ret <2 x i64> %vecinit2 1359} 1360 1361 1362define <4 x i16> @concat_vector_v4i16_const() { 1363; CHECK-LABEL: concat_vector_v4i16_const: 1364; CHECK: movi {{d[0-9]+}}, #0 1365 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1366 ret <4 x i16> %r 1367} 1368 1369define <4 x i16> @concat_vector_v4i16_const_one() { 1370; CHECK-LABEL: concat_vector_v4i16_const_one: 1371; CHECK: movi {{v[0-9]+}}.4h, #0x1 1372 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1373 ret <4 x i16> %r 1374} 1375 1376define <4 x i32> @concat_vector_v4i32_const() { 1377; CHECK-LABEL: concat_vector_v4i32_const: 1378; CHECK: movi {{v[0-9]+}}.2d, #0 1379 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1380 ret <4 x i32> %r 1381} 1382 1383define <8 x i8> @concat_vector_v8i8_const() { 1384; CHECK-LABEL: concat_vector_v8i8_const: 1385; CHECK: movi {{d[0-9]+}}, #0 1386 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1387 ret <8 x i8> %r 1388} 1389 1390define <8 x i16> @concat_vector_v8i16_const() { 1391; CHECK-LABEL: concat_vector_v8i16_const: 1392; CHECK: movi {{v[0-9]+}}.2d, #0 1393 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1394 ret <8 x i16> %r 1395} 1396 1397define <8 x i16> @concat_vector_v8i16_const_one() { 1398; CHECK-LABEL: concat_vector_v8i16_const_one: 1399; CHECK: movi {{v[0-9]+}}.8h, #0x1 1400 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1401 ret <8 x i16> %r 1402} 1403 1404define <16 x i8> @concat_vector_v16i8_const() { 1405; CHECK-LABEL: concat_vector_v16i8_const: 1406; CHECK: movi {{v[0-9]+}}.2d, #0 1407 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1408 ret <16 x i8> %r 1409} 1410 1411define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1412; CHECK-LABEL: concat_vector_v4i16: 1413; CHECK: dup v0.4h, v0.h[0] 1414 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1415 ret <4 x i16> %r 1416} 1417 1418define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1419; CHECK-LABEL: concat_vector_v4i32: 1420; CHECK: dup v0.4s, v0.s[0] 1421 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1422 ret <4 x i32> %r 1423} 1424 1425define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1426; CHECK-LABEL: concat_vector_v8i8: 1427; CHECK: dup v0.8b, v0.b[0] 1428 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1429 ret <8 x i8> %r 1430} 1431 1432define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1433; CHECK-LABEL: concat_vector_v8i16: 1434; CHECK: dup v0.8h, v0.h[0] 1435 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1436 ret <8 x i16> %r 1437} 1438 1439define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1440; CHECK-LABEL: concat_vector_v16i8: 1441; CHECK: dup v0.16b, v0.b[0] 1442 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1443 ret <16 x i8> %r 1444} 1445