1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} 7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 8 ret <16 x i8> %tmp3 9} 10 11define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 12; CHECK-LABEL: ins8hw: 13; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} 14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 15 ret <8 x i16> %tmp3 16} 17 18define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 19; CHECK-LABEL: ins4sw: 20; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} 21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 22 ret <4 x i32> %tmp3 23} 24 25define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 26; CHECK-LABEL: ins2dw: 27; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 29 ret <2 x i64> %tmp3 30} 31 32define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 33; CHECK-LABEL: ins8bw: 34; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} 35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 40; CHECK-LABEL: ins4hw: 41; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} 42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 43 ret <4 x i16> %tmp3 44} 45 46define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 47; CHECK-LABEL: ins2sw: 48; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} 49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 50 ret <2 x i32> %tmp3 51} 52 53define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 54; CHECK-LABEL: ins16b16: 55; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 58 ret <16 x i8> %tmp4 59} 60 61define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 62; CHECK-LABEL: ins8h8: 63; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 66 ret <8 x i16> %tmp4 67} 68 69define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 70; CHECK-LABEL: ins4s4: 71; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 74 ret <4 x i32> %tmp4 75} 76 77define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 78; CHECK-LABEL: ins2d2: 79; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 82 ret <2 x i64> %tmp4 83} 84 85define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 86; CHECK-LABEL: ins4f4: 87; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 88 %tmp3 = extractelement <4 x float> %tmp1, i32 2 89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 90 ret <4 x float> %tmp4 91} 92 93define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 94; CHECK-LABEL: ins2df2: 95; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 96 %tmp3 = extractelement <2 x double> %tmp1, i32 0 97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 98 ret <2 x double> %tmp4 99} 100 101define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 102; CHECK-LABEL: ins8b16: 103; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] 104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 106 ret <16 x i8> %tmp4 107} 108 109define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 110; CHECK-LABEL: ins4h8: 111; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] 112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 114 ret <8 x i16> %tmp4 115} 116 117define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 118; CHECK-LABEL: ins2s4: 119; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 122 ret <4 x i32> %tmp4 123} 124 125define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 126; CHECK-LABEL: ins1d2: 127; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 130 ret <2 x i64> %tmp4 131} 132 133define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 134; CHECK-LABEL: ins2f4: 135; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] 136 %tmp3 = extractelement <2 x float> %tmp1, i32 1 137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 138 ret <4 x float> %tmp4 139} 140 141define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 142; CHECK-LABEL: ins1f2: 143; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 144 %tmp3 = extractelement <1 x double> %tmp1, i32 0 145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 146 ret <2 x double> %tmp4 147} 148 149define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 150; CHECK-LABEL: ins16b8: 151; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] 152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 154 ret <8 x i8> %tmp4 155} 156 157define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 158; CHECK-LABEL: ins8h4: 159; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 162 ret <4 x i16> %tmp4 163} 164 165define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 166; CHECK-LABEL: ins4s2: 167; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 170 ret <2 x i32> %tmp4 171} 172 173define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 174; CHECK-LABEL: ins2d1: 175; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 178 ret <1 x i64> %tmp4 179} 180 181define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 182; CHECK-LABEL: ins4f2: 183; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] 184 %tmp3 = extractelement <4 x float> %tmp1, i32 2 185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 186 ret <2 x float> %tmp4 187} 188 189define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 190; CHECK-LABEL: ins2f1: 191; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1] 192 %tmp3 = extractelement <2 x double> %tmp1, i32 1 193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 194 ret <1 x double> %tmp4 195} 196 197define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 198; CHECK-LABEL: ins8b8: 199; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] 200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 202 ret <8 x i8> %tmp4 203} 204 205define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 206; CHECK-LABEL: ins4h4: 207; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] 208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 210 ret <4 x i16> %tmp4 211} 212 213define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 214; CHECK-LABEL: ins2s2: 215; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 218 ret <2 x i32> %tmp4 219} 220 221define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 222; CHECK-LABEL: ins1d1: 223; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] 224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 226 ret <1 x i64> %tmp4 227} 228 229define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 230; CHECK-LABEL: ins2f2: 231; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] 232 %tmp3 = extractelement <2 x float> %tmp1, i32 0 233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 234 ret <2 x float> %tmp4 235} 236 237define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 238; CHECK-LABEL: ins1df1: 239; CHECK-NOT: ins {{v[0-9]+}} 240 %tmp3 = extractelement <1 x double> %tmp1, i32 0 241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 242 ret <1 x double> %tmp4 243} 244 245define i32 @umovw16b(<16 x i8> %tmp1) { 246; CHECK-LABEL: umovw16b: 247; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 249 %tmp4 = zext i8 %tmp3 to i32 250 ret i32 %tmp4 251} 252 253define i32 @umovw8h(<8 x i16> %tmp1) { 254; CHECK-LABEL: umovw8h: 255; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 257 %tmp4 = zext i16 %tmp3 to i32 258 ret i32 %tmp4 259} 260 261define i32 @umovw4s(<4 x i32> %tmp1) { 262; CHECK-LABEL: umovw4s: 263; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2] 264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 265 ret i32 %tmp3 266} 267 268define i64 @umovx2d(<2 x i64> %tmp1) { 269; CHECK-LABEL: umovx2d: 270; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 272 ret i64 %tmp3 273} 274 275define i32 @umovw8b(<8 x i8> %tmp1) { 276; CHECK-LABEL: umovw8b: 277; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7] 278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 279 %tmp4 = zext i8 %tmp3 to i32 280 ret i32 %tmp4 281} 282 283define i32 @umovw4h(<4 x i16> %tmp1) { 284; CHECK-LABEL: umovw4h: 285; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 287 %tmp4 = zext i16 %tmp3 to i32 288 ret i32 %tmp4 289} 290 291define i32 @umovw2s(<2 x i32> %tmp1) { 292; CHECK-LABEL: umovw2s: 293; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1] 294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 295 ret i32 %tmp3 296} 297 298define i64 @umovx1d(<1 x i64> %tmp1) { 299; CHECK-LABEL: umovx1d: 300; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 302 ret i64 %tmp3 303} 304 305define i32 @smovw16b(<16 x i8> %tmp1) { 306; CHECK-LABEL: smovw16b: 307; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] 308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 309 %tmp4 = sext i8 %tmp3 to i32 310 %tmp5 = add i32 %tmp4, %tmp4 311 ret i32 %tmp5 312} 313 314define i32 @smovw8h(<8 x i16> %tmp1) { 315; CHECK-LABEL: smovw8h: 316; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 318 %tmp4 = sext i16 %tmp3 to i32 319 %tmp5 = add i32 %tmp4, %tmp4 320 ret i32 %tmp5 321} 322 323define i64 @smovx16b(<16 x i8> %tmp1) { 324; CHECK-LABEL: smovx16b: 325; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] 326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 327 %tmp4 = sext i8 %tmp3 to i64 328 ret i64 %tmp4 329} 330 331define i64 @smovx8h(<8 x i16> %tmp1) { 332; CHECK-LABEL: smovx8h: 333; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] 334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 335 %tmp4 = sext i16 %tmp3 to i64 336 ret i64 %tmp4 337} 338 339define i64 @smovx4s(<4 x i32> %tmp1) { 340; CHECK-LABEL: smovx4s: 341; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] 342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 343 %tmp4 = sext i32 %tmp3 to i64 344 ret i64 %tmp4 345} 346 347define i32 @smovw8b(<8 x i8> %tmp1) { 348; CHECK-LABEL: smovw8b: 349; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] 350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 351 %tmp4 = sext i8 %tmp3 to i32 352 %tmp5 = add i32 %tmp4, %tmp4 353 ret i32 %tmp5 354} 355 356define i32 @smovw4h(<4 x i16> %tmp1) { 357; CHECK-LABEL: smovw4h: 358; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] 359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 360 %tmp4 = sext i16 %tmp3 to i32 361 %tmp5 = add i32 %tmp4, %tmp4 362 ret i32 %tmp5 363} 364 365define i32 @smovx8b(<8 x i8> %tmp1) { 366; CHECK-LABEL: smovx8b: 367; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6] 368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 369 %tmp4 = sext i8 %tmp3 to i32 370 ret i32 %tmp4 371} 372 373define i32 @smovx4h(<4 x i16> %tmp1) { 374; CHECK-LABEL: smovx4h: 375; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] 376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 377 %tmp4 = sext i16 %tmp3 to i32 378 ret i32 %tmp4 379} 380 381define i64 @smovx2s(<2 x i32> %tmp1) { 382; CHECK-LABEL: smovx2s: 383; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] 384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 385 %tmp4 = sext i32 %tmp3 to i64 386 ret i64 %tmp4 387} 388 389define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 390; CHECK-LABEL: test_vcopy_lane_s8: 391; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] 392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 393 ret <8 x i8> %vset_lane 394} 395 396define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 397; CHECK-LABEL: test_vcopyq_laneq_s8: 398; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] 399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 400 ret <16 x i8> %vset_lane 401} 402 403define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 404; CHECK-LABEL: test_vcopy_lane_swap_s8: 405; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] 406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 407 ret <8 x i8> %vset_lane 408} 409 410define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 411; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 412; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] 413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 414 ret <16 x i8> %vset_lane 415} 416 417define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 418; CHECK-LABEL: test_vdup_n_u8: 419; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} 420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 428 ret <8 x i8> %vecinit7.i 429} 430 431define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 432; CHECK-LABEL: test_vdup_n_u16: 433; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} 434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 438 ret <4 x i16> %vecinit3.i 439} 440 441define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 442; CHECK-LABEL: test_vdup_n_u32: 443; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} 444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 446 ret <2 x i32> %vecinit1.i 447} 448 449define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 450; CHECK-LABEL: test_vdup_n_u64: 451; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 453 ret <1 x i64> %vecinit.i 454} 455 456define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 457; CHECK-LABEL: test_vdupq_n_u8: 458; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} 459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 475 ret <16 x i8> %vecinit15.i 476} 477 478define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 479; CHECK-LABEL: test_vdupq_n_u16: 480; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} 481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 489 ret <8 x i16> %vecinit7.i 490} 491 492define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 493; CHECK-LABEL: test_vdupq_n_u32: 494; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} 495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 499 ret <4 x i32> %vecinit3.i 500} 501 502define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 503; CHECK-LABEL: test_vdupq_n_u64: 504; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 507 ret <2 x i64> %vecinit1.i 508} 509 510define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 511; CHECK-LABEL: test_vdup_lane_s8: 512; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 514 ret <8 x i8> %shuffle 515} 516 517define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 518; CHECK-LABEL: test_vdup_lane_s16: 519; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 521 ret <4 x i16> %shuffle 522} 523 524define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 525; CHECK-LABEL: test_vdup_lane_s32: 526; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 528 ret <2 x i32> %shuffle 529} 530 531define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 532; CHECK-LABEL: test_vdupq_lane_s8: 533; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 535 ret <16 x i8> %shuffle 536} 537 538define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 539; CHECK-LABEL: test_vdupq_lane_s16: 540; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 542 ret <8 x i16> %shuffle 543} 544 545define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 546; CHECK-LABEL: test_vdupq_lane_s32: 547; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 549 ret <4 x i32> %shuffle 550} 551 552define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 553; CHECK-LABEL: test_vdupq_lane_s64: 554; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 556 ret <2 x i64> %shuffle 557} 558 559define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 560; CHECK-LABEL: test_vdup_laneq_s8: 561; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] 562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 563 ret <8 x i8> %shuffle 564} 565 566define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 567; CHECK-LABEL: test_vdup_laneq_s16: 568; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] 569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 570 ret <4 x i16> %shuffle 571} 572 573define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 574; CHECK-LABEL: test_vdup_laneq_s32: 575; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 577 ret <2 x i32> %shuffle 578} 579 580define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 581; CHECK-LABEL: test_vdupq_laneq_s8: 582; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] 583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 584 ret <16 x i8> %shuffle 585} 586 587define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 588; CHECK-LABEL: test_vdupq_laneq_s16: 589; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] 590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 591 ret <8 x i16> %shuffle 592} 593 594define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 595; CHECK-LABEL: test_vdupq_laneq_s32: 596; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 598 ret <4 x i32> %shuffle 599} 600 601define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 602; CHECK-LABEL: test_vdupq_laneq_s64: 603; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 605 ret <2 x i64> %shuffle 606} 607 608define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 609; CHECK-LABEL: test_bitcastv8i8toi64: 610 %res = bitcast <8 x i8> %in to i64 611; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 612 ret i64 %res 613} 614 615define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 616; CHECK-LABEL: test_bitcastv4i16toi64: 617 %res = bitcast <4 x i16> %in to i64 618; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 619 ret i64 %res 620} 621 622define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 623; CHECK-LABEL: test_bitcastv2i32toi64: 624 %res = bitcast <2 x i32> %in to i64 625; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 626 ret i64 %res 627} 628 629define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 630; CHECK-LABEL: test_bitcastv2f32toi64: 631 %res = bitcast <2 x float> %in to i64 632; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 633 ret i64 %res 634} 635 636define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 637; CHECK-LABEL: test_bitcastv1i64toi64: 638 %res = bitcast <1 x i64> %in to i64 639; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 640 ret i64 %res 641} 642 643define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 644; CHECK-LABEL: test_bitcastv1f64toi64: 645 %res = bitcast <1 x double> %in to i64 646; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 647 ret i64 %res 648} 649 650define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 651; CHECK-LABEL: test_bitcasti64tov8i8: 652 %res = bitcast i64 %in to <8 x i8> 653; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 654 ret <8 x i8> %res 655} 656 657define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 658; CHECK-LABEL: test_bitcasti64tov4i16: 659 %res = bitcast i64 %in to <4 x i16> 660; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 661 ret <4 x i16> %res 662} 663 664define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 665; CHECK-LABEL: test_bitcasti64tov2i32: 666 %res = bitcast i64 %in to <2 x i32> 667; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 668 ret <2 x i32> %res 669} 670 671define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 672; CHECK-LABEL: test_bitcasti64tov2f32: 673 %res = bitcast i64 %in to <2 x float> 674; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 675 ret <2 x float> %res 676} 677 678define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 679; CHECK-LABEL: test_bitcasti64tov1i64: 680 %res = bitcast i64 %in to <1 x i64> 681; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 682 ret <1 x i64> %res 683} 684 685define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 686; CHECK-LABEL: test_bitcasti64tov1f64: 687 %res = bitcast i64 %in to <1 x double> 688; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 689 ret <1 x double> %res 690} 691 692define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 693; CHECK-LABEL: test_bitcastv8i8tov1f64: 694; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 695; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 696 %sub.i = sub <8 x i8> zeroinitializer, %a 697 %1 = bitcast <8 x i8> %sub.i to <1 x double> 698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 699 ret <1 x i64> %vcvt.i 700} 701 702define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 703; CHECK-LABEL: test_bitcastv4i16tov1f64: 704; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 705; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 706 %sub.i = sub <4 x i16> zeroinitializer, %a 707 %1 = bitcast <4 x i16> %sub.i to <1 x double> 708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 709 ret <1 x i64> %vcvt.i 710} 711 712define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 713; CHECK-LABEL: test_bitcastv2i32tov1f64: 714; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 715; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 716 %sub.i = sub <2 x i32> zeroinitializer, %a 717 %1 = bitcast <2 x i32> %sub.i to <1 x double> 718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 719 ret <1 x i64> %vcvt.i 720} 721 722define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 723; CHECK-LABEL: test_bitcastv1i64tov1f64: 724; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} 725; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}} 726 %sub.i = sub <1 x i64> zeroinitializer, %a 727 %1 = bitcast <1 x i64> %sub.i to <1 x double> 728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 729 ret <1 x i64> %vcvt.i 730} 731 732define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 733; CHECK-LABEL: test_bitcastv2f32tov1f64: 734; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 735; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}} 736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 737 %1 = bitcast <2 x float> %sub.i to <1 x double> 738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 739 ret <1 x i64> %vcvt.i 740} 741 742define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 743; CHECK-LABEL: test_bitcastv1f64tov8i8: 744; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 745; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 746 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 748 %sub.i = sub <8 x i8> zeroinitializer, %1 749 ret <8 x i8> %sub.i 750} 751 752define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 753; CHECK-LABEL: test_bitcastv1f64tov4i16: 754; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 755; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h 756 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 758 %sub.i = sub <4 x i16> zeroinitializer, %1 759 ret <4 x i16> %sub.i 760} 761 762define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 763; CHECK-LABEL: test_bitcastv1f64tov2i32: 764; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 765; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 766 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 768 %sub.i = sub <2 x i32> zeroinitializer, %1 769 ret <2 x i32> %sub.i 770} 771 772define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 773; CHECK-LABEL: test_bitcastv1f64tov1i64: 774; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 775; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} 776 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 778 %sub.i = sub <1 x i64> zeroinitializer, %1 779 ret <1 x i64> %sub.i 780} 781 782define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 783; CHECK-LABEL: test_bitcastv1f64tov2f32: 784; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}} 785; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s 786 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 787 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 789 ret <2 x float> %sub.i 790} 791 792; Test insert element into an undef vector 793define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { 794; CHECK-LABEL: scalar_to_vector.v8i8: 795; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 796 %b = insertelement <8 x i8> undef, i8 %a, i32 0 797 ret <8 x i8> %b 798} 799 800define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { 801; CHECK-LABEL: scalar_to_vector.v16i8: 802; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 803 %b = insertelement <16 x i8> undef, i8 %a, i32 0 804 ret <16 x i8> %b 805} 806 807define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { 808; CHECK-LABEL: scalar_to_vector.v4i16: 809; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 810 %b = insertelement <4 x i16> undef, i16 %a, i32 0 811 ret <4 x i16> %b 812} 813 814define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { 815; CHECK-LABEL: scalar_to_vector.v8i16: 816; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 817 %b = insertelement <8 x i16> undef, i16 %a, i32 0 818 ret <8 x i16> %b 819} 820 821define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { 822; CHECK-LABEL: scalar_to_vector.v2i32: 823; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 824 %b = insertelement <2 x i32> undef, i32 %a, i32 0 825 ret <2 x i32> %b 826} 827 828define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { 829; CHECK-LABEL: scalar_to_vector.v4i32: 830; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} 831 %b = insertelement <4 x i32> undef, i32 %a, i32 0 832 ret <4 x i32> %b 833} 834 835define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { 836; CHECK-LABEL: scalar_to_vector.v2i64: 837; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 838 %b = insertelement <2 x i64> undef, i64 %a, i32 0 839 ret <2 x i64> %b 840} 841 842define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { 843; CHECK-LABEL: testDUP.v1i8: 844; CHECK: dup v0.8b, v0.b[0] 845 %b = extractelement <1 x i8> %a, i32 0 846 %c = insertelement <8 x i8> undef, i8 %b, i32 0 847 %d = insertelement <8 x i8> %c, i8 %b, i32 1 848 %e = insertelement <8 x i8> %d, i8 %b, i32 2 849 %f = insertelement <8 x i8> %e, i8 %b, i32 3 850 %g = insertelement <8 x i8> %f, i8 %b, i32 4 851 %h = insertelement <8 x i8> %g, i8 %b, i32 5 852 %i = insertelement <8 x i8> %h, i8 %b, i32 6 853 %j = insertelement <8 x i8> %i, i8 %b, i32 7 854 ret <8 x i8> %j 855} 856 857define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { 858; CHECK-LABEL: testDUP.v1i16: 859; CHECK: dup v0.8h, v0.h[0] 860 %b = extractelement <1 x i16> %a, i32 0 861 %c = insertelement <8 x i16> undef, i16 %b, i32 0 862 %d = insertelement <8 x i16> %c, i16 %b, i32 1 863 %e = insertelement <8 x i16> %d, i16 %b, i32 2 864 %f = insertelement <8 x i16> %e, i16 %b, i32 3 865 %g = insertelement <8 x i16> %f, i16 %b, i32 4 866 %h = insertelement <8 x i16> %g, i16 %b, i32 5 867 %i = insertelement <8 x i16> %h, i16 %b, i32 6 868 %j = insertelement <8 x i16> %i, i16 %b, i32 7 869 ret <8 x i16> %j 870} 871 872define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { 873; CHECK-LABEL: testDUP.v1i32: 874; CHECK: dup v0.4s, v0.s[0] 875 %b = extractelement <1 x i32> %a, i32 0 876 %c = insertelement <4 x i32> undef, i32 %b, i32 0 877 %d = insertelement <4 x i32> %c, i32 %b, i32 1 878 %e = insertelement <4 x i32> %d, i32 %b, i32 2 879 %f = insertelement <4 x i32> %e, i32 %b, i32 3 880 ret <4 x i32> %f 881} 882 883define <8 x i8> @getl(<16 x i8> %x) #0 { 884; CHECK-LABEL: getl: 885; CHECK: ret 886 %vecext = extractelement <16 x i8> %x, i32 0 887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 888 %vecext1 = extractelement <16 x i8> %x, i32 1 889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 890 %vecext3 = extractelement <16 x i8> %x, i32 2 891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 892 %vecext5 = extractelement <16 x i8> %x, i32 3 893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 894 %vecext7 = extractelement <16 x i8> %x, i32 4 895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 896 %vecext9 = extractelement <16 x i8> %x, i32 5 897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 898 %vecext11 = extractelement <16 x i8> %x, i32 6 899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 900 %vecext13 = extractelement <16 x i8> %x, i32 7 901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 902 ret <8 x i8> %vecinit14 903} 904 905; CHECK-LABEL: test_extracts_inserts_varidx_extract: 906; CHECK: str q0 907; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1 908; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]] 909; CHECK-DAG: ins v[[R]].h[1], v0.h[1] 910; CHECK-DAG: ins v[[R]].h[2], v0.h[2] 911; CHECK-DAG: ins v[[R]].h[3], v0.h[3] 912define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { 913 %tmp = extractelement <8 x i16> %x, i32 %idx 914 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 915 %tmp3 = extractelement <8 x i16> %x, i32 1 916 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 917 %tmp5 = extractelement <8 x i16> %x, i32 2 918 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 919 %tmp7 = extractelement <8 x i16> %x, i32 3 920 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 921 ret <4 x i16> %tmp8 922} 923 924; CHECK-LABEL: test_extracts_inserts_varidx_insert: 925; CHECK: str h0, [{{.*}}, w0, sxtw #1] 926; CHECK-DAG: ldr d[[R:[0-9]+]] 927; CHECK-DAG: ins v[[R]].h[1], v0.h[1] 928; CHECK-DAG: ins v[[R]].h[2], v0.h[2] 929; CHECK-DAG: ins v[[R]].h[3], v0.h[3] 930define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { 931 %tmp = extractelement <8 x i16> %x, i32 0 932 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx 933 %tmp3 = extractelement <8 x i16> %x, i32 1 934 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 935 %tmp5 = extractelement <8 x i16> %x, i32 2 936 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 937 %tmp7 = extractelement <8 x i16> %x, i32 3 938 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 939 ret <4 x i16> %tmp8 940} 941 942define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 943; CHECK-LABEL: test_dup_v2i32_v4i16: 944; CHECK: dup v0.4h, v0.h[2] 945entry: 946 %x = extractelement <2 x i32> %a, i32 1 947 %vget_lane = trunc i32 %x to i16 948 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 949 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 950 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 951 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 952 ret <4 x i16> %vecinit3.i 953} 954 955define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 956; CHECK-LABEL: test_dup_v4i32_v8i16: 957; CHECK: dup v0.8h, v0.h[6] 958entry: 959 %x = extractelement <4 x i32> %a, i32 3 960 %vget_lane = trunc i32 %x to i16 961 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 962 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 963 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 964 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 965 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 966 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 967 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 968 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 969 ret <8 x i16> %vecinit7.i 970} 971 972define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 973; CHECK-LABEL: test_dup_v1i64_v4i16: 974; CHECK: dup v0.4h, v0.h[0] 975entry: 976 %x = extractelement <1 x i64> %a, i32 0 977 %vget_lane = trunc i64 %x to i16 978 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 979 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 980 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 981 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 982 ret <4 x i16> %vecinit3.i 983} 984 985define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 986; CHECK-LABEL: test_dup_v1i64_v2i32: 987; CHECK: dup v0.2s, v0.s[0] 988entry: 989 %x = extractelement <1 x i64> %a, i32 0 990 %vget_lane = trunc i64 %x to i32 991 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 992 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 993 ret <2 x i32> %vecinit1.i 994} 995 996define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 997; CHECK-LABEL: test_dup_v2i64_v8i16: 998; CHECK: dup v0.8h, v0.h[4] 999entry: 1000 %x = extractelement <2 x i64> %a, i32 1 1001 %vget_lane = trunc i64 %x to i16 1002 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1003 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1004 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1005 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1006 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1007 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1008 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1009 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1010 ret <8 x i16> %vecinit7.i 1011} 1012 1013define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 1014; CHECK-LABEL: test_dup_v2i64_v4i32: 1015; CHECK: dup v0.4s, v0.s[2] 1016entry: 1017 %x = extractelement <2 x i64> %a, i32 1 1018 %vget_lane = trunc i64 %x to i32 1019 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1020 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1021 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1022 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1023 ret <4 x i32> %vecinit3.i 1024} 1025 1026define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 1027; CHECK-LABEL: test_dup_v4i32_v4i16: 1028; CHECK: dup v0.4h, v0.h[2] 1029entry: 1030 %x = extractelement <4 x i32> %a, i32 1 1031 %vget_lane = trunc i32 %x to i16 1032 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1033 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1034 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1035 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1036 ret <4 x i16> %vecinit3.i 1037} 1038 1039define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1040; CHECK-LABEL: test_dup_v2i64_v4i16: 1041; CHECK: dup v0.4h, v0.h[0] 1042entry: 1043 %x = extractelement <2 x i64> %a, i32 0 1044 %vget_lane = trunc i64 %x to i16 1045 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1046 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1047 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1048 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1049 ret <4 x i16> %vecinit3.i 1050} 1051 1052define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1053; CHECK-LABEL: test_dup_v2i64_v2i32: 1054; CHECK: dup v0.2s, v0.s[0] 1055entry: 1056 %x = extractelement <2 x i64> %a, i32 0 1057 %vget_lane = trunc i64 %x to i32 1058 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1059 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1060 ret <2 x i32> %vecinit1.i 1061} 1062 1063 1064define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { 1065; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: 1066; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1067; CHECK-NEXT: ret 1068entry: 1069 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1070 %1 = insertelement <1 x float> undef, float %0, i32 0 1071 %2 = extractelement <1 x float> %1, i32 0 1072 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 1073 ret <2 x float> %vecinit1.i 1074} 1075 1076define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { 1077; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: 1078; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s 1079; CHECK-NEXT: ret 1080entry: 1081 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 1082 %1 = insertelement <1 x float> undef, float %0, i32 0 1083 %2 = extractelement <1 x float> %1, i32 0 1084 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 1085 ret <4 x float> %vecinit1.i 1086} 1087 1088declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>) 1089 1090define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1091; CHECK-LABEL: test_concat_undef_v1i32: 1092; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1093entry: 1094 %0 = extractelement <2 x i32> %a, i32 0 1095 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1096 ret <2 x i32> %vecinit1.i 1097} 1098 1099declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4 1100 1101define <2 x i32> @test_concat_v1i32_undef(i32 %a) { 1102; CHECK-LABEL: test_concat_v1i32_undef: 1103; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1104; CHECK-NEXT: ret 1105entry: 1106 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1107 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0 1108 ret <2 x i32> %vecinit.i432 1109} 1110 1111define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1112; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1113; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] 1114entry: 1115 %0 = extractelement <2 x i32> %a, i32 0 1116 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1117 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1118 ret <2 x i32> %vecinit1.i 1119} 1120 1121define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) { 1122; CHECK-LABEL: test_concat_diff_v1i32_v1i32: 1123; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1124; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} 1125; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}} 1126entry: 1127 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 1128 %d = insertelement <2 x i32> undef, i32 %c, i32 0 1129 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b) 1130 %f = insertelement <2 x i32> undef, i32 %e, i32 0 1131 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2> 1132 ret <2 x i32> %h 1133} 1134 1135define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1136; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1137; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1138entry: 1139 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1140 ret <16 x i8> %vecinit30 1141} 1142 1143define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1144; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1145; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1146entry: 1147 %vecext = extractelement <8 x i8> %x, i32 0 1148 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1149 %vecext1 = extractelement <8 x i8> %x, i32 1 1150 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1151 %vecext3 = extractelement <8 x i8> %x, i32 2 1152 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1153 %vecext5 = extractelement <8 x i8> %x, i32 3 1154 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1155 %vecext7 = extractelement <8 x i8> %x, i32 4 1156 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1157 %vecext9 = extractelement <8 x i8> %x, i32 5 1158 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1159 %vecext11 = extractelement <8 x i8> %x, i32 6 1160 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1161 %vecext13 = extractelement <8 x i8> %x, i32 7 1162 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1163 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1164 ret <16 x i8> %vecinit30 1165} 1166 1167define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1168; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1169; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1170entry: 1171 %vecext = extractelement <16 x i8> %x, i32 0 1172 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1173 %vecext1 = extractelement <16 x i8> %x, i32 1 1174 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1175 %vecext3 = extractelement <16 x i8> %x, i32 2 1176 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1177 %vecext5 = extractelement <16 x i8> %x, i32 3 1178 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1179 %vecext7 = extractelement <16 x i8> %x, i32 4 1180 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1181 %vecext9 = extractelement <16 x i8> %x, i32 5 1182 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1183 %vecext11 = extractelement <16 x i8> %x, i32 6 1184 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1185 %vecext13 = extractelement <16 x i8> %x, i32 7 1186 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1187 %vecext15 = extractelement <8 x i8> %y, i32 0 1188 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1189 %vecext17 = extractelement <8 x i8> %y, i32 1 1190 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1191 %vecext19 = extractelement <8 x i8> %y, i32 2 1192 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1193 %vecext21 = extractelement <8 x i8> %y, i32 3 1194 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1195 %vecext23 = extractelement <8 x i8> %y, i32 4 1196 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1197 %vecext25 = extractelement <8 x i8> %y, i32 5 1198 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1199 %vecext27 = extractelement <8 x i8> %y, i32 6 1200 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1201 %vecext29 = extractelement <8 x i8> %y, i32 7 1202 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1203 ret <16 x i8> %vecinit30 1204} 1205 1206define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1207; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1208; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1209entry: 1210 %vecext = extractelement <8 x i8> %x, i32 0 1211 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1212 %vecext1 = extractelement <8 x i8> %x, i32 1 1213 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1214 %vecext3 = extractelement <8 x i8> %x, i32 2 1215 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1216 %vecext5 = extractelement <8 x i8> %x, i32 3 1217 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1218 %vecext7 = extractelement <8 x i8> %x, i32 4 1219 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1220 %vecext9 = extractelement <8 x i8> %x, i32 5 1221 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1222 %vecext11 = extractelement <8 x i8> %x, i32 6 1223 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1224 %vecext13 = extractelement <8 x i8> %x, i32 7 1225 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1226 %vecext15 = extractelement <8 x i8> %y, i32 0 1227 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1228 %vecext17 = extractelement <8 x i8> %y, i32 1 1229 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1230 %vecext19 = extractelement <8 x i8> %y, i32 2 1231 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1232 %vecext21 = extractelement <8 x i8> %y, i32 3 1233 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1234 %vecext23 = extractelement <8 x i8> %y, i32 4 1235 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1236 %vecext25 = extractelement <8 x i8> %y, i32 5 1237 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1238 %vecext27 = extractelement <8 x i8> %y, i32 6 1239 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1240 %vecext29 = extractelement <8 x i8> %y, i32 7 1241 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1242 ret <16 x i8> %vecinit30 1243} 1244 1245define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1246; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1247; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1248entry: 1249 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1250 ret <8 x i16> %vecinit14 1251} 1252 1253define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1254; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1255; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1256entry: 1257 %vecext = extractelement <4 x i16> %x, i32 0 1258 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1259 %vecext1 = extractelement <4 x i16> %x, i32 1 1260 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1261 %vecext3 = extractelement <4 x i16> %x, i32 2 1262 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1263 %vecext5 = extractelement <4 x i16> %x, i32 3 1264 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1265 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1266 ret <8 x i16> %vecinit14 1267} 1268 1269define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1270; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1271; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1272entry: 1273 %vecext = extractelement <8 x i16> %x, i32 0 1274 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1275 %vecext1 = extractelement <8 x i16> %x, i32 1 1276 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1277 %vecext3 = extractelement <8 x i16> %x, i32 2 1278 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1279 %vecext5 = extractelement <8 x i16> %x, i32 3 1280 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1281 %vecext7 = extractelement <4 x i16> %y, i32 0 1282 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1283 %vecext9 = extractelement <4 x i16> %y, i32 1 1284 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1285 %vecext11 = extractelement <4 x i16> %y, i32 2 1286 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1287 %vecext13 = extractelement <4 x i16> %y, i32 3 1288 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1289 ret <8 x i16> %vecinit14 1290} 1291 1292define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1293; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1294; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1295entry: 1296 %vecext = extractelement <4 x i16> %x, i32 0 1297 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1298 %vecext1 = extractelement <4 x i16> %x, i32 1 1299 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1300 %vecext3 = extractelement <4 x i16> %x, i32 2 1301 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1302 %vecext5 = extractelement <4 x i16> %x, i32 3 1303 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1304 %vecext7 = extractelement <4 x i16> %y, i32 0 1305 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1306 %vecext9 = extractelement <4 x i16> %y, i32 1 1307 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1308 %vecext11 = extractelement <4 x i16> %y, i32 2 1309 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1310 %vecext13 = extractelement <4 x i16> %y, i32 3 1311 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1312 ret <8 x i16> %vecinit14 1313} 1314 1315define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1316; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1317; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1318entry: 1319 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1320 ret <4 x i32> %vecinit6 1321} 1322 1323define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1324; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1325; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1326entry: 1327 %vecext = extractelement <2 x i32> %x, i32 0 1328 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1329 %vecext1 = extractelement <2 x i32> %x, i32 1 1330 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1331 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1332 ret <4 x i32> %vecinit6 1333} 1334 1335define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1336; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1337; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1338entry: 1339 %vecext = extractelement <4 x i32> %x, i32 0 1340 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1341 %vecext1 = extractelement <4 x i32> %x, i32 1 1342 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1343 %vecext3 = extractelement <2 x i32> %y, i32 0 1344 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1345 %vecext5 = extractelement <2 x i32> %y, i32 1 1346 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1347 ret <4 x i32> %vecinit6 1348} 1349 1350define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1351; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1352; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1353entry: 1354 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1355 ret <4 x i32> %vecinit6 1356} 1357 1358define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1359; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1360; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1361entry: 1362 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1363 ret <2 x i64> %vecinit2 1364} 1365 1366define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1367; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1368; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 1369entry: 1370 %vecext = extractelement <1 x i64> %x, i32 0 1371 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1372 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1373 ret <2 x i64> %vecinit2 1374} 1375 1376define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1377; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1378; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1379entry: 1380 %vecext = extractelement <2 x i64> %x, i32 0 1381 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1382 %vecext1 = extractelement <1 x i64> %y, i32 0 1383 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1384 ret <2 x i64> %vecinit2 1385} 1386 1387define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1388; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1389; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 1390entry: 1391 %vecext = extractelement <1 x i64> %x, i32 0 1392 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1393 %vecext1 = extractelement <1 x i64> %y, i32 0 1394 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1395 ret <2 x i64> %vecinit2 1396} 1397 1398 1399define <4 x i16> @concat_vector_v4i16_const() { 1400; CHECK-LABEL: concat_vector_v4i16_const: 1401; CHECK: movi {{d[0-9]+}}, #0 1402 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1403 ret <4 x i16> %r 1404} 1405 1406define <4 x i16> @concat_vector_v4i16_const_one() { 1407; CHECK-LABEL: concat_vector_v4i16_const_one: 1408; CHECK: movi {{v[0-9]+}}.4h, #1 1409 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1410 ret <4 x i16> %r 1411} 1412 1413define <4 x i32> @concat_vector_v4i32_const() { 1414; CHECK-LABEL: concat_vector_v4i32_const: 1415; CHECK: movi {{v[0-9]+}}.2d, #0 1416 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1417 ret <4 x i32> %r 1418} 1419 1420define <8 x i8> @concat_vector_v8i8_const() { 1421; CHECK-LABEL: concat_vector_v8i8_const: 1422; CHECK: movi {{d[0-9]+}}, #0 1423 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1424 ret <8 x i8> %r 1425} 1426 1427define <8 x i16> @concat_vector_v8i16_const() { 1428; CHECK-LABEL: concat_vector_v8i16_const: 1429; CHECK: movi {{v[0-9]+}}.2d, #0 1430 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1431 ret <8 x i16> %r 1432} 1433 1434define <8 x i16> @concat_vector_v8i16_const_one() { 1435; CHECK-LABEL: concat_vector_v8i16_const_one: 1436; CHECK: movi {{v[0-9]+}}.8h, #1 1437 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1438 ret <8 x i16> %r 1439} 1440 1441define <16 x i8> @concat_vector_v16i8_const() { 1442; CHECK-LABEL: concat_vector_v16i8_const: 1443; CHECK: movi {{v[0-9]+}}.2d, #0 1444 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1445 ret <16 x i8> %r 1446} 1447 1448define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1449; CHECK-LABEL: concat_vector_v4i16: 1450; CHECK: dup v0.4h, v0.h[0] 1451 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1452 ret <4 x i16> %r 1453} 1454 1455define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1456; CHECK-LABEL: concat_vector_v4i32: 1457; CHECK: dup v0.4s, v0.s[0] 1458 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1459 ret <4 x i32> %r 1460} 1461 1462define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1463; CHECK-LABEL: concat_vector_v8i8: 1464; CHECK: dup v0.8b, v0.b[0] 1465 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1466 ret <8 x i8> %r 1467} 1468 1469define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1470; CHECK-LABEL: concat_vector_v8i16: 1471; CHECK: dup v0.8h, v0.h[0] 1472 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1473 ret <8 x i16> %r 1474} 1475 1476define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1477; CHECK-LABEL: concat_vector_v16i8: 1478; CHECK: dup v0.16b, v0.b[0] 1479 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1480 ret <16 x i8> %r 1481} 1482