1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 3define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4; CHECK-LABEL: vtrni8: 5; CHECK: @ %bb.0: 6; CHECK-NEXT: vldr d16, [r1] 7; CHECK-NEXT: vldr d17, [r0] 8; CHECK-NEXT: vtrn.8 d17, d16 9; CHECK-NEXT: vadd.i8 d16, d17, d16 10; CHECK-NEXT: vmov r0, r1, d16 11; CHECK-NEXT: mov pc, lr 12 %tmp1 = load <8 x i8>, <8 x i8>* %A 13 %tmp2 = load <8 x i8>, <8 x i8>* %B 14 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 15 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 16 %tmp5 = add <8 x i8> %tmp3, %tmp4 17 ret <8 x i8> %tmp5 18} 19 20define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 21; CHECK-LABEL: vtrni8_Qres: 22; CHECK: @ %bb.0: 23; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 24; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 25; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 26; CHECK-NEXT: vmov r0, r1, [[LDR0]] 27; CHECK-NEXT: vmov r2, r3, [[LDR1]] 28; CHECK-NEXT: mov pc, lr 29 %tmp1 = load <8 x i8>, <8 x i8>* %A 30 %tmp2 = load <8 x i8>, <8 x i8>* %B 31 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 32 ret <16 x i8> %tmp3 33} 34 35define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 36; CHECK-LABEL: vtrni16: 37; CHECK: @ %bb.0: 38; CHECK-NEXT: vldr d16, [r1] 39; CHECK-NEXT: vldr d17, [r0] 40; CHECK-NEXT: vtrn.16 d17, d16 41; CHECK-NEXT: vadd.i16 d16, d17, d16 42; CHECK-NEXT: vmov r0, r1, d16 43; CHECK-NEXT: mov pc, lr 44 %tmp1 = load <4 x i16>, <4 x i16>* %A 45 %tmp2 = load <4 x i16>, <4 x i16>* %B 46 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 47 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 48 %tmp5 = add <4 x i16> %tmp3, %tmp4 49 ret <4 x i16> %tmp5 50} 51 52define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { 53; CHECK-LABEL: vtrni16_Qres: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 56; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 57; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]] 58; CHECK-NEXT: vmov r0, r1, [[LDR0]] 59; CHECK-NEXT: vmov r2, r3, [[LDR1]] 60; CHECK-NEXT: mov pc, lr 61 %tmp1 = load <4 x i16>, <4 x i16>* %A 62 %tmp2 = load <4 x i16>, <4 x i16>* %B 63 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 64 ret <8 x i16> %tmp3 65} 66 67define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 68; CHECK-LABEL: vtrni32: 69; CHECK: @ %bb.0: 70; CHECK-NEXT: vldr d16, [r1] 71; CHECK-NEXT: vldr d17, [r0] 72; CHECK-NEXT: vtrn.32 d17, d16 73; CHECK-NEXT: vmul.i32 d16, d17, d16 74; CHECK-NEXT: vmov r0, r1, d16 75; CHECK-NEXT: mov pc, lr 76 %tmp1 = load <2 x i32>, <2 x i32>* %A 77 %tmp2 = load <2 x i32>, <2 x i32>* %B 78 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2> 79 %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3> 80 %tmp5 = mul <2 x i32> %tmp3, %tmp4 81 ret <2 x i32> %tmp5 82} 83 84define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { 85; CHECK-LABEL: vtrni32_Qres: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 88; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 89; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 90; CHECK-NEXT: vmov r0, r1, [[LDR0]] 91; CHECK-NEXT: vmov r2, r3, [[LDR1]] 92; CHECK-NEXT: mov pc, lr 93 %tmp1 = load <2 x i32>, <2 x i32>* %A 94 %tmp2 = load <2 x i32>, <2 x i32>* %B 95 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 96 ret <4 x i32> %tmp3 97} 98 99define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { 100; CHECK-LABEL: vtrnf: 101; CHECK: @ %bb.0: 102; CHECK-NEXT: vldr d16, [r1] 103; CHECK-NEXT: vldr d17, [r0] 104; CHECK-NEXT: vtrn.32 d17, d16 105; CHECK-NEXT: vadd.f32 d16, d17, d16 106; CHECK-NEXT: vmov r0, r1, d16 107; CHECK-NEXT: mov pc, lr 108 %tmp1 = load <2 x float>, <2 x float>* %A 109 %tmp2 = load <2 x float>, <2 x float>* %B 110 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2> 111 %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3> 112 %tmp5 = fadd <2 x float> %tmp3, %tmp4 113 ret <2 x float> %tmp5 114} 115 116define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { 117; CHECK-LABEL: vtrnf_Qres: 118; CHECK: @ %bb.0: 119; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 120; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 121; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 122; CHECK-NEXT: vmov r0, r1, [[LDR0]] 123; CHECK-NEXT: vmov r2, r3, [[LDR1]] 124; CHECK-NEXT: mov pc, lr 125 %tmp1 = load <2 x float>, <2 x float>* %A 126 %tmp2 = load <2 x float>, <2 x float>* %B 127 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 128 ret <4 x float> %tmp3 129} 130 131define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 132; CHECK-LABEL: vtrnQi8: 133; CHECK: @ %bb.0: 134; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 135; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 136; CHECK-NEXT: vtrn.8 q9, q8 137; CHECK-NEXT: vadd.i8 q8, q9, q8 138; CHECK-NEXT: vmov r0, r1, d16 139; CHECK-NEXT: vmov r2, r3, d17 140; CHECK-NEXT: mov pc, lr 141 %tmp1 = load <16 x i8>, <16 x i8>* %A 142 %tmp2 = load <16 x i8>, <16 x i8>* %B 143 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 144 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 145 %tmp5 = add <16 x i8> %tmp3, %tmp4 146 ret <16 x i8> %tmp5 147} 148 149define <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 150; CHECK-LABEL: vtrnQi8_QQres: 151; CHECK: @ %bb.0: 152; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 153; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 154; CHECK-NEXT: vtrn.8 q9, q8 155; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 156; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 157; CHECK-NEXT: mov pc, lr 158 %tmp1 = load <16 x i8>, <16 x i8>* %A 159 %tmp2 = load <16 x i8>, <16 x i8>* %B 160 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 161 ret <32 x i8> %tmp3 162} 163 164define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 165; CHECK-LABEL: vtrnQi16: 166; CHECK: @ %bb.0: 167; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 168; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 169; CHECK-NEXT: vtrn.16 q9, q8 170; CHECK-NEXT: vadd.i16 q8, q9, q8 171; CHECK-NEXT: vmov r0, r1, d16 172; CHECK-NEXT: vmov r2, r3, d17 173; CHECK-NEXT: mov pc, lr 174 %tmp1 = load <8 x i16>, <8 x i16>* %A 175 %tmp2 = load <8 x i16>, <8 x i16>* %B 176 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 177 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 178 %tmp5 = add <8 x i16> %tmp3, %tmp4 179 ret <8 x i16> %tmp5 180} 181 182define <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 183; CHECK-LABEL: vtrnQi16_QQres: 184; CHECK: @ %bb.0: 185; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 186; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 187; CHECK-NEXT: vtrn.16 q9, q8 188; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 189; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 190; CHECK-NEXT: mov pc, lr 191 %tmp1 = load <8 x i16>, <8 x i16>* %A 192 %tmp2 = load <8 x i16>, <8 x i16>* %B 193 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 194 ret <16 x i16> %tmp3 195} 196 197define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 198; CHECK-LABEL: vtrnQi32: 199; CHECK: @ %bb.0: 200; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 201; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 202; CHECK-NEXT: vtrn.32 q9, q8 203; CHECK-NEXT: vadd.i32 q8, q9, q8 204; CHECK-NEXT: vmov r0, r1, d16 205; CHECK-NEXT: vmov r2, r3, d17 206; CHECK-NEXT: mov pc, lr 207 %tmp1 = load <4 x i32>, <4 x i32>* %A 208 %tmp2 = load <4 x i32>, <4 x i32>* %B 209 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 210 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 211 %tmp5 = add <4 x i32> %tmp3, %tmp4 212 ret <4 x i32> %tmp5 213} 214 215define <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { 216; CHECK-LABEL: vtrnQi32_QQres: 217; CHECK: @ %bb.0: 218; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 219; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 220; CHECK-NEXT: vtrn.32 q9, q8 221; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 222; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 223; CHECK-NEXT: mov pc, lr 224 %tmp1 = load <4 x i32>, <4 x i32>* %A 225 %tmp2 = load <4 x i32>, <4 x i32>* %B 226 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 227 ret <8 x i32> %tmp3 228} 229 230define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { 231; CHECK-LABEL: vtrnQf: 232; CHECK: @ %bb.0: 233; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 234; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 235; CHECK-NEXT: vtrn.32 q9, q8 236; CHECK-NEXT: vadd.f32 q8, q9, q8 237; CHECK-NEXT: vmov r0, r1, d16 238; CHECK-NEXT: vmov r2, r3, d17 239; CHECK-NEXT: mov pc, lr 240 %tmp1 = load <4 x float>, <4 x float>* %A 241 %tmp2 = load <4 x float>, <4 x float>* %B 242 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 243 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 244 %tmp5 = fadd <4 x float> %tmp3, %tmp4 245 ret <4 x float> %tmp5 246} 247 248define <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { 249; CHECK-LABEL: vtrnQf_QQres: 250; CHECK: @ %bb.0: 251; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 252; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 253; CHECK-NEXT: vtrn.32 q9, q8 254; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 255; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 256; CHECK-NEXT: mov pc, lr 257 %tmp1 = load <4 x float>, <4 x float>* %A 258 %tmp2 = load <4 x float>, <4 x float>* %B 259 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 260 ret <8 x float> %tmp3 261} 262 263 264define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 265; CHECK-LABEL: vtrni8_undef: 266; CHECK: @ %bb.0: 267; CHECK-NEXT: vldr d16, [r1] 268; CHECK-NEXT: vldr d17, [r0] 269; CHECK-NEXT: vtrn.8 d17, d16 270; CHECK-NEXT: vadd.i8 d16, d17, d16 271; CHECK-NEXT: vmov r0, r1, d16 272; CHECK-NEXT: mov pc, lr 273 %tmp1 = load <8 x i8>, <8 x i8>* %A 274 %tmp2 = load <8 x i8>, <8 x i8>* %B 275 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> 276 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 277 %tmp5 = add <8 x i8> %tmp3, %tmp4 278 ret <8 x i8> %tmp5 279} 280 281define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 282; CHECK-LABEL: vtrni8_undef_Qres: 283; CHECK: @ %bb.0: 284; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 285; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 286; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 287; CHECK-NEXT: vmov r0, r1, [[LDR0]] 288; CHECK-NEXT: vmov r2, r3, [[LDR1]] 289; CHECK-NEXT: mov pc, lr 290 %tmp1 = load <8 x i8>, <8 x i8>* %A 291 %tmp2 = load <8 x i8>, <8 x i8>* %B 292 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 293 ret <16 x i8> %tmp3 294} 295 296define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { 297; CHECK-LABEL: vtrnQi16_undef: 298; CHECK: @ %bb.0: 299; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 300; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 301; CHECK-NEXT: vtrn.16 q9, q8 302; CHECK-NEXT: vadd.i16 q8, q9, q8 303; CHECK-NEXT: vmov r0, r1, d16 304; CHECK-NEXT: vmov r2, r3, d17 305; CHECK-NEXT: mov pc, lr 306 %tmp1 = load <8 x i16>, <8 x i16>* %A 307 %tmp2 = load <8 x i16>, <8 x i16>* %B 308 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> 309 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 310 %tmp5 = add <8 x i16> %tmp3, %tmp4 311 ret <8 x i16> %tmp5 312} 313 314define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 315; CHECK-LABEL: vtrnQi16_undef_QQres: 316; CHECK: @ %bb.0: 317; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 318; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 319; CHECK-NEXT: vtrn.16 q9, q8 320; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 321; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 322; CHECK-NEXT: mov pc, lr 323 %tmp1 = load <8 x i16>, <8 x i16>* %A 324 %tmp2 = load <8 x i16>, <8 x i16>* %B 325 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 326 ret <16 x i16> %tmp3 327} 328 329define <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { 330entry: 331 ; CHECK-LABEL: vtrn_lower_shufflemask_undef 332 ; CHECK: vtrn 333 %tmp1 = load <4 x i16>, <4 x i16>* %A 334 %tmp2 = load <4 x i16>, <4 x i16>* %B 335 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7> 336 ret <8 x i16> %0 337} 338 339; Here we get a build_vector node, where all the incoming extract_element 340; values do modify the type. However, we get different input types, as some of 341; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of 342; them get truncated from i16 to i8 (from comparing cmp2 with cmp3). 343define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1, 344 <4 x i32> %cmp0, <4 x i32> %cmp1, 345 <4 x i16> %cmp2, <4 x i16> %cmp3) { 346 ; CHECK-LABEL: vtrn_mismatched_builvector0: 347 ; CHECK: vmovn.i32 348 ; CHECK: vbsl 349 %c0 = icmp ult <4 x i32> %cmp0, %cmp1 350 %c1 = icmp ult <4 x i16> %cmp2, %cmp3 351 %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 352 %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 353 ret <8 x i8> %rv 354} 355 356; Here we get a build_vector node, where half the incoming extract_element 357; values do not modify the type (the values form cmp2), but half of them do 358; (from the icmp operation). 359define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, 360 <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 361 ; CHECK-LABEL: vtrn_mismatched_builvector1: 362 ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn 363 ; CHECK: vmovl 364 ; CHECK: vbsl 365 %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 366 %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 367 %c0 = icmp ult <4 x i32> %cmp0, %cmp1 368 %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 369 %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 370 ret <8 x i8> %rv 371} 372 373; The shuffle mask is half a vtrn; we duplicate the half to produce the 374; full result. 375define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 376entry: 377 ; CHECK-LABEL: lower_twice_no_vtrn: 378 ; CHECK: @ %bb.0: 379 ; CHECK-NEXT: vldr d16, [r1] 380 ; CHECK-NEXT: vldr d18, [r0] 381 ; CHECK-NEXT: vtrn.16 d18, d16 382 ; CHECK-NEXT: vorr d17, d16, d16 383 ; CHECK-NEXT: vst1.64 {d16, d17}, [r2] 384 ; CHECK-NEXT: mov pc, lr 385 %tmp1 = load <4 x i16>, <4 x i16>* %A 386 %tmp2 = load <4 x i16>, <4 x i16>* %B 387 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7> 388 store <8 x i16> %0, <8 x i16>* %C 389 ret void 390} 391 392; The shuffle mask is half a vtrn; we duplicate the half to produce the 393; full result. 394define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 395entry: 396 ; CHECK-LABEL: upper_twice_no_vtrn: 397 ; CHECK: @ %bb.0: 398 ; CHECK-NEXT: vldr d16, [r1] 399 ; CHECK-NEXT: vldr d18, [r0] 400 ; CHECK-NEXT: vtrn.16 d18, d16 401 ; CHECK-NEXT: vorr d19, d18, d18 402 ; CHECK-NEXT: vst1.64 {d18, d19}, [r2] 403 ; CHECK-NEXT: mov pc, lr 404 %tmp1 = load <4 x i16>, <4 x i16>* %A 405 %tmp2 = load <4 x i16>, <4 x i16>* %B 406 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6> 407 store <8 x i16> %0, <8 x i16>* %C 408 ret void 409} 410