1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK-LABEL: vshf_v16i8_0: 6 7 %1 = load <16 x i8>, <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 10 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 11 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 12 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] 13 store <16 x i8> %2, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17} 18 19define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 20 ; CHECK-LABEL: vshf_v16i8_1: 21 22 %1 = load <16 x i8>, <16 x i8>* %a 23 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 24 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 25 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 26 store <16 x i8> %2, <16 x i8>* %c 27 ; CHECK-DAG: st.b [[R3]], 0($4) 28 29 ret void 30} 31 32define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 33 ; CHECK-LABEL: vshf_v16i8_2: 34 35 %1 = load <16 x i8>, <16 x i8>* %a 36 %2 = load <16 x i8>, <16 x i8>* %b 37 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 38 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16> 39 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]] 42 store <16 x i8> %3, <16 x i8>* %c 43 ; CHECK-DAG: st.b [[R3]], 0($4) 44 45 ret void 46} 47 48define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 49 ; CHECK-LABEL: vshf_v16i8_3: 50 51 %1 = load <16 x i8>, <16 x i8>* %a 52 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 53 %2 = load <16 x i8>, <16 x i8>* %b 54 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 55 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 56 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 58 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 59 ; the operands to get the right answer. 60 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]] 61 store <16 x i8> %3, <16 x i8>* %c 62 ; CHECK-DAG: st.b [[R3]], 0($4) 63 64 ret void 65} 66 67define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 68 ; CHECK-LABEL: vshf_v16i8_4: 69 70 %1 = load <16 x i8>, <16 x i8>* %a 71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 72 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17> 73 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 74 store <16 x i8> %2, <16 x i8>* %c 75 ; CHECK-DAG: st.b [[R3]], 0($4) 76 77 ret void 78} 79 80define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 81 ; CHECK-LABEL: vshf_v8i16_0: 82 83 %1 = load <8 x i16>, <8 x i16>* %a 84 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 86 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 87 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 88 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] 89 store <8 x i16> %2, <8 x i16>* %c 90 ; CHECK-DAG: st.h [[R3]], 0($4) 91 92 ret void 93} 94 95define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 96 ; CHECK-LABEL: vshf_v8i16_1: 97 98 %1 = load <8 x i16>, <8 x i16>* %a 99 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 100 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 101 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 102 store <8 x i16> %2, <8 x i16>* %c 103 ; CHECK-DAG: st.h [[R3]], 0($4) 104 105 ret void 106} 107 108define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 109 ; CHECK-LABEL: vshf_v8i16_2: 110 111 %1 = load <8 x i16>, <8 x i16>* %a 112 %2 = load <8 x i16>, <8 x i16>* %b 113 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 114 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8> 115 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 116 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 117 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]] 118 store <8 x i16> %3, <8 x i16>* %c 119 ; CHECK-DAG: st.h [[R3]], 0($4) 120 121 ret void 122} 123 124define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 125 ; CHECK-LABEL: vshf_v8i16_3: 126 127 %1 = load <8 x i16>, <8 x i16>* %a 128 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 129 %2 = load <8 x i16>, <8 x i16>* %b 130 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 131 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 132 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 133 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 134 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 135 ; the operands to get the right answer. 136 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]] 137 store <8 x i16> %3, <8 x i16>* %c 138 ; CHECK-DAG: st.h [[R3]], 0($4) 139 140 ret void 141} 142 143define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 144 ; CHECK-LABEL: vshf_v8i16_4: 145 146 %1 = load <8 x i16>, <8 x i16>* %a 147 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 148 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9> 149 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 150 store <8 x i16> %2, <8 x i16>* %c 151 ; CHECK-DAG: st.h [[R3]], 0($4) 152 153 ret void 154} 155 156; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w 157; instruction when using a single vector. 158 159define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 160 ; CHECK-LABEL: vshf_v4i32_0: 161 162 %1 = load <4 x i32>, <4 x i32>* %a 163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 164 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 165 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 166 store <4 x i32> %2, <4 x i32>* %c 167 ; CHECK-DAG: st.w [[R3]], 0($4) 168 169 ret void 170} 171 172define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 173 ; CHECK-LABEL: vshf_v4i32_1: 174 175 %1 = load <4 x i32>, <4 x i32>* %a 176 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 177 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 178 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1] 179 store <4 x i32> %2, <4 x i32>* %c 180 ; CHECK-DAG: st.w [[R3]], 0($4) 181 182 ret void 183} 184 185define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 186 ; CHECK-LABEL: vshf_v4i32_2: 187 188 %1 = load <4 x i32>, <4 x i32>* %a 189 %2 = load <4 x i32>, <4 x i32>* %b 190 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 191 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4> 192 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36 193 store <4 x i32> %3, <4 x i32>* %c 194 ; CHECK-DAG: st.w [[R3]], 0($4) 195 196 ret void 197} 198 199define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 200 ; CHECK-LABEL: vshf_v4i32_3: 201 202 %1 = load <4 x i32>, <4 x i32>* %a 203 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 204 %2 = load <4 x i32>, <4 x i32>* %b 205 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 206 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4> 207 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 208 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]]) 209 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 210 ; the operands to get the right answer. 211 ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]] 212 store <4 x i32> %3, <4 x i32>* %c 213 ; CHECK-DAG: st.w [[R3]], 0($4) 214 215 ret void 216} 217 218define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 219 ; CHECK-LABEL: vshf_v4i32_4: 220 221 %1 = load <4 x i32>, <4 x i32>* %a 222 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 223 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1> 224 ; The two operand vectors are the same so element 1 and 5 are equivalent. 225 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1] 226 store <4 x i32> %2, <4 x i32>* %c 227 ; CHECK-DAG: st.w [[R3]], 0($4) 228 229 ret void 230} 231 232define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 233 ; CHECK-LABEL: vshf_v2i64_0: 234 235 %1 = load <2 x i64>, <2 x i64>* %a 236 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 237 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 238 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 239 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 240 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] 241 store <2 x i64> %2, <2 x i64>* %c 242 ; CHECK-DAG: st.d [[R3]], 0($4) 243 244 ret void 245} 246 247define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 248 ; CHECK-LABEL: vshf_v2i64_1: 249 250 %1 = load <2 x i64>, <2 x i64>* %a 251 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 252 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 253 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 254 store <2 x i64> %2, <2 x i64>* %c 255 ; CHECK-DAG: st.d [[R3]], 0($4) 256 257 ret void 258} 259 260define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 261 ; CHECK-LABEL: vshf_v2i64_2: 262 263 %1 = load <2 x i64>, <2 x i64>* %a 264 %2 = load <2 x i64>, <2 x i64>* %b 265 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 266 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2> 267 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 268 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 269 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]] 270 store <2 x i64> %3, <2 x i64>* %c 271 ; CHECK-DAG: st.d [[R3]], 0($4) 272 273 ret void 274} 275 276define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 277 ; CHECK-LABEL: vshf_v2i64_3: 278 279 %1 = load <2 x i64>, <2 x i64>* %a 280 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 281 %2 = load <2 x i64>, <2 x i64>* %b 282 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 283 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2> 284 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 285 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 286 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 287 ; the operands to get the right answer. 288 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]] 289 store <2 x i64> %3, <2 x i64>* %c 290 ; CHECK-DAG: st.d [[R3]], 0($4) 291 292 ret void 293} 294 295define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 296 ; CHECK-LABEL: vshf_v2i64_4: 297 298 %1 = load <2 x i64>, <2 x i64>* %a 299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 300 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3> 301 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 302 store <2 x i64> %2, <2 x i64>* %c 303 ; CHECK-DAG: st.d [[R3]], 0($4) 304 305 ret void 306} 307 308define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 309 ; CHECK-LABEL: shf_v16i8_0: 310 311 %1 = load <16 x i8>, <16 x i8>* %a 312 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 313 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12> 314 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45 315 store <16 x i8> %2, <16 x i8>* %c 316 ; CHECK-DAG: st.b [[R3]], 0($4) 317 318 ret void 319} 320 321define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 322 ; CHECK-LABEL: shf_v8i16_0: 323 324 %1 = load <8 x i16>, <8 x i16>* %a 325 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 326 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 327 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27 328 store <8 x i16> %2, <8 x i16>* %c 329 ; CHECK-DAG: st.h [[R3]], 0($4) 330 331 ret void 332} 333 334define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 335 ; CHECK-LABEL: shf_v4i32_0: 336 337 %1 = load <4 x i32>, <4 x i32>* %a 338 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 339 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 340 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 341 store <4 x i32> %2, <4 x i32>* %c 342 ; CHECK-DAG: st.w [[R3]], 0($4) 343 344 ret void 345} 346 347; shf.d does not exist 348 349define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 350 ; CHECK-LABEL: ilvev_v16i8_0: 351 352 %1 = load <16 x i8>, <16 x i8>* %a 353 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 354 %2 = load <16 x i8>, <16 x i8>* %b 355 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 356 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 357 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 358 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]] 359 store <16 x i8> %3, <16 x i8>* %c 360 ; CHECK-DAG: st.b [[R3]], 0($4) 361 362 ret void 363} 364 365define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 366 ; CHECK-LABEL: ilvev_v8i16_0: 367 368 %1 = load <8 x i16>, <8 x i16>* %a 369 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 370 %2 = load <8 x i16>, <8 x i16>* %b 371 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 372 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 373 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]] 374 store <8 x i16> %3, <8 x i16>* %c 375 ; CHECK-DAG: st.h [[R3]], 0($4) 376 377 ret void 378} 379 380define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 381 ; CHECK-LABEL: ilvev_v4i32_0: 382 383 %1 = load <4 x i32>, <4 x i32>* %a 384 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 385 %2 = load <4 x i32>, <4 x i32>* %b 386 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 387 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 388 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] 389 store <4 x i32> %3, <4 x i32>* %c 390 ; CHECK-DAG: st.w [[R3]], 0($4) 391 392 ret void 393} 394 395define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 396 ; CHECK-LABEL: ilvev_v2i64_0: 397 398 %1 = load <2 x i64>, <2 x i64>* %a 399 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 400 %2 = load <2 x i64>, <2 x i64>* %b 401 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 402 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 403 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] 404 store <2 x i64> %3, <2 x i64>* %c 405 ; CHECK-DAG: st.d [[R3]], 0($4) 406 407 ret void 408} 409 410; Interleaving one operand with itself. 411define void @ilvev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 412 ; CHECK-LABEL: ilvev_v16i8_1: 413 414 %1 = load <16 x i8>, <16 x i8>* %a 415 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 416 %2 = load <16 x i8>, <16 x i8>* %b 417 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 418 <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 419 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 420 store <16 x i8> %3, <16 x i8>* %c 421 ; CHECK-DAG: st.b [[R3]], 0($4) 422 423 ret void 424} 425 426define void @ilvev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 427 ; CHECK-LABEL: ilvev_v8i16_1: 428 429 %1 = load <8 x i16>, <8 x i16>* %a 430 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 431 %2 = load <8 x i16>, <8 x i16>* %b 432 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 433 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 434 store <8 x i16> %3, <8 x i16>* %c 435 ; CHECK-DAG: st.h [[R3]], 0($4) 436 437 ret void 438} 439 440define void @ilvev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 441 ; CHECK-LABEL: ilvev_v4i32_1: 442 443 %1 = load <4 x i32>, <4 x i32>* %a 444 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 445 %2 = load <4 x i32>, <4 x i32>* %b 446 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 447 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 448 store <4 x i32> %3, <4 x i32>* %c 449 ; CHECK-DAG: st.w [[R3]], 0($4) 450 451 ret void 452} 453 454define void @ilvev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 455 ; CHECK-LABEL: ilvev_v2i64_1: 456 457 %1 = load <2 x i64>, <2 x i64>* %a 458 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 459 %2 = load <2 x i64>, <2 x i64>* %b 460 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0> 461 ; ilvev.d with two identical operands is equivalent to splati.d 462 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0] 463 store <2 x i64> %3, <2 x i64>* %c 464 ; CHECK-DAG: st.d [[R3]], 0($4) 465 466 ret void 467} 468 469define void @ilvev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 470 ; CHECK-LABEL: ilvev_v16i8_2: 471 472 %1 = load <16 x i8>, <16 x i8>* %a 473 %2 = load <16 x i8>, <16 x i8>* %b 474 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 475 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 476 <16 x i32> <i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30> 477 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 478 store <16 x i8> %3, <16 x i8>* %c 479 ; CHECK-DAG: st.b [[R3]], 0($4) 480 481 ret void 482} 483 484define void @ilvev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 485 ; CHECK-LABEL: ilvev_v8i16_2: 486 487 %1 = load <8 x i16>, <8 x i16>* %a 488 %2 = load <8 x i16>, <8 x i16>* %b 489 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 490 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 491 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 492 store <8 x i16> %3, <8 x i16>* %c 493 ; CHECK-DAG: st.h [[R3]], 0($4) 494 495 ret void 496} 497 498define void @ilvev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 499 ; CHECK-LABEL: ilvev_v4i32_2: 500 501 %1 = load <4 x i32>, <4 x i32>* %a 502 %2 = load <4 x i32>, <4 x i32>* %b 503 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 504 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 6, i32 6> 505 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 506 store <4 x i32> %3, <4 x i32>* %c 507 ; CHECK-DAG: st.w [[R3]], 0($4) 508 509 ret void 510} 511 512define void @ilvev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 513 ; CHECK-LABEL: ilvev_v2i64_2: 514 515 %1 = load <2 x i64>, <2 x i64>* %a 516 %2 = load <2 x i64>, <2 x i64>* %b 517 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 518 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2> 519 ; ilvev.d with two identical operands is equivalent to splati.d 520 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0] 521 store <2 x i64> %3, <2 x i64>* %c 522 ; CHECK-DAG: st.d [[R3]], 0($4) 523 524 ret void 525} 526 527define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 528 ; CHECK-LABEL: ilvod_v16i8_0: 529 530 %1 = load <16 x i8>, <16 x i8>* %a 531 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 532 %2 = load <16 x i8>, <16 x i8>* %b 533 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 534 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 535 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 536 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 537 store <16 x i8> %3, <16 x i8>* %c 538 ; CHECK-DAG: st.b [[R3]], 0($4) 539 540 ret void 541} 542 543define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 544 ; CHECK-LABEL: ilvod_v8i16_0: 545 546 %1 = load <8 x i16>, <8 x i16>* %a 547 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 548 %2 = load <8 x i16>, <8 x i16>* %b 549 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 550 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 551 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 552 store <8 x i16> %3, <8 x i16>* %c 553 ; CHECK-DAG: st.h [[R3]], 0($4) 554 555 ret void 556} 557 558define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 559 ; CHECK-LABEL: ilvod_v4i32_0: 560 561 %1 = load <4 x i32>, <4 x i32>* %a 562 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 563 %2 = load <4 x i32>, <4 x i32>* %b 564 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 565 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 566 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 567 store <4 x i32> %3, <4 x i32>* %c 568 ; CHECK-DAG: st.w [[R3]], 0($4) 569 570 ret void 571} 572 573define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 574 ; CHECK-LABEL: ilvod_v2i64_0: 575 576 %1 = load <2 x i64>, <2 x i64>* %a 577 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 578 %2 = load <2 x i64>, <2 x i64>* %b 579 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 580 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 581 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 582 store <2 x i64> %3, <2 x i64>* %c 583 ; CHECK-DAG: st.d [[R3]], 0($4) 584 585 ret void 586} 587 588define void @ilvod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 589 ; CHECK-LABEL: ilvod_v16i8_1: 590 591 %1 = load <16 x i8>, <16 x i8>* %a 592 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 593 %2 = load <16 x i8>, <16 x i8>* %b 594 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 595 <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 596 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 597 store <16 x i8> %3, <16 x i8>* %c 598 ; CHECK-DAG: st.b [[R3]], 0($4) 599 600 ret void 601} 602 603define void @ilvod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 604 ; CHECK-LABEL: ilvod_v8i16_1: 605 606 %1 = load <8 x i16>, <8 x i16>* %a 607 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 608 %2 = load <8 x i16>, <8 x i16>* %b 609 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 610 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 611 store <8 x i16> %3, <8 x i16>* %c 612 ; CHECK-DAG: st.h [[R3]], 0($4) 613 614 ret void 615} 616 617define void @ilvod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 618 ; CHECK-LABEL: ilvod_v4i32_1: 619 620 %1 = load <4 x i32>, <4 x i32>* %a 621 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 622 %2 = load <4 x i32>, <4 x i32>* %b 623 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 624 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 625 store <4 x i32> %3, <4 x i32>* %c 626 ; CHECK-DAG: st.w [[R3]], 0($4) 627 628 ret void 629} 630 631define void @ilvod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 632 ; CHECK-LABEL: ilvod_v2i64_1: 633 634 %1 = load <2 x i64>, <2 x i64>* %a 635 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 636 %2 = load <2 x i64>, <2 x i64>* %b 637 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1> 638 ; ilvod.d with two identical operands is equivalent to splati.d 639 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 640 store <2 x i64> %3, <2 x i64>* %c 641 ; CHECK-DAG: st.d [[R3]], 0($4) 642 643 ret void 644} 645 646define void @ilvod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 647 ; CHECK-LABEL: ilvod_v16i8_2: 648 649 %1 = load <16 x i8>, <16 x i8>* %a 650 %2 = load <16 x i8>, <16 x i8>* %b 651 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 652 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 653 <16 x i32> <i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31> 654 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 655 store <16 x i8> %3, <16 x i8>* %c 656 ; CHECK-DAG: st.b [[R3]], 0($4) 657 658 ret void 659} 660 661define void @ilvod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 662 ; CHECK-LABEL: ilvod_v8i16_2: 663 664 %1 = load <8 x i16>, <8 x i16>* %a 665 %2 = load <8 x i16>, <8 x i16>* %b 666 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 667 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> 668 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 669 store <8 x i16> %3, <8 x i16>* %c 670 ; CHECK-DAG: st.h [[R3]], 0($4) 671 672 ret void 673} 674 675define void @ilvod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 676 ; CHECK-LABEL: ilvod_v4i32_2: 677 678 %1 = load <4 x i32>, <4 x i32>* %a 679 %2 = load <4 x i32>, <4 x i32>* %b 680 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 681 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 5, i32 7, i32 7> 682 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 683 store <4 x i32> %3, <4 x i32>* %c 684 ; CHECK-DAG: st.w [[R3]], 0($4) 685 686 ret void 687} 688 689define void @ilvod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 690 ; CHECK-LABEL: ilvod_v2i64_2: 691 692 %1 = load <2 x i64>, <2 x i64>* %a 693 %2 = load <2 x i64>, <2 x i64>* %b 694 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 695 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3> 696 ; ilvod.d with two identical operands is equivalent to splati.d 697 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1] 698 store <2 x i64> %3, <2 x i64>* %c 699 ; CHECK-DAG: st.d [[R3]], 0($4) 700 701 ret void 702} 703 704define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 705 ; CHECK-LABEL: ilvr_v16i8_0: 706 707 %1 = load <16 x i8>, <16 x i8>* %a 708 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 709 %2 = load <16 x i8>, <16 x i8>* %b 710 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 711 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 712 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 713 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R1]] 714 store <16 x i8> %3, <16 x i8>* %c 715 ; CHECK-DAG: st.b [[R3]], 0($4) 716 717 ret void 718} 719 720define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 721 ; CHECK-LABEL: ilvr_v8i16_0: 722 723 %1 = load <8 x i16>, <8 x i16>* %a 724 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 725 %2 = load <8 x i16>, <8 x i16>* %b 726 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 727 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 728 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R1]] 729 store <8 x i16> %3, <8 x i16>* %c 730 ; CHECK-DAG: st.h [[R3]], 0($4) 731 732 ret void 733} 734 735define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 736 ; CHECK-LABEL: ilvr_v4i32_0: 737 738 %1 = load <4 x i32>, <4 x i32>* %a 739 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 740 %2 = load <4 x i32>, <4 x i32>* %b 741 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 742 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 743 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] 744 store <4 x i32> %3, <4 x i32>* %c 745 ; CHECK-DAG: st.w [[R3]], 0($4) 746 747 ret void 748} 749 750define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 751 ; CHECK-LABEL: ilvr_v2i64_0: 752 753 %1 = load <2 x i64>, <2 x i64>* %a 754 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 755 %2 = load <2 x i64>, <2 x i64>* %b 756 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 757 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 758 ; ilvr.d and ilvev.d are equivalent for v2i64 759 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] 760 store <2 x i64> %3, <2 x i64>* %c 761 ; CHECK-DAG: st.d [[R3]], 0($4) 762 763 ret void 764} 765 766define void @ilvr_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 767 ; CHECK-LABEL: ilvr_v16i8_1: 768 769 %1 = load <16 x i8>, <16 x i8>* %a 770 %2 = load <16 x i8>, <16 x i8>* %b 771 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 772 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 773 <16 x i32> <i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23> 774 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 775 store <16 x i8> %3, <16 x i8>* %c 776 ; CHECK-DAG: st.b [[R3]], 0($4) 777 778 ret void 779} 780 781define void @ilvr_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 782 ; CHECK-LABEL: ilvr_v8i16_1: 783 784 %1 = load <8 x i16>, <8 x i16>* %a 785 %2 = load <8 x i16>, <8 x i16>* %b 786 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 787 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11> 788 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 789 store <8 x i16> %3, <8 x i16>* %c 790 ; CHECK-DAG: st.h [[R3]], 0($4) 791 792 ret void 793} 794 795define void @ilvr_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 796 ; CHECK-LABEL: ilvr_v4i32_1: 797 798 %1 = load <4 x i32>, <4 x i32>* %a 799 %2 = load <4 x i32>, <4 x i32>* %b 800 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 801 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 5, i32 5> 802 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 803 store <4 x i32> %3, <4 x i32>* %c 804 ; CHECK-DAG: st.w [[R3]], 0($4) 805 806 ret void 807} 808 809define void @ilvr_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 810 ; CHECK-LABEL: ilvr_v2i64_1: 811 812 %1 = load <2 x i64>, <2 x i64>* %a 813 %2 = load <2 x i64>, <2 x i64>* %b 814 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 815 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2> 816 ; ilvr.d and splati.d are equivalent for v2i64 817 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0] 818 store <2 x i64> %3, <2 x i64>* %c 819 ; CHECK-DAG: st.d [[R3]], 0($4) 820 821 ret void 822} 823 824define void @ilvr_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 825 ; CHECK-LABEL: ilvr_v16i8_2: 826 827 %1 = load <16 x i8>, <16 x i8>* %a 828 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 829 %2 = load <16 x i8>, <16 x i8>* %b 830 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 831 <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 832 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 833 store <16 x i8> %3, <16 x i8>* %c 834 ; CHECK-DAG: st.b [[R3]], 0($4) 835 836 ret void 837} 838 839define void @ilvr_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 840 ; CHECK-LABEL: ilvr_v8i16_2: 841 842 %1 = load <8 x i16>, <8 x i16>* %a 843 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 844 %2 = load <8 x i16>, <8 x i16>* %b 845 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 846 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 847 store <8 x i16> %3, <8 x i16>* %c 848 ; CHECK-DAG: st.h [[R3]], 0($4) 849 850 ret void 851} 852 853define void @ilvr_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 854 ; CHECK-LABEL: ilvr_v4i32_2: 855 856 %1 = load <4 x i32>, <4 x i32>* %a 857 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 858 %2 = load <4 x i32>, <4 x i32>* %b 859 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 860 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 861 store <4 x i32> %3, <4 x i32>* %c 862 ; CHECK-DAG: st.w [[R3]], 0($4) 863 864 ret void 865} 866 867define void @ilvr_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 868 ; CHECK-LABEL: ilvr_v2i64_2: 869 870 %1 = load <2 x i64>, <2 x i64>* %a 871 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 872 %2 = load <2 x i64>, <2 x i64>* %b 873 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0> 874 ; ilvr.d and splati.d are equivalent for v2i64 875 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0] 876 store <2 x i64> %3, <2 x i64>* %c 877 ; CHECK-DAG: st.d [[R3]], 0($4) 878 879 ret void 880} 881 882define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 883 ; CHECK-LABEL: ilvl_v16i8_0: 884 885 %1 = load <16 x i8>, <16 x i8>* %a 886 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 887 %2 = load <16 x i8>, <16 x i8>* %b 888 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 889 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 890 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 891 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R1]] 892 store <16 x i8> %3, <16 x i8>* %c 893 ; CHECK-DAG: st.b [[R3]], 0($4) 894 895 ret void 896} 897 898define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 899 ; CHECK-LABEL: ilvl_v8i16_0: 900 901 %1 = load <8 x i16>, <8 x i16>* %a 902 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 903 %2 = load <8 x i16>, <8 x i16>* %b 904 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 905 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 906 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R1]] 907 store <8 x i16> %3, <8 x i16>* %c 908 ; CHECK-DAG: st.h [[R3]], 0($4) 909 910 ret void 911} 912 913define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 914 ; CHECK-LABEL: ilvl_v4i32_0: 915 916 %1 = load <4 x i32>, <4 x i32>* %a 917 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 918 %2 = load <4 x i32>, <4 x i32>* %b 919 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 920 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 921 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] 922 store <4 x i32> %3, <4 x i32>* %c 923 ; CHECK-DAG: st.w [[R3]], 0($4) 924 925 ret void 926} 927 928define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 929 ; CHECK-LABEL: ilvl_v2i64_0: 930 931 %1 = load <2 x i64>, <2 x i64>* %a 932 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 933 %2 = load <2 x i64>, <2 x i64>* %b 934 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 935 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 936 ; ilvl.d and ilvod.d are equivalent for v2i64 937 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 938 store <2 x i64> %3, <2 x i64>* %c 939 ; CHECK-DAG: st.d [[R3]], 0($4) 940 941 ret void 942} 943 944define void @ilvl_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 945 ; CHECK-LABEL: ilvl_v16i8_1: 946 947 %1 = load <16 x i8>, <16 x i8>* %a 948 %2 = load <16 x i8>, <16 x i8>* %b 949 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 950 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 951 <16 x i32> <i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31> 952 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 953 store <16 x i8> %3, <16 x i8>* %c 954 ; CHECK-DAG: st.b [[R3]], 0($4) 955 956 ret void 957} 958 959define void @ilvl_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 960 ; CHECK-LABEL: ilvl_v8i16_1: 961 962 %1 = load <8 x i16>, <8 x i16>* %a 963 %2 = load <8 x i16>, <8 x i16>* %b 964 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 965 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> 966 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 967 store <8 x i16> %3, <8 x i16>* %c 968 ; CHECK-DAG: st.h [[R3]], 0($4) 969 970 ret void 971} 972 973define void @ilvl_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 974 ; CHECK-LABEL: ilvl_v4i32_1: 975 976 %1 = load <4 x i32>, <4 x i32>* %a 977 %2 = load <4 x i32>, <4 x i32>* %b 978 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 979 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 6, i32 6, i32 7, i32 7> 980 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 981 store <4 x i32> %3, <4 x i32>* %c 982 ; CHECK-DAG: st.w [[R3]], 0($4) 983 984 ret void 985} 986 987define void @ilvl_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 988 ; CHECK-LABEL: ilvl_v2i64_1: 989 990 %1 = load <2 x i64>, <2 x i64>* %a 991 %2 = load <2 x i64>, <2 x i64>* %b 992 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 993 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3> 994 ; ilvl.d and splati.d are equivalent for v2i64 995 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1] 996 store <2 x i64> %3, <2 x i64>* %c 997 ; CHECK-DAG: st.d [[R3]], 0($4) 998 999 ret void 1000} 1001 1002define void @ilvl_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1003 ; CHECK-LABEL: ilvl_v16i8_2: 1004 1005 %1 = load <16 x i8>, <16 x i8>* %a 1006 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1007 %2 = load <16 x i8>, <16 x i8>* %b 1008 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1009 <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15> 1010 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1011 store <16 x i8> %3, <16 x i8>* %c 1012 ; CHECK-DAG: st.b [[R3]], 0($4) 1013 1014 ret void 1015} 1016 1017define void @ilvl_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1018 ; CHECK-LABEL: ilvl_v8i16_2: 1019 1020 %1 = load <8 x i16>, <8 x i16>* %a 1021 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1022 %2 = load <8 x i16>, <8 x i16>* %b 1023 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 1024 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1025 store <8 x i16> %3, <8 x i16>* %c 1026 ; CHECK-DAG: st.h [[R3]], 0($4) 1027 1028 ret void 1029} 1030 1031define void @ilvl_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1032 ; CHECK-LABEL: ilvl_v4i32_2: 1033 1034 %1 = load <4 x i32>, <4 x i32>* %a 1035 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1036 %2 = load <4 x i32>, <4 x i32>* %b 1037 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 1038 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1039 store <4 x i32> %3, <4 x i32>* %c 1040 ; CHECK-DAG: st.w [[R3]], 0($4) 1041 1042 ret void 1043} 1044 1045define void @ilvl_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1046 ; CHECK-LABEL: ilvl_v2i64_2: 1047 1048 %1 = load <2 x i64>, <2 x i64>* %a 1049 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1050 %2 = load <2 x i64>, <2 x i64>* %b 1051 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1> 1052 ; ilvl.d and splati.d are equivalent for v2i64 1053 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 1054 store <2 x i64> %3, <2 x i64>* %c 1055 ; CHECK-DAG: st.d [[R3]], 0($4) 1056 1057 ret void 1058} 1059 1060define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1061 ; CHECK-LABEL: pckev_v16i8_0: 1062 1063 %1 = load <16 x i8>, <16 x i8>* %a 1064 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1065 %2 = load <16 x i8>, <16 x i8>* %b 1066 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1067 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1068 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1069 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1070 store <16 x i8> %3, <16 x i8>* %c 1071 ; CHECK-DAG: st.b [[R3]], 0($4) 1072 1073 ret void 1074} 1075 1076define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1077 ; CHECK-LABEL: pckev_v8i16_0: 1078 1079 %1 = load <8 x i16>, <8 x i16>* %a 1080 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1081 %2 = load <8 x i16>, <8 x i16>* %b 1082 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1083 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1084 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1085 store <8 x i16> %3, <8 x i16>* %c 1086 ; CHECK-DAG: st.h [[R3]], 0($4) 1087 1088 ret void 1089} 1090 1091define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1092 ; CHECK-LABEL: pckev_v4i32_0: 1093 1094 %1 = load <4 x i32>, <4 x i32>* %a 1095 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1096 %2 = load <4 x i32>, <4 x i32>* %b 1097 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1098 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1099 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1100 store <4 x i32> %3, <4 x i32>* %c 1101 ; CHECK-DAG: st.w [[R3]], 0($4) 1102 1103 ret void 1104} 1105 1106define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1107 ; CHECK-LABEL: pckev_v2i64_0: 1108 1109 %1 = load <2 x i64>, <2 x i64>* %a 1110 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1111 %2 = load <2 x i64>, <2 x i64>* %b 1112 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1113 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 1114 ; pckev.d and ilvev.d are equivalent for v2i64 1115 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1116 store <2 x i64> %3, <2 x i64>* %c 1117 ; CHECK-DAG: st.d [[R3]], 0($4) 1118 1119 ret void 1120} 1121 1122define void @pckev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1123 ; CHECK-LABEL: pckev_v16i8_1: 1124 1125 %1 = load <16 x i8>, <16 x i8>* %a 1126 %2 = load <16 x i8>, <16 x i8>* %b 1127 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1128 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1129 <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 1130 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1131 store <16 x i8> %3, <16 x i8>* %c 1132 ; CHECK-DAG: st.b [[R3]], 0($4) 1133 1134 ret void 1135} 1136 1137define void @pckev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1138 ; CHECK-LABEL: pckev_v8i16_1: 1139 1140 %1 = load <8 x i16>, <8 x i16>* %a 1141 %2 = load <8 x i16>, <8 x i16>* %b 1142 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1143 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 8, i32 10, i32 12, i32 14> 1144 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1145 store <8 x i16> %3, <8 x i16>* %c 1146 ; CHECK-DAG: st.h [[R3]], 0($4) 1147 1148 ret void 1149} 1150 1151define void @pckev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1152 ; CHECK-LABEL: pckev_v4i32_1: 1153 1154 %1 = load <4 x i32>, <4 x i32>* %a 1155 %2 = load <4 x i32>, <4 x i32>* %b 1156 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1157 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 6, i32 4, i32 6> 1158 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1159 store <4 x i32> %3, <4 x i32>* %c 1160 ; CHECK-DAG: st.w [[R3]], 0($4) 1161 1162 ret void 1163} 1164 1165define void @pckev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1166 ; CHECK-LABEL: pckev_v2i64_1: 1167 1168 %1 = load <2 x i64>, <2 x i64>* %a 1169 %2 = load <2 x i64>, <2 x i64>* %b 1170 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1171 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2> 1172 ; pckev.d and splati.d are equivalent for v2i64 1173 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0] 1174 store <2 x i64> %3, <2 x i64>* %c 1175 ; CHECK-DAG: st.d [[R3]], 0($4) 1176 1177 ret void 1178} 1179 1180define void @pckev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1181 ; CHECK-LABEL: pckev_v16i8_2: 1182 1183 %1 = load <16 x i8>, <16 x i8>* %a 1184 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1185 %2 = load <16 x i8>, <16 x i8>* %b 1186 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1187 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1188 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1189 store <16 x i8> %3, <16 x i8>* %c 1190 ; CHECK-DAG: st.b [[R3]], 0($4) 1191 1192 ret void 1193} 1194 1195define void @pckev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1196 ; CHECK-LABEL: pckev_v8i16_2: 1197 1198 %1 = load <8 x i16>, <8 x i16>* %a 1199 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1200 %2 = load <8 x i16>, <8 x i16>* %b 1201 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6> 1202 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1203 store <8 x i16> %3, <8 x i16>* %c 1204 ; CHECK-DAG: st.h [[R3]], 0($4) 1205 1206 ret void 1207} 1208 1209define void @pckev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1210 ; CHECK-LABEL: pckev_v4i32_2: 1211 1212 %1 = load <4 x i32>, <4 x i32>* %a 1213 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1214 %2 = load <4 x i32>, <4 x i32>* %b 1215 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 1216 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1217 store <4 x i32> %3, <4 x i32>* %c 1218 ; CHECK-DAG: st.w [[R3]], 0($4) 1219 1220 ret void 1221} 1222 1223define void @pckev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1224 ; CHECK-LABEL: pckev_v2i64_2: 1225 1226 %1 = load <2 x i64>, <2 x i64>* %a 1227 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1228 %2 = load <2 x i64>, <2 x i64>* %b 1229 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0> 1230 ; pckev.d and splati.d are equivalent for v2i64 1231 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0] 1232 store <2 x i64> %3, <2 x i64>* %c 1233 ; CHECK-DAG: st.d [[R3]], 0($4) 1234 1235 ret void 1236} 1237 1238define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1239 ; CHECK-LABEL: pckod_v16i8_0: 1240 1241 %1 = load <16 x i8>, <16 x i8>* %a 1242 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1243 %2 = load <16 x i8>, <16 x i8>* %b 1244 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1245 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1246 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 1247 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1248 store <16 x i8> %3, <16 x i8>* %c 1249 ; CHECK-DAG: st.b [[R3]], 0($4) 1250 1251 ret void 1252} 1253 1254define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1255 ; CHECK-LABEL: pckod_v8i16_0: 1256 1257 %1 = load <8 x i16>, <8 x i16>* %a 1258 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1259 %2 = load <8 x i16>, <8 x i16>* %b 1260 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1261 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1262 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1263 store <8 x i16> %3, <8 x i16>* %c 1264 ; CHECK-DAG: st.h [[R3]], 0($4) 1265 1266 ret void 1267} 1268 1269define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1270 ; CHECK-LABEL: pckod_v4i32_0: 1271 1272 %1 = load <4 x i32>, <4 x i32>* %a 1273 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1274 %2 = load <4 x i32>, <4 x i32>* %b 1275 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1276 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1277 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]] 1278 store <4 x i32> %3, <4 x i32>* %c 1279 ; CHECK-DAG: st.w [[R3]], 0($4) 1280 1281 ret void 1282} 1283 1284define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1285 ; CHECK-LABEL: pckod_v2i64_0: 1286 1287 %1 = load <2 x i64>, <2 x i64>* %a 1288 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1289 %2 = load <2 x i64>, <2 x i64>* %b 1290 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1291 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 1292 ; pckod.d and ilvod.d are equivalent for v2i64 1293 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 1294 store <2 x i64> %3, <2 x i64>* %c 1295 ; CHECK-DAG: st.d [[R3]], 0($4) 1296 1297 ret void 1298} 1299 1300define void @pckod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1301 ; CHECK-LABEL: pckod_v16i8_1: 1302 1303 %1 = load <16 x i8>, <16 x i8>* %a 1304 %2 = load <16 x i8>, <16 x i8>* %b 1305 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 1306 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1307 <16 x i32> <i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 1308 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1309 store <16 x i8> %3, <16 x i8>* %c 1310 ; CHECK-DAG: st.b [[R3]], 0($4) 1311 1312 ret void 1313} 1314 1315define void @pckod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1316 ; CHECK-LABEL: pckod_v8i16_1: 1317 1318 %1 = load <8 x i16>, <8 x i16>* %a 1319 %2 = load <8 x i16>, <8 x i16>* %b 1320 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 1321 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 9, i32 11, i32 13, i32 15> 1322 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1323 store <8 x i16> %3, <8 x i16>* %c 1324 ; CHECK-DAG: st.h [[R3]], 0($4) 1325 1326 ret void 1327} 1328 1329define void @pckod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1330 ; CHECK-LABEL: pckod_v4i32_1: 1331 1332 %1 = load <4 x i32>, <4 x i32>* %a 1333 %2 = load <4 x i32>, <4 x i32>* %b 1334 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 1335 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 7, i32 5, i32 7> 1336 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] 1337 store <4 x i32> %3, <4 x i32>* %c 1338 ; CHECK-DAG: st.w [[R3]], 0($4) 1339 1340 ret void 1341} 1342 1343define void @pckod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1344 ; CHECK-LABEL: pckod_v2i64_1: 1345 1346 %1 = load <2 x i64>, <2 x i64>* %a 1347 %2 = load <2 x i64>, <2 x i64>* %b 1348 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 1349 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3> 1350 ; pckod.d and splati.d are equivalent for v2i64 1351 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1] 1352 store <2 x i64> %3, <2 x i64>* %c 1353 ; CHECK-DAG: st.d [[R3]], 0($4) 1354 1355 ret void 1356} 1357 1358define void @pckod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 1359 ; CHECK-LABEL: pckod_v16i8_2: 1360 1361 %1 = load <16 x i8>, <16 x i8>* %a 1362 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1363 %2 = load <16 x i8>, <16 x i8>* %b 1364 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 1365 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1366 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1367 store <16 x i8> %3, <16 x i8>* %c 1368 ; CHECK-DAG: st.b [[R3]], 0($4) 1369 1370 ret void 1371} 1372 1373define void @pckod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 1374 ; CHECK-LABEL: pckod_v8i16_2: 1375 1376 %1 = load <8 x i16>, <8 x i16>* %a 1377 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1378 %2 = load <8 x i16>, <8 x i16>* %b 1379 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7> 1380 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1381 store <8 x i16> %3, <8 x i16>* %c 1382 ; CHECK-DAG: st.h [[R3]], 0($4) 1383 1384 ret void 1385} 1386 1387define void @pckod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 1388 ; CHECK-LABEL: pckod_v4i32_2: 1389 1390 %1 = load <4 x i32>, <4 x i32>* %a 1391 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1392 %2 = load <4 x i32>, <4 x i32>* %b 1393 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 1, i32 3> 1394 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 1395 store <4 x i32> %3, <4 x i32>* %c 1396 ; CHECK-DAG: st.w [[R3]], 0($4) 1397 1398 ret void 1399} 1400 1401define void @pckod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 1402 ; CHECK-LABEL: pckod_v2i64_2: 1403 1404 %1 = load <2 x i64>, <2 x i64>* %a 1405 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1406 %2 = load <2 x i64>, <2 x i64>* %b 1407 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1> 1408 ; pckod.d and splati.d are equivalent for v2i64 1409 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 1410 store <2 x i64> %3, <2 x i64>* %c 1411 ; CHECK-DAG: st.d [[R3]], 0($4) 1412 1413 ret void 1414} 1415 1416define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind { 1417 ; CHECK-LABEL: splati_v16i8_0: 1418 1419 %1 = load <16 x i8>, <16 x i8>* %a 1420 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 1421 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, 1422 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1423 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4] 1424 store <16 x i8> %2, <16 x i8>* %c 1425 ; CHECK-DAG: st.b [[R3]], 0($4) 1426 1427 ret void 1428} 1429 1430define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind { 1431 ; CHECK-LABEL: splati_v8i16_0: 1432 1433 %1 = load <8 x i16>, <8 x i16>* %a 1434 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 1435 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 1436 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4] 1437 store <8 x i16> %2, <8 x i16>* %c 1438 ; CHECK-DAG: st.h [[R3]], 0($4) 1439 1440 ret void 1441} 1442 1443define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind { 1444 ; CHECK-LABEL: splati_v4i32_0: 1445 1446 %1 = load <4 x i32>, <4 x i32>* %a 1447 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 1448 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1449 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][3] 1450 store <4 x i32> %2, <4 x i32>* %c 1451 ; CHECK-DAG: st.w [[R3]], 0($4) 1452 1453 ret void 1454} 1455 1456define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind { 1457 ; CHECK-LABEL: splati_v2i64_0: 1458 1459 %1 = load <2 x i64>, <2 x i64>* %a 1460 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 1461 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 1462 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 1463 store <2 x i64> %2, <2 x i64>* %c 1464 ; CHECK-DAG: st.d [[R3]], 0($4) 1465 1466 ret void 1467} 1468