1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -enable-misched=false | FileCheck %s 2 3define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK-LABEL: sqshl8b: 5;CHECK: sqshl.8b 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %tmp2 = load <8 x i8>, <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10} 11 12define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13;CHECK-LABEL: sqshl4h: 14;CHECK: sqshl.4h 15 %tmp1 = load <4 x i16>, <4 x i16>* %A 16 %tmp2 = load <4 x i16>, <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19} 20 21define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22;CHECK-LABEL: sqshl2s: 23;CHECK: sqshl.2s 24 %tmp1 = load <2 x i32>, <2 x i32>* %A 25 %tmp2 = load <2 x i32>, <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28} 29 30define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 31;CHECK-LABEL: uqshl8b: 32;CHECK: uqshl.8b 33 %tmp1 = load <8 x i8>, <8 x i8>* %A 34 %tmp2 = load <8 x i8>, <8 x i8>* %B 35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 36 ret <8 x i8> %tmp3 37} 38 39define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 40;CHECK-LABEL: uqshl4h: 41;CHECK: uqshl.4h 42 %tmp1 = load <4 x i16>, <4 x i16>* %A 43 %tmp2 = load <4 x i16>, <4 x i16>* %B 44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 45 ret <4 x i16> %tmp3 46} 47 48define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 49;CHECK-LABEL: uqshl2s: 50;CHECK: uqshl.2s 51 %tmp1 = load <2 x i32>, <2 x i32>* %A 52 %tmp2 = load <2 x i32>, <2 x i32>* %B 53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 54 ret <2 x i32> %tmp3 55} 56 57define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 58;CHECK-LABEL: sqshl16b: 59;CHECK: sqshl.16b 60 %tmp1 = load <16 x i8>, <16 x i8>* %A 61 %tmp2 = load <16 x i8>, <16 x i8>* %B 62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 63 ret <16 x i8> %tmp3 64} 65 66define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 67;CHECK-LABEL: sqshl8h: 68;CHECK: sqshl.8h 69 %tmp1 = load <8 x i16>, <8 x i16>* %A 70 %tmp2 = load <8 x i16>, <8 x i16>* %B 71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 72 ret <8 x i16> %tmp3 73} 74 75define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 76;CHECK-LABEL: sqshl4s: 77;CHECK: sqshl.4s 78 %tmp1 = load <4 x i32>, <4 x i32>* %A 79 %tmp2 = load <4 x i32>, <4 x i32>* %B 80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 81 ret <4 x i32> %tmp3 82} 83 84define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 85;CHECK-LABEL: sqshl2d: 86;CHECK: sqshl.2d 87 %tmp1 = load <2 x i64>, <2 x i64>* %A 88 %tmp2 = load <2 x i64>, <2 x i64>* %B 89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 90 ret <2 x i64> %tmp3 91} 92 93define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 94;CHECK-LABEL: uqshl16b: 95;CHECK: uqshl.16b 96 %tmp1 = load <16 x i8>, <16 x i8>* %A 97 %tmp2 = load <16 x i8>, <16 x i8>* %B 98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 99 ret <16 x i8> %tmp3 100} 101 102define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103;CHECK-LABEL: uqshl8h: 104;CHECK: uqshl.8h 105 %tmp1 = load <8 x i16>, <8 x i16>* %A 106 %tmp2 = load <8 x i16>, <8 x i16>* %B 107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 108 ret <8 x i16> %tmp3 109} 110 111define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 112;CHECK-LABEL: uqshl4s: 113;CHECK: uqshl.4s 114 %tmp1 = load <4 x i32>, <4 x i32>* %A 115 %tmp2 = load <4 x i32>, <4 x i32>* %B 116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 117 ret <4 x i32> %tmp3 118} 119 120define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 121;CHECK-LABEL: uqshl2d: 122;CHECK: uqshl.2d 123 %tmp1 = load <2 x i64>, <2 x i64>* %A 124 %tmp2 = load <2 x i64>, <2 x i64>* %B 125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 126 ret <2 x i64> %tmp3 127} 128 129declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 130declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 131declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 132declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 133 134declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 135declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 136declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 137declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 138 139declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 140declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 141declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 142declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 143 144declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 145declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 146declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 147declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 148 149define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 150;CHECK-LABEL: srshl8b: 151;CHECK: srshl.8b 152 %tmp1 = load <8 x i8>, <8 x i8>* %A 153 %tmp2 = load <8 x i8>, <8 x i8>* %B 154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 155 ret <8 x i8> %tmp3 156} 157 158define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 159;CHECK-LABEL: srshl4h: 160;CHECK: srshl.4h 161 %tmp1 = load <4 x i16>, <4 x i16>* %A 162 %tmp2 = load <4 x i16>, <4 x i16>* %B 163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 164 ret <4 x i16> %tmp3 165} 166 167define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 168;CHECK-LABEL: srshl2s: 169;CHECK: srshl.2s 170 %tmp1 = load <2 x i32>, <2 x i32>* %A 171 %tmp2 = load <2 x i32>, <2 x i32>* %B 172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 173 ret <2 x i32> %tmp3 174} 175 176define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 177;CHECK-LABEL: urshl8b: 178;CHECK: urshl.8b 179 %tmp1 = load <8 x i8>, <8 x i8>* %A 180 %tmp2 = load <8 x i8>, <8 x i8>* %B 181 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 182 ret <8 x i8> %tmp3 183} 184 185define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 186;CHECK-LABEL: urshl4h: 187;CHECK: urshl.4h 188 %tmp1 = load <4 x i16>, <4 x i16>* %A 189 %tmp2 = load <4 x i16>, <4 x i16>* %B 190 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 191 ret <4 x i16> %tmp3 192} 193 194define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 195;CHECK-LABEL: urshl2s: 196;CHECK: urshl.2s 197 %tmp1 = load <2 x i32>, <2 x i32>* %A 198 %tmp2 = load <2 x i32>, <2 x i32>* %B 199 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 200 ret <2 x i32> %tmp3 201} 202 203define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 204;CHECK-LABEL: srshl16b: 205;CHECK: srshl.16b 206 %tmp1 = load <16 x i8>, <16 x i8>* %A 207 %tmp2 = load <16 x i8>, <16 x i8>* %B 208 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 209 ret <16 x i8> %tmp3 210} 211 212define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 213;CHECK-LABEL: srshl8h: 214;CHECK: srshl.8h 215 %tmp1 = load <8 x i16>, <8 x i16>* %A 216 %tmp2 = load <8 x i16>, <8 x i16>* %B 217 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 218 ret <8 x i16> %tmp3 219} 220 221define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 222;CHECK-LABEL: srshl4s: 223;CHECK: srshl.4s 224 %tmp1 = load <4 x i32>, <4 x i32>* %A 225 %tmp2 = load <4 x i32>, <4 x i32>* %B 226 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 227 ret <4 x i32> %tmp3 228} 229 230define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 231;CHECK-LABEL: srshl2d: 232;CHECK: srshl.2d 233 %tmp1 = load <2 x i64>, <2 x i64>* %A 234 %tmp2 = load <2 x i64>, <2 x i64>* %B 235 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 236 ret <2 x i64> %tmp3 237} 238 239define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 240;CHECK-LABEL: urshl16b: 241;CHECK: urshl.16b 242 %tmp1 = load <16 x i8>, <16 x i8>* %A 243 %tmp2 = load <16 x i8>, <16 x i8>* %B 244 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 245 ret <16 x i8> %tmp3 246} 247 248define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 249;CHECK-LABEL: urshl8h: 250;CHECK: urshl.8h 251 %tmp1 = load <8 x i16>, <8 x i16>* %A 252 %tmp2 = load <8 x i16>, <8 x i16>* %B 253 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 254 ret <8 x i16> %tmp3 255} 256 257define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 258;CHECK-LABEL: urshl4s: 259;CHECK: urshl.4s 260 %tmp1 = load <4 x i32>, <4 x i32>* %A 261 %tmp2 = load <4 x i32>, <4 x i32>* %B 262 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 263 ret <4 x i32> %tmp3 264} 265 266define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 267;CHECK-LABEL: urshl2d: 268;CHECK: urshl.2d 269 %tmp1 = load <2 x i64>, <2 x i64>* %A 270 %tmp2 = load <2 x i64>, <2 x i64>* %B 271 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 272 ret <2 x i64> %tmp3 273} 274 275declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 276declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 277declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 278declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 279 280declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 281declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 282declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 283declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 284 285declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 286declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 287declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 288declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 289 290declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 291declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 292declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 293declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 294 295define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 296;CHECK-LABEL: sqrshl8b: 297;CHECK: sqrshl.8b 298 %tmp1 = load <8 x i8>, <8 x i8>* %A 299 %tmp2 = load <8 x i8>, <8 x i8>* %B 300 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 301 ret <8 x i8> %tmp3 302} 303 304define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 305;CHECK-LABEL: sqrshl4h: 306;CHECK: sqrshl.4h 307 %tmp1 = load <4 x i16>, <4 x i16>* %A 308 %tmp2 = load <4 x i16>, <4 x i16>* %B 309 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 310 ret <4 x i16> %tmp3 311} 312 313define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 314;CHECK-LABEL: sqrshl2s: 315;CHECK: sqrshl.2s 316 %tmp1 = load <2 x i32>, <2 x i32>* %A 317 %tmp2 = load <2 x i32>, <2 x i32>* %B 318 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 319 ret <2 x i32> %tmp3 320} 321 322define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 323;CHECK-LABEL: uqrshl8b: 324;CHECK: uqrshl.8b 325 %tmp1 = load <8 x i8>, <8 x i8>* %A 326 %tmp2 = load <8 x i8>, <8 x i8>* %B 327 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 328 ret <8 x i8> %tmp3 329} 330 331define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 332;CHECK-LABEL: uqrshl4h: 333;CHECK: uqrshl.4h 334 %tmp1 = load <4 x i16>, <4 x i16>* %A 335 %tmp2 = load <4 x i16>, <4 x i16>* %B 336 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 337 ret <4 x i16> %tmp3 338} 339 340define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 341;CHECK-LABEL: uqrshl2s: 342;CHECK: uqrshl.2s 343 %tmp1 = load <2 x i32>, <2 x i32>* %A 344 %tmp2 = load <2 x i32>, <2 x i32>* %B 345 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 346 ret <2 x i32> %tmp3 347} 348 349define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 350;CHECK-LABEL: sqrshl16b: 351;CHECK: sqrshl.16b 352 %tmp1 = load <16 x i8>, <16 x i8>* %A 353 %tmp2 = load <16 x i8>, <16 x i8>* %B 354 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 355 ret <16 x i8> %tmp3 356} 357 358define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 359;CHECK-LABEL: sqrshl8h: 360;CHECK: sqrshl.8h 361 %tmp1 = load <8 x i16>, <8 x i16>* %A 362 %tmp2 = load <8 x i16>, <8 x i16>* %B 363 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 364 ret <8 x i16> %tmp3 365} 366 367define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 368;CHECK-LABEL: sqrshl4s: 369;CHECK: sqrshl.4s 370 %tmp1 = load <4 x i32>, <4 x i32>* %A 371 %tmp2 = load <4 x i32>, <4 x i32>* %B 372 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 373 ret <4 x i32> %tmp3 374} 375 376define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 377;CHECK-LABEL: sqrshl2d: 378;CHECK: sqrshl.2d 379 %tmp1 = load <2 x i64>, <2 x i64>* %A 380 %tmp2 = load <2 x i64>, <2 x i64>* %B 381 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 382 ret <2 x i64> %tmp3 383} 384 385define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 386;CHECK-LABEL: uqrshl16b: 387;CHECK: uqrshl.16b 388 %tmp1 = load <16 x i8>, <16 x i8>* %A 389 %tmp2 = load <16 x i8>, <16 x i8>* %B 390 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 391 ret <16 x i8> %tmp3 392} 393 394define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 395;CHECK-LABEL: uqrshl8h: 396;CHECK: uqrshl.8h 397 %tmp1 = load <8 x i16>, <8 x i16>* %A 398 %tmp2 = load <8 x i16>, <8 x i16>* %B 399 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 400 ret <8 x i16> %tmp3 401} 402 403define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 404;CHECK-LABEL: uqrshl4s: 405;CHECK: uqrshl.4s 406 %tmp1 = load <4 x i32>, <4 x i32>* %A 407 %tmp2 = load <4 x i32>, <4 x i32>* %B 408 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 409 ret <4 x i32> %tmp3 410} 411 412define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 413;CHECK-LABEL: uqrshl2d: 414;CHECK: uqrshl.2d 415 %tmp1 = load <2 x i64>, <2 x i64>* %A 416 %tmp2 = load <2 x i64>, <2 x i64>* %B 417 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 418 ret <2 x i64> %tmp3 419} 420 421declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 422declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 423declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 424declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 425 426declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 427declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 428declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 429declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 430 431declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 432declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 433declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 434declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 435 436declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 437declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 438declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 439declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 440 441define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind { 442;CHECK-LABEL: urshr8b: 443;CHECK: urshr.8b 444 %tmp1 = load <8 x i8>, <8 x i8>* %A 445 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 446 ret <8 x i8> %tmp3 447} 448 449define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind { 450;CHECK-LABEL: urshr4h: 451;CHECK: urshr.4h 452 %tmp1 = load <4 x i16>, <4 x i16>* %A 453 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 454 ret <4 x i16> %tmp3 455} 456 457define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind { 458;CHECK-LABEL: urshr2s: 459;CHECK: urshr.2s 460 %tmp1 = load <2 x i32>, <2 x i32>* %A 461 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 462 ret <2 x i32> %tmp3 463} 464 465define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind { 466;CHECK-LABEL: urshr16b: 467;CHECK: urshr.16b 468 %tmp1 = load <16 x i8>, <16 x i8>* %A 469 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 470 ret <16 x i8> %tmp3 471} 472 473define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind { 474;CHECK-LABEL: urshr8h: 475;CHECK: urshr.8h 476 %tmp1 = load <8 x i16>, <8 x i16>* %A 477 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 478 ret <8 x i16> %tmp3 479} 480 481define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind { 482;CHECK-LABEL: urshr4s: 483;CHECK: urshr.4s 484 %tmp1 = load <4 x i32>, <4 x i32>* %A 485 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 486 ret <4 x i32> %tmp3 487} 488 489define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind { 490;CHECK-LABEL: urshr2d: 491;CHECK: urshr.2d 492 %tmp1 = load <2 x i64>, <2 x i64>* %A 493 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 494 ret <2 x i64> %tmp3 495} 496 497define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind { 498;CHECK-LABEL: srshr8b: 499;CHECK: srshr.8b 500 %tmp1 = load <8 x i8>, <8 x i8>* %A 501 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 502 ret <8 x i8> %tmp3 503} 504 505define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind { 506;CHECK-LABEL: srshr4h: 507;CHECK: srshr.4h 508 %tmp1 = load <4 x i16>, <4 x i16>* %A 509 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 510 ret <4 x i16> %tmp3 511} 512 513define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind { 514;CHECK-LABEL: srshr2s: 515;CHECK: srshr.2s 516 %tmp1 = load <2 x i32>, <2 x i32>* %A 517 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 518 ret <2 x i32> %tmp3 519} 520 521define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind { 522;CHECK-LABEL: srshr16b: 523;CHECK: srshr.16b 524 %tmp1 = load <16 x i8>, <16 x i8>* %A 525 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 526 ret <16 x i8> %tmp3 527} 528 529define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind { 530;CHECK-LABEL: srshr8h: 531;CHECK: srshr.8h 532 %tmp1 = load <8 x i16>, <8 x i16>* %A 533 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 534 ret <8 x i16> %tmp3 535} 536 537define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind { 538;CHECK-LABEL: srshr4s: 539;CHECK: srshr.4s 540 %tmp1 = load <4 x i32>, <4 x i32>* %A 541 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 542 ret <4 x i32> %tmp3 543} 544 545define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind { 546;CHECK-LABEL: srshr2d: 547;CHECK: srshr.2d 548 %tmp1 = load <2 x i64>, <2 x i64>* %A 549 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 550 ret <2 x i64> %tmp3 551} 552 553define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind { 554;CHECK-LABEL: sqshlu8b: 555;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1 556 %tmp1 = load <8 x i8>, <8 x i8>* %A 557 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 558 ret <8 x i8> %tmp3 559} 560 561define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind { 562;CHECK-LABEL: sqshlu4h: 563;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1 564 %tmp1 = load <4 x i16>, <4 x i16>* %A 565 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 566 ret <4 x i16> %tmp3 567} 568 569define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind { 570;CHECK-LABEL: sqshlu2s: 571;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1 572 %tmp1 = load <2 x i32>, <2 x i32>* %A 573 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 574 ret <2 x i32> %tmp3 575} 576 577define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind { 578;CHECK-LABEL: sqshlu16b: 579;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1 580 %tmp1 = load <16 x i8>, <16 x i8>* %A 581 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 582 ret <16 x i8> %tmp3 583} 584 585define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind { 586;CHECK-LABEL: sqshlu8h: 587;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1 588 %tmp1 = load <8 x i16>, <8 x i16>* %A 589 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 590 ret <8 x i16> %tmp3 591} 592 593define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind { 594;CHECK-LABEL: sqshlu4s: 595;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1 596 %tmp1 = load <4 x i32>, <4 x i32>* %A 597 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 598 ret <4 x i32> %tmp3 599} 600 601define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind { 602;CHECK-LABEL: sqshlu2d: 603;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1 604 %tmp1 = load <2 x i64>, <2 x i64>* %A 605 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 606 ret <2 x i64> %tmp3 607} 608 609declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 610declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 611declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 612declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 613 614declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 615declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 616declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 617declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 618 619define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind { 620;CHECK-LABEL: rshrn8b: 621;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1 622 %tmp1 = load <8 x i16>, <8 x i16>* %A 623 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) 624 ret <8 x i8> %tmp3 625} 626 627define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind { 628;CHECK-LABEL: rshrn4h: 629;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1 630 %tmp1 = load <4 x i32>, <4 x i32>* %A 631 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) 632 ret <4 x i16> %tmp3 633} 634 635define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind { 636;CHECK-LABEL: rshrn2s: 637;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1 638 %tmp1 = load <2 x i64>, <2 x i64>* %A 639 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) 640 ret <2 x i32> %tmp3 641} 642 643define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind { 644;CHECK-LABEL: rshrn16b: 645;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1 646 %out = load <8 x i8>, <8 x i8>* %ret 647 %tmp1 = load <8 x i16>, <8 x i16>* %A 648 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) 649 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 650 ret <16 x i8> %tmp4 651} 652 653define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 654;CHECK-LABEL: rshrn8h: 655;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1 656 %out = load <4 x i16>, <4 x i16>* %ret 657 %tmp1 = load <4 x i32>, <4 x i32>* %A 658 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) 659 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 660 ret <8 x i16> %tmp4 661} 662 663define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 664;CHECK-LABEL: rshrn4s: 665;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1 666 %out = load <2 x i32>, <2 x i32>* %ret 667 %tmp1 = load <2 x i64>, <2 x i64>* %A 668 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) 669 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 670 ret <4 x i32> %tmp4 671} 672 673declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone 674declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone 675declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone 676 677define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind { 678;CHECK-LABEL: shrn8b: 679;CHECK: shrn.8b v0, {{v[0-9]+}}, #1 680 %tmp1 = load <8 x i16>, <8 x i16>* %A 681 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 682 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> 683 ret <8 x i8> %tmp3 684} 685 686define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind { 687;CHECK-LABEL: shrn4h: 688;CHECK: shrn.4h v0, {{v[0-9]+}}, #1 689 %tmp1 = load <4 x i32>, <4 x i32>* %A 690 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 691 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> 692 ret <4 x i16> %tmp3 693} 694 695define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind { 696;CHECK-LABEL: shrn2s: 697;CHECK: shrn.2s v0, {{v[0-9]+}}, #1 698 %tmp1 = load <2 x i64>, <2 x i64>* %A 699 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 700 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> 701 ret <2 x i32> %tmp3 702} 703 704define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 705;CHECK-LABEL: shrn16b: 706;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1 707 %out = load <8 x i8>, <8 x i8>* %ret 708 %tmp1 = load <8 x i16>, <8 x i16>* %A 709 %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 710 %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> 711 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 712 ret <16 x i8> %tmp4 713} 714 715define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 716;CHECK-LABEL: shrn8h: 717;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1 718 %out = load <4 x i16>, <4 x i16>* %ret 719 %tmp1 = load <4 x i32>, <4 x i32>* %A 720 %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 721 %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> 722 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 723 ret <8 x i16> %tmp4 724} 725 726define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 727;CHECK-LABEL: shrn4s: 728;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1 729 %out = load <2 x i32>, <2 x i32>* %ret 730 %tmp1 = load <2 x i64>, <2 x i64>* %A 731 %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 732 %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> 733 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 734 ret <4 x i32> %tmp4 735} 736 737declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone 738declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone 739declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone 740 741define i32 @sqshrn1s(i64 %A) nounwind { 742; CHECK-LABEL: sqshrn1s: 743; CHECK: sqshrn {{s[0-9]+}}, d0, #1 744 %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1) 745 ret i32 %tmp 746} 747 748define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind { 749;CHECK-LABEL: sqshrn8b: 750;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1 751 %tmp1 = load <8 x i16>, <8 x i16>* %A 752 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) 753 ret <8 x i8> %tmp3 754} 755 756define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind { 757;CHECK-LABEL: sqshrn4h: 758;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1 759 %tmp1 = load <4 x i32>, <4 x i32>* %A 760 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) 761 ret <4 x i16> %tmp3 762} 763 764define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind { 765;CHECK-LABEL: sqshrn2s: 766;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1 767 %tmp1 = load <2 x i64>, <2 x i64>* %A 768 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) 769 ret <2 x i32> %tmp3 770} 771 772 773define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 774;CHECK-LABEL: sqshrn16b: 775;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1 776 %out = load <8 x i8>, <8 x i8>* %ret 777 %tmp1 = load <8 x i16>, <8 x i16>* %A 778 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) 779 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 780 ret <16 x i8> %tmp4 781} 782 783define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 784;CHECK-LABEL: sqshrn8h: 785;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1 786 %out = load <4 x i16>, <4 x i16>* %ret 787 %tmp1 = load <4 x i32>, <4 x i32>* %A 788 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) 789 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 790 ret <8 x i16> %tmp4 791} 792 793define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 794;CHECK-LABEL: sqshrn4s: 795;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1 796 %out = load <2 x i32>, <2 x i32>* %ret 797 %tmp1 = load <2 x i64>, <2 x i64>* %A 798 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) 799 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 800 ret <4 x i32> %tmp4 801} 802 803declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone 804declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone 805declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone 806declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone 807 808define i32 @sqshrun1s(i64 %A) nounwind { 809; CHECK-LABEL: sqshrun1s: 810; CHECK: sqshrun {{s[0-9]+}}, d0, #1 811 %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1) 812 ret i32 %tmp 813} 814 815define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind { 816;CHECK-LABEL: sqshrun8b: 817;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1 818 %tmp1 = load <8 x i16>, <8 x i16>* %A 819 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) 820 ret <8 x i8> %tmp3 821} 822 823define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind { 824;CHECK-LABEL: sqshrun4h: 825;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1 826 %tmp1 = load <4 x i32>, <4 x i32>* %A 827 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) 828 ret <4 x i16> %tmp3 829} 830 831define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind { 832;CHECK-LABEL: sqshrun2s: 833;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1 834 %tmp1 = load <2 x i64>, <2 x i64>* %A 835 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) 836 ret <2 x i32> %tmp3 837} 838 839define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 840;CHECK-LABEL: sqshrun16b: 841;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1 842 %out = load <8 x i8>, <8 x i8>* %ret 843 %tmp1 = load <8 x i16>, <8 x i16>* %A 844 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) 845 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 846 ret <16 x i8> %tmp4 847} 848 849define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 850;CHECK-LABEL: sqshrun8h: 851;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1 852 %out = load <4 x i16>, <4 x i16>* %ret 853 %tmp1 = load <4 x i32>, <4 x i32>* %A 854 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) 855 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 856 ret <8 x i16> %tmp4 857} 858 859define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 860;CHECK-LABEL: sqshrun4s: 861;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1 862 %out = load <2 x i32>, <2 x i32>* %ret 863 %tmp1 = load <2 x i64>, <2 x i64>* %A 864 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) 865 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 866 ret <4 x i32> %tmp4 867} 868 869declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone 870declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone 871declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone 872declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone 873 874define i32 @sqrshrn1s(i64 %A) nounwind { 875; CHECK-LABEL: sqrshrn1s: 876; CHECK: sqrshrn {{s[0-9]+}}, d0, #1 877 %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1) 878 ret i32 %tmp 879} 880 881define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind { 882;CHECK-LABEL: sqrshrn8b: 883;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1 884 %tmp1 = load <8 x i16>, <8 x i16>* %A 885 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 886 ret <8 x i8> %tmp3 887} 888 889define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind { 890;CHECK-LABEL: sqrshrn4h: 891;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1 892 %tmp1 = load <4 x i32>, <4 x i32>* %A 893 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 894 ret <4 x i16> %tmp3 895} 896 897define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind { 898;CHECK-LABEL: sqrshrn2s: 899;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1 900 %tmp1 = load <2 x i64>, <2 x i64>* %A 901 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 902 ret <2 x i32> %tmp3 903} 904 905define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 906;CHECK-LABEL: sqrshrn16b: 907;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1 908 %out = load <8 x i8>, <8 x i8>* %ret 909 %tmp1 = load <8 x i16>, <8 x i16>* %A 910 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 911 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 912 ret <16 x i8> %tmp4 913} 914 915define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 916;CHECK-LABEL: sqrshrn8h: 917;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1 918 %out = load <4 x i16>, <4 x i16>* %ret 919 %tmp1 = load <4 x i32>, <4 x i32>* %A 920 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 921 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 922 ret <8 x i16> %tmp4 923} 924 925define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 926;CHECK-LABEL: sqrshrn4s: 927;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1 928 %out = load <2 x i32>, <2 x i32>* %ret 929 %tmp1 = load <2 x i64>, <2 x i64>* %A 930 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 931 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 932 ret <4 x i32> %tmp4 933} 934 935declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone 936declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone 937declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone 938declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone 939 940define i32 @sqrshrun1s(i64 %A) nounwind { 941; CHECK-LABEL: sqrshrun1s: 942; CHECK: sqrshrun {{s[0-9]+}}, d0, #1 943 %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1) 944 ret i32 %tmp 945} 946 947define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind { 948;CHECK-LABEL: sqrshrun8b: 949;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1 950 %tmp1 = load <8 x i16>, <8 x i16>* %A 951 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) 952 ret <8 x i8> %tmp3 953} 954 955define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind { 956;CHECK-LABEL: sqrshrun4h: 957;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1 958 %tmp1 = load <4 x i32>, <4 x i32>* %A 959 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) 960 ret <4 x i16> %tmp3 961} 962 963define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind { 964;CHECK-LABEL: sqrshrun2s: 965;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1 966 %tmp1 = load <2 x i64>, <2 x i64>* %A 967 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) 968 ret <2 x i32> %tmp3 969} 970 971define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 972;CHECK-LABEL: sqrshrun16b: 973;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1 974 %out = load <8 x i8>, <8 x i8>* %ret 975 %tmp1 = load <8 x i16>, <8 x i16>* %A 976 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) 977 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 978 ret <16 x i8> %tmp4 979} 980 981define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 982;CHECK-LABEL: sqrshrun8h: 983;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1 984 %out = load <4 x i16>, <4 x i16>* %ret 985 %tmp1 = load <4 x i32>, <4 x i32>* %A 986 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) 987 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 988 ret <8 x i16> %tmp4 989} 990 991define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 992;CHECK-LABEL: sqrshrun4s: 993;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1 994 %out = load <2 x i32>, <2 x i32>* %ret 995 %tmp1 = load <2 x i64>, <2 x i64>* %A 996 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) 997 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 998 ret <4 x i32> %tmp4 999} 1000 1001declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone 1002declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone 1003declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone 1004declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone 1005 1006define i32 @uqrshrn1s(i64 %A) nounwind { 1007; CHECK-LABEL: uqrshrn1s: 1008; CHECK: uqrshrn {{s[0-9]+}}, d0, #1 1009 %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1) 1010 ret i32 %tmp 1011} 1012 1013define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind { 1014;CHECK-LABEL: uqrshrn8b: 1015;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1 1016 %tmp1 = load <8 x i16>, <8 x i16>* %A 1017 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 1018 ret <8 x i8> %tmp3 1019} 1020 1021define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind { 1022;CHECK-LABEL: uqrshrn4h: 1023;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1 1024 %tmp1 = load <4 x i32>, <4 x i32>* %A 1025 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 1026 ret <4 x i16> %tmp3 1027} 1028 1029define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind { 1030;CHECK-LABEL: uqrshrn2s: 1031;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1 1032 %tmp1 = load <2 x i64>, <2 x i64>* %A 1033 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 1034 ret <2 x i32> %tmp3 1035} 1036 1037define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 1038;CHECK-LABEL: uqrshrn16b: 1039;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1 1040 %out = load <8 x i8>, <8 x i8>* %ret 1041 %tmp1 = load <8 x i16>, <8 x i16>* %A 1042 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) 1043 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1044 ret <16 x i8> %tmp4 1045} 1046 1047define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 1048;CHECK-LABEL: uqrshrn8h: 1049;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1 1050 %out = load <4 x i16>, <4 x i16>* %ret 1051 %tmp1 = load <4 x i32>, <4 x i32>* %A 1052 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) 1053 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1054 ret <8 x i16> %tmp4 1055} 1056 1057define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 1058;CHECK-LABEL: uqrshrn4s: 1059;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1 1060 %out = load <2 x i32>, <2 x i32>* %ret 1061 %tmp1 = load <2 x i64>, <2 x i64>* %A 1062 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) 1063 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1064 ret <4 x i32> %tmp4 1065} 1066 1067declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone 1068declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone 1069declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone 1070declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone 1071 1072define i32 @uqshrn1s(i64 %A) nounwind { 1073; CHECK-LABEL: uqshrn1s: 1074; CHECK: uqshrn {{s[0-9]+}}, d0, #1 1075 %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1) 1076 ret i32 %tmp 1077} 1078 1079define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind { 1080;CHECK-LABEL: uqshrn8b: 1081;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1 1082 %tmp1 = load <8 x i16>, <8 x i16>* %A 1083 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) 1084 ret <8 x i8> %tmp3 1085} 1086 1087define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind { 1088;CHECK-LABEL: uqshrn4h: 1089;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1 1090 %tmp1 = load <4 x i32>, <4 x i32>* %A 1091 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) 1092 ret <4 x i16> %tmp3 1093} 1094 1095define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind { 1096;CHECK-LABEL: uqshrn2s: 1097;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1 1098 %tmp1 = load <2 x i64>, <2 x i64>* %A 1099 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) 1100 ret <2 x i32> %tmp3 1101} 1102 1103define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind { 1104;CHECK-LABEL: uqshrn16b: 1105;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1 1106 %out = load <8 x i8>, <8 x i8>* %ret 1107 %tmp1 = load <8 x i16>, <8 x i16>* %A 1108 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) 1109 %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1110 ret <16 x i8> %tmp4 1111} 1112 1113define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind { 1114;CHECK-LABEL: uqshrn8h: 1115;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1 1116 %out = load <4 x i16>, <4 x i16>* %ret 1117 %tmp1 = load <4 x i32>, <4 x i32>* %A 1118 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) 1119 %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1120 ret <8 x i16> %tmp4 1121} 1122 1123define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind { 1124;CHECK-LABEL: uqshrn4s: 1125;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1 1126 %out = load <2 x i32>, <2 x i32>* %ret 1127 %tmp1 = load <2 x i64>, <2 x i64>* %A 1128 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) 1129 %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1130 ret <4 x i32> %tmp4 1131} 1132 1133declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone 1134declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone 1135declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone 1136declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone 1137 1138define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind { 1139;CHECK-LABEL: ushll8h: 1140;CHECK: ushll.8h v0, {{v[0-9]+}}, #1 1141 %tmp1 = load <8 x i8>, <8 x i8>* %A 1142 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> 1143 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1144 ret <8 x i16> %tmp3 1145} 1146 1147define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind { 1148;CHECK-LABEL: ushll4s: 1149;CHECK: ushll.4s v0, {{v[0-9]+}}, #1 1150 %tmp1 = load <4 x i16>, <4 x i16>* %A 1151 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> 1152 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1153 ret <4 x i32> %tmp3 1154} 1155 1156define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind { 1157;CHECK-LABEL: ushll2d: 1158;CHECK: ushll.2d v0, {{v[0-9]+}}, #1 1159 %tmp1 = load <2 x i32>, <2 x i32>* %A 1160 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> 1161 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1162 ret <2 x i64> %tmp3 1163} 1164 1165define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind { 1166;CHECK-LABEL: ushll2_8h: 1167;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1 1168 %load1 = load <16 x i8>, <16 x i8>* %A 1169 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1170 %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> 1171 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1172 ret <8 x i16> %tmp3 1173} 1174 1175define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind { 1176;CHECK-LABEL: ushll2_4s: 1177;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1 1178 %load1 = load <8 x i16>, <8 x i16>* %A 1179 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1180 %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> 1181 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1182 ret <4 x i32> %tmp3 1183} 1184 1185define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind { 1186;CHECK-LABEL: ushll2_2d: 1187;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1 1188 %load1 = load <4 x i32>, <4 x i32>* %A 1189 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1190 %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> 1191 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1192 ret <2 x i64> %tmp3 1193} 1194 1195define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind { 1196;CHECK-LABEL: sshll8h: 1197;CHECK: sshll.8h v0, {{v[0-9]+}}, #1 1198 %tmp1 = load <8 x i8>, <8 x i8>* %A 1199 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> 1200 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1201 ret <8 x i16> %tmp3 1202} 1203 1204define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind { 1205;CHECK-LABEL: sshll4s: 1206;CHECK: sshll.4s v0, {{v[0-9]+}}, #1 1207 %tmp1 = load <4 x i16>, <4 x i16>* %A 1208 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> 1209 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1210 ret <4 x i32> %tmp3 1211} 1212 1213define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind { 1214;CHECK-LABEL: sshll2d: 1215;CHECK: sshll.2d v0, {{v[0-9]+}}, #1 1216 %tmp1 = load <2 x i32>, <2 x i32>* %A 1217 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> 1218 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1219 ret <2 x i64> %tmp3 1220} 1221 1222define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind { 1223;CHECK-LABEL: sshll2_8h: 1224;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1 1225 %load1 = load <16 x i8>, <16 x i8>* %A 1226 %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1227 %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> 1228 %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1229 ret <8 x i16> %tmp3 1230} 1231 1232define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind { 1233;CHECK-LABEL: sshll2_4s: 1234;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1 1235 %load1 = load <8 x i16>, <8 x i16>* %A 1236 %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1237 %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> 1238 %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1> 1239 ret <4 x i32> %tmp3 1240} 1241 1242define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind { 1243;CHECK-LABEL: sshll2_2d: 1244;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1 1245 %load1 = load <4 x i32>, <4 x i32>* %A 1246 %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1247 %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> 1248 %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1> 1249 ret <2 x i64> %tmp3 1250} 1251 1252define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind { 1253;CHECK-LABEL: sqshli8b: 1254;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1 1255 %tmp1 = load <8 x i8>, <8 x i8>* %A 1256 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1257 ret <8 x i8> %tmp3 1258} 1259 1260define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind { 1261;CHECK-LABEL: sqshli4h: 1262;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1 1263 %tmp1 = load <4 x i16>, <4 x i16>* %A 1264 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 1265 ret <4 x i16> %tmp3 1266} 1267 1268define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind { 1269;CHECK-LABEL: sqshli2s: 1270;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1 1271 %tmp1 = load <2 x i32>, <2 x i32>* %A 1272 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 1273 ret <2 x i32> %tmp3 1274} 1275 1276define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind { 1277;CHECK-LABEL: sqshli16b: 1278;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1 1279 %tmp1 = load <16 x i8>, <16 x i8>* %A 1280 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1281 ret <16 x i8> %tmp3 1282} 1283 1284define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind { 1285;CHECK-LABEL: sqshli8h: 1286;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1 1287 %tmp1 = load <8 x i16>, <8 x i16>* %A 1288 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 1289 ret <8 x i16> %tmp3 1290} 1291 1292define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind { 1293;CHECK-LABEL: sqshli4s: 1294;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1 1295 %tmp1 = load <4 x i32>, <4 x i32>* %A 1296 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 1297 ret <4 x i32> %tmp3 1298} 1299 1300define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind { 1301;CHECK-LABEL: sqshli2d: 1302;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1 1303 %tmp1 = load <2 x i64>, <2 x i64>* %A 1304 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 1305 ret <2 x i64> %tmp3 1306} 1307 1308define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind { 1309;CHECK-LABEL: uqshli8b: 1310;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1 1311 %tmp1 = load <8 x i8>, <8 x i8>* %A 1312 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1313 ret <8 x i8> %tmp3 1314} 1315 1316define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind { 1317;CHECK-LABEL: uqshli8b_1: 1318;CHECK: movi.8b [[REG:v[0-9]+]], #0x8 1319;CHECK: uqshl.8b v0, v0, [[REG]] 1320 %tmp1 = load <8 x i8>, <8 x i8>* %A 1321 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>) 1322 ret <8 x i8> %tmp3 1323} 1324 1325define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind { 1326;CHECK-LABEL: uqshli4h: 1327;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1 1328 %tmp1 = load <4 x i16>, <4 x i16>* %A 1329 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 1330 ret <4 x i16> %tmp3 1331} 1332 1333define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind { 1334;CHECK-LABEL: uqshli2s: 1335;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1 1336 %tmp1 = load <2 x i32>, <2 x i32>* %A 1337 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>) 1338 ret <2 x i32> %tmp3 1339} 1340 1341define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind { 1342;CHECK-LABEL: uqshli16b: 1343;CHECK: uqshl.16b 1344 %tmp1 = load <16 x i8>, <16 x i8>* %A 1345 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 1346 ret <16 x i8> %tmp3 1347} 1348 1349define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind { 1350;CHECK-LABEL: uqshli8h: 1351;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1 1352 %tmp1 = load <8 x i16>, <8 x i16>* %A 1353 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 1354 ret <8 x i16> %tmp3 1355} 1356 1357define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind { 1358;CHECK-LABEL: uqshli4s: 1359;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1 1360 %tmp1 = load <4 x i32>, <4 x i32>* %A 1361 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 1362 ret <4 x i32> %tmp3 1363} 1364 1365define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind { 1366;CHECK-LABEL: uqshli2d: 1367;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1 1368 %tmp1 = load <2 x i64>, <2 x i64>* %A 1369 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>) 1370 ret <2 x i64> %tmp3 1371} 1372 1373define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1374;CHECK-LABEL: ursra8b: 1375;CHECK: ursra.8b v0, {{v[0-9]+}}, #1 1376 %tmp1 = load <8 x i8>, <8 x i8>* %A 1377 %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1378 %tmp4 = load <8 x i8>, <8 x i8>* %B 1379 %tmp5 = add <8 x i8> %tmp3, %tmp4 1380 ret <8 x i8> %tmp5 1381} 1382 1383define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1384;CHECK-LABEL: ursra4h: 1385;CHECK: ursra.4h v0, {{v[0-9]+}}, #1 1386 %tmp1 = load <4 x i16>, <4 x i16>* %A 1387 %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 1388 %tmp4 = load <4 x i16>, <4 x i16>* %B 1389 %tmp5 = add <4 x i16> %tmp3, %tmp4 1390 ret <4 x i16> %tmp5 1391} 1392 1393define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1394;CHECK-LABEL: ursra2s: 1395;CHECK: ursra.2s v0, {{v[0-9]+}}, #1 1396 %tmp1 = load <2 x i32>, <2 x i32>* %A 1397 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 1398 %tmp4 = load <2 x i32>, <2 x i32>* %B 1399 %tmp5 = add <2 x i32> %tmp3, %tmp4 1400 ret <2 x i32> %tmp5 1401} 1402 1403define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1404;CHECK-LABEL: ursra16b: 1405;CHECK: ursra.16b v0, {{v[0-9]+}}, #1 1406 %tmp1 = load <16 x i8>, <16 x i8>* %A 1407 %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1408 %tmp4 = load <16 x i8>, <16 x i8>* %B 1409 %tmp5 = add <16 x i8> %tmp3, %tmp4 1410 ret <16 x i8> %tmp5 1411} 1412 1413define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1414;CHECK-LABEL: ursra8h: 1415;CHECK: ursra.8h v0, {{v[0-9]+}}, #1 1416 %tmp1 = load <8 x i16>, <8 x i16>* %A 1417 %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 1418 %tmp4 = load <8 x i16>, <8 x i16>* %B 1419 %tmp5 = add <8 x i16> %tmp3, %tmp4 1420 ret <8 x i16> %tmp5 1421} 1422 1423define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1424;CHECK-LABEL: ursra4s: 1425;CHECK: ursra.4s v0, {{v[0-9]+}}, #1 1426 %tmp1 = load <4 x i32>, <4 x i32>* %A 1427 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 1428 %tmp4 = load <4 x i32>, <4 x i32>* %B 1429 %tmp5 = add <4 x i32> %tmp3, %tmp4 1430 ret <4 x i32> %tmp5 1431} 1432 1433define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1434;CHECK-LABEL: ursra2d: 1435;CHECK: ursra.2d v0, {{v[0-9]+}}, #1 1436 %tmp1 = load <2 x i64>, <2 x i64>* %A 1437 %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 1438 %tmp4 = load <2 x i64>, <2 x i64>* %B 1439 %tmp5 = add <2 x i64> %tmp3, %tmp4 1440 ret <2 x i64> %tmp5 1441} 1442 1443define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1444;CHECK-LABEL: srsra8b: 1445;CHECK: srsra.8b v0, {{v[0-9]+}}, #1 1446 %tmp1 = load <8 x i8>, <8 x i8>* %A 1447 %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1448 %tmp4 = load <8 x i8>, <8 x i8>* %B 1449 %tmp5 = add <8 x i8> %tmp3, %tmp4 1450 ret <8 x i8> %tmp5 1451} 1452 1453define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1454;CHECK-LABEL: srsra4h: 1455;CHECK: srsra.4h v0, {{v[0-9]+}}, #1 1456 %tmp1 = load <4 x i16>, <4 x i16>* %A 1457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 1458 %tmp4 = load <4 x i16>, <4 x i16>* %B 1459 %tmp5 = add <4 x i16> %tmp3, %tmp4 1460 ret <4 x i16> %tmp5 1461} 1462 1463define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1464;CHECK-LABEL: srsra2s: 1465;CHECK: srsra.2s v0, {{v[0-9]+}}, #1 1466 %tmp1 = load <2 x i32>, <2 x i32>* %A 1467 %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>) 1468 %tmp4 = load <2 x i32>, <2 x i32>* %B 1469 %tmp5 = add <2 x i32> %tmp3, %tmp4 1470 ret <2 x i32> %tmp5 1471} 1472 1473define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1474;CHECK-LABEL: srsra16b: 1475;CHECK: srsra.16b v0, {{v[0-9]+}}, #1 1476 %tmp1 = load <16 x i8>, <16 x i8>* %A 1477 %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1478 %tmp4 = load <16 x i8>, <16 x i8>* %B 1479 %tmp5 = add <16 x i8> %tmp3, %tmp4 1480 ret <16 x i8> %tmp5 1481} 1482 1483define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1484;CHECK-LABEL: srsra8h: 1485;CHECK: srsra.8h v0, {{v[0-9]+}}, #1 1486 %tmp1 = load <8 x i16>, <8 x i16>* %A 1487 %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 1488 %tmp4 = load <8 x i16>, <8 x i16>* %B 1489 %tmp5 = add <8 x i16> %tmp3, %tmp4 1490 ret <8 x i16> %tmp5 1491} 1492 1493define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1494;CHECK-LABEL: srsra4s: 1495;CHECK: srsra.4s v0, {{v[0-9]+}}, #1 1496 %tmp1 = load <4 x i32>, <4 x i32>* %A 1497 %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 1498 %tmp4 = load <4 x i32>, <4 x i32>* %B 1499 %tmp5 = add <4 x i32> %tmp3, %tmp4 1500 ret <4 x i32> %tmp5 1501} 1502 1503define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1504;CHECK-LABEL: srsra2d: 1505;CHECK: srsra.2d v0, {{v[0-9]+}}, #1 1506 %tmp1 = load <2 x i64>, <2 x i64>* %A 1507 %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>) 1508 %tmp4 = load <2 x i64>, <2 x i64>* %B 1509 %tmp5 = add <2 x i64> %tmp3, %tmp4 1510 ret <2 x i64> %tmp5 1511} 1512 1513define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1514;CHECK-LABEL: usra8b: 1515;CHECK: usra.8b v0, {{v[0-9]+}}, #1 1516 %tmp1 = load <8 x i8>, <8 x i8>* %A 1517 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1518 %tmp4 = load <8 x i8>, <8 x i8>* %B 1519 %tmp5 = add <8 x i8> %tmp3, %tmp4 1520 ret <8 x i8> %tmp5 1521} 1522 1523define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1524;CHECK-LABEL: usra4h: 1525;CHECK: usra.4h v0, {{v[0-9]+}}, #1 1526 %tmp1 = load <4 x i16>, <4 x i16>* %A 1527 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1528 %tmp4 = load <4 x i16>, <4 x i16>* %B 1529 %tmp5 = add <4 x i16> %tmp3, %tmp4 1530 ret <4 x i16> %tmp5 1531} 1532 1533define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1534;CHECK-LABEL: usra2s: 1535;CHECK: usra.2s v0, {{v[0-9]+}}, #1 1536 %tmp1 = load <2 x i32>, <2 x i32>* %A 1537 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1> 1538 %tmp4 = load <2 x i32>, <2 x i32>* %B 1539 %tmp5 = add <2 x i32> %tmp3, %tmp4 1540 ret <2 x i32> %tmp5 1541} 1542 1543define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1544;CHECK-LABEL: usra16b: 1545;CHECK: usra.16b v0, {{v[0-9]+}}, #1 1546 %tmp1 = load <16 x i8>, <16 x i8>* %A 1547 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1548 %tmp4 = load <16 x i8>, <16 x i8>* %B 1549 %tmp5 = add <16 x i8> %tmp3, %tmp4 1550 ret <16 x i8> %tmp5 1551} 1552 1553define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1554;CHECK-LABEL: usra8h: 1555;CHECK: usra.8h v0, {{v[0-9]+}}, #1 1556 %tmp1 = load <8 x i16>, <8 x i16>* %A 1557 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1558 %tmp4 = load <8 x i16>, <8 x i16>* %B 1559 %tmp5 = add <8 x i16> %tmp3, %tmp4 1560 ret <8 x i16> %tmp5 1561} 1562 1563define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1564;CHECK-LABEL: usra4s: 1565;CHECK: usra.4s v0, {{v[0-9]+}}, #1 1566 %tmp1 = load <4 x i32>, <4 x i32>* %A 1567 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1568 %tmp4 = load <4 x i32>, <4 x i32>* %B 1569 %tmp5 = add <4 x i32> %tmp3, %tmp4 1570 ret <4 x i32> %tmp5 1571} 1572 1573define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1574;CHECK-LABEL: usra2d: 1575;CHECK: usra.2d v0, {{v[0-9]+}}, #1 1576 %tmp1 = load <2 x i64>, <2 x i64>* %A 1577 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 1578 %tmp4 = load <2 x i64>, <2 x i64>* %B 1579 %tmp5 = add <2 x i64> %tmp3, %tmp4 1580 ret <2 x i64> %tmp5 1581} 1582 1583define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1584;CHECK-LABEL: ssra8b: 1585;CHECK: ssra.8b v0, {{v[0-9]+}}, #1 1586 %tmp1 = load <8 x i8>, <8 x i8>* %A 1587 %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1588 %tmp4 = load <8 x i8>, <8 x i8>* %B 1589 %tmp5 = add <8 x i8> %tmp3, %tmp4 1590 ret <8 x i8> %tmp5 1591} 1592 1593define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1594;CHECK-LABEL: ssra4h: 1595;CHECK: ssra.4h v0, {{v[0-9]+}}, #1 1596 %tmp1 = load <4 x i16>, <4 x i16>* %A 1597 %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1598 %tmp4 = load <4 x i16>, <4 x i16>* %B 1599 %tmp5 = add <4 x i16> %tmp3, %tmp4 1600 ret <4 x i16> %tmp5 1601} 1602 1603define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1604;CHECK-LABEL: ssra2s: 1605;CHECK: ssra.2s v0, {{v[0-9]+}}, #1 1606 %tmp1 = load <2 x i32>, <2 x i32>* %A 1607 %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1> 1608 %tmp4 = load <2 x i32>, <2 x i32>* %B 1609 %tmp5 = add <2 x i32> %tmp3, %tmp4 1610 ret <2 x i32> %tmp5 1611} 1612 1613define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1614;CHECK-LABEL: ssra16b: 1615;CHECK: ssra.16b v0, {{v[0-9]+}}, #1 1616 %tmp1 = load <16 x i8>, <16 x i8>* %A 1617 %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1618 %tmp4 = load <16 x i8>, <16 x i8>* %B 1619 %tmp5 = add <16 x i8> %tmp3, %tmp4 1620 ret <16 x i8> %tmp5 1621} 1622 1623define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1624;CHECK-LABEL: ssra8h: 1625;CHECK: ssra.8h v0, {{v[0-9]+}}, #1 1626 %tmp1 = load <8 x i16>, <8 x i16>* %A 1627 %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1628 %tmp4 = load <8 x i16>, <8 x i16>* %B 1629 %tmp5 = add <8 x i16> %tmp3, %tmp4 1630 ret <8 x i16> %tmp5 1631} 1632 1633define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1634;CHECK-LABEL: ssra4s: 1635;CHECK: ssra.4s v0, {{v[0-9]+}}, #1 1636 %tmp1 = load <4 x i32>, <4 x i32>* %A 1637 %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1638 %tmp4 = load <4 x i32>, <4 x i32>* %B 1639 %tmp5 = add <4 x i32> %tmp3, %tmp4 1640 ret <4 x i32> %tmp5 1641} 1642 1643define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1644;CHECK-LABEL: ssra2d: 1645;CHECK: ssra.2d v0, {{v[0-9]+}}, #1 1646 %tmp1 = load <2 x i64>, <2 x i64>* %A 1647 %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1> 1648 %tmp4 = load <2 x i64>, <2 x i64>* %B 1649 %tmp5 = add <2 x i64> %tmp3, %tmp4 1650 ret <2 x i64> %tmp5 1651} 1652 1653define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1654;CHECK-LABEL: shr_orr8b: 1655;CHECK: shr.8b v0, {{v[0-9]+}}, #1 1656;CHECK-NEXT: orr.8b 1657;CHECK-NEXT: ret 1658 %tmp1 = load <8 x i8>, <8 x i8>* %A 1659 %tmp4 = load <8 x i8>, <8 x i8>* %B 1660 %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1661 %tmp5 = or <8 x i8> %tmp3, %tmp4 1662 ret <8 x i8> %tmp5 1663} 1664 1665define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1666;CHECK-LABEL: shr_orr4h: 1667;CHECK: shr.4h v0, {{v[0-9]+}}, #1 1668;CHECK-NEXT: orr.8b 1669;CHECK-NEXT: ret 1670 %tmp1 = load <4 x i16>, <4 x i16>* %A 1671 %tmp4 = load <4 x i16>, <4 x i16>* %B 1672 %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1673 %tmp5 = or <4 x i16> %tmp3, %tmp4 1674 ret <4 x i16> %tmp5 1675} 1676 1677define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1678;CHECK-LABEL: shr_orr2s: 1679;CHECK: shr.2s v0, {{v[0-9]+}}, #1 1680;CHECK-NEXT: orr.8b 1681;CHECK-NEXT: ret 1682 %tmp1 = load <2 x i32>, <2 x i32>* %A 1683 %tmp4 = load <2 x i32>, <2 x i32>* %B 1684 %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1> 1685 %tmp5 = or <2 x i32> %tmp3, %tmp4 1686 ret <2 x i32> %tmp5 1687} 1688 1689define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1690;CHECK-LABEL: shr_orr16b: 1691;CHECK: shr.16b v0, {{v[0-9]+}}, #1 1692;CHECK-NEXT: orr.16b 1693;CHECK-NEXT: ret 1694 %tmp1 = load <16 x i8>, <16 x i8>* %A 1695 %tmp4 = load <16 x i8>, <16 x i8>* %B 1696 %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1697 %tmp5 = or <16 x i8> %tmp3, %tmp4 1698 ret <16 x i8> %tmp5 1699} 1700 1701define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1702;CHECK-LABEL: shr_orr8h: 1703;CHECK: shr.8h v0, {{v[0-9]+}}, #1 1704;CHECK-NEXT: orr.16b 1705;CHECK-NEXT: ret 1706 %tmp1 = load <8 x i16>, <8 x i16>* %A 1707 %tmp4 = load <8 x i16>, <8 x i16>* %B 1708 %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1709 %tmp5 = or <8 x i16> %tmp3, %tmp4 1710 ret <8 x i16> %tmp5 1711} 1712 1713define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1714;CHECK-LABEL: shr_orr4s: 1715;CHECK: shr.4s v0, {{v[0-9]+}}, #1 1716;CHECK-NEXT: orr.16b 1717;CHECK-NEXT: ret 1718 %tmp1 = load <4 x i32>, <4 x i32>* %A 1719 %tmp4 = load <4 x i32>, <4 x i32>* %B 1720 %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1721 %tmp5 = or <4 x i32> %tmp3, %tmp4 1722 ret <4 x i32> %tmp5 1723} 1724 1725define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1726;CHECK-LABEL: shr_orr2d: 1727;CHECK: shr.2d v0, {{v[0-9]+}}, #1 1728;CHECK-NEXT: orr.16b 1729;CHECK-NEXT: ret 1730 %tmp1 = load <2 x i64>, <2 x i64>* %A 1731 %tmp4 = load <2 x i64>, <2 x i64>* %B 1732 %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1> 1733 %tmp5 = or <2 x i64> %tmp3, %tmp4 1734 ret <2 x i64> %tmp5 1735} 1736 1737define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1738;CHECK-LABEL: shl_orr8b: 1739;CHECK: shl.8b v0, {{v[0-9]+}}, #1 1740;CHECK-NEXT: orr.8b 1741;CHECK-NEXT: ret 1742 %tmp1 = load <8 x i8>, <8 x i8>* %A 1743 %tmp4 = load <8 x i8>, <8 x i8>* %B 1744 %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1745 %tmp5 = or <8 x i8> %tmp3, %tmp4 1746 ret <8 x i8> %tmp5 1747} 1748 1749define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1750;CHECK-LABEL: shl_orr4h: 1751;CHECK: shl.4h v0, {{v[0-9]+}}, #1 1752;CHECK-NEXT: orr.8b 1753;CHECK-NEXT: ret 1754 %tmp1 = load <4 x i16>, <4 x i16>* %A 1755 %tmp4 = load <4 x i16>, <4 x i16>* %B 1756 %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1> 1757 %tmp5 = or <4 x i16> %tmp3, %tmp4 1758 ret <4 x i16> %tmp5 1759} 1760 1761define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1762;CHECK-LABEL: shl_orr2s: 1763;CHECK: shl.2s v0, {{v[0-9]+}}, #1 1764;CHECK-NEXT: orr.8b 1765;CHECK-NEXT: ret 1766 %tmp1 = load <2 x i32>, <2 x i32>* %A 1767 %tmp4 = load <2 x i32>, <2 x i32>* %B 1768 %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1> 1769 %tmp5 = or <2 x i32> %tmp3, %tmp4 1770 ret <2 x i32> %tmp5 1771} 1772 1773define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1774;CHECK-LABEL: shl_orr16b: 1775;CHECK: shl.16b v0, {{v[0-9]+}}, #1 1776;CHECK-NEXT: orr.16b 1777;CHECK-NEXT: ret 1778 %tmp1 = load <16 x i8>, <16 x i8>* %A 1779 %tmp4 = load <16 x i8>, <16 x i8>* %B 1780 %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1781 %tmp5 = or <16 x i8> %tmp3, %tmp4 1782 ret <16 x i8> %tmp5 1783} 1784 1785define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1786;CHECK-LABEL: shl_orr8h: 1787;CHECK: shl.8h v0, {{v[0-9]+}}, #1 1788;CHECK-NEXT: orr.16b 1789;CHECK-NEXT: ret 1790 %tmp1 = load <8 x i16>, <8 x i16>* %A 1791 %tmp4 = load <8 x i16>, <8 x i16>* %B 1792 %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1793 %tmp5 = or <8 x i16> %tmp3, %tmp4 1794 ret <8 x i16> %tmp5 1795} 1796 1797define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1798;CHECK-LABEL: shl_orr4s: 1799;CHECK: shl.4s v0, {{v[0-9]+}}, #1 1800;CHECK-NEXT: orr.16b 1801;CHECK-NEXT: ret 1802 %tmp1 = load <4 x i32>, <4 x i32>* %A 1803 %tmp4 = load <4 x i32>, <4 x i32>* %B 1804 %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1> 1805 %tmp5 = or <4 x i32> %tmp3, %tmp4 1806 ret <4 x i32> %tmp5 1807} 1808 1809define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1810;CHECK-LABEL: shl_orr2d: 1811;CHECK: shl.2d v0, {{v[0-9]+}}, #1 1812;CHECK-NEXT: orr.16b 1813;CHECK-NEXT: ret 1814 %tmp1 = load <2 x i64>, <2 x i64>* %A 1815 %tmp4 = load <2 x i64>, <2 x i64>* %B 1816 %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1> 1817 %tmp5 = or <2 x i64> %tmp3, %tmp4 1818 ret <2 x i64> %tmp5 1819} 1820 1821define <8 x i16> @shll(<8 x i8> %in) { 1822; CHECK-LABEL: shll: 1823; CHECK: shll.8h v0, {{v[0-9]+}}, #8 1824 %ext = zext <8 x i8> %in to <8 x i16> 1825 %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1826 ret <8 x i16> %res 1827} 1828 1829define <4 x i32> @shll_high(<8 x i16> %in) { 1830; CHECK-LABEL: shll_high 1831; CHECK: shll2.4s v0, {{v[0-9]+}}, #16 1832 %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1833 %ext = zext <4 x i16> %extract to <4 x i32> 1834 %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16> 1835 ret <4 x i32> %res 1836} 1837 1838define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 1839;CHECK-LABEL: sli8b: 1840;CHECK: sli.8b v0, {{v[0-9]+}}, #1 1841 %tmp1 = load <8 x i8>, <8 x i8>* %A 1842 %tmp2 = load <8 x i8>, <8 x i8>* %B 1843 %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) 1844 ret <8 x i8> %tmp3 1845} 1846 1847define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 1848;CHECK-LABEL: sli4h: 1849;CHECK: sli.4h v0, {{v[0-9]+}}, #1 1850 %tmp1 = load <4 x i16>, <4 x i16>* %A 1851 %tmp2 = load <4 x i16>, <4 x i16>* %B 1852 %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) 1853 ret <4 x i16> %tmp3 1854} 1855 1856define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 1857;CHECK-LABEL: sli2s: 1858;CHECK: sli.2s v0, {{v[0-9]+}}, #1 1859 %tmp1 = load <2 x i32>, <2 x i32>* %A 1860 %tmp2 = load <2 x i32>, <2 x i32>* %B 1861 %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) 1862 ret <2 x i32> %tmp3 1863} 1864 1865define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind { 1866;CHECK-LABEL: sli1d: 1867;CHECK: sli d0, {{d[0-9]+}}, #1 1868 %tmp1 = load <1 x i64>, <1 x i64>* %A 1869 %tmp2 = load <1 x i64>, <1 x i64>* %B 1870 %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) 1871 ret <1 x i64> %tmp3 1872} 1873 1874define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 1875;CHECK-LABEL: sli16b: 1876;CHECK: sli.16b v0, {{v[0-9]+}}, #1 1877 %tmp1 = load <16 x i8>, <16 x i8>* %A 1878 %tmp2 = load <16 x i8>, <16 x i8>* %B 1879 %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) 1880 ret <16 x i8> %tmp3 1881} 1882 1883define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 1884;CHECK-LABEL: sli8h: 1885;CHECK: sli.8h v0, {{v[0-9]+}}, #1 1886 %tmp1 = load <8 x i16>, <8 x i16>* %A 1887 %tmp2 = load <8 x i16>, <8 x i16>* %B 1888 %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) 1889 ret <8 x i16> %tmp3 1890} 1891 1892define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 1893;CHECK-LABEL: sli4s: 1894;CHECK: sli.4s v0, {{v[0-9]+}}, #1 1895 %tmp1 = load <4 x i32>, <4 x i32>* %A 1896 %tmp2 = load <4 x i32>, <4 x i32>* %B 1897 %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) 1898 ret <4 x i32> %tmp3 1899} 1900 1901define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 1902;CHECK-LABEL: sli2d: 1903;CHECK: sli.2d v0, {{v[0-9]+}}, #1 1904 %tmp1 = load <2 x i64>, <2 x i64>* %A 1905 %tmp2 = load <2 x i64>, <2 x i64>* %B 1906 %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) 1907 ret <2 x i64> %tmp3 1908} 1909 1910declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone 1911declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone 1912declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone 1913declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone 1914 1915declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone 1916declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone 1917declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone 1918declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone 1919 1920define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) { 1921; CHECK-LABEL: ashr_v1i64: 1922; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} 1923; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} 1924 %c = ashr <1 x i64> %a, %b 1925 ret <1 x i64> %c 1926} 1927