1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK-LABEL: smax_8b: 5;CHECK: smax.8b 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %tmp2 = load <8 x i8>, <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10} 11 12define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 13;CHECK-LABEL: smax_16b: 14;CHECK: smax.16b 15 %tmp1 = load <16 x i8>, <16 x i8>* %A 16 %tmp2 = load <16 x i8>, <16 x i8>* %B 17 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 18 ret <16 x i8> %tmp3 19} 20 21define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 22;CHECK-LABEL: smax_4h: 23;CHECK: smax.4h 24 %tmp1 = load <4 x i16>, <4 x i16>* %A 25 %tmp2 = load <4 x i16>, <4 x i16>* %B 26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 27 ret <4 x i16> %tmp3 28} 29 30define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 31;CHECK-LABEL: smax_8h: 32;CHECK: smax.8h 33 %tmp1 = load <8 x i16>, <8 x i16>* %A 34 %tmp2 = load <8 x i16>, <8 x i16>* %B 35 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 36 ret <8 x i16> %tmp3 37} 38 39define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 40;CHECK-LABEL: smax_2s: 41;CHECK: smax.2s 42 %tmp1 = load <2 x i32>, <2 x i32>* %A 43 %tmp2 = load <2 x i32>, <2 x i32>* %B 44 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 45 ret <2 x i32> %tmp3 46} 47 48define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 49;CHECK-LABEL: smax_4s: 50;CHECK: smax.4s 51 %tmp1 = load <4 x i32>, <4 x i32>* %A 52 %tmp2 = load <4 x i32>, <4 x i32>* %B 53 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 54 ret <4 x i32> %tmp3 55} 56 57declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 58declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 59declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 60declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 61declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 62declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 63 64define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 65;CHECK-LABEL: umax_8b: 66;CHECK: umax.8b 67 %tmp1 = load <8 x i8>, <8 x i8>* %A 68 %tmp2 = load <8 x i8>, <8 x i8>* %B 69 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 70 ret <8 x i8> %tmp3 71} 72 73define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 74;CHECK-LABEL: umax_16b: 75;CHECK: umax.16b 76 %tmp1 = load <16 x i8>, <16 x i8>* %A 77 %tmp2 = load <16 x i8>, <16 x i8>* %B 78 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 79 ret <16 x i8> %tmp3 80} 81 82define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 83;CHECK-LABEL: umax_4h: 84;CHECK: umax.4h 85 %tmp1 = load <4 x i16>, <4 x i16>* %A 86 %tmp2 = load <4 x i16>, <4 x i16>* %B 87 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 88 ret <4 x i16> %tmp3 89} 90 91define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 92;CHECK-LABEL: umax_8h: 93;CHECK: umax.8h 94 %tmp1 = load <8 x i16>, <8 x i16>* %A 95 %tmp2 = load <8 x i16>, <8 x i16>* %B 96 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 97 ret <8 x i16> %tmp3 98} 99 100define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 101;CHECK-LABEL: umax_2s: 102;CHECK: umax.2s 103 %tmp1 = load <2 x i32>, <2 x i32>* %A 104 %tmp2 = load <2 x i32>, <2 x i32>* %B 105 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 106 ret <2 x i32> %tmp3 107} 108 109define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 110;CHECK-LABEL: umax_4s: 111;CHECK: umax.4s 112 %tmp1 = load <4 x i32>, <4 x i32>* %A 113 %tmp2 = load <4 x i32>, <4 x i32>* %B 114 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 115 ret <4 x i32> %tmp3 116} 117 118declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 119declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 120declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 121declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 122declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 123declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 124 125define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 126;CHECK-LABEL: smin_8b: 127;CHECK: smin.8b 128 %tmp1 = load <8 x i8>, <8 x i8>* %A 129 %tmp2 = load <8 x i8>, <8 x i8>* %B 130 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 131 ret <8 x i8> %tmp3 132} 133 134define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 135;CHECK-LABEL: smin_16b: 136;CHECK: smin.16b 137 %tmp1 = load <16 x i8>, <16 x i8>* %A 138 %tmp2 = load <16 x i8>, <16 x i8>* %B 139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 140 ret <16 x i8> %tmp3 141} 142 143define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 144;CHECK-LABEL: smin_4h: 145;CHECK: smin.4h 146 %tmp1 = load <4 x i16>, <4 x i16>* %A 147 %tmp2 = load <4 x i16>, <4 x i16>* %B 148 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 149 ret <4 x i16> %tmp3 150} 151 152define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 153;CHECK-LABEL: smin_8h: 154;CHECK: smin.8h 155 %tmp1 = load <8 x i16>, <8 x i16>* %A 156 %tmp2 = load <8 x i16>, <8 x i16>* %B 157 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 158 ret <8 x i16> %tmp3 159} 160 161define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 162;CHECK-LABEL: smin_2s: 163;CHECK: smin.2s 164 %tmp1 = load <2 x i32>, <2 x i32>* %A 165 %tmp2 = load <2 x i32>, <2 x i32>* %B 166 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 167 ret <2 x i32> %tmp3 168} 169 170define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 171;CHECK-LABEL: smin_4s: 172;CHECK: smin.4s 173 %tmp1 = load <4 x i32>, <4 x i32>* %A 174 %tmp2 = load <4 x i32>, <4 x i32>* %B 175 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 176 ret <4 x i32> %tmp3 177} 178 179declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 180declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 181declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 182declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 183declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 184declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 185 186define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 187;CHECK-LABEL: umin_8b: 188;CHECK: umin.8b 189 %tmp1 = load <8 x i8>, <8 x i8>* %A 190 %tmp2 = load <8 x i8>, <8 x i8>* %B 191 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 192 ret <8 x i8> %tmp3 193} 194 195define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 196;CHECK-LABEL: umin_16b: 197;CHECK: umin.16b 198 %tmp1 = load <16 x i8>, <16 x i8>* %A 199 %tmp2 = load <16 x i8>, <16 x i8>* %B 200 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 201 ret <16 x i8> %tmp3 202} 203 204define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 205;CHECK-LABEL: umin_4h: 206;CHECK: umin.4h 207 %tmp1 = load <4 x i16>, <4 x i16>* %A 208 %tmp2 = load <4 x i16>, <4 x i16>* %B 209 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 210 ret <4 x i16> %tmp3 211} 212 213define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 214;CHECK-LABEL: umin_8h: 215;CHECK: umin.8h 216 %tmp1 = load <8 x i16>, <8 x i16>* %A 217 %tmp2 = load <8 x i16>, <8 x i16>* %B 218 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 219 ret <8 x i16> %tmp3 220} 221 222define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 223;CHECK-LABEL: umin_2s: 224;CHECK: umin.2s 225 %tmp1 = load <2 x i32>, <2 x i32>* %A 226 %tmp2 = load <2 x i32>, <2 x i32>* %B 227 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 228 ret <2 x i32> %tmp3 229} 230 231define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 232;CHECK-LABEL: umin_4s: 233;CHECK: umin.4s 234 %tmp1 = load <4 x i32>, <4 x i32>* %A 235 %tmp2 = load <4 x i32>, <4 x i32>* %B 236 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 237 ret <4 x i32> %tmp3 238} 239 240declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 241declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 242declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 243declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 244declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 245declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 246 247; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 248 249define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 250;CHECK-LABEL: smaxp_8b: 251;CHECK: smaxp.8b 252 %tmp1 = load <8 x i8>, <8 x i8>* %A 253 %tmp2 = load <8 x i8>, <8 x i8>* %B 254 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 255 ret <8 x i8> %tmp3 256} 257 258define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 259;CHECK-LABEL: smaxp_16b: 260;CHECK: smaxp.16b 261 %tmp1 = load <16 x i8>, <16 x i8>* %A 262 %tmp2 = load <16 x i8>, <16 x i8>* %B 263 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 264 ret <16 x i8> %tmp3 265} 266 267define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 268;CHECK-LABEL: smaxp_4h: 269;CHECK: smaxp.4h 270 %tmp1 = load <4 x i16>, <4 x i16>* %A 271 %tmp2 = load <4 x i16>, <4 x i16>* %B 272 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 273 ret <4 x i16> %tmp3 274} 275 276define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 277;CHECK-LABEL: smaxp_8h: 278;CHECK: smaxp.8h 279 %tmp1 = load <8 x i16>, <8 x i16>* %A 280 %tmp2 = load <8 x i16>, <8 x i16>* %B 281 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 282 ret <8 x i16> %tmp3 283} 284 285define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 286;CHECK-LABEL: smaxp_2s: 287;CHECK: smaxp.2s 288 %tmp1 = load <2 x i32>, <2 x i32>* %A 289 %tmp2 = load <2 x i32>, <2 x i32>* %B 290 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 291 ret <2 x i32> %tmp3 292} 293 294define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 295;CHECK-LABEL: smaxp_4s: 296;CHECK: smaxp.4s 297 %tmp1 = load <4 x i32>, <4 x i32>* %A 298 %tmp2 = load <4 x i32>, <4 x i32>* %B 299 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 300 ret <4 x i32> %tmp3 301} 302 303declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 304declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 305declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 306declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 307declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 308declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 309 310define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 311;CHECK-LABEL: umaxp_8b: 312;CHECK: umaxp.8b 313 %tmp1 = load <8 x i8>, <8 x i8>* %A 314 %tmp2 = load <8 x i8>, <8 x i8>* %B 315 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 316 ret <8 x i8> %tmp3 317} 318 319define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 320;CHECK-LABEL: umaxp_16b: 321;CHECK: umaxp.16b 322 %tmp1 = load <16 x i8>, <16 x i8>* %A 323 %tmp2 = load <16 x i8>, <16 x i8>* %B 324 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 325 ret <16 x i8> %tmp3 326} 327 328define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 329;CHECK-LABEL: umaxp_4h: 330;CHECK: umaxp.4h 331 %tmp1 = load <4 x i16>, <4 x i16>* %A 332 %tmp2 = load <4 x i16>, <4 x i16>* %B 333 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 334 ret <4 x i16> %tmp3 335} 336 337define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 338;CHECK-LABEL: umaxp_8h: 339;CHECK: umaxp.8h 340 %tmp1 = load <8 x i16>, <8 x i16>* %A 341 %tmp2 = load <8 x i16>, <8 x i16>* %B 342 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 343 ret <8 x i16> %tmp3 344} 345 346define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 347;CHECK-LABEL: umaxp_2s: 348;CHECK: umaxp.2s 349 %tmp1 = load <2 x i32>, <2 x i32>* %A 350 %tmp2 = load <2 x i32>, <2 x i32>* %B 351 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 352 ret <2 x i32> %tmp3 353} 354 355define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 356;CHECK-LABEL: umaxp_4s: 357;CHECK: umaxp.4s 358 %tmp1 = load <4 x i32>, <4 x i32>* %A 359 %tmp2 = load <4 x i32>, <4 x i32>* %B 360 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 361 ret <4 x i32> %tmp3 362} 363 364declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 365declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 366declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 367declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 368declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 369declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 370 371; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 372 373define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 374;CHECK-LABEL: sminp_8b: 375;CHECK: sminp.8b 376 %tmp1 = load <8 x i8>, <8 x i8>* %A 377 %tmp2 = load <8 x i8>, <8 x i8>* %B 378 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 379 ret <8 x i8> %tmp3 380} 381 382define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 383;CHECK-LABEL: sminp_16b: 384;CHECK: sminp.16b 385 %tmp1 = load <16 x i8>, <16 x i8>* %A 386 %tmp2 = load <16 x i8>, <16 x i8>* %B 387 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 388 ret <16 x i8> %tmp3 389} 390 391define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 392;CHECK-LABEL: sminp_4h: 393;CHECK: sminp.4h 394 %tmp1 = load <4 x i16>, <4 x i16>* %A 395 %tmp2 = load <4 x i16>, <4 x i16>* %B 396 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 397 ret <4 x i16> %tmp3 398} 399 400define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 401;CHECK-LABEL: sminp_8h: 402;CHECK: sminp.8h 403 %tmp1 = load <8 x i16>, <8 x i16>* %A 404 %tmp2 = load <8 x i16>, <8 x i16>* %B 405 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 406 ret <8 x i16> %tmp3 407} 408 409define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 410;CHECK-LABEL: sminp_2s: 411;CHECK: sminp.2s 412 %tmp1 = load <2 x i32>, <2 x i32>* %A 413 %tmp2 = load <2 x i32>, <2 x i32>* %B 414 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 415 ret <2 x i32> %tmp3 416} 417 418define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 419;CHECK-LABEL: sminp_4s: 420;CHECK: sminp.4s 421 %tmp1 = load <4 x i32>, <4 x i32>* %A 422 %tmp2 = load <4 x i32>, <4 x i32>* %B 423 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 424 ret <4 x i32> %tmp3 425} 426 427declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 428declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 429declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 430declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 431declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 432declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 433 434define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 435;CHECK-LABEL: uminp_8b: 436;CHECK: uminp.8b 437 %tmp1 = load <8 x i8>, <8 x i8>* %A 438 %tmp2 = load <8 x i8>, <8 x i8>* %B 439 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 440 ret <8 x i8> %tmp3 441} 442 443define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 444;CHECK-LABEL: uminp_16b: 445;CHECK: uminp.16b 446 %tmp1 = load <16 x i8>, <16 x i8>* %A 447 %tmp2 = load <16 x i8>, <16 x i8>* %B 448 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 449 ret <16 x i8> %tmp3 450} 451 452define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 453;CHECK-LABEL: uminp_4h: 454;CHECK: uminp.4h 455 %tmp1 = load <4 x i16>, <4 x i16>* %A 456 %tmp2 = load <4 x i16>, <4 x i16>* %B 457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 458 ret <4 x i16> %tmp3 459} 460 461define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 462;CHECK-LABEL: uminp_8h: 463;CHECK: uminp.8h 464 %tmp1 = load <8 x i16>, <8 x i16>* %A 465 %tmp2 = load <8 x i16>, <8 x i16>* %B 466 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 467 ret <8 x i16> %tmp3 468} 469 470define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 471;CHECK-LABEL: uminp_2s: 472;CHECK: uminp.2s 473 %tmp1 = load <2 x i32>, <2 x i32>* %A 474 %tmp2 = load <2 x i32>, <2 x i32>* %B 475 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 476 ret <2 x i32> %tmp3 477} 478 479define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 480;CHECK-LABEL: uminp_4s: 481;CHECK: uminp.4s 482 %tmp1 = load <4 x i32>, <4 x i32>* %A 483 %tmp2 = load <4 x i32>, <4 x i32>* %B 484 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 485 ret <4 x i32> %tmp3 486} 487 488declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 489declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 490declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 491declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 492declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 493declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 494 495define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 496;CHECK-LABEL: fmax_2s: 497;CHECK: fmax.2s 498 %tmp1 = load <2 x float>, <2 x float>* %A 499 %tmp2 = load <2 x float>, <2 x float>* %B 500 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 501 ret <2 x float> %tmp3 502} 503 504define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 505;CHECK-LABEL: fmax_4s: 506;CHECK: fmax.4s 507 %tmp1 = load <4 x float>, <4 x float>* %A 508 %tmp2 = load <4 x float>, <4 x float>* %B 509 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 510 ret <4 x float> %tmp3 511} 512 513define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 514;CHECK-LABEL: fmax_2d: 515;CHECK: fmax.2d 516 %tmp1 = load <2 x double>, <2 x double>* %A 517 %tmp2 = load <2 x double>, <2 x double>* %B 518 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 519 ret <2 x double> %tmp3 520} 521 522declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone 523declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone 524declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone 525 526define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 527;CHECK-LABEL: fmaxp_2s: 528;CHECK: fmaxp.2s 529 %tmp1 = load <2 x float>, <2 x float>* %A 530 %tmp2 = load <2 x float>, <2 x float>* %B 531 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 532 ret <2 x float> %tmp3 533} 534 535define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 536;CHECK-LABEL: fmaxp_4s: 537;CHECK: fmaxp.4s 538 %tmp1 = load <4 x float>, <4 x float>* %A 539 %tmp2 = load <4 x float>, <4 x float>* %B 540 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 541 ret <4 x float> %tmp3 542} 543 544define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 545;CHECK-LABEL: fmaxp_2d: 546;CHECK: fmaxp.2d 547 %tmp1 = load <2 x double>, <2 x double>* %A 548 %tmp2 = load <2 x double>, <2 x double>* %B 549 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 550 ret <2 x double> %tmp3 551} 552 553declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone 554declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone 555declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone 556 557define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 558;CHECK-LABEL: fmin_2s: 559;CHECK: fmin.2s 560 %tmp1 = load <2 x float>, <2 x float>* %A 561 %tmp2 = load <2 x float>, <2 x float>* %B 562 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 563 ret <2 x float> %tmp3 564} 565 566define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 567;CHECK-LABEL: fmin_4s: 568;CHECK: fmin.4s 569 %tmp1 = load <4 x float>, <4 x float>* %A 570 %tmp2 = load <4 x float>, <4 x float>* %B 571 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 572 ret <4 x float> %tmp3 573} 574 575define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 576;CHECK-LABEL: fmin_2d: 577;CHECK: fmin.2d 578 %tmp1 = load <2 x double>, <2 x double>* %A 579 %tmp2 = load <2 x double>, <2 x double>* %B 580 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 581 ret <2 x double> %tmp3 582} 583 584declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone 585declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone 586declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone 587 588define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 589;CHECK-LABEL: fminp_2s: 590;CHECK: fminp.2s 591 %tmp1 = load <2 x float>, <2 x float>* %A 592 %tmp2 = load <2 x float>, <2 x float>* %B 593 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 594 ret <2 x float> %tmp3 595} 596 597define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 598;CHECK-LABEL: fminp_4s: 599;CHECK: fminp.4s 600 %tmp1 = load <4 x float>, <4 x float>* %A 601 %tmp2 = load <4 x float>, <4 x float>* %B 602 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 603 ret <4 x float> %tmp3 604} 605 606define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 607;CHECK-LABEL: fminp_2d: 608;CHECK: fminp.2d 609 %tmp1 = load <2 x double>, <2 x double>* %A 610 %tmp2 = load <2 x double>, <2 x double>* %B 611 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 612 ret <2 x double> %tmp3 613} 614 615declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone 616declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone 617declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone 618 619define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 620;CHECK-LABEL: fminnmp_2s: 621;CHECK: fminnmp.2s 622 %tmp1 = load <2 x float>, <2 x float>* %A 623 %tmp2 = load <2 x float>, <2 x float>* %B 624 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 625 ret <2 x float> %tmp3 626} 627 628define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 629;CHECK-LABEL: fminnmp_4s: 630;CHECK: fminnmp.4s 631 %tmp1 = load <4 x float>, <4 x float>* %A 632 %tmp2 = load <4 x float>, <4 x float>* %B 633 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 634 ret <4 x float> %tmp3 635} 636 637define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 638;CHECK-LABEL: fminnmp_2d: 639;CHECK: fminnmp.2d 640 %tmp1 = load <2 x double>, <2 x double>* %A 641 %tmp2 = load <2 x double>, <2 x double>* %B 642 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 643 ret <2 x double> %tmp3 644} 645 646declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 647declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 648declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 649 650define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 651;CHECK-LABEL: fmaxnmp_2s: 652;CHECK: fmaxnmp.2s 653 %tmp1 = load <2 x float>, <2 x float>* %A 654 %tmp2 = load <2 x float>, <2 x float>* %B 655 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 656 ret <2 x float> %tmp3 657} 658 659define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 660;CHECK-LABEL: fmaxnmp_4s: 661;CHECK: fmaxnmp.4s 662 %tmp1 = load <4 x float>, <4 x float>* %A 663 %tmp2 = load <4 x float>, <4 x float>* %B 664 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 665 ret <4 x float> %tmp3 666} 667 668define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 669;CHECK-LABEL: fmaxnmp_2d: 670;CHECK: fmaxnmp.2d 671 %tmp1 = load <2 x double>, <2 x double>* %A 672 %tmp2 = load <2 x double>, <2 x double>* %B 673 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 674 ret <2 x double> %tmp3 675} 676 677declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 678declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 679declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 680