1; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s 2 3declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) 4declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) 5 6define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 7; Using registers other than v0, v1 are possible, but would be odd. 8; CHECK: test_smax_v8i8: 9 %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 10; CHECK: smax v0.8b, v0.8b, v1.8b 11 ret <8 x i8> %tmp1 12} 13 14define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 15 %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 16; CHECK: umax v0.8b, v0.8b, v1.8b 17 ret <8 x i8> %tmp1 18} 19 20declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) 21declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) 22 23define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 24; CHECK: test_smax_v16i8: 25 %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 26; CHECK: smax v0.16b, v0.16b, v1.16b 27 ret <16 x i8> %tmp1 28} 29 30define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 31; CHECK: test_umax_v16i8: 32 %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 33; CHECK: umax v0.16b, v0.16b, v1.16b 34 ret <16 x i8> %tmp1 35} 36 37declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) 38declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) 39 40define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 41; CHECK: test_smax_v4i16: 42 %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 43; CHECK: smax v0.4h, v0.4h, v1.4h 44 ret <4 x i16> %tmp1 45} 46 47define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 48; CHECK: test_umax_v4i16: 49 %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 50; CHECK: umax v0.4h, v0.4h, v1.4h 51 ret <4 x i16> %tmp1 52} 53 54 55declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) 56declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) 57 58define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 59; CHECK: test_smax_v8i16: 60 %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 61; CHECK: smax v0.8h, v0.8h, v1.8h 62 ret <8 x i16> %tmp1 63} 64 65define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 66; CHECK: test_umax_v8i16: 67 %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 68; CHECK: umax v0.8h, v0.8h, v1.8h 69 ret <8 x i16> %tmp1 70} 71 72 73declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) 74declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) 75 76define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 77; CHECK: test_smax_v2i32: 78 %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 79; CHECK: smax v0.2s, v0.2s, v1.2s 80 ret <2 x i32> %tmp1 81} 82 83define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 84; CHECK: test_umax_v2i32: 85 %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 86; CHECK: umax v0.2s, v0.2s, v1.2s 87 ret <2 x i32> %tmp1 88} 89 90declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) 91declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) 92 93define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 94; CHECK: test_smax_v4i32: 95 %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 96; CHECK: smax v0.4s, v0.4s, v1.4s 97 ret <4 x i32> %tmp1 98} 99 100define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 101; CHECK: test_umax_v4i32: 102 %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 103; CHECK: umax v0.4s, v0.4s, v1.4s 104 ret <4 x i32> %tmp1 105} 106 107declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) 108declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) 109 110define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 111; Using registers other than v0, v1 are possible, but would be odd. 112; CHECK: test_smin_v8i8: 113 %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 114; CHECK: smin v0.8b, v0.8b, v1.8b 115 ret <8 x i8> %tmp1 116} 117 118define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 119 %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 120; CHECK: umin v0.8b, v0.8b, v1.8b 121 ret <8 x i8> %tmp1 122} 123 124declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) 125declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) 126 127define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 128; CHECK: test_smin_v16i8: 129 %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 130; CHECK: smin v0.16b, v0.16b, v1.16b 131 ret <16 x i8> %tmp1 132} 133 134define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 135; CHECK: test_umin_v16i8: 136 %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 137; CHECK: umin v0.16b, v0.16b, v1.16b 138 ret <16 x i8> %tmp1 139} 140 141declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) 142declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) 143 144define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 145; CHECK: test_smin_v4i16: 146 %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 147; CHECK: smin v0.4h, v0.4h, v1.4h 148 ret <4 x i16> %tmp1 149} 150 151define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 152; CHECK: test_umin_v4i16: 153 %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 154; CHECK: umin v0.4h, v0.4h, v1.4h 155 ret <4 x i16> %tmp1 156} 157 158 159declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) 160declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) 161 162define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 163; CHECK: test_smin_v8i16: 164 %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 165; CHECK: smin v0.8h, v0.8h, v1.8h 166 ret <8 x i16> %tmp1 167} 168 169define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 170; CHECK: test_umin_v8i16: 171 %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 172; CHECK: umin v0.8h, v0.8h, v1.8h 173 ret <8 x i16> %tmp1 174} 175 176 177declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) 178declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) 179 180define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 181; CHECK: test_smin_v2i32: 182 %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 183; CHECK: smin v0.2s, v0.2s, v1.2s 184 ret <2 x i32> %tmp1 185} 186 187define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 188; CHECK: test_umin_v2i32: 189 %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 190; CHECK: umin v0.2s, v0.2s, v1.2s 191 ret <2 x i32> %tmp1 192} 193 194declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) 195declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) 196 197define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 198; CHECK: test_smin_v4i32: 199 %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 200; CHECK: smin v0.4s, v0.4s, v1.4s 201 ret <4 x i32> %tmp1 202} 203 204define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 205; CHECK: test_umin_v4i32: 206 %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 207; CHECK: umin v0.4s, v0.4s, v1.4s 208 ret <4 x i32> %tmp1 209} 210 211declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) 212declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) 213declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>) 214 215define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 216; CHECK: test_fmax_v2f32: 217 %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) 218; CHECK: fmax v0.2s, v0.2s, v1.2s 219 ret <2 x float> %val 220} 221 222define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 223; CHECK: test_fmax_v4f32: 224 %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) 225; CHECK: fmax v0.4s, v0.4s, v1.4s 226 ret <4 x float> %val 227} 228 229define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 230; CHECK: test_fmax_v2f64: 231 %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) 232; CHECK: fmax v0.2d, v0.2d, v1.2d 233 ret <2 x double> %val 234} 235 236declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) 237declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) 238declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>) 239 240define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 241; CHECK: test_fmin_v2f32: 242 %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) 243; CHECK: fmin v0.2s, v0.2s, v1.2s 244 ret <2 x float> %val 245} 246 247define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 248; CHECK: test_fmin_v4f32: 249 %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) 250; CHECK: fmin v0.4s, v0.4s, v1.4s 251 ret <4 x float> %val 252} 253 254define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 255; CHECK: test_fmin_v2f64: 256 %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) 257; CHECK: fmin v0.2d, v0.2d, v1.2d 258 ret <2 x double> %val 259} 260 261 262declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) 263declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) 264declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>) 265 266define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 267; CHECK: test_fmaxnm_v2f32: 268 %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 269; CHECK: fmaxnm v0.2s, v0.2s, v1.2s 270 ret <2 x float> %val 271} 272 273define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 274; CHECK: test_fmaxnm_v4f32: 275 %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 276; CHECK: fmaxnm v0.4s, v0.4s, v1.4s 277 ret <4 x float> %val 278} 279 280define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 281; CHECK: test_fmaxnm_v2f64: 282 %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 283; CHECK: fmaxnm v0.2d, v0.2d, v1.2d 284 ret <2 x double> %val 285} 286 287declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>) 288declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>) 289declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>) 290 291define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 292; CHECK: test_fminnm_v2f32: 293 %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) 294; CHECK: fminnm v0.2s, v0.2s, v1.2s 295 ret <2 x float> %val 296} 297 298define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 299; CHECK: test_fminnm_v4f32: 300 %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) 301; CHECK: fminnm v0.4s, v0.4s, v1.4s 302 ret <4 x float> %val 303} 304 305define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 306; CHECK: test_fminnm_v2f64: 307 %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) 308; CHECK: fminnm v0.2d, v0.2d, v1.2d 309 ret <2 x double> %val 310} 311