1; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s 2 3declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) 4declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) 5 6define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 7; CHECK: test_uabd_v8i8: 8 %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 9; CHECK: uabd v0.8b, v0.8b, v1.8b 10 ret <8 x i8> %abd 11} 12 13define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 14; CHECK: test_uaba_v8i8: 15 %abd = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 16 %aba = add <8 x i8> %lhs, %abd 17; CHECK: uaba v0.8b, v0.8b, v1.8b 18 ret <8 x i8> %aba 19} 20 21define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 22; CHECK: test_sabd_v8i8: 23 %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 24; CHECK: sabd v0.8b, v0.8b, v1.8b 25 ret <8 x i8> %abd 26} 27 28define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 29; CHECK: test_saba_v8i8: 30 %abd = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 31 %aba = add <8 x i8> %lhs, %abd 32; CHECK: saba v0.8b, v0.8b, v1.8b 33 ret <8 x i8> %aba 34} 35 36declare <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8>, <16 x i8>) 37declare <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8>, <16 x i8>) 38 39define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 40; CHECK: test_uabd_v16i8: 41 %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 42; CHECK: uabd v0.16b, v0.16b, v1.16b 43 ret <16 x i8> %abd 44} 45 46define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 47; CHECK: test_uaba_v16i8: 48 %abd = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 49 %aba = add <16 x i8> %lhs, %abd 50; CHECK: uaba v0.16b, v0.16b, v1.16b 51 ret <16 x i8> %aba 52} 53 54define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 55; CHECK: test_sabd_v16i8: 56 %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 57; CHECK: sabd v0.16b, v0.16b, v1.16b 58 ret <16 x i8> %abd 59} 60 61define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 62; CHECK: test_saba_v16i8: 63 %abd = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 64 %aba = add <16 x i8> %lhs, %abd 65; CHECK: saba v0.16b, v0.16b, v1.16b 66 ret <16 x i8> %aba 67} 68 69declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) 70declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) 71 72define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 73; CHECK: test_uabd_v4i16: 74 %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 75; CHECK: uabd v0.4h, v0.4h, v1.4h 76 ret <4 x i16> %abd 77} 78 79define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 80; CHECK: test_uaba_v4i16: 81 %abd = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 82 %aba = add <4 x i16> %lhs, %abd 83; CHECK: uaba v0.4h, v0.4h, v1.4h 84 ret <4 x i16> %aba 85} 86 87define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 88; CHECK: test_sabd_v4i16: 89 %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 90; CHECK: sabd v0.4h, v0.4h, v1.4h 91 ret <4 x i16> %abd 92} 93 94define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 95; CHECK: test_saba_v4i16: 96 %abd = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 97 %aba = add <4 x i16> %lhs, %abd 98; CHECK: saba v0.4h, v0.4h, v1.4h 99 ret <4 x i16> %aba 100} 101 102declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) 103declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) 104 105define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 106; CHECK: test_uabd_v8i16: 107 %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 108; CHECK: uabd v0.8h, v0.8h, v1.8h 109 ret <8 x i16> %abd 110} 111 112define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 113; CHECK: test_uaba_v8i16: 114 %abd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 115 %aba = add <8 x i16> %lhs, %abd 116; CHECK: uaba v0.8h, v0.8h, v1.8h 117 ret <8 x i16> %aba 118} 119 120define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 121; CHECK: test_sabd_v8i16: 122 %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 123; CHECK: sabd v0.8h, v0.8h, v1.8h 124 ret <8 x i16> %abd 125} 126 127define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 128; CHECK: test_saba_v8i16: 129 %abd = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 130 %aba = add <8 x i16> %lhs, %abd 131; CHECK: saba v0.8h, v0.8h, v1.8h 132 ret <8 x i16> %aba 133} 134 135declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) 136declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) 137 138define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 139; CHECK: test_uabd_v2i32: 140 %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 141; CHECK: uabd v0.2s, v0.2s, v1.2s 142 ret <2 x i32> %abd 143} 144 145define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 146; CHECK: test_uaba_v2i32: 147 %abd = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 148 %aba = add <2 x i32> %lhs, %abd 149; CHECK: uaba v0.2s, v0.2s, v1.2s 150 ret <2 x i32> %aba 151} 152 153define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 154; CHECK: test_sabd_v2i32: 155 %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 156; CHECK: sabd v0.2s, v0.2s, v1.2s 157 ret <2 x i32> %abd 158} 159 160define <2 x i32> @test_sabd_v2i32_const() { 161; CHECK: test_sabd_v2i32_const: 162; CHECK: movi d1, #0x00ffffffff0000 163; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s 164 %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( 165 <2 x i32> <i32 -2147483648, i32 2147450880>, 166 <2 x i32> <i32 -65536, i32 65535>) 167 ret <2 x i32> %1 168} 169 170define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 171; CHECK: test_saba_v2i32: 172 %abd = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 173 %aba = add <2 x i32> %lhs, %abd 174; CHECK: saba v0.2s, v0.2s, v1.2s 175 ret <2 x i32> %aba 176} 177 178declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) 179declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) 180 181define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 182; CHECK: test_uabd_v4i32: 183 %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 184; CHECK: uabd v0.4s, v0.4s, v1.4s 185 ret <4 x i32> %abd 186} 187 188define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 189; CHECK: test_uaba_v4i32: 190 %abd = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 191 %aba = add <4 x i32> %lhs, %abd 192; CHECK: uaba v0.4s, v0.4s, v1.4s 193 ret <4 x i32> %aba 194} 195 196define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 197; CHECK: test_sabd_v4i32: 198 %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 199; CHECK: sabd v0.4s, v0.4s, v1.4s 200 ret <4 x i32> %abd 201} 202 203define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 204; CHECK: test_saba_v4i32: 205 %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 206 %aba = add <4 x i32> %lhs, %abd 207; CHECK: saba v0.4s, v0.4s, v1.4s 208 ret <4 x i32> %aba 209} 210 211declare <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float>, <2 x float>) 212 213define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 214; CHECK: test_fabd_v2f32: 215 %abd = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %lhs, <2 x float> %rhs) 216; CHECK: fabd v0.2s, v0.2s, v1.2s 217 ret <2 x float> %abd 218} 219 220declare <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float>, <4 x float>) 221 222define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 223; CHECK: test_fabd_v4f32: 224 %abd = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %lhs, <4 x float> %rhs) 225; CHECK: fabd v0.4s, v0.4s, v1.4s 226 ret <4 x float> %abd 227} 228 229declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) 230 231define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 232; CHECK: test_fabd_v2f64: 233 %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs) 234; CHECK: fabd v0.2d, v0.2d, v1.2d 235 ret <2 x double> %abd 236} 237