1target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 2target triple = "armv7-none-linux-gnueabi" 3 4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5;;;;;;;;; INTRINSICS ;;;;;;;;;; 6;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 8declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone 9declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone 10declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 11declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 12declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 13declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 14 15declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone 16declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone 17declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 18declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 19declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 20declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 21 22declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 23declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 24declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 25 26declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 27declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 28declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 29 30declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 31declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 32declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 33 34declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone 35declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 36 37declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone 38declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone 39 40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 41;;;;;;;;; HELPERS ;;;;;;;;;; 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43 44define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline { 45 %1 = insertelement <4 x float> undef, float %in, i32 0 46 %2 = insertelement <4 x float> %1, float %in, i32 1 47 %3 = insertelement <4 x float> %2, float %in, i32 2 48 %4 = insertelement <4 x float> %3, float %in, i32 3 49 ret <4 x float> %4 50} 51 52define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline { 53 %1 = insertelement <2 x float> undef, float %in, i32 0 54 %2 = insertelement <2 x float> %1, float %in, i32 1 55 ret <2 x float> %2 56} 57 58define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline { 59 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 60 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 61 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 62 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 63 ret <4 x i32> %4 64} 65 66 67;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 68;;;;;;;;; CLAMP ;;;;;;;;;; 69;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 70 71define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly { 72 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone 73 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone 74 ret <4 x float> %2 75} 76 77define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly { 78 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 79 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 80 %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly 81 ret <4 x float> %out 82} 83 84define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly { 85 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 86 %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 87 %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 88 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 89 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 90 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 91 ret <3 x float> %c 92} 93 94define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly { 95 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 96 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 97 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 98 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 99 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 100 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 101 ret <3 x float> %c 102} 103 104define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly { 105 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone 106 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone 107 ret <2 x float> %2 108} 109 110define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly { 111 %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone 112 %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone 113 %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone 114 %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone 115 ret <2 x float> %b 116} 117 118define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly { 119 %1 = fcmp olt float %value, %high 120 %2 = select i1 %1, float %value, float %high 121 %3 = fcmp ogt float %2, %low 122 %4 = select i1 %3, float %2, float %low 123 ret float %4 124} 125 126 127;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 128;;;;;;;;; FMAX ;;;;;;;;;; 129;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 130 131define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 132 %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 133 ret <4 x float> %1 134} 135 136define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 137 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 138 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 139 ret <4 x float> %2 140} 141 142define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 143 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 144 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 145 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 146 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 147 ret <3 x float> %4 148} 149 150define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 151 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 152 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 153 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 154 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 155 ret <3 x float> %c 156} 157 158define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 159 %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 160 ret <2 x float> %1 161} 162 163define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 164 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 165 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 166 ret <2 x float> %2 167} 168 169define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly { 170 %1 = fcmp ogt float %v1, %v2 171 %2 = select i1 %1, float %v1, float %v2 172 ret float %2 173} 174 175 176;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 177;;;;;;;;; FMIN ;;;;;;;;;; 178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 179 180define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 181 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 182 ret <4 x float> %1 183} 184 185define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 186 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 187 %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 188 ret <4 x float> %2 189} 190 191define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 192 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 193 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 194 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 195 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 196 ret <3 x float> %4 197} 198 199define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 200 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 201 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 202 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 203 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 204 ret <3 x float> %c 205} 206 207define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 208 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 209 ret <2 x float> %1 210} 211 212define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 213 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 214 %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 215 ret <2 x float> %2 216} 217 218define float @_Z4fminff(float %v1, float %v2) nounwind readnone { 219 %1 = fcmp olt float %v1, %v2 220 %2 = select i1 %1, float %v1, float %v2 221 ret float %2 222} 223 224 225;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 226;;;;;;;;; MAX ;;;;;;;;;; 227;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 228 229define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone { 230 %1 = icmp sgt i8 %v1, %v2 231 %2 = select i1 %1, i8 %v1, i8 %v2 232 ret i8 %2 233} 234 235define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 236 %1 = sext <2 x i8> %v1 to <2 x i32> 237 %2 = sext <2 x i8> %v2 to <2 x i32> 238 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 239 %4 = trunc <2 x i32> %3 to <2 x i8> 240 ret <2 x i8> %4 241} 242 243define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 244 %1 = sext <3 x i8> %v1 to <3 x i32> 245 %2 = sext <3 x i8> %v2 to <3 x i32> 246 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 247 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 248 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 249 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 250 %7 = trunc <3 x i32> %6 to <3 x i8> 251 ret <3 x i8> %7 252} 253 254define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 255 %1 = sext <4 x i8> %v1 to <4 x i32> 256 %2 = sext <4 x i8> %v2 to <4 x i32> 257 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 258 %4 = trunc <4 x i32> %3 to <4 x i8> 259 ret <4 x i8> %4 260} 261 262define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone { 263 %1 = icmp sgt i16 %v1, %v2 264 %2 = select i1 %1, i16 %v1, i16 %v2 265 ret i16 %2 266} 267 268define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 269 %1 = sext <2 x i16> %v1 to <2 x i32> 270 %2 = sext <2 x i16> %v2 to <2 x i32> 271 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 272 %4 = trunc <2 x i32> %3 to <2 x i16> 273 ret <2 x i16> %4 274} 275 276define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 277 %1 = sext <3 x i16> %v1 to <3 x i32> 278 %2 = sext <3 x i16> %v2 to <3 x i32> 279 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 280 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 281 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 282 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 283 %7 = trunc <3 x i32> %6 to <3 x i16> 284 ret <3 x i16> %7 285} 286 287define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 288 %1 = sext <4 x i16> %v1 to <4 x i32> 289 %2 = sext <4 x i16> %v2 to <4 x i32> 290 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 291 %4 = trunc <4 x i32> %3 to <4 x i16> 292 ret <4 x i16> %4 293} 294 295define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone { 296 %1 = icmp sgt i32 %v1, %v2 297 %2 = select i1 %1, i32 %v1, i32 %v2 298 ret i32 %2 299} 300 301define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 302 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 303 ret <2 x i32> %1 304} 305 306define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 307 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 308 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 309 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 310 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 311 ret <3 x i32> %4 312} 313 314define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 315 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 316 ret <4 x i32> %1 317} 318 319define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone { 320 %1 = icmp sgt i64 %v1, %v2 321 %2 = select i1 %1, i64 %v1, i64 %v2 322 ret i64 %2 323} 324 325; TODO: long vector types 326 327define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 328 %1 = icmp ugt i8 %v1, %v2 329 %2 = select i1 %1, i8 %v1, i8 %v2 330 ret i8 %2 331} 332 333define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 334 %1 = zext <2 x i8> %v1 to <2 x i32> 335 %2 = zext <2 x i8> %v2 to <2 x i32> 336 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 337 %4 = trunc <2 x i32> %3 to <2 x i8> 338 ret <2 x i8> %4 339} 340 341define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 342 %1 = zext <3 x i8> %v1 to <3 x i32> 343 %2 = zext <3 x i8> %v2 to <3 x i32> 344 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 345 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 346 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 347 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 348 %7 = trunc <3 x i32> %6 to <3 x i8> 349 ret <3 x i8> %7 350} 351 352define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 353 %1 = zext <4 x i8> %v1 to <4 x i32> 354 %2 = zext <4 x i8> %v2 to <4 x i32> 355 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 356 %4 = trunc <4 x i32> %3 to <4 x i8> 357 ret <4 x i8> %4 358} 359 360define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 361 %1 = icmp ugt i16 %v1, %v2 362 %2 = select i1 %1, i16 %v1, i16 %v2 363 ret i16 %2 364} 365 366define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 367 %1 = zext <2 x i16> %v1 to <2 x i32> 368 %2 = zext <2 x i16> %v2 to <2 x i32> 369 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 370 %4 = trunc <2 x i32> %3 to <2 x i16> 371 ret <2 x i16> %4 372} 373 374define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 375 %1 = zext <3 x i16> %v1 to <3 x i32> 376 %2 = zext <3 x i16> %v2 to <3 x i32> 377 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 378 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 379 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 380 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 381 %7 = trunc <3 x i32> %6 to <3 x i16> 382 ret <3 x i16> %7 383} 384 385define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 386 %1 = zext <4 x i16> %v1 to <4 x i32> 387 %2 = zext <4 x i16> %v2 to <4 x i32> 388 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 389 %4 = trunc <4 x i32> %3 to <4 x i16> 390 ret <4 x i16> %4 391} 392 393define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone { 394 %1 = icmp ugt i32 %v1, %v2 395 %2 = select i1 %1, i32 %v1, i32 %v2 396 ret i32 %2 397} 398 399define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 400 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 401 ret <2 x i32> %1 402} 403 404define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 405 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 406 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 407 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 408 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 409 ret <3 x i32> %4 410} 411 412define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 413 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 414 ret <4 x i32> %1 415} 416 417define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone { 418 %1 = icmp ugt i64 %v1, %v2 419 %2 = select i1 %1, i64 %v1, i64 %v2 420 ret i64 %2 421} 422 423; TODO: long vector types 424 425define float @_Z3maxff(float %v1, float %v2) nounwind readnone { 426 %1 = tail call float @_Z4fmaxff(float %v1, float %v2) 427 ret float %1 428} 429 430define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 431 %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) 432 ret <2 x float> %1 433} 434 435define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 436 %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) 437 ret <2 x float> %1 438} 439 440define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 441 %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) 442 ret <3 x float> %1 443} 444 445define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 446 %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) 447 ret <3 x float> %1 448} 449 450define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 451 %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) 452 ret <4 x float> %1 453} 454 455define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 456 %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) 457 ret <4 x float> %1 458} 459 460 461;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 462;;;;;;;;; MIN ;;;;;;;;;; 463;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 464 465define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone { 466 %1 = icmp slt i8 %v1, %v2 467 %2 = select i1 %1, i8 %v1, i8 %v2 468 ret i8 %2 469} 470 471define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 472 %1 = sext <2 x i8> %v1 to <2 x i32> 473 %2 = sext <2 x i8> %v2 to <2 x i32> 474 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 475 %4 = trunc <2 x i32> %3 to <2 x i8> 476 ret <2 x i8> %4 477} 478 479define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 480 %1 = sext <3 x i8> %v1 to <3 x i32> 481 %2 = sext <3 x i8> %v2 to <3 x i32> 482 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 483 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 484 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 485 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 486 %7 = trunc <3 x i32> %6 to <3 x i8> 487 ret <3 x i8> %7 488} 489 490define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 491 %1 = sext <4 x i8> %v1 to <4 x i32> 492 %2 = sext <4 x i8> %v2 to <4 x i32> 493 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 494 %4 = trunc <4 x i32> %3 to <4 x i8> 495 ret <4 x i8> %4 496} 497 498define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone { 499 %1 = icmp slt i16 %v1, %v2 500 %2 = select i1 %1, i16 %v1, i16 %v2 501 ret i16 %2 502} 503 504define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 505 %1 = sext <2 x i16> %v1 to <2 x i32> 506 %2 = sext <2 x i16> %v2 to <2 x i32> 507 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 508 %4 = trunc <2 x i32> %3 to <2 x i16> 509 ret <2 x i16> %4 510} 511 512define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 513 %1 = sext <3 x i16> %v1 to <3 x i32> 514 %2 = sext <3 x i16> %v2 to <3 x i32> 515 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 516 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 517 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 518 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 519 %7 = trunc <3 x i32> %6 to <3 x i16> 520 ret <3 x i16> %7 521} 522 523define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 524 %1 = sext <4 x i16> %v1 to <4 x i32> 525 %2 = sext <4 x i16> %v2 to <4 x i32> 526 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 527 %4 = trunc <4 x i32> %3 to <4 x i16> 528 ret <4 x i16> %4 529} 530 531define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone { 532 %1 = icmp slt i32 %v1, %v2 533 %2 = select i1 %1, i32 %v1, i32 %v2 534 ret i32 %2 535} 536 537define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 538 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 539 ret <2 x i32> %1 540} 541 542define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 543 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 544 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 545 %3 = tail call <4 x i32 > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 546 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 547 ret <3 x i32> %4 548} 549 550define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 551 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 552 ret <4 x i32> %1 553} 554 555define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone { 556 %1 = icmp slt i64 %v1, %v2 557 %2 = select i1 %1, i64 %v1, i64 %v2 558 ret i64 %2 559} 560 561; TODO: long vector types 562 563define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 564 %1 = icmp ult i8 %v1, %v2 565 %2 = select i1 %1, i8 %v1, i8 %v2 566 ret i8 %2 567} 568 569define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 570 %1 = zext <2 x i8> %v1 to <2 x i32> 571 %2 = zext <2 x i8> %v2 to <2 x i32> 572 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 573 %4 = trunc <2 x i32> %3 to <2 x i8> 574 ret <2 x i8> %4 575} 576 577define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 578 %1 = zext <3 x i8> %v1 to <3 x i32> 579 %2 = zext <3 x i8> %v2 to <3 x i32> 580 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 581 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 582 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 583 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 584 %7 = trunc <3 x i32> %6 to <3 x i8> 585 ret <3 x i8> %7 586} 587 588define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 589 %1 = zext <4 x i8> %v1 to <4 x i32> 590 %2 = zext <4 x i8> %v2 to <4 x i32> 591 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 592 %4 = trunc <4 x i32> %3 to <4 x i8> 593 ret <4 x i8> %4 594} 595 596define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 597 %1 = icmp ult i16 %v1, %v2 598 %2 = select i1 %1, i16 %v1, i16 %v2 599 ret i16 %2 600} 601 602define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 603 %1 = zext <2 x i16> %v1 to <2 x i32> 604 %2 = zext <2 x i16> %v2 to <2 x i32> 605 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 606 %4 = trunc <2 x i32> %3 to <2 x i16> 607 ret <2 x i16> %4 608} 609 610define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 611 %1 = zext <3 x i16> %v1 to <3 x i32> 612 %2 = zext <3 x i16> %v2 to <3 x i32> 613 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 614 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 615 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 616 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 617 %7 = trunc <3 x i32> %6 to <3 x i16> 618 ret <3 x i16> %7 619} 620 621define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 622 %1 = zext <4 x i16> %v1 to <4 x i32> 623 %2 = zext <4 x i16> %v2 to <4 x i32> 624 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 625 %4 = trunc <4 x i32> %3 to <4 x i16> 626 ret <4 x i16> %4 627} 628 629define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone { 630 %1 = icmp ult i32 %v1, %v2 631 %2 = select i1 %1, i32 %v1, i32 %v2 632 ret i32 %2 633} 634 635define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 636 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 637 ret <2 x i32> %1 638} 639 640define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 641 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 642 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 643 %3 = tail call <4 x i32 > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 644 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 645 ret <3 x i32> %4 646} 647 648define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 649 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 650 ret <4 x i32> %1 651} 652 653define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone { 654 %1 = icmp ult i64 %v1, %v2 655 %2 = select i1 %1, i64 %v1, i64 %v2 656 ret i64 %2 657} 658 659; TODO: long vector types 660 661define float @_Z3minff(float %v1, float %v2) nounwind readnone { 662 %1 = tail call float @_Z4fminff(float %v1, float %v2) 663 ret float %1 664} 665 666define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 667 %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) 668 ret <2 x float> %1 669} 670 671define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 672 %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) 673 ret <2 x float> %1 674} 675 676define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 677 %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) 678 ret <3 x float> %1 679} 680 681define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 682 %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) 683 ret <3 x float> %1 684} 685 686define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 687 %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) 688 ret <4 x float> %1 689} 690 691define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 692 %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) 693 ret <4 x float> %1 694} 695 696 697;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 698;;;;;;;;; YUV ;;;;;;;;;; 699;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 700 701@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16 702@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16 703@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 704@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16 705 706 707define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline { 708 %_sy = zext i8 %pY to i32 709 %_su = zext i8 %pU to i32 710 %_sv = zext i8 %pV to i32 711 712 %_sy2 = add i32 -16, %_sy 713 %_sy3 = mul i32 298, %_sy2 714 %_su2 = add i32 -128, %_su 715 %_sv2 = add i32 -128, %_sv 716 %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone 717 %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone 718 %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone 719 720 %mu = load <4 x i32>* @yuv_U, align 8 721 %mv = load <4 x i32>* @yuv_V, align 8 722 %_u2 = mul <4 x i32> %_u, %mu 723 %_v2 = mul <4 x i32> %_v, %mv 724 %_y2 = add <4 x i32> %_y, %_u2 725 %_y3 = add <4 x i32> %_y2, %_v2 726 727 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone 728; %r2 = trunc <4 x i16> %r1 to <4 x i8> 729; ret <4 x i8> %r2 730 731 %c0 = load <4 x i32>* @yuv_0, align 8 732 %c255 = load <4 x i32>* @yuv_255, align 8 733 %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone 734 %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone 735 %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> 736 %r4 = trunc <4 x i32> %r3 to <4 x i8> 737 ret <4 x i8> %r4 738} 739 740;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 741;;;;;;;;; half_RECIP ;;;;;;;;;; 742;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 743 744define float @_Z10half_recipf(float %v) { 745 %1 = insertelement <2 x float> undef, float %v, i32 0 746 %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone 747 %3 = extractelement <2 x float> %2, i32 0 748 ret float %3 749} 750 751define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone { 752 %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone 753 ret <2 x float> %1 754} 755 756define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone { 757 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 758 %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone 759 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 760 ret <3 x float> %3 761} 762 763define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone { 764 %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone 765 ret <4 x float> %1 766} 767 768;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 769;;;;;;;;; half_SQRT ;;;;;;;;;; 770;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 771 772define float @_Z9half_sqrtf(float %v) { 773 %1 = insertelement <2 x float> undef, float %v, i32 0 774 %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone 775 %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone 776 %4 = extractelement <2 x float> %3, i32 0 777 ret float %4 778} 779 780define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone { 781 %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone 782 %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone 783 ret <2 x float> %2 784} 785 786define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone { 787 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 788 %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone 789 %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone 790 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 791 ret <3 x float> %4 792} 793 794define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone { 795 %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone 796 %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone 797 ret <4 x float> %2 798} 799 800 801;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 802;;;;;;;;; half_RSQRT ;;;;;;;;;; 803;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 804 805define float @_Z10half_rsqrtf(float %v) { 806 %1 = insertelement <2 x float> undef, float %v, i32 0 807 %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone 808 %3 = extractelement <2 x float> %2, i32 0 809 ret float %3 810} 811 812define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone { 813 %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone 814 ret <2 x float> %1 815} 816 817define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone { 818 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 819 %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone 820 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 821 ret <3 x float> %3 822} 823 824define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone { 825 %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone 826 ret <4 x float> %1 827} 828 829