1target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 2target triple = "armv7-none-linux-gnueabi" 3 4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5;;;;;;;;; INTRINSICS ;;;;;;;;;; 6;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 8declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone 9declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone 10declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 11declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 12declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 13declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 14declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 15declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 16 17declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone 18declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone 19declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 20declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 21declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 22declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 23declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 24declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 25 26declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 27declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 28declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 29 30declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 31declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 32declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 33 34declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 35declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 36declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 37 38declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone 39declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 40 41declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone 42declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone 43 44declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone 45declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 46 47declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone 48declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone 49 50;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51;;;;;;;;; HELPERS ;;;;;;;;;; 52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 53 54define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline { 55 %1 = insertelement <4 x float> undef, float %in, i32 0 56 %2 = insertelement <4 x float> %1, float %in, i32 1 57 %3 = insertelement <4 x float> %2, float %in, i32 2 58 %4 = insertelement <4 x float> %3, float %in, i32 3 59 ret <4 x float> %4 60} 61 62define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline { 63 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 64 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 65 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 66 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 67 ret <4 x i32> %4 68} 69 70define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline { 71 %1 = insertelement <4 x i16> undef, i16 %in, i32 0 72 %2 = insertelement <4 x i16> %1, i16 %in, i32 1 73 %3 = insertelement <4 x i16> %2, i16 %in, i32 2 74 %4 = insertelement <4 x i16> %3, i16 %in, i32 3 75 ret <4 x i16> %4 76} 77 78 79 80define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline { 81 %1 = insertelement <2 x float> undef, float %in, i32 0 82 %2 = insertelement <2 x float> %1, float %in, i32 1 83 ret <2 x float> %2 84} 85 86define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline { 87 %1 = insertelement <2 x i32> undef, i32 %in, i32 0 88 %2 = insertelement <2 x i32> %1, i32 %in, i32 1 89 ret <2 x i32> %2 90} 91 92define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline { 93 %1 = insertelement <2 x i16> undef, i16 %in, i32 0 94 %2 = insertelement <2 x i16> %1, i16 %in, i32 1 95 ret <2 x i16> %2 96} 97 98 99define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline { 100 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 101 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 102 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 103 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 104 ret <4 x i32> %4 105} 106 107 108;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 109;;;;;;;;; CLAMP ;;;;;;;;;; 110;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 111 112define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly { 113 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone 114 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone 115 ret <4 x float> %2 116} 117 118define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly { 119 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 120 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 121 %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly 122 ret <4 x float> %out 123} 124 125define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly { 126 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 127 %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 128 %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 129 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 130 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 131 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 132 ret <3 x float> %c 133} 134 135define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly { 136 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 137 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 138 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 139 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 140 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 141 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 142 ret <3 x float> %c 143} 144 145define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly { 146 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone 147 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone 148 ret <2 x float> %2 149} 150 151define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly { 152 %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone 153 %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone 154 %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone 155 %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone 156 ret <2 x float> %b 157} 158 159define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly { 160 %1 = fcmp olt float %value, %high 161 %2 = select i1 %1, float %value, float %high 162 %3 = fcmp ogt float %2, %low 163 %4 = select i1 %3, float %2, float %low 164 ret float %4 165} 166 167 168 169define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 170 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 171 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 172 ret <4 x i32> %2 173} 174 175define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 176 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 177 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 178 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 179 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 180 ret <4 x i32> %2 181} 182 183define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 184 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 185 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 186 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 187 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 188 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 189 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 190 ret <3 x i32> %c 191} 192 193define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 194 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 195 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 196 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 197 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 198 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 199 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 200 ret <3 x i32> %c 201} 202 203define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 204 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 205 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 206 ret <2 x i32> %2 207} 208 209define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 210 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 211 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 212 %a = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 213 %b = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 214 ret <2 x i32> %b 215} 216 217 218 219define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 220 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 221 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 222 ret <4 x i32> %2 223} 224 225define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 226 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 227 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 228 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 229 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 230 ret <4 x i32> %2 231} 232 233define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 234 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 235 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 236 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 237 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 238 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 239 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 240 ret <3 x i32> %c 241} 242 243define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 244 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 245 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 246 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 247 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 248 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 249 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 250 ret <3 x i32> %c 251} 252 253define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 254 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 255 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 256 ret <2 x i32> %2 257} 258 259define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 260 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 261 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 262 %a = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 263 %b = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 264 ret <2 x i32> %b 265} 266 267 268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 269;;;;;;;;; FMAX ;;;;;;;;;; 270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271 272define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 273 %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 274 ret <4 x float> %1 275} 276 277define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 278 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 279 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 280 ret <4 x float> %2 281} 282 283define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 284 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 285 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 286 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 287 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 288 ret <3 x float> %4 289} 290 291define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 292 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 294 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 295 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 296 ret <3 x float> %c 297} 298 299define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 300 %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 301 ret <2 x float> %1 302} 303 304define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 305 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 306 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 307 ret <2 x float> %2 308} 309 310define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly { 311 %1 = fcmp ogt float %v1, %v2 312 %2 = select i1 %1, float %v1, float %v2 313 ret float %2 314} 315 316 317;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 318;;;;;;;;; FMIN ;;;;;;;;;; 319;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 320 321define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 322 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 323 ret <4 x float> %1 324} 325 326define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 327 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 328 %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 329 ret <4 x float> %2 330} 331 332define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 333 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 334 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 335 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 336 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 337 ret <3 x float> %4 338} 339 340define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 341 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 342 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 343 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 344 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 345 ret <3 x float> %c 346} 347 348define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 349 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 350 ret <2 x float> %1 351} 352 353define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 354 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 355 %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 356 ret <2 x float> %2 357} 358 359define float @_Z4fminff(float %v1, float %v2) nounwind readnone { 360 %1 = fcmp olt float %v1, %v2 361 %2 = select i1 %1, float %v1, float %v2 362 ret float %2 363} 364 365 366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 367;;;;;;;;; MAX ;;;;;;;;;; 368;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 369 370define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone { 371 %1 = icmp sgt i8 %v1, %v2 372 %2 = select i1 %1, i8 %v1, i8 %v2 373 ret i8 %2 374} 375 376define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 377 %1 = sext <2 x i8> %v1 to <2 x i32> 378 %2 = sext <2 x i8> %v2 to <2 x i32> 379 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 380 %4 = trunc <2 x i32> %3 to <2 x i8> 381 ret <2 x i8> %4 382} 383 384define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 385 %1 = sext <3 x i8> %v1 to <3 x i32> 386 %2 = sext <3 x i8> %v2 to <3 x i32> 387 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 388 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 389 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 390 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 391 %7 = trunc <3 x i32> %6 to <3 x i8> 392 ret <3 x i8> %7 393} 394 395define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 396 %1 = sext <4 x i8> %v1 to <4 x i32> 397 %2 = sext <4 x i8> %v2 to <4 x i32> 398 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 399 %4 = trunc <4 x i32> %3 to <4 x i8> 400 ret <4 x i8> %4 401} 402 403define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone { 404 %1 = icmp sgt i16 %v1, %v2 405 %2 = select i1 %1, i16 %v1, i16 %v2 406 ret i16 %2 407} 408 409define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 410 %1 = sext <2 x i16> %v1 to <2 x i32> 411 %2 = sext <2 x i16> %v2 to <2 x i32> 412 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 413 %4 = trunc <2 x i32> %3 to <2 x i16> 414 ret <2 x i16> %4 415} 416 417define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 418 %1 = sext <3 x i16> %v1 to <3 x i32> 419 %2 = sext <3 x i16> %v2 to <3 x i32> 420 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 421 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 422 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 423 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 424 %7 = trunc <3 x i32> %6 to <3 x i16> 425 ret <3 x i16> %7 426} 427 428define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 429 %1 = sext <4 x i16> %v1 to <4 x i32> 430 %2 = sext <4 x i16> %v2 to <4 x i32> 431 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 432 %4 = trunc <4 x i32> %3 to <4 x i16> 433 ret <4 x i16> %4 434} 435 436define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone { 437 %1 = icmp sgt i32 %v1, %v2 438 %2 = select i1 %1, i32 %v1, i32 %v2 439 ret i32 %2 440} 441 442define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 443 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 444 ret <2 x i32> %1 445} 446 447define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 448 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 449 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 450 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 451 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 452 ret <3 x i32> %4 453} 454 455define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 456 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 457 ret <4 x i32> %1 458} 459 460define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone { 461 %1 = icmp sgt i64 %v1, %v2 462 %2 = select i1 %1, i64 %v1, i64 %v2 463 ret i64 %2 464} 465 466; TODO: long vector types 467 468define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 469 %1 = icmp ugt i8 %v1, %v2 470 %2 = select i1 %1, i8 %v1, i8 %v2 471 ret i8 %2 472} 473 474define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 475 %1 = zext <2 x i8> %v1 to <2 x i32> 476 %2 = zext <2 x i8> %v2 to <2 x i32> 477 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 478 %4 = trunc <2 x i32> %3 to <2 x i8> 479 ret <2 x i8> %4 480} 481 482define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 483 %1 = zext <3 x i8> %v1 to <3 x i32> 484 %2 = zext <3 x i8> %v2 to <3 x i32> 485 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 486 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 487 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 488 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 489 %7 = trunc <3 x i32> %6 to <3 x i8> 490 ret <3 x i8> %7 491} 492 493define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 494 %1 = zext <4 x i8> %v1 to <4 x i32> 495 %2 = zext <4 x i8> %v2 to <4 x i32> 496 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 497 %4 = trunc <4 x i32> %3 to <4 x i8> 498 ret <4 x i8> %4 499} 500 501define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 502 %1 = icmp ugt i16 %v1, %v2 503 %2 = select i1 %1, i16 %v1, i16 %v2 504 ret i16 %2 505} 506 507define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 508 %1 = zext <2 x i16> %v1 to <2 x i32> 509 %2 = zext <2 x i16> %v2 to <2 x i32> 510 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 511 %4 = trunc <2 x i32> %3 to <2 x i16> 512 ret <2 x i16> %4 513} 514 515define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 516 %1 = zext <3 x i16> %v1 to <3 x i32> 517 %2 = zext <3 x i16> %v2 to <3 x i32> 518 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 519 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 520 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 521 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 522 %7 = trunc <3 x i32> %6 to <3 x i16> 523 ret <3 x i16> %7 524} 525 526define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 527 %1 = zext <4 x i16> %v1 to <4 x i32> 528 %2 = zext <4 x i16> %v2 to <4 x i32> 529 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 530 %4 = trunc <4 x i32> %3 to <4 x i16> 531 ret <4 x i16> %4 532} 533 534define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone { 535 %1 = icmp ugt i32 %v1, %v2 536 %2 = select i1 %1, i32 %v1, i32 %v2 537 ret i32 %2 538} 539 540define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 541 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 542 ret <2 x i32> %1 543} 544 545define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 546 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 548 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 549 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 550 ret <3 x i32> %4 551} 552 553define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 554 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 555 ret <4 x i32> %1 556} 557 558 559; TODO: long vector types 560 561define float @_Z3maxff(float %v1, float %v2) nounwind readnone { 562 %1 = tail call float @_Z4fmaxff(float %v1, float %v2) 563 ret float %1 564} 565 566define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 567 %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) 568 ret <2 x float> %1 569} 570 571define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 572 %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) 573 ret <2 x float> %1 574} 575 576define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 577 %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) 578 ret <3 x float> %1 579} 580 581define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 582 %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) 583 ret <3 x float> %1 584} 585 586define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 587 %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) 588 ret <4 x float> %1 589} 590 591define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 592 %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) 593 ret <4 x float> %1 594} 595 596 597;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 598;;;;;;;;; MIN ;;;;;;;;;; 599;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 600 601define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone { 602 %1 = icmp slt i8 %v1, %v2 603 %2 = select i1 %1, i8 %v1, i8 %v2 604 ret i8 %2 605} 606 607define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 608 %1 = sext <2 x i8> %v1 to <2 x i32> 609 %2 = sext <2 x i8> %v2 to <2 x i32> 610 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 611 %4 = trunc <2 x i32> %3 to <2 x i8> 612 ret <2 x i8> %4 613} 614 615define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 616 %1 = sext <3 x i8> %v1 to <3 x i32> 617 %2 = sext <3 x i8> %v2 to <3 x i32> 618 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 619 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 620 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 621 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 622 %7 = trunc <3 x i32> %6 to <3 x i8> 623 ret <3 x i8> %7 624} 625 626define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 627 %1 = sext <4 x i8> %v1 to <4 x i32> 628 %2 = sext <4 x i8> %v2 to <4 x i32> 629 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 630 %4 = trunc <4 x i32> %3 to <4 x i8> 631 ret <4 x i8> %4 632} 633 634define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone { 635 %1 = icmp slt i16 %v1, %v2 636 %2 = select i1 %1, i16 %v1, i16 %v2 637 ret i16 %2 638} 639 640define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 641 %1 = sext <2 x i16> %v1 to <2 x i32> 642 %2 = sext <2 x i16> %v2 to <2 x i32> 643 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 644 %4 = trunc <2 x i32> %3 to <2 x i16> 645 ret <2 x i16> %4 646} 647 648define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 649 %1 = sext <3 x i16> %v1 to <3 x i32> 650 %2 = sext <3 x i16> %v2 to <3 x i32> 651 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 652 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 653 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 654 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 655 %7 = trunc <3 x i32> %6 to <3 x i16> 656 ret <3 x i16> %7 657} 658 659define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 660 %1 = sext <4 x i16> %v1 to <4 x i32> 661 %2 = sext <4 x i16> %v2 to <4 x i32> 662 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 663 %4 = trunc <4 x i32> %3 to <4 x i16> 664 ret <4 x i16> %4 665} 666 667define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone { 668 %1 = icmp slt i32 %v1, %v2 669 %2 = select i1 %1, i32 %v1, i32 %v2 670 ret i32 %2 671} 672 673define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 674 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 675 ret <2 x i32> %1 676} 677 678define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 679 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 680 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 681 %3 = tail call <4 x i32 > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 682 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 683 ret <3 x i32> %4 684} 685 686define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 687 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 688 ret <4 x i32> %1 689} 690 691define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone { 692 %1 = icmp slt i64 %v1, %v2 693 %2 = select i1 %1, i64 %v1, i64 %v2 694 ret i64 %2 695} 696 697; TODO: long vector types 698 699define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 700 %1 = icmp ult i8 %v1, %v2 701 %2 = select i1 %1, i8 %v1, i8 %v2 702 ret i8 %2 703} 704 705define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 706 %1 = zext <2 x i8> %v1 to <2 x i32> 707 %2 = zext <2 x i8> %v2 to <2 x i32> 708 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 709 %4 = trunc <2 x i32> %3 to <2 x i8> 710 ret <2 x i8> %4 711} 712 713define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 714 %1 = zext <3 x i8> %v1 to <3 x i32> 715 %2 = zext <3 x i8> %v2 to <3 x i32> 716 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 717 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 718 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 719 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 720 %7 = trunc <3 x i32> %6 to <3 x i8> 721 ret <3 x i8> %7 722} 723 724define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 725 %1 = zext <4 x i8> %v1 to <4 x i32> 726 %2 = zext <4 x i8> %v2 to <4 x i32> 727 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 728 %4 = trunc <4 x i32> %3 to <4 x i8> 729 ret <4 x i8> %4 730} 731 732define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 733 %1 = icmp ult i16 %v1, %v2 734 %2 = select i1 %1, i16 %v1, i16 %v2 735 ret i16 %2 736} 737 738define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 739 %1 = zext <2 x i16> %v1 to <2 x i32> 740 %2 = zext <2 x i16> %v2 to <2 x i32> 741 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 742 %4 = trunc <2 x i32> %3 to <2 x i16> 743 ret <2 x i16> %4 744} 745 746define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 747 %1 = zext <3 x i16> %v1 to <3 x i32> 748 %2 = zext <3 x i16> %v2 to <3 x i32> 749 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 750 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 751 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 752 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 753 %7 = trunc <3 x i32> %6 to <3 x i16> 754 ret <3 x i16> %7 755} 756 757define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 758 %1 = zext <4 x i16> %v1 to <4 x i32> 759 %2 = zext <4 x i16> %v2 to <4 x i32> 760 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 761 %4 = trunc <4 x i32> %3 to <4 x i16> 762 ret <4 x i16> %4 763} 764 765define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone { 766 %1 = icmp ult i32 %v1, %v2 767 %2 = select i1 %1, i32 %v1, i32 %v2 768 ret i32 %2 769} 770 771define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 772 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 773 ret <2 x i32> %1 774} 775 776define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 777 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 778 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 779 %3 = tail call <4 x i32 > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 780 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 781 ret <3 x i32> %4 782} 783 784define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 785 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 786 ret <4 x i32> %1 787} 788 789 790; TODO: long vector types 791 792define float @_Z3minff(float %v1, float %v2) nounwind readnone { 793 %1 = tail call float @_Z4fminff(float %v1, float %v2) 794 ret float %1 795} 796 797define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 798 %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) 799 ret <2 x float> %1 800} 801 802define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 803 %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) 804 ret <2 x float> %1 805} 806 807define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 808 %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) 809 ret <3 x float> %1 810} 811 812define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 813 %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) 814 ret <3 x float> %1 815} 816 817define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 818 %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) 819 ret <4 x float> %1 820} 821 822define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 823 %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) 824 ret <4 x float> %1 825} 826 827 828;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 829;;;;;;;;; YUV ;;;;;;;;;; 830;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 831 832@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16 833@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16 834@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 835@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16 836 837 838define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline { 839 %_sy = zext i8 %pY to i32 840 %_su = zext i8 %pU to i32 841 %_sv = zext i8 %pV to i32 842 843 %_sy2 = add i32 -16, %_sy 844 %_sy3 = mul i32 298, %_sy2 845 %_su2 = add i32 -128, %_su 846 %_sv2 = add i32 -128, %_sv 847 %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone 848 %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone 849 %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone 850 851 %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8 852 %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8 853 %_u2 = mul <4 x i32> %_u, %mu 854 %_v2 = mul <4 x i32> %_v, %mv 855 %_y2 = add <4 x i32> %_y, %_u2 856 %_y3 = add <4 x i32> %_y2, %_v2 857 858 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone 859; %r2 = trunc <4 x i16> %r1 to <4 x i8> 860; ret <4 x i8> %r2 861 862 %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8 863 %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8 864 %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone 865 %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone 866 %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> 867 %r4 = trunc <4 x i32> %r3 to <4 x i8> 868 ret <4 x i8> %r4 869} 870 871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872;;;;;;;;; half_RECIP ;;;;;;;;;; 873;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 874 875define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone { 876 %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone 877 %2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind readnone 878 %3 = fmul <2 x float> %1, %2 879 %4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind readnone 880 %5 = fmul <2 x float> %4, %3 881 ret <2 x float> %5 882} 883 884define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone { 885 %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone 886 %2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind readnone 887 %3 = fmul <4 x float> %1, %2 888 %4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind readnone 889 %5 = fmul <4 x float> %4, %3 890 ret <4 x float> %5 891} 892 893define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone { 894 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 895 %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone 896 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 897 ret <3 x float> %3 898} 899 900 901;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 902;;;;;;;;; half_RSQRT ;;;;;;;;;; 903;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 904 905define float @_Z10half_rsqrtf(float %v) { 906 %1 = insertelement <2 x float> undef, float %v, i32 0 907 %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone 908 %3 = extractelement <2 x float> %2, i32 0 909 ret float %3 910} 911 912define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone { 913 %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone 914 ret <2 x float> %1 915} 916 917define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone { 918 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 919 %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone 920 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 921 ret <3 x float> %3 922} 923 924define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone { 925 %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone 926 ret <4 x float> %1 927} 928 929;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 930;;;;;;;;; matrix ;;;;;;;;;; 931;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 932 933declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly 934 935%struct.rs_matrix4x4 = type { [16 x float] } 936%struct.rs_matrix3x3 = type { [9 x float] } 937%struct.rs_matrix2x2 = type { [4 x float] } 938 939define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline { 940 %1 = insertelement <4 x float> undef, float %in, i32 0 941 %2 = insertelement <4 x float> %1, float %in, i32 1 942 %3 = insertelement <4 x float> %2, float %in, i32 2 943 %4 = insertelement <4 x float> %3, float %in, i32 3 944 ret <4 x float> %4 945} 946 947 948define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly { 949 %x0 = extractelement <3 x float> %in, i32 0 950 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 951 %y0 = extractelement <3 x float> %in, i32 1 952 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 953 %z0 = extractelement <3 x float> %in, i32 2 954 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 955 956 %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 957 %px2 = bitcast float* %px to i8* 958 %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind 959 960 %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 961 %py2 = bitcast float* %py to i8* 962 %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind 963 964 %pz = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 965 %pz2 = bitcast float* %pz to i8* 966 %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind 967 %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 968 969 %a1 = fmul <4 x float> %x, %xm 970 %a2 = fmul <4 x float> %y, %ym 971 %a3 = fadd <4 x float> %a1, %a2 972 %a4 = fmul <4 x float> %z, %zm 973 %a5 = fadd <4 x float> %a4, %a3 974 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 975 ret <3 x float> %a6 976} 977 978define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly { 979 %x0 = extractelement <2 x float> %in, i32 0 980 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 981 %y0 = extractelement <2 x float> %in, i32 1 982 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 983 984 %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 985 %px2 = bitcast float* %px to <4 x float>* 986 %xm = load <4 x float>, <4 x float>* %px2, align 4 987 %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 988 %py2 = bitcast float* %py to <4 x float>* 989 %ym = load <4 x float>, <4 x float>* %py2, align 4 990 991 %a1 = fmul <4 x float> %x, %xm 992 %a2 = fmul <4 x float> %y, %ym 993 %a3 = fadd <4 x float> %a1, %a2 994 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 995 ret <3 x float> %a4 996} 997 998define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly { 999 %x0 = extractelement <4 x float> %in, i32 0 1000 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1001 %y0 = extractelement <4 x float> %in, i32 1 1002 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1003 %z0 = extractelement <4 x float> %in, i32 2 1004 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1005 %w0 = extractelement <4 x float> %in, i32 3 1006 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone 1007 1008 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1009 %px2 = bitcast float* %px to <4 x float>* 1010 %xm = load <4 x float>, <4 x float>* %px2, align 4 1011 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1012 %py2 = bitcast float* %py to <4 x float>* 1013 %ym = load <4 x float>, <4 x float>* %py2, align 4 1014 %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1015 %pz2 = bitcast float* %pz to <4 x float>* 1016 %zm = load <4 x float>, <4 x float>* %pz2, align 4 1017 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1018 %pw2 = bitcast float* %pw to <4 x float>* 1019 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1020 1021 %a1 = fmul <4 x float> %x, %xm 1022 %a2 = fmul <4 x float> %y, %ym 1023 %a3 = fadd <4 x float> %a1, %a2 1024 %a4 = fmul <4 x float> %z, %zm 1025 %a5 = fadd <4 x float> %a3, %a4 1026 %a6 = fmul <4 x float> %w, %wm 1027 %a7 = fadd <4 x float> %a5, %a6 1028 ret <4 x float> %a7 1029} 1030 1031define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly { 1032 %x0 = extractelement <3 x float> %in, i32 0 1033 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1034 %y0 = extractelement <3 x float> %in, i32 1 1035 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1036 %z0 = extractelement <3 x float> %in, i32 2 1037 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1038 1039 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1040 %px2 = bitcast float* %px to <4 x float>* 1041 %xm = load <4 x float>, <4 x float>* %px2, align 4 1042 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1043 %py2 = bitcast float* %py to <4 x float>* 1044 %ym = load <4 x float>, <4 x float>* %py2, align 4 1045 %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1046 %pz2 = bitcast float* %pz to <4 x float>* 1047 %zm = load <4 x float>, <4 x float>* %pz2, align 4 1048 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1049 %pw2 = bitcast float* %pw to <4 x float>* 1050 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1051 1052 %a1 = fmul <4 x float> %x, %xm 1053 %a2 = fadd <4 x float> %wm, %a1 1054 %a3 = fmul <4 x float> %y, %ym 1055 %a4 = fadd <4 x float> %a2, %a3 1056 %a5 = fmul <4 x float> %z, %zm 1057 %a6 = fadd <4 x float> %a4, %a5 1058 ret <4 x float> %a6 1059} 1060 1061define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly { 1062 %x0 = extractelement <2 x float> %in, i32 0 1063 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1064 %y0 = extractelement <2 x float> %in, i32 1 1065 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1066 1067 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1068 %px2 = bitcast float* %px to <4 x float>* 1069 %xm = load <4 x float>, <4 x float>* %px2, align 4 1070 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1071 %py2 = bitcast float* %py to <4 x float>* 1072 %ym = load <4 x float>, <4 x float>* %py2, align 4 1073 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1074 %pw2 = bitcast float* %pw to <4 x float>* 1075 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1076 1077 %a1 = fmul <4 x float> %x, %xm 1078 %a2 = fadd <4 x float> %wm, %a1 1079 %a3 = fmul <4 x float> %y, %ym 1080 %a4 = fadd <4 x float> %a2, %a3 1081 ret <4 x float> %a4 1082} 1083 1084 1085 1086;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1087;;;;;;;;; pixel ops ;;;;;;;;;; 1088;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1089 1090 1091@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16 1092@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16 1093@fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16 1094 1095declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone 1096declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone 1097 1098; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) 1099define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone { 1100 %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16 1101 %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16 1102 %f0 = load <4 x float>, <4 x float>* @fc_0, align 16 1103 %v1 = fmul <4 x float> %f255, %color 1104 %v2 = fadd <4 x float> %f05, %v1 1105 %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone 1106 %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone 1107 ret <4 x i8> %v4 1108} 1109 1110; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color) 1111define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone { 1112 %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1113 %2 = insertelement <4 x float> %1, float 1.0, i32 3 1114 %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone 1115 ret <4 x i8> %3 1116} 1117 1118; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b) 1119define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone { 1120 %1 = insertelement <4 x float> undef, float %r, i32 0 1121 %2 = insertelement <4 x float> %1, float %g, i32 1 1122 %3 = insertelement <4 x float> %2, float %b, i32 2 1123 %4 = insertelement <4 x float> %3, float 1.0, i32 3 1124 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1125 ret <4 x i8> %5 1126} 1127 1128; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a) 1129define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone { 1130 %1 = insertelement <4 x float> undef, float %r, i32 0 1131 %2 = insertelement <4 x float> %1, float %g, i32 1 1132 %3 = insertelement <4 x float> %2, float %b, i32 2 1133 %4 = insertelement <4 x float> %3, float %a, i32 3 1134 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1135 ret <4 x i8> %5 1136} 1137 1138