neon.ll - OpenGrok cross reference for /frameworks/rs/driver/runtime/arch/neon.ll

Lines Matching refs:float
8 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
9 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
17 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
18 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
38 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
39 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
41 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
42 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
48 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
49   %1 = insertelement <4 x float> undef, float %in, i32 0
50   %2 = insertelement <4 x float> %1, float %in, i32 1
51   %3 = insertelement <4 x float> %2, float %in, i32 2
52   %4 = insertelement <4 x float> %3, float %in, i32 3
53   ret <4 x float> %4
74 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
75   %1 = insertelement <2 x float> undef, float %in, i32 0
76   %2 = insertelement <2 x float> %1, float %in, i32 1
77   ret <2 x float> %2
106 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) noun…
107 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounw…
108 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind r…
109   ret <4 x float> %2
112 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
113   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
114   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
115 …%out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float>…
116   ret <4 x float> %out
119 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) noun…
120 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
121   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
122 …%_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
123 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
124 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
125   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
126   ret <3 x float> %c
129 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
130 …%_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32…
131   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
132   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
133 …%a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nou…
134 …%b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind …
135   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
136   ret <3 x float> %c
139 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) noun…
140 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounw…
141 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind r…
142   ret <2 x float> %2
145 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
146   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
147   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
148 …%a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) noun…
149 …%b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind …
150   ret <2 x float> %b
153 define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
154   %1 = fcmp olt float %value, %high
155   %2 = select i1 %1, float %value, float %high
156   %3 = fcmp ogt float %2, %low
157   %4 = select i1 %3, float %2, float %low
158   ret float %4
266 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
267 …%1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
268   ret <4 x float> %1
271 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
272   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
273 …%2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
274   ret <4 x float> %2
277 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
278   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
279   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
280 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
281   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
282   ret <3 x float> %4
285 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
286   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
287   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
288 …%3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
289   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
290   ret <3 x float> %c
293 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
294 …%1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
295   ret <2 x float> %1
298 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
299   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
300 …%2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
301   ret <2 x float> %2
304 define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
305   %1 = fcmp ogt float %v1, %v2
306   %2 = select i1 %1, float %v1, float %v2
307   ret float %2
315 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
316 …%1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind r…
317   ret <4 x float> %1
320 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
321   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
322 …%2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind re…
323   ret <4 x float> %2
326 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
327   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
328   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
329 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
330   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
331   ret <3 x float> %4
334 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
335   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
336   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
337 …%3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind rea…
338   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
339   ret <3 x float> %c
342 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
343 …%1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind r…
344   ret <2 x float> %1
347 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
348   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
349 …%2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind re…
350   ret <2 x float> %2
353 define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
354   %1 = fcmp olt float %v1, %v2
355   %2 = select i1 %1, float %v1, float %v2
356   ret float %2
560 define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
561   %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
562   ret float %1
565 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
566   %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
567   ret <2 x float> %1
570 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
571   %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
572   ret <2 x float> %1
575 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
576   %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
577   ret <3 x float> %1
580 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
581   %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
582   ret <3 x float> %1
585 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
586   %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
587   ret <4 x float> %1
590 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
591   %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
592   ret <4 x float> %1
796 define float @_Z3minff(float %v1, float %v2) nounwind readnone {
797   %1 = tail call float @_Z4fminff(float %v1, float %v2)
798   ret float %1
801 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
802   %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
803   ret <2 x float> %1
806 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
807   %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
808   ret <2 x float> %1
811 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
812   %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
813   ret <3 x float> %1
816 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
817   %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
818   ret <3 x float> %1
821 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
822   %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
823   ret <4 x float> %1
826 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
827   %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
828   ret <4 x float> %1
879 define float @_Z10half_recipf(float %v) {
880   %1 = insertelement <2 x float> undef, float %v, i32 0
881   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
882   %3 = extractelement <2 x float> %2, i32 0
883   ret float %3
886 define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
887   %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
888   ret <2 x float> %1
891 define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
892   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
893   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
894   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
895   ret <3 x float> %3
898 define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
899   %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
900   ret <4 x float> %1
907 define float @_Z9half_sqrtf(float %v) {
908   %1 = insertelement <2 x float> undef, float %v, i32 0
909   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
910   %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
911   %4 = extractelement <2 x float> %3, i32 0
912   ret float %4
915 define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
916   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
917   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
918   ret <2 x float> %2
921 define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
922   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
923   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
924   %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
925   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
926   ret <3 x float> %4
929 define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
930   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
931   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
932   ret <4 x float> %2
940 define float @_Z10half_rsqrtf(float %v) {
941   %1 = insertelement <2 x float> undef, float %v, i32 0
942   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
943   %3 = extractelement <2 x float> %2, i32 0
944   ret float %3
947 define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
948   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
949   ret <2 x float> %1
952 define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
953   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
954   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
955   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
956   ret <3 x float> %3
959 define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
960   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
961   ret <4 x float> %1
968 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
970 %struct.rs_matrix4x4 = type { [16 x float] }
971 %struct.rs_matrix3x3 = type { [9 x float] }
972 %struct.rs_matrix2x2 = type { [4 x float] }
974 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
975   %1 = insertelement <4 x float> undef, float %in, i32 0
976   %2 = insertelement <4 x float> %1, float %in, i32 1
977   %3 = insertelement <4 x float> %2, float %in, i32 2
978   %4 = insertelement <4 x float> %3, float %in, i32 3
979   ret <4 x float> %4
983 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <…
984   %x0 = extractelement <3 x float> %in, i32 0
985   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
986   %y0 = extractelement <3 x float> %in, i32 1
987   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
988   %z0 = extractelement <3 x float> %in, i32 2
989   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
992   %px2 = bitcast float* %px to i8*
993   %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
996   %py2 = bitcast float* %py to i8*
997   %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
1000   %pz2 = bitcast float* %pz to i8*
1001   %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
1002   %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
1004   %a1 = fmul <4 x float> %x, %xm
1005   %a2 = fmul <4 x float> %y, %ym
1006   %a3 = fadd <4 x float> %a1, %a2
1007   %a4 = fmul <4 x float> %z, %zm
1008   %a5 = fadd <4 x float> %a4, %a3
1009   %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1010   ret <3 x float> %a6
1013 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <…
1014   %x0 = extractelement <2 x float> %in, i32 0
1015   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1016   %y0 = extractelement <2 x float> %in, i32 1
1017   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1020   %px2 = bitcast float* %px to <4 x float>*
1021   %xm = load <4 x float>* %px2, align 4
1023   %py2 = bitcast float* %py to <4 x float>*
1024   %ym = load <4 x float>* %py2, align 4
1026   %a1 = fmul <4 x float> %x, %xm
1027   %a2 = fmul <4 x float> %y, %ym
1028   %a3 = fadd <4 x float> %a1, %a2
1029   %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1030   ret <3 x float> %a4
1033 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <…
1034   %x0 = extractelement <4 x float> %in, i32 0
1035   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1036   %y0 = extractelement <4 x float> %in, i32 1
1037   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1038   %z0 = extractelement <4 x float> %in, i32 2
1039   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1040   %w0 = extractelement <4 x float> %in, i32 3
1041   %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
1044   %px2 = bitcast float* %px to <4 x float>*
1045   %xm = load <4 x float>* %px2, align 4
1047   %py2 = bitcast float* %py to <4 x float>*
1048   %ym = load <4 x float>* %py2, align 4
1050   %pz2 = bitcast float* %pz to <4 x float>*
1051   %zm = load <4 x float>* %pz2, align 4
1053   %pw2 = bitcast float* %pw to <4 x float>*
1054   %wm = load <4 x float>* %pw2, align 4
1056   %a1 = fmul <4 x float> %x, %xm
1057   %a2 = fmul <4 x float> %y, %ym
1058   %a3 = fadd <4 x float> %a1, %a2
1059   %a4 = fmul <4 x float> %z, %zm
1060   %a5 = fadd <4 x float> %a3, %a4
1061   %a6 = fmul <4 x float> %w, %wm
1062   %a7 = fadd <4 x float> %a5, %a6
1063   ret <4 x float> %a7
1066 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <…
1067   %x0 = extractelement <3 x float> %in, i32 0
1068   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1069   %y0 = extractelement <3 x float> %in, i32 1
1070   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1071   %z0 = extractelement <3 x float> %in, i32 2
1072   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1075   %px2 = bitcast float* %px to <4 x float>*
1076   %xm = load <4 x float>* %px2, align 4
1078   %py2 = bitcast float* %py to <4 x float>*
1079   %ym = load <4 x float>* %py2, align 4
1081   %pz2 = bitcast float* %pz to <4 x float>*
1082   %zm = load <4 x float>* %pz2, align 4
1084   %pw2 = bitcast float* %pw to <4 x float>*
1085   %wm = load <4 x float>* %pw2, align 4
1087   %a1 = fmul <4 x float> %x, %xm
1088   %a2 = fadd <4 x float> %wm, %a1
1089   %a3 = fmul <4 x float> %y, %ym
1090   %a4 = fadd <4 x float> %a2, %a3
1091   %a5 = fmul <4 x float> %z, %zm
1092   %a6 = fadd <4 x float> %a4, %a5
1093   ret <4 x float> %a6
1096 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <…
1097   %x0 = extractelement <2 x float> %in, i32 0
1098   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1099   %y0 = extractelement <2 x float> %in, i32 1
1100   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1103   %px2 = bitcast float* %px to <4 x float>*
1104   %xm = load <4 x float>* %px2, align 4
1106   %py2 = bitcast float* %py to <4 x float>*
1107   %ym = load <4 x float>* %py2, align 4
1109   %pw2 = bitcast float* %pw to <4 x float>*
1110   %wm = load <4 x float>* %pw2, align 4
1112   %a1 = fmul <4 x float> %x, %xm
1113   %a2 = fadd <4 x float> %wm, %a1
1114   %a3 = fmul <4 x float> %y, %ym
1115   %a4 = fadd <4 x float> %a2, %a3
1116   ret <4 x float> %a4
1126 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, ali…
1127 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
1128 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
1130 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
1131 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
1134 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
1135     %f255 = load <4 x float>* @fc_255.0, align 16
1136     %f05 = load <4 x float>* @fc_0.5, align 16
1137     %f0 = load <4 x float>* @fc_0, align 16
1138     %v1 = fmul <4 x float> %f255, %color
1139     %v2 = fadd <4 x float> %f05, %v1
1140 …%v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255…
1141     %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
1146 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
1147     %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1148     %2 = insertelement <4 x float> %1, float 1.0, i32 3
1149     %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
1153 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
1154 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
1155     %1 = insertelement <4 x float> undef, float %r, i32 0
1156     %2 = insertelement <4 x float> %1, float %g, i32 1
1157     %3 = insertelement <4 x float> %2, float %b, i32 2
1158     %4 = insertelement <4 x float> %3, float 1.0, i32 3
1159     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1163 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
1164 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnon…
1165     %1 = insertelement <4 x float> undef, float %r, i32 0
1166     %2 = insertelement <4 x float> %1, float %g, i32 1
1167     %3 = insertelement <4 x float> %2, float %b, i32 2
1168     %4 = insertelement <4 x float> %3, float %a, i32 3
1169     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone