Lines Matching refs:v16u8
48 out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \
49 out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \
50 out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \
127 v16u8 dst0_m, dst1_m; \
130 dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \
131 dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \
139 v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \
142 vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \
143 vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \
144 vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \
145 vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \
154 y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
160 v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \
161 v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
162 v16u8 vec8_m, vec9_m; \
166 src0_m = (v16u8)__msa_ld_b((void*)s, 0); \
167 src1_m = (v16u8)__msa_ld_b((void*)s, 16); \
168 src2_m = (v16u8)__msa_ld_b((void*)s, 32); \
169 src3_m = (v16u8)__msa_ld_b((void*)s, 48); \
170 src4_m = (v16u8)__msa_ld_b((void*)t, 0); \
171 src5_m = (v16u8)__msa_ld_b((void*)t, 16); \
172 src6_m = (v16u8)__msa_ld_b((void*)t, 32); \
173 src7_m = (v16u8)__msa_ld_b((void*)t, 48); \
174 vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \
175 vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \
176 vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \
177 vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \
178 vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \
179 vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \
180 vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \
181 vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \
202 argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
203 argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
204 src0_m = (v16u8)__msa_ld_b((void*)s, 64); \
205 src1_m = (v16u8)__msa_ld_b((void*)s, 80); \
206 src2_m = (v16u8)__msa_ld_b((void*)s, 96); \
207 src3_m = (v16u8)__msa_ld_b((void*)s, 112); \
208 src4_m = (v16u8)__msa_ld_b((void*)t, 64); \
209 src5_m = (v16u8)__msa_ld_b((void*)t, 80); \
210 src6_m = (v16u8)__msa_ld_b((void*)t, 96); \
211 src7_m = (v16u8)__msa_ld_b((void*)t, 112); \
212 vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \
213 vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \
214 vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \
215 vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \
216 vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \
217 vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \
218 vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \
219 vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \
240 argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \
241 argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \
248 v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
251 vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \
252 vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \
253 vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \
254 vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \
255 vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \
256 vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \
257 vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \
258 vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \
271 v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \
272 u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \
283 out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \
284 out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \
285 out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \
290 v16u8 src0, src1, src2, src3; in MirrorRow_MSA()
291 v16u8 dst0, dst1, dst2, dst3; in MirrorRow_MSA()
307 v16u8 src0, src1, src2, src3; in ARGBMirrorRow_MSA()
308 v16u8 dst0, dst1, dst2, dst3; in ARGBMirrorRow_MSA()
328 v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; in I422ToYUY2Row_MSA()
329 v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; in I422ToYUY2Row_MSA()
352 v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; in I422ToUYVYRow_MSA()
353 v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; in I422ToUYVYRow_MSA()
377 v16u8 src0, src1, src2; in I422ToARGBRow_MSA()
381 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in I422ToARGBRow_MSA()
390 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToARGBRow_MSA()
408 v16u8 src0, src1, src2; in I422ToRGBARow_MSA()
412 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in I422ToRGBARow_MSA()
421 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToRGBARow_MSA()
441 v16u8 src0, src1, src2, src3; in I422AlphaToARGBRow_MSA()
455 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422AlphaToARGBRow_MSA()
456 src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a); in I422AlphaToARGBRow_MSA()
459 src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); in I422AlphaToARGBRow_MSA()
477 v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; in I422ToRGB24Row_MSA()
481 v16u8 reg0, reg1, reg2, reg3; in I422ToRGB24Row_MSA()
494 src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0); in I422ToRGB24Row_MSA()
497 src1 = (v16u8)__msa_insert_d(zero, 0, data_u); in I422ToRGB24Row_MSA()
498 src2 = (v16u8)__msa_insert_d(zero, 0, data_v); in I422ToRGB24Row_MSA()
499 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToRGB24Row_MSA()
500 src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8); in I422ToRGB24Row_MSA()
501 src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8); in I422ToRGB24Row_MSA()
506 reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); in I422ToRGB24Row_MSA()
507 reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3); in I422ToRGB24Row_MSA()
508 reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2); in I422ToRGB24Row_MSA()
509 reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11); in I422ToRGB24Row_MSA()
510 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); in I422ToRGB24Row_MSA()
511 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); in I422ToRGB24Row_MSA()
512 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); in I422ToRGB24Row_MSA()
530 v16u8 src0, src1, src2, dst0; in I422ToRGB565Row_MSA()
542 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToRGB565Row_MSA()
551 dst0 = (v16u8)(vec2 | vec0); in I422ToRGB565Row_MSA()
568 v16u8 src0, src1, src2, dst0; in I422ToARGB4444Row_MSA()
582 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToARGB4444Row_MSA()
592 dst0 = (v16u8)(reg1 | reg0); in I422ToARGB4444Row_MSA()
608 v16u8 src0, src1, src2, dst0; in I422ToARGB1555Row_MSA()
622 src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); in I422ToARGB1555Row_MSA()
632 dst0 = (v16u8)(reg1 | reg0); in I422ToARGB1555Row_MSA()
643 v16u8 src0, src1, src2, src3, dst0, dst1; in YUY2ToYRow_MSA()
647 dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in YUY2ToYRow_MSA()
648 dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in YUY2ToYRow_MSA()
662 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in YUY2ToUVRow_MSA()
663 v16u8 vec0, vec1, dst0, dst1; in YUY2ToUVRow_MSA()
668 src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in YUY2ToUVRow_MSA()
669 src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in YUY2ToUVRow_MSA()
670 src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); in YUY2ToUVRow_MSA()
671 src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); in YUY2ToUVRow_MSA()
674 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in YUY2ToUVRow_MSA()
675 dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); in YUY2ToUVRow_MSA()
690 v16u8 src0, src1, src2, src3, dst0, dst1; in YUY2ToUV422Row_MSA()
694 src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in YUY2ToUV422Row_MSA()
695 src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in YUY2ToUV422Row_MSA()
696 dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in YUY2ToUV422Row_MSA()
697 dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in YUY2ToUV422Row_MSA()
708 v16u8 src0, src1, src2, src3, dst0, dst1; in UYVYToYRow_MSA()
712 dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in UYVYToYRow_MSA()
713 dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in UYVYToYRow_MSA()
727 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in UYVYToUVRow_MSA()
728 v16u8 vec0, vec1, dst0, dst1; in UYVYToUVRow_MSA()
733 src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in UYVYToUVRow_MSA()
734 src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in UYVYToUVRow_MSA()
735 src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); in UYVYToUVRow_MSA()
736 src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); in UYVYToUVRow_MSA()
739 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in UYVYToUVRow_MSA()
740 dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); in UYVYToUVRow_MSA()
755 v16u8 src0, src1, src2, src3, dst0, dst1; in UYVYToUV422Row_MSA()
759 src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in UYVYToUV422Row_MSA()
760 src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in UYVYToUV422Row_MSA()
761 dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in UYVYToUV422Row_MSA()
762 dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in UYVYToUV422Row_MSA()
773 v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; in ARGBToYRow_MSA()
782 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); in ARGBToYRow_MSA()
783 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); in ARGBToYRow_MSA()
784 src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); in ARGBToYRow_MSA()
785 src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); in ARGBToYRow_MSA()
786 vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGBToYRow_MSA()
787 vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in ARGBToYRow_MSA()
788 vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBToYRow_MSA()
789 vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in ARGBToYRow_MSA()
810 dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); in ARGBToYRow_MSA()
824 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in ARGBToUVRow_MSA()
825 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; in ARGBToUVRow_MSA()
827 v16u8 dst0, dst1; in ARGBToUVRow_MSA()
836 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); in ARGBToUVRow_MSA()
837 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); in ARGBToUVRow_MSA()
838 src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); in ARGBToUVRow_MSA()
839 src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); in ARGBToUVRow_MSA()
840 src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64); in ARGBToUVRow_MSA()
841 src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80); in ARGBToUVRow_MSA()
842 src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96); in ARGBToUVRow_MSA()
843 src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112); in ARGBToUVRow_MSA()
844 vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGBToUVRow_MSA()
845 vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in ARGBToUVRow_MSA()
846 vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); in ARGBToUVRow_MSA()
847 vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); in ARGBToUVRow_MSA()
848 vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBToUVRow_MSA()
849 vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in ARGBToUVRow_MSA()
850 vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); in ARGBToUVRow_MSA()
851 vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); in ARGBToUVRow_MSA()
852 vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBToUVRow_MSA()
853 vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGBToUVRow_MSA()
854 vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); in ARGBToUVRow_MSA()
855 vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); in ARGBToUVRow_MSA()
856 vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); in ARGBToUVRow_MSA()
857 vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); in ARGBToUVRow_MSA()
864 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0); in ARGBToUVRow_MSA()
865 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16); in ARGBToUVRow_MSA()
866 src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32); in ARGBToUVRow_MSA()
867 src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48); in ARGBToUVRow_MSA()
868 src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64); in ARGBToUVRow_MSA()
869 src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80); in ARGBToUVRow_MSA()
870 src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96); in ARGBToUVRow_MSA()
871 src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112); in ARGBToUVRow_MSA()
872 vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGBToUVRow_MSA()
873 vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in ARGBToUVRow_MSA()
874 vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); in ARGBToUVRow_MSA()
875 vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); in ARGBToUVRow_MSA()
876 vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBToUVRow_MSA()
877 vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in ARGBToUVRow_MSA()
878 vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); in ARGBToUVRow_MSA()
879 vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); in ARGBToUVRow_MSA()
880 vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBToUVRow_MSA()
881 vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGBToUVRow_MSA()
882 vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); in ARGBToUVRow_MSA()
883 vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); in ARGBToUVRow_MSA()
884 vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); in ARGBToUVRow_MSA()
885 vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); in ARGBToUVRow_MSA()
924 dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6); in ARGBToUVRow_MSA()
925 dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); in ARGBToUVRow_MSA()
937 v16u8 src0, src1, src2, src3, dst0, dst1, dst2; in ARGBToRGB24Row_MSA()
945 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToRGB24Row_MSA()
946 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToRGB24Row_MSA()
947 src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); in ARGBToRGB24Row_MSA()
948 src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); in ARGBToRGB24Row_MSA()
949 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); in ARGBToRGB24Row_MSA()
950 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); in ARGBToRGB24Row_MSA()
951 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); in ARGBToRGB24Row_MSA()
961 v16u8 src0, src1, src2, src3, dst0, dst1, dst2; in ARGBToRAWRow_MSA()
969 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToRAWRow_MSA()
970 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToRAWRow_MSA()
971 src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); in ARGBToRAWRow_MSA()
972 src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); in ARGBToRAWRow_MSA()
973 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); in ARGBToRAWRow_MSA()
974 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); in ARGBToRAWRow_MSA()
975 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); in ARGBToRAWRow_MSA()
985 v16u8 src0, src1, dst0; in ARGBToRGB565Row_MSA()
986 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; in ARGBToRGB565Row_MSA()
990 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToRGB565Row_MSA()
991 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToRGB565Row_MSA()
992 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); in ARGBToRGB565Row_MSA()
993 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); in ARGBToRGB565Row_MSA()
994 vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); in ARGBToRGB565Row_MSA()
995 vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); in ARGBToRGB565Row_MSA()
996 vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); in ARGBToRGB565Row_MSA()
997 vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); in ARGBToRGB565Row_MSA()
998 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); in ARGBToRGB565Row_MSA()
999 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); in ARGBToRGB565Row_MSA()
1000 vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); in ARGBToRGB565Row_MSA()
1001 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); in ARGBToRGB565Row_MSA()
1002 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); in ARGBToRGB565Row_MSA()
1003 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); in ARGBToRGB565Row_MSA()
1008 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); in ARGBToRGB565Row_MSA()
1009 vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); in ARGBToRGB565Row_MSA()
1010 dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); in ARGBToRGB565Row_MSA()
1021 v16u8 src0, src1, dst0; in ARGBToARGB1555Row_MSA()
1022 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; in ARGBToARGB1555Row_MSA()
1026 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToARGB1555Row_MSA()
1027 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToARGB1555Row_MSA()
1028 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); in ARGBToARGB1555Row_MSA()
1029 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); in ARGBToARGB1555Row_MSA()
1030 vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); in ARGBToARGB1555Row_MSA()
1031 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); in ARGBToARGB1555Row_MSA()
1032 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); in ARGBToARGB1555Row_MSA()
1033 vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); in ARGBToARGB1555Row_MSA()
1034 vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); in ARGBToARGB1555Row_MSA()
1035 vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); in ARGBToARGB1555Row_MSA()
1036 vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); in ARGBToARGB1555Row_MSA()
1037 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); in ARGBToARGB1555Row_MSA()
1038 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); in ARGBToARGB1555Row_MSA()
1039 vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); in ARGBToARGB1555Row_MSA()
1040 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); in ARGBToARGB1555Row_MSA()
1041 vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); in ARGBToARGB1555Row_MSA()
1042 vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); in ARGBToARGB1555Row_MSA()
1043 vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); in ARGBToARGB1555Row_MSA()
1050 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); in ARGBToARGB1555Row_MSA()
1051 vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); in ARGBToARGB1555Row_MSA()
1052 dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); in ARGBToARGB1555Row_MSA()
1063 v16u8 src0, src1; in ARGBToARGB4444Row_MSA()
1064 v16u8 vec0, vec1; in ARGBToARGB4444Row_MSA()
1065 v16u8 dst0; in ARGBToARGB4444Row_MSA()
1069 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToARGB4444Row_MSA()
1070 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToARGB4444Row_MSA()
1071 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); in ARGBToARGB4444Row_MSA()
1072 vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); in ARGBToARGB4444Row_MSA()
1073 src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); in ARGBToARGB4444Row_MSA()
1074 src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); in ARGBToARGB4444Row_MSA()
1077 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBToARGB4444Row_MSA()
1089 v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; in ARGBToUV444Row_MSA()
1101 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToUV444Row_MSA()
1102 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToUV444Row_MSA()
1103 src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); in ARGBToUV444Row_MSA()
1104 src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); in ARGBToUV444Row_MSA()
1105 reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGBToUV444Row_MSA()
1106 reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in ARGBToUV444Row_MSA()
1107 reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBToUV444Row_MSA()
1108 reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in ARGBToUV444Row_MSA()
1109 src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); in ARGBToUV444Row_MSA()
1110 src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); in ARGBToUV444Row_MSA()
1111 src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); in ARGBToUV444Row_MSA()
1146 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBToUV444Row_MSA()
1147 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); in ARGBToUV444Row_MSA()
1161 v16u8 src0, src1, dst0; in ARGBMultiplyRow_MSA()
1167 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ARGBMultiplyRow_MSA()
1168 src1 = (v16u8)__msa_ld_b((void*)src_argb1, 0); in ARGBMultiplyRow_MSA()
1187 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBMultiplyRow_MSA()
1200 v16u8 src0, src1, src2, src3, dst0, dst1; in ARGBAddRow_MSA()
1203 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ARGBAddRow_MSA()
1204 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in ARGBAddRow_MSA()
1205 src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); in ARGBAddRow_MSA()
1206 src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); in ARGBAddRow_MSA()
1221 v16u8 src0, src1, src2, src3, dst0, dst1; in ARGBSubtractRow_MSA()
1224 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ARGBSubtractRow_MSA()
1225 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in ARGBSubtractRow_MSA()
1226 src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); in ARGBSubtractRow_MSA()
1227 src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); in ARGBSubtractRow_MSA()
1241 v16u8 src0, src1, dst0, dst1; in ARGBAttenuateRow_MSA()
1245 v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; in ARGBAttenuateRow_MSA()
1248 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBAttenuateRow_MSA()
1249 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBAttenuateRow_MSA()
1294 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBAttenuateRow_MSA()
1295 dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGBAttenuateRow_MSA()
1309 v16u8 src0, src1, dst0, vec0, vec1; in ARGBToRGB565DitherRow_MSA()
1319 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBToRGB565DitherRow_MSA()
1320 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBToRGB565DitherRow_MSA()
1321 vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGBToRGB565DitherRow_MSA()
1322 vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBToRGB565DitherRow_MSA()
1341 dst0 = (v16u8)(reg0 | reg2); in ARGBToRGB565DitherRow_MSA()
1353 v16u8 src0, src1, dst0, dst1; in ARGBShuffleRow_MSA()
1362 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); in ARGBShuffleRow_MSA()
1363 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); in ARGBShuffleRow_MSA()
1364 dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); in ARGBShuffleRow_MSA()
1365 dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); in ARGBShuffleRow_MSA()
1377 v16u8 src0, dst0; in ARGBShadeRow_MSA()
1387 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); in ARGBShadeRow_MSA()
1404 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBShadeRow_MSA()
1413 v16u8 src0, src1, vec0, vec1, dst0, dst1; in ARGBGrayRow_MSA()
1415 v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); in ARGBGrayRow_MSA()
1416 v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); in ARGBGrayRow_MSA()
1419 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); in ARGBGrayRow_MSA()
1420 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); in ARGBGrayRow_MSA()
1421 vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); in ARGBGrayRow_MSA()
1422 vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); in ARGBGrayRow_MSA()
1426 vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); in ARGBGrayRow_MSA()
1427 vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); in ARGBGrayRow_MSA()
1428 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); in ARGBGrayRow_MSA()
1429 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); in ARGBGrayRow_MSA()
1438 v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; in ARGBSepiaRow_MSA()
1440 v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); in ARGBSepiaRow_MSA()
1441 v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); in ARGBSepiaRow_MSA()
1442 v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); in ARGBSepiaRow_MSA()
1443 v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); in ARGBSepiaRow_MSA()
1444 v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); in ARGBSepiaRow_MSA()
1445 v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); in ARGBSepiaRow_MSA()
1449 src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); in ARGBSepiaRow_MSA()
1450 src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); in ARGBSepiaRow_MSA()
1451 vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); in ARGBSepiaRow_MSA()
1452 vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); in ARGBSepiaRow_MSA()
1453 vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); in ARGBSepiaRow_MSA()
1465 vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); in ARGBSepiaRow_MSA()
1466 vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); in ARGBSepiaRow_MSA()
1467 vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); in ARGBSepiaRow_MSA()
1468 vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); in ARGBSepiaRow_MSA()
1469 vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); in ARGBSepiaRow_MSA()
1470 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); in ARGBSepiaRow_MSA()
1471 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); in ARGBSepiaRow_MSA()
1481 v16u8 src0, src1; in ARGB4444ToARGBRow_MSA()
1483 v16u8 dst0, dst1, dst2, dst3; in ARGB4444ToARGBRow_MSA()
1486 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); in ARGB4444ToARGBRow_MSA()
1487 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 16); in ARGB4444ToARGBRow_MSA()
1496 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); in ARGB4444ToARGBRow_MSA()
1497 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); in ARGB4444ToARGBRow_MSA()
1498 dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); in ARGB4444ToARGBRow_MSA()
1499 dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); in ARGB4444ToARGBRow_MSA()
1512 v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6; in ARGB1555ToARGBRow_MSA()
1513 v16u8 dst0, dst1, dst2, dst3; in ARGB1555ToARGBRow_MSA()
1531 reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGB1555ToARGBRow_MSA()
1532 reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGB1555ToARGBRow_MSA()
1533 reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); in ARGB1555ToARGBRow_MSA()
1534 reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in ARGB1555ToARGBRow_MSA()
1535 reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3); in ARGB1555ToARGBRow_MSA()
1536 reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3); in ARGB1555ToARGBRow_MSA()
1537 reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3); in ARGB1555ToARGBRow_MSA()
1538 reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2); in ARGB1555ToARGBRow_MSA()
1539 reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2); in ARGB1555ToARGBRow_MSA()
1540 reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2); in ARGB1555ToARGBRow_MSA()
1542 reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4); in ARGB1555ToARGBRow_MSA()
1543 reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4); in ARGB1555ToARGBRow_MSA()
1544 reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5); in ARGB1555ToARGBRow_MSA()
1545 reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5); in ARGB1555ToARGBRow_MSA()
1546 dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0); in ARGB1555ToARGBRow_MSA()
1547 dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0); in ARGB1555ToARGBRow_MSA()
1548 dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1); in ARGB1555ToARGBRow_MSA()
1549 dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1); in ARGB1555ToARGBRow_MSA()
1562 v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3; in RGB565ToARGBRow_MSA()
1563 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in RGB565ToARGBRow_MSA()
1589 res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0); in RGB565ToARGBRow_MSA()
1590 res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1); in RGB565ToARGBRow_MSA()
1591 res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3); in RGB565ToARGBRow_MSA()
1592 res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4); in RGB565ToARGBRow_MSA()
1593 dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); in RGB565ToARGBRow_MSA()
1594 dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); in RGB565ToARGBRow_MSA()
1595 dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2); in RGB565ToARGBRow_MSA()
1596 dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2); in RGB565ToARGBRow_MSA()
1607 v16u8 src0, src1, src2; in RGB24ToARGBRow_MSA()
1608 v16u8 vec0, vec1, vec2; in RGB24ToARGBRow_MSA()
1609 v16u8 dst0, dst1, dst2, dst3; in RGB24ToARGBRow_MSA()
1610 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in RGB24ToARGBRow_MSA()
1614 src0 = (v16u8)__msa_ld_b((void*)src_rgb24, 0); in RGB24ToARGBRow_MSA()
1615 src1 = (v16u8)__msa_ld_b((void*)src_rgb24, 16); in RGB24ToARGBRow_MSA()
1616 src2 = (v16u8)__msa_ld_b((void*)src_rgb24, 32); in RGB24ToARGBRow_MSA()
1617 vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); in RGB24ToARGBRow_MSA()
1618 vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); in RGB24ToARGBRow_MSA()
1619 vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); in RGB24ToARGBRow_MSA()
1620 dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0); in RGB24ToARGBRow_MSA()
1621 dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0); in RGB24ToARGBRow_MSA()
1622 dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1); in RGB24ToARGBRow_MSA()
1623 dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2); in RGB24ToARGBRow_MSA()
1632 v16u8 src0, src1, src2; in RAWToARGBRow_MSA()
1633 v16u8 vec0, vec1, vec2; in RAWToARGBRow_MSA()
1634 v16u8 dst0, dst1, dst2, dst3; in RAWToARGBRow_MSA()
1635 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in RAWToARGBRow_MSA()
1639 src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); in RAWToARGBRow_MSA()
1640 src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); in RAWToARGBRow_MSA()
1641 src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); in RAWToARGBRow_MSA()
1642 vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); in RAWToARGBRow_MSA()
1643 vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); in RAWToARGBRow_MSA()
1644 vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); in RAWToARGBRow_MSA()
1645 dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0); in RAWToARGBRow_MSA()
1646 dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0); in RAWToARGBRow_MSA()
1647 dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1); in RAWToARGBRow_MSA()
1648 dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2); in RAWToARGBRow_MSA()
1661 v16u8 dst0; in ARGB1555ToYRow_MSA()
1707 dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); in ARGB1555ToYRow_MSA()
1719 v16u8 dst0; in RGB565ToYRow_MSA()
1770 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in RGB565ToYRow_MSA()
1779 v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; in RGB24ToYRow_MSA()
1792 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in RGB24ToYRow_MSA()
1793 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in RGB24ToYRow_MSA()
1794 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in RGB24ToYRow_MSA()
1795 reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); in RGB24ToYRow_MSA()
1796 reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); in RGB24ToYRow_MSA()
1797 reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); in RGB24ToYRow_MSA()
1798 reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); in RGB24ToYRow_MSA()
1803 vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119); in RGB24ToYRow_MSA()
1804 vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119); in RGB24ToYRow_MSA()
1805 vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42); in RGB24ToYRow_MSA()
1806 vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42); in RGB24ToYRow_MSA()
1811 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in RGB24ToYRow_MSA()
1820 v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; in RAWToYRow_MSA()
1833 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in RAWToYRow_MSA()
1834 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in RAWToYRow_MSA()
1835 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in RAWToYRow_MSA()
1836 reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); in RAWToYRow_MSA()
1837 reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); in RAWToYRow_MSA()
1838 reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); in RAWToYRow_MSA()
1839 reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); in RAWToYRow_MSA()
1844 vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142); in RAWToYRow_MSA()
1845 vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142); in RAWToYRow_MSA()
1846 vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19); in RAWToYRow_MSA()
1847 vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19); in RAWToYRow_MSA()
1852 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in RAWToYRow_MSA()
1870 v16u8 dst0; in ARGB1555ToUVRow_MSA()
1907 vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); in ARGB1555ToUVRow_MSA()
1908 vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); in ARGB1555ToUVRow_MSA()
1909 vec4 = __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); in ARGB1555ToUVRow_MSA()
1928 dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); in ARGB1555ToUVRow_MSA()
1951 v16u8 dst0; in RGB565ToUVRow_MSA()
1989 vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); in RGB565ToUVRow_MSA()
1990 vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); in RGB565ToUVRow_MSA()
1991 vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); in RGB565ToUVRow_MSA()
2008 dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); in RGB565ToUVRow_MSA()
2029 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in RGB24ToUVRow_MSA()
2030 v16u8 inp0, inp1, inp2, inp3, inp4, inp5; in RGB24ToUVRow_MSA()
2033 v16u8 dst0; in RGB24ToUVRow_MSA()
2044 inp0 = (v16u8)__msa_ld_b((void*)s, 0); in RGB24ToUVRow_MSA()
2045 inp1 = (v16u8)__msa_ld_b((void*)s, 16); in RGB24ToUVRow_MSA()
2046 inp2 = (v16u8)__msa_ld_b((void*)s, 32); in RGB24ToUVRow_MSA()
2047 inp3 = (v16u8)__msa_ld_b((void*)t, 0); in RGB24ToUVRow_MSA()
2048 inp4 = (v16u8)__msa_ld_b((void*)t, 16); in RGB24ToUVRow_MSA()
2049 inp5 = (v16u8)__msa_ld_b((void*)t, 32); in RGB24ToUVRow_MSA()
2050 src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); in RGB24ToUVRow_MSA()
2051 src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); in RGB24ToUVRow_MSA()
2052 src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); in RGB24ToUVRow_MSA()
2053 src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); in RGB24ToUVRow_MSA()
2054 src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); in RGB24ToUVRow_MSA()
2055 src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); in RGB24ToUVRow_MSA()
2056 src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); in RGB24ToUVRow_MSA()
2057 src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); in RGB24ToUVRow_MSA()
2058 src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); in RGB24ToUVRow_MSA()
2059 src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); in RGB24ToUVRow_MSA()
2060 src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); in RGB24ToUVRow_MSA()
2061 src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); in RGB24ToUVRow_MSA()
2062 src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); in RGB24ToUVRow_MSA()
2063 src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); in RGB24ToUVRow_MSA()
2072 vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); in RGB24ToUVRow_MSA()
2073 vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); in RGB24ToUVRow_MSA()
2074 vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); in RGB24ToUVRow_MSA()
2075 vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); in RGB24ToUVRow_MSA()
2076 vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); in RGB24ToUVRow_MSA()
2077 vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); in RGB24ToUVRow_MSA()
2078 vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); in RGB24ToUVRow_MSA()
2079 vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); in RGB24ToUVRow_MSA()
2113 dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); in RGB24ToUVRow_MSA()
2134 v16u8 inp0, inp1, inp2, inp3, inp4, inp5; in RAWToUVRow_MSA()
2135 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in RAWToUVRow_MSA()
2138 v16u8 dst0; in RAWToUVRow_MSA()
2149 inp0 = (v16u8)__msa_ld_b((void*)s, 0); in RAWToUVRow_MSA()
2150 inp1 = (v16u8)__msa_ld_b((void*)s, 16); in RAWToUVRow_MSA()
2151 inp2 = (v16u8)__msa_ld_b((void*)s, 32); in RAWToUVRow_MSA()
2152 inp3 = (v16u8)__msa_ld_b((void*)t, 0); in RAWToUVRow_MSA()
2153 inp4 = (v16u8)__msa_ld_b((void*)t, 16); in RAWToUVRow_MSA()
2154 inp5 = (v16u8)__msa_ld_b((void*)t, 32); in RAWToUVRow_MSA()
2155 src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); in RAWToUVRow_MSA()
2156 src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); in RAWToUVRow_MSA()
2157 src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); in RAWToUVRow_MSA()
2158 src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); in RAWToUVRow_MSA()
2159 src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); in RAWToUVRow_MSA()
2160 src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); in RAWToUVRow_MSA()
2161 src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); in RAWToUVRow_MSA()
2162 src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); in RAWToUVRow_MSA()
2163 src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); in RAWToUVRow_MSA()
2164 src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); in RAWToUVRow_MSA()
2165 src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); in RAWToUVRow_MSA()
2166 src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); in RAWToUVRow_MSA()
2167 src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); in RAWToUVRow_MSA()
2168 src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); in RAWToUVRow_MSA()
2177 vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); in RAWToUVRow_MSA()
2178 vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); in RAWToUVRow_MSA()
2179 vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); in RAWToUVRow_MSA()
2180 vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); in RAWToUVRow_MSA()
2181 vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); in RAWToUVRow_MSA()
2182 vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); in RAWToUVRow_MSA()
2183 vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); in RAWToUVRow_MSA()
2184 vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); in RAWToUVRow_MSA()
2218 dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); in RAWToUVRow_MSA()
2237 v16u8 src0, src1, res0, res1, dst0, dst1; in NV12ToARGBRow_MSA()
2241 v16u8 zero = {0}; in NV12ToARGBRow_MSA()
2242 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in NV12ToARGBRow_MSA()
2252 src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); in NV12ToARGBRow_MSA()
2253 src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); in NV12ToARGBRow_MSA()
2256 res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); in NV12ToARGBRow_MSA()
2257 res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); in NV12ToARGBRow_MSA()
2258 dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); in NV12ToARGBRow_MSA()
2259 dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); in NV12ToARGBRow_MSA()
2274 v16u8 src0, src1, dst0; in NV12ToRGB565Row_MSA()
2278 v16u8 zero = {0}; in NV12ToRGB565Row_MSA()
2288 src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); in NV12ToRGB565Row_MSA()
2289 src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); in NV12ToRGB565Row_MSA()
2295 dst0 = (v16u8)(vec0 | vec1 | vec2); in NV12ToRGB565Row_MSA()
2310 v16u8 src0, src1, res0, res1, dst0, dst1; in NV21ToARGBRow_MSA()
2314 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in NV21ToARGBRow_MSA()
2315 v16u8 zero = {0}; in NV21ToARGBRow_MSA()
2326 src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); in NV21ToARGBRow_MSA()
2327 src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); in NV21ToARGBRow_MSA()
2328 src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); in NV21ToARGBRow_MSA()
2331 res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); in NV21ToARGBRow_MSA()
2332 res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); in NV21ToARGBRow_MSA()
2333 dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); in NV21ToARGBRow_MSA()
2334 dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); in NV21ToARGBRow_MSA()
2347 v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; in SobelRow_MSA()
2353 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in SobelRow_MSA()
2356 src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); in SobelRow_MSA()
2357 src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); in SobelRow_MSA()
2359 dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); in SobelRow_MSA()
2360 dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); in SobelRow_MSA()
2361 dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0); in SobelRow_MSA()
2362 dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0); in SobelRow_MSA()
2375 v16u8 src0, src1, src2, src3, dst0, dst1; in SobelToPlaneRow_MSA()
2378 src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); in SobelToPlaneRow_MSA()
2379 src1 = (v16u8)__msa_ld_b((void*)src_sobelx, 16); in SobelToPlaneRow_MSA()
2380 src2 = (v16u8)__msa_ld_b((void*)src_sobely, 0); in SobelToPlaneRow_MSA()
2381 src3 = (v16u8)__msa_ld_b((void*)src_sobely, 16); in SobelToPlaneRow_MSA()
2396 v16u8 src0, src1, vec0, vec1, vec2; in SobelXYRow_MSA()
2397 v16u8 reg0, reg1, dst0, dst1, dst2, dst3; in SobelXYRow_MSA()
2398 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in SobelXYRow_MSA()
2401 src0 = (v16u8)__msa_ld_b((void*)src_sobelx, 0); in SobelXYRow_MSA()
2402 src1 = (v16u8)__msa_ld_b((void*)src_sobely, 0); in SobelXYRow_MSA()
2404 vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); in SobelXYRow_MSA()
2405 vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); in SobelXYRow_MSA()
2406 reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0); in SobelXYRow_MSA()
2407 reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0); in SobelXYRow_MSA()
2408 dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1); in SobelXYRow_MSA()
2409 dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1); in SobelXYRow_MSA()
2410 dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2); in SobelXYRow_MSA()
2411 dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2); in SobelXYRow_MSA()
2421 v16u8 src0, src1, src2, src3, dst0; in ARGBToYJRow_MSA()
2422 v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); in ARGBToYJRow_MSA()
2423 v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26); in ARGBToYJRow_MSA()
2427 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ARGBToYJRow_MSA()
2428 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in ARGBToYJRow_MSA()
2429 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in ARGBToYJRow_MSA()
2430 src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); in ARGBToYJRow_MSA()
2441 v16u8 src0, src1, src2, src3, dst0; in BGRAToYRow_MSA()
2442 v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); in BGRAToYRow_MSA()
2443 v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981); in BGRAToYRow_MSA()
2447 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in BGRAToYRow_MSA()
2448 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in BGRAToYRow_MSA()
2449 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in BGRAToYRow_MSA()
2450 src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); in BGRAToYRow_MSA()
2461 v16u8 src0, src1, src2, src3, dst0; in ABGRToYRow_MSA()
2462 v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); in ABGRToYRow_MSA()
2463 v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19); in ABGRToYRow_MSA()
2467 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ABGRToYRow_MSA()
2468 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in ABGRToYRow_MSA()
2469 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in ABGRToYRow_MSA()
2470 src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); in ABGRToYRow_MSA()
2481 v16u8 src0, src1, src2, src3, dst0; in RGBAToYRow_MSA()
2482 v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); in RGBAToYRow_MSA()
2483 v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281); in RGBAToYRow_MSA()
2487 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in RGBAToYRow_MSA()
2488 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in RGBAToYRow_MSA()
2489 src2 = (v16u8)__msa_ld_b((void*)src_argb0, 32); in RGBAToYRow_MSA()
2490 src3 = (v16u8)__msa_ld_b((void*)src_argb0, 48); in RGBAToYRow_MSA()
2507 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; in ARGBToUVJRow_MSA()
2508 v16u8 vec0, vec1, vec2, vec3; in ARGBToUVJRow_MSA()
2509 v16u8 dst0, dst1; in ARGBToUVJRow_MSA()
2515 v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F); in ARGBToUVJRow_MSA()
2516 v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14); in ARGBToUVJRow_MSA()
2517 v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54); in ARGBToUVJRow_MSA()
2521 src0 = (v16u8)__msa_ld_b((void*)s, 0); in ARGBToUVJRow_MSA()
2522 src1 = (v16u8)__msa_ld_b((void*)s, 16); in ARGBToUVJRow_MSA()
2523 src2 = (v16u8)__msa_ld_b((void*)s, 32); in ARGBToUVJRow_MSA()
2524 src3 = (v16u8)__msa_ld_b((void*)s, 48); in ARGBToUVJRow_MSA()
2525 src4 = (v16u8)__msa_ld_b((void*)t, 0); in ARGBToUVJRow_MSA()
2526 src5 = (v16u8)__msa_ld_b((void*)t, 16); in ARGBToUVJRow_MSA()
2527 src6 = (v16u8)__msa_ld_b((void*)t, 32); in ARGBToUVJRow_MSA()
2528 src7 = (v16u8)__msa_ld_b((void*)t, 48); in ARGBToUVJRow_MSA()
2533 src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); in ARGBToUVJRow_MSA()
2534 src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); in ARGBToUVJRow_MSA()
2535 src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); in ARGBToUVJRow_MSA()
2536 src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); in ARGBToUVJRow_MSA()
2539 src0 = (v16u8)__msa_ld_b((void*)s, 64); in ARGBToUVJRow_MSA()
2540 src1 = (v16u8)__msa_ld_b((void*)s, 80); in ARGBToUVJRow_MSA()
2541 src2 = (v16u8)__msa_ld_b((void*)s, 96); in ARGBToUVJRow_MSA()
2542 src3 = (v16u8)__msa_ld_b((void*)s, 112); in ARGBToUVJRow_MSA()
2543 src4 = (v16u8)__msa_ld_b((void*)t, 64); in ARGBToUVJRow_MSA()
2544 src5 = (v16u8)__msa_ld_b((void*)t, 80); in ARGBToUVJRow_MSA()
2545 src6 = (v16u8)__msa_ld_b((void*)t, 96); in ARGBToUVJRow_MSA()
2546 src7 = (v16u8)__msa_ld_b((void*)t, 112); in ARGBToUVJRow_MSA()
2551 src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); in ARGBToUVJRow_MSA()
2552 src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); in ARGBToUVJRow_MSA()
2553 src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); in ARGBToUVJRow_MSA()
2554 src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); in ARGBToUVJRow_MSA()
2577 v16u8 dst0, dst1, vec0, vec1, vec2, vec3; in BGRAToUVRow_MSA()
2583 v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); in BGRAToUVRow_MSA()
2584 v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); in BGRAToUVRow_MSA()
2585 v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A); in BGRAToUVRow_MSA()
2610 v16u8 src0, src1, src2, src3; in ABGRToUVRow_MSA()
2611 v16u8 dst0, dst1; in ABGRToUVRow_MSA()
2617 v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26); in ABGRToUVRow_MSA()
2618 v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070); in ABGRToUVRow_MSA()
2619 v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); in ABGRToUVRow_MSA()
2644 v16u8 dst0, dst1, vec0, vec1, vec2, vec3; in RGBAToUVRow_MSA()
2650 v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A); in RGBAToUVRow_MSA()
2651 v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); in RGBAToUVRow_MSA()
2652 v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E); in RGBAToUVRow_MSA()
2676 v16u8 src0, src1, src2, dst0, dst1; in I444ToARGBRow_MSA()
2680 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in I444ToARGBRow_MSA()
2727 dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); in I444ToARGBRow_MSA()
2728 dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); in I444ToARGBRow_MSA()
2739 v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; in I400ToARGBRow_MSA()
2744 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in I400ToARGBRow_MSA()
2749 src0 = (v16u8)__msa_ld_b((void*)src_y, 0); in I400ToARGBRow_MSA()
2774 res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in I400ToARGBRow_MSA()
2775 res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0); in I400ToARGBRow_MSA()
2776 res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0); in I400ToARGBRow_MSA()
2777 res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0); in I400ToARGBRow_MSA()
2778 res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0); in I400ToARGBRow_MSA()
2779 dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1); in I400ToARGBRow_MSA()
2780 dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); in I400ToARGBRow_MSA()
2781 dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); in I400ToARGBRow_MSA()
2782 dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); in I400ToARGBRow_MSA()
2791 v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; in J400ToARGBRow_MSA()
2792 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in J400ToARGBRow_MSA()
2795 src0 = (v16u8)__msa_ld_b((void*)src_y, 0); in J400ToARGBRow_MSA()
2796 vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); in J400ToARGBRow_MSA()
2797 vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); in J400ToARGBRow_MSA()
2798 vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); in J400ToARGBRow_MSA()
2799 vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0); in J400ToARGBRow_MSA()
2800 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); in J400ToARGBRow_MSA()
2801 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); in J400ToARGBRow_MSA()
2802 dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); in J400ToARGBRow_MSA()
2803 dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); in J400ToARGBRow_MSA()
2815 v16u8 src0, src1, src2; in YUY2ToARGBRow_MSA()
2819 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in YUY2ToARGBRow_MSA()
2827 src0 = (v16u8)__msa_ld_b((void*)src_yuy2, 0); in YUY2ToARGBRow_MSA()
2828 src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); in YUY2ToARGBRow_MSA()
2829 src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); in YUY2ToARGBRow_MSA()
2843 v16u8 src0, src1, src2; in UYVYToARGBRow_MSA()
2847 v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); in UYVYToARGBRow_MSA()
2855 src0 = (v16u8)__msa_ld_b((void*)src_uyvy, 0); in UYVYToARGBRow_MSA()
2856 src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); in UYVYToARGBRow_MSA()
2857 src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); in UYVYToARGBRow_MSA()
2877 v16u8 src0, src1, src2, src3, dst0, dst1; in InterpolateRow_MSA()
2887 src0 = (v16u8)__msa_ld_b((void*)s, 0); in InterpolateRow_MSA()
2888 src1 = (v16u8)__msa_ld_b((void*)s, 16); in InterpolateRow_MSA()
2889 src2 = (v16u8)__msa_ld_b((void*)t, 0); in InterpolateRow_MSA()
2890 src3 = (v16u8)__msa_ld_b((void*)t, 16); in InterpolateRow_MSA()
2905 src0 = (v16u8)__msa_ld_b((void*)s, 0); in InterpolateRow_MSA()
2906 src1 = (v16u8)__msa_ld_b((void*)s, 16); in InterpolateRow_MSA()
2907 src2 = (v16u8)__msa_ld_b((void*)t, 0); in InterpolateRow_MSA()
2908 src3 = (v16u8)__msa_ld_b((void*)t, 16); in InterpolateRow_MSA()
2913 vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac); in InterpolateRow_MSA()
2914 vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac); in InterpolateRow_MSA()
2915 vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac); in InterpolateRow_MSA()
2916 vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac); in InterpolateRow_MSA()
2921 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in InterpolateRow_MSA()
2922 dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in InterpolateRow_MSA()
2942 v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; in RAWToRGB24Row_MSA()
2950 src0 = (v16u8)__msa_ld_b((void*)src_raw, 0); in RAWToRGB24Row_MSA()
2951 src1 = (v16u8)__msa_ld_b((void*)src_raw, 16); in RAWToRGB24Row_MSA()
2952 src2 = (v16u8)__msa_ld_b((void*)src_raw, 32); in RAWToRGB24Row_MSA()
2953 src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); in RAWToRGB24Row_MSA()
2954 src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); in RAWToRGB24Row_MSA()
2955 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); in RAWToRGB24Row_MSA()
2956 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3); in RAWToRGB24Row_MSA()
2957 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1); in RAWToRGB24Row_MSA()
2970 v16u8 src0, src1, dst0, dst1; in MergeUVRow_MSA()
2973 src0 = (v16u8)__msa_ld_b((void*)src_u, 0); in MergeUVRow_MSA()
2974 src1 = (v16u8)__msa_ld_b((void*)src_v, 0); in MergeUVRow_MSA()
2975 dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); in MergeUVRow_MSA()
2976 dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); in MergeUVRow_MSA()
2988 v16u8 src0, src1, src2, src3, vec0, vec1, dst0; in ARGBExtractAlphaRow_MSA()
2991 src0 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBExtractAlphaRow_MSA()
2992 src1 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBExtractAlphaRow_MSA()
2993 src2 = (v16u8)__msa_ld_b((void*)src_argb, 32); in ARGBExtractAlphaRow_MSA()
2994 src3 = (v16u8)__msa_ld_b((void*)src_argb, 48); in ARGBExtractAlphaRow_MSA()
2995 vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in ARGBExtractAlphaRow_MSA()
2996 vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in ARGBExtractAlphaRow_MSA()
2997 dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); in ARGBExtractAlphaRow_MSA()
3009 v16u8 src0, src1, src2, src3, dst0, dst1; in ARGBBlendRow_MSA()
3013 v16u8 const_255 = (v16u8)__msa_ldi_b(255); in ARGBBlendRow_MSA()
3014 v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; in ARGBBlendRow_MSA()
3018 src0 = (v16u8)__msa_ld_b((void*)src_argb0, 0); in ARGBBlendRow_MSA()
3019 src1 = (v16u8)__msa_ld_b((void*)src_argb0, 16); in ARGBBlendRow_MSA()
3020 src2 = (v16u8)__msa_ld_b((void*)src_argb1, 0); in ARGBBlendRow_MSA()
3021 src3 = (v16u8)__msa_ld_b((void*)src_argb1, 16); in ARGBBlendRow_MSA()
3058 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBBlendRow_MSA()
3059 dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGBBlendRow_MSA()
3075 v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; in ARGBQuantizeRow_MSA()
3080 v16u8 vec_int_sz = (v16u8)__msa_fill_b(interval_size); in ARGBQuantizeRow_MSA()
3081 v16u8 vec_int_ofst = (v16u8)__msa_fill_b(interval_offset); in ARGBQuantizeRow_MSA()
3086 src0 = (v16u8)__msa_ld_b((void*)dst_argb, 0); in ARGBQuantizeRow_MSA()
3087 src1 = (v16u8)__msa_ld_b((void*)dst_argb, 16); in ARGBQuantizeRow_MSA()
3088 src2 = (v16u8)__msa_ld_b((void*)dst_argb, 32); in ARGBQuantizeRow_MSA()
3089 src3 = (v16u8)__msa_ld_b((void*)dst_argb, 48); in ARGBQuantizeRow_MSA()
3154 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in ARGBQuantizeRow_MSA()
3155 dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); in ARGBQuantizeRow_MSA()
3156 dst2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); in ARGBQuantizeRow_MSA()
3157 dst3 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); in ARGBQuantizeRow_MSA()
3166 dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)src0, (v16i8)dst0); in ARGBQuantizeRow_MSA()
3167 dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)dst1); in ARGBQuantizeRow_MSA()
3168 dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)src2, (v16i8)dst2); in ARGBQuantizeRow_MSA()
3169 dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)src3, (v16i8)dst3); in ARGBQuantizeRow_MSA()
3181 v16u8 src1, src2, dst0, dst1; in ARGBColorMatrixRow_MSA()
3194 src1 = (v16u8)__msa_ld_b((void*)src_argb, 0); in ARGBColorMatrixRow_MSA()
3195 src2 = (v16u8)__msa_ld_b((void*)src_argb, 16); in ARGBColorMatrixRow_MSA()
3276 dst0 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10); in ARGBColorMatrixRow_MSA()
3277 dst1 = (v16u8)__msa_pckev_b((v16i8)vec13, (v16i8)vec12); in ARGBColorMatrixRow_MSA()
3289 v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; in SplitUVRow_MSA()
3292 src0 = (v16u8)__msa_ld_b((void*)src_uv, 0); in SplitUVRow_MSA()
3293 src1 = (v16u8)__msa_ld_b((void*)src_uv, 16); in SplitUVRow_MSA()
3294 src2 = (v16u8)__msa_ld_b((void*)src_uv, 32); in SplitUVRow_MSA()
3295 src3 = (v16u8)__msa_ld_b((void*)src_uv, 48); in SplitUVRow_MSA()
3296 dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); in SplitUVRow_MSA()
3297 dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); in SplitUVRow_MSA()
3298 dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); in SplitUVRow_MSA()
3299 dst3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); in SplitUVRow_MSA()
3310 v16u8 dst0 = (v16u8)__msa_fill_b(v8); in SetRow_MSA()
3323 v16u8 src0, src1, src2, src3; in MirrorUVRow_MSA()
3324 v16u8 dst0, dst1, dst2, dst3; in MirrorUVRow_MSA()
3332 src2 = (v16u8)__msa_ld_b((void*)src_uv, 0); in MirrorUVRow_MSA()
3333 src3 = (v16u8)__msa_ld_b((void*)src_uv, 16); in MirrorUVRow_MSA()
3334 src0 = (v16u8)__msa_ld_b((void*)src_uv, 32); in MirrorUVRow_MSA()
3335 src1 = (v16u8)__msa_ld_b((void*)src_uv, 48); in MirrorUVRow_MSA()
3336 dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); in MirrorUVRow_MSA()
3337 dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); in MirrorUVRow_MSA()
3338 dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); in MirrorUVRow_MSA()
3339 dst3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); in MirrorUVRow_MSA()
3353 v16u8 src0, src1, src2, src3, src4, src5, dst0; in SobelXRow_MSA()
3362 src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); in SobelXRow_MSA()
3363 src1 = (v16u8)__msa_ld_b((void*)src_y0, 16); in SobelXRow_MSA()
3364 src2 = (v16u8)__msa_ld_b((void*)src_y1, 0); in SobelXRow_MSA()
3365 src3 = (v16u8)__msa_ld_b((void*)src_y1, 16); in SobelXRow_MSA()
3366 src4 = (v16u8)__msa_ld_b((void*)src_y2, 0); in SobelXRow_MSA()
3367 src5 = (v16u8)__msa_ld_b((void*)src_y2, 16); in SobelXRow_MSA()
3374 vec0 = (v8i16)__msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); in SobelXRow_MSA()
3375 vec1 = (v8i16)__msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); in SobelXRow_MSA()
3376 vec2 = (v8i16)__msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); in SobelXRow_MSA()
3377 vec3 = (v8i16)__msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); in SobelXRow_MSA()
3378 vec4 = (v8i16)__msa_hsub_u_h((v16u8)vec4, (v16u8)vec4); in SobelXRow_MSA()
3379 vec5 = (v8i16)__msa_hsub_u_h((v16u8)vec5, (v16u8)vec5); in SobelXRow_MSA()
3392 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in SobelXRow_MSA()
3406 v16u8 src0, src1, dst0; in SobelYRow_MSA()
3412 src0 = (v16u8)__msa_ld_b((void*)src_y0, 0); in SobelYRow_MSA()
3413 src1 = (v16u8)__msa_ld_b((void*)src_y1, 0); in SobelYRow_MSA()
3438 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); in SobelYRow_MSA()