1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ 13 #define AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ 14 15 #include "aom_dsp/mips/macros_msa.h" 16 #include "aom_dsp/aom_filter.h" 17 18 extern const uint8_t mc_filt_mask_arr[16 * 3]; 19 20 #define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \ 21 filt3) \ 22 ({ \ 23 v8i16 tmp_dpadd_0, tmp_dpadd_1; \ 24 \ 25 tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \ 26 tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \ 27 tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \ 28 tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \ 29 tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \ 30 \ 31 tmp_dpadd_0; \ 32 }) 33 34 #define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ 35 mask2, mask3, filt0, filt1, filt2, filt3, \ 36 out0, out1) \ 37 { \ 38 v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 39 v8i16 res0_m, res1_m, res2_m, res3_m; \ 40 \ 41 VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ 42 DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m); \ 43 VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ 44 DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m); \ 45 VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \ 46 DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m); \ 47 VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m); \ 48 DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m); \ 49 ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1); \ 50 } 51 52 #define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ 53 mask2, mask3, filt0, filt1, filt2, filt3, \ 54 out0, out1, out2, out3) \ 55 { \ 56 v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 57 v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m; \ 58 \ 59 VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ 60 VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ 61 DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ 62 res0_m, res1_m, res2_m, res3_m); \ 63 VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m); \ 64 VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m); \ 65 DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2, \ 66 res4_m, res5_m, res6_m, res7_m); \ 67 VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m); \ 68 VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m); \ 69 DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \ 70 res0_m, res1_m, res2_m, res3_m); \ 71 VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m); \ 72 VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m); \ 73 DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \ 74 res4_m, res5_m, res6_m, res7_m); \ 75 ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m, \ 76 res7_m, out0, out1, out2, out3); \ 77 } 78 79 #endif // AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ 80