• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14 
vp8_block_error_msa(int16_t * coeff_ptr,int16_t * dq_coeff_ptr)15 int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr)
16 {
17     int32_t err = 0;
18     uint32_t loop_cnt;
19     v8i16 coeff, dq_coeff, coeff0, coeff1;
20     v4i32 diff0, diff1;
21     v2i64 err0 = { 0 };
22     v2i64 err1 = { 0 };
23 
24     for (loop_cnt = 2; loop_cnt--;)
25     {
26         coeff = LD_SH(coeff_ptr);
27         dq_coeff = LD_SH(dq_coeff_ptr);
28         ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
29         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
30         DPADD_SD2_SD(diff0, diff1, err0, err1);
31         coeff_ptr += 8;
32         dq_coeff_ptr += 8;
33     }
34 
35     err0 += __msa_splati_d(err0, 1);
36     err1 += __msa_splati_d(err1, 1);
37     err = __msa_copy_s_d(err0, 0);
38     err += __msa_copy_s_d(err1, 0);
39 
40     return err;
41 }
42 
vp8_mbblock_error_msa(MACROBLOCK * mb,int32_t dc)43 int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc)
44 {
45     BLOCK *be;
46     BLOCKD *bd;
47     int16_t *coeff_ptr, *dq_coeff_ptr;
48     int32_t err = 0;
49     uint32_t loop_cnt;
50     v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
51     v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
52     v4i32 diff0, diff1;
53     v2i64 err0, err1;
54     v16u8 zero  = { 0 };
55     v16u8 mask0 = (v16u8)__msa_ldi_b(255);
56 
57     if (1 == dc)
58     {
59         mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
60     }
61 
62     for (loop_cnt = 0; loop_cnt < 8; loop_cnt++)
63     {
64         be = &mb->block[2 * loop_cnt];
65         bd = &mb->e_mbd.block[2 * loop_cnt];
66         coeff_ptr = be->coeff;
67         dq_coeff_ptr = bd->dqcoeff;
68         coeff = LD_SH(coeff_ptr);
69         dq_coeff = LD_SH(dq_coeff_ptr);
70         coeff_ptr += 8;
71         dq_coeff_ptr += 8;
72         coeff2 = LD_SH(coeff_ptr);
73         dq_coeff2 = LD_SH(dq_coeff_ptr);
74         be = &mb->block[2 * loop_cnt + 1];
75         bd = &mb->e_mbd.block[2 * loop_cnt + 1];
76         coeff_ptr = be->coeff;
77         dq_coeff_ptr = bd->dqcoeff;
78         coeff3 = LD_SH(coeff_ptr);
79         dq_coeff3 = LD_SH(dq_coeff_ptr);
80         coeff_ptr += 8;
81         dq_coeff_ptr += 8;
82         coeff4 = LD_SH(coeff_ptr);
83         dq_coeff4 = LD_SH(dq_coeff_ptr);
84         ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
85         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
86         diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
87         DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
88         ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
89         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
90         DPADD_SD2_SD(diff0, diff1, err0, err1);
91         err0 += __msa_splati_d(err0, 1);
92         err1 += __msa_splati_d(err1, 1);
93         err += __msa_copy_s_d(err0, 0);
94         err += __msa_copy_s_d(err1, 0);
95 
96         ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
97         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
98         diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
99         DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
100         ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
101         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
102         DPADD_SD2_SD(diff0, diff1, err0, err1);
103         err0 += __msa_splati_d(err0, 1);
104         err1 += __msa_splati_d(err1, 1);
105         err += __msa_copy_s_d(err0, 0);
106         err += __msa_copy_s_d(err1, 0);
107     }
108 
109     return err;
110 }
111 
vp8_mbuverror_msa(MACROBLOCK * mb)112 int32_t vp8_mbuverror_msa(MACROBLOCK *mb)
113 {
114     BLOCK *be;
115     BLOCKD *bd;
116     int16_t *coeff_ptr, *dq_coeff_ptr;
117     int32_t err = 0;
118     uint32_t loop_cnt;
119     v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
120     v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
121     v4i32 diff0, diff1;
122     v2i64 err0, err1, err_dup0, err_dup1;
123 
124     for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2)
125     {
126         be = &mb->block[loop_cnt];
127         bd = &mb->e_mbd.block[loop_cnt];
128         coeff_ptr = be->coeff;
129         dq_coeff_ptr = bd->dqcoeff;
130         coeff = LD_SH(coeff_ptr);
131         dq_coeff = LD_SH(dq_coeff_ptr);
132         coeff_ptr += 8;
133         dq_coeff_ptr += 8;
134         coeff2 = LD_SH(coeff_ptr);
135         dq_coeff2 = LD_SH(dq_coeff_ptr);
136         be = &mb->block[loop_cnt + 1];
137         bd = &mb->e_mbd.block[loop_cnt + 1];
138         coeff_ptr = be->coeff;
139         dq_coeff_ptr = bd->dqcoeff;
140         coeff3 = LD_SH(coeff_ptr);
141         dq_coeff3 = LD_SH(dq_coeff_ptr);
142         coeff_ptr += 8;
143         dq_coeff_ptr += 8;
144         coeff4 = LD_SH(coeff_ptr);
145         dq_coeff4 = LD_SH(dq_coeff_ptr);
146 
147         ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
148         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
149         DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
150 
151         ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
152         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
153         DPADD_SD2_SD(diff0, diff1, err0, err1);
154         err_dup0 = __msa_splati_d(err0, 1);
155         err_dup1 = __msa_splati_d(err1, 1);
156         ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
157         err += __msa_copy_s_d(err0, 0);
158         err += __msa_copy_s_d(err1, 0);
159 
160         ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
161         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
162         DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
163         ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
164         HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
165         DPADD_SD2_SD(diff0, diff1, err0, err1);
166         err_dup0 = __msa_splati_d(err0, 1);
167         err_dup1 = __msa_splati_d(err1, 1);
168         ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
169         err += __msa_copy_s_d(err0, 0);
170         err += __msa_copy_s_d(err1, 0);
171     }
172 
173     return err;
174 }
175