• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vpx/vpx_integer.h"
14 #include "vpx_ports/mem.h"
15 
16 #define SAD_SRC_REF_ABS_SUB_64                                      \
17   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
18   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
19   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
20   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
21   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
22   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
23   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
24   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
25   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
26   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
27   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
28   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
29   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
30   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
31   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
32   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
33   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
34   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
35   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
36   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
37   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
38   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
39   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
40   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
41   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
42   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
43   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
44   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
45   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
46   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
47   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
48   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
49   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
50   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
51   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
52   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
53   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
54   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
55   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
56   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
57   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
58   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
59   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
60   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
61   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
62   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
63   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
64   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
65   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
66   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
67   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
68   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
69   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
70   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
71   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
72   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
73 
74 #define SAD_SRC_REF_ABS_SUB_32                                      \
75   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
76   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
77   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
78   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
79   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
80   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
81   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
82   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
83   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
84   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
85   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
86   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
87   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
88   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
89   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
90   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
91   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
92   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
93   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
94   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
95   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
96   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
97   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
98   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
99   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
100   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
101   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
102   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
103 
104 #define SAD_SRC_REF_ABS_SUB_16                                      \
105   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
106   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
107   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
108   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
109   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
110   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
111   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
112   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
113   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
114   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
115   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
116   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
117   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
118   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
119 
120 #define SAD_SRC_REF_ABS_SUB_8                                       \
121   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
122   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
123   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
124   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
125   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
126   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
127   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
128 
129 #if _MIPS_SIM == _ABIO32
130 #define SAD_SRC_REF_ABS_SUB_4                                       \
131   "ulw        %[tmp0],    0x00(%[src])                        \n\t" \
132   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
133   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
134   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
135   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
136   "mthc1      $0,         %[ftmp1]                            \n\t" \
137   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
138   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
140 #define SAD_SRC_REF_ABS_SUB_4                                       \
141   "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t" \
142   "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
143   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
144   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
145   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
146   "mthc1      $0,         %[ftmp1]                            \n\t" \
147   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
148   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
149 #endif /* _MIPS_SIM == _ABIO32 */
150 
151 #define SAD_SRC_AVGREF_ABS_SUB_64                                   \
152   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
153   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
154   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
155   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
156   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
157   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
158   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
159   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
160   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
161   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
162   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
163   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
164   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
165   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
166   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
167   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
168   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
169   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
170   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
171   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
172   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
173   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
174   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
175   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
176   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
177   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
178   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
179   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
180   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
181   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
182   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
183   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
184   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
185   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
186   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
187   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
188   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
189   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
190   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
191   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
192   "gsldlc1    %[ftmp1],   0x27(%[second_pred])                \n\t" \
193   "gsldrc1    %[ftmp1],   0x20(%[second_pred])                \n\t" \
194   "gsldlc1    %[ftmp2],   0x2f(%[second_pred])                \n\t" \
195   "gsldrc1    %[ftmp2],   0x28(%[second_pred])                \n\t" \
196   "gsldlc1    %[ftmp3],   0x27(%[ref])                        \n\t" \
197   "gsldrc1    %[ftmp3],   0x20(%[ref])                        \n\t" \
198   "gsldlc1    %[ftmp4],   0x2f(%[ref])                        \n\t" \
199   "gsldrc1    %[ftmp4],   0x28(%[ref])                        \n\t" \
200   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
201   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
202   "gsldlc1    %[ftmp1],   0x27(%[src])                        \n\t" \
203   "gsldrc1    %[ftmp1],   0x20(%[src])                        \n\t" \
204   "gsldlc1    %[ftmp2],   0x2f(%[src])                        \n\t" \
205   "gsldrc1    %[ftmp2],   0x28(%[src])                        \n\t" \
206   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
207   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
208   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
209   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
210   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
211   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
212   "gsldlc1    %[ftmp1],   0x37(%[second_pred])                \n\t" \
213   "gsldrc1    %[ftmp1],   0x30(%[second_pred])                \n\t" \
214   "gsldlc1    %[ftmp2],   0x3f(%[second_pred])                \n\t" \
215   "gsldrc1    %[ftmp2],   0x38(%[second_pred])                \n\t" \
216   "gsldlc1    %[ftmp3],   0x37(%[ref])                        \n\t" \
217   "gsldrc1    %[ftmp3],   0x30(%[ref])                        \n\t" \
218   "gsldlc1    %[ftmp4],   0x3f(%[ref])                        \n\t" \
219   "gsldrc1    %[ftmp4],   0x38(%[ref])                        \n\t" \
220   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
221   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
222   "gsldlc1    %[ftmp1],   0x37(%[src])                        \n\t" \
223   "gsldrc1    %[ftmp1],   0x30(%[src])                        \n\t" \
224   "gsldlc1    %[ftmp2],   0x3f(%[src])                        \n\t" \
225   "gsldrc1    %[ftmp2],   0x38(%[src])                        \n\t" \
226   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
227   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
228   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
229   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
230   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
231   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
232 
233 #define SAD_SRC_AVGREF_ABS_SUB_32                                   \
234   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
235   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
236   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
237   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
238   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
239   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
240   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
241   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
242   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
243   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
244   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
245   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
246   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
247   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
248   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
249   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
250   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
251   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
252   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
253   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t" \
254   "gsldlc1    %[ftmp1],   0x17(%[second_pred])                \n\t" \
255   "gsldrc1    %[ftmp1],   0x10(%[second_pred])                \n\t" \
256   "gsldlc1    %[ftmp2],   0x1f(%[second_pred])                \n\t" \
257   "gsldrc1    %[ftmp2],   0x18(%[second_pred])                \n\t" \
258   "gsldlc1    %[ftmp3],   0x17(%[ref])                        \n\t" \
259   "gsldrc1    %[ftmp3],   0x10(%[ref])                        \n\t" \
260   "gsldlc1    %[ftmp4],   0x1f(%[ref])                        \n\t" \
261   "gsldrc1    %[ftmp4],   0x18(%[ref])                        \n\t" \
262   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
263   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
264   "gsldlc1    %[ftmp1],   0x17(%[src])                        \n\t" \
265   "gsldrc1    %[ftmp1],   0x10(%[src])                        \n\t" \
266   "gsldlc1    %[ftmp2],   0x1f(%[src])                        \n\t" \
267   "gsldrc1    %[ftmp2],   0x18(%[src])                        \n\t" \
268   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
269   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
270   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
271   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
272   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
273   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
274 
275 #define SAD_SRC_AVGREF_ABS_SUB_16                                   \
276   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
277   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
278   "gsldlc1    %[ftmp2],   0x0f(%[second_pred])                \n\t" \
279   "gsldrc1    %[ftmp2],   0x08(%[second_pred])                \n\t" \
280   "gsldlc1    %[ftmp3],   0x07(%[ref])                        \n\t" \
281   "gsldrc1    %[ftmp3],   0x00(%[ref])                        \n\t" \
282   "gsldlc1    %[ftmp4],   0x0f(%[ref])                        \n\t" \
283   "gsldrc1    %[ftmp4],   0x08(%[ref])                        \n\t" \
284   "pavgb      %[ftmp3],   %[ftmp1],       %[ftmp3]            \n\t" \
285   "pavgb      %[ftmp4],   %[ftmp2],       %[ftmp4]            \n\t" \
286   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
287   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
288   "gsldlc1    %[ftmp2],   0x0f(%[src])                        \n\t" \
289   "gsldrc1    %[ftmp2],   0x08(%[src])                        \n\t" \
290   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t" \
291   "pasubub    %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t" \
292   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
293   "biadd      %[ftmp2],   %[ftmp2]                            \n\t" \
294   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp1]            \n\t" \
295   "paddw      %[ftmp5],   %[ftmp5],       %[ftmp2]            \n\t"
296 
297 #define SAD_SRC_AVGREF_ABS_SUB_8                                    \
298   "gsldlc1    %[ftmp1],   0x07(%[second_pred])                \n\t" \
299   "gsldrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
300   "gsldlc1    %[ftmp2],   0x07(%[ref])                        \n\t" \
301   "gsldrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
302   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
303   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
304   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
305   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
306   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
307   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
308 
309 #if _MIPS_SIM == _ABIO32
310 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
311   "ulw        %[tmp0],    0x00(%[second_pred])                \n\t" \
312   "mtc1       %[tmp0],    %[ftmp1]                            \n\t" \
313   "ulw        %[tmp0],    0x00(%[ref])                        \n\t" \
314   "mtc1       %[tmp0],    %[ftmp2]                            \n\t" \
315   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
316   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
317   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
318   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
319   "mthc1      $0,         %[ftmp1]                            \n\t" \
320   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
321   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
323 #define SAD_SRC_AVGREF_ABS_SUB_4                                    \
324   "gslwlc1    %[ftmp1],   0x03(%[second_pred])                \n\t" \
325   "gslwrc1    %[ftmp1],   0x00(%[second_pred])                \n\t" \
326   "gslwlc1    %[ftmp2],   0x03(%[ref])                        \n\t" \
327   "gslwrc1    %[ftmp2],   0x00(%[ref])                        \n\t" \
328   "pavgb      %[ftmp2],   %[ftmp1],       %[ftmp2]            \n\t" \
329   "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t" \
330   "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t" \
331   "pasubub    %[ftmp1],   %[ftmp1],       %[ftmp2]            \n\t" \
332   "mthc1      $0,         %[ftmp1]                            \n\t" \
333   "biadd      %[ftmp1],   %[ftmp1]                            \n\t" \
334   "paddw      %[ftmp3],   %[ftmp3],       %[ftmp1]            \n\t"
335 #endif /* _MIPS_SIM == _ABIO32 */
336 
337 #define sadMxNx4D_mmi(m, n)                                                  \
338   void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride,         \
339                                  const uint8_t *const ref_array[],           \
340                                  int ref_stride, uint32_t *sad_array) {      \
341     int i;                                                                   \
342     for (i = 0; i < 4; ++i)                                                  \
343       sad_array[i] =                                                         \
344           vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
345   }
346 
vpx_sad64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)347 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
348                                       const uint8_t *ref, int ref_stride,
349                                       int counter) {
350   unsigned int sad;
351   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
352   mips_reg l_counter = counter;
353 
354   /* clang-format off */
355   __asm__ volatile (
356     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
357     "1:                                                         \n\t"
358     // Include two loop body, to reduce loop time.
359     SAD_SRC_REF_ABS_SUB_64
360     MMI_ADDU(%[src],     %[src],         %[src_stride])
361     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
362     SAD_SRC_REF_ABS_SUB_64
363     MMI_ADDU(%[src],     %[src],         %[src_stride])
364     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
365     MMI_ADDIU(%[counter], %[counter], -0x02)
366     "bnez       %[counter], 1b                                  \n\t"
367     "mfc1       %[sad],     %[ftmp5]                            \n\t"
368     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
369       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
370       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
371     : [src_stride]"r"((mips_reg)src_stride),
372       [ref_stride]"r"((mips_reg)ref_stride)
373   );
374   /* clang-format on */
375 
376   return sad;
377 }
378 
379 #define vpx_sad64xN(H)                                                   \
380   unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride,   \
381                                    const uint8_t *ref, int ref_stride) { \
382     return vpx_sad64x(src, src_stride, ref, ref_stride, H);              \
383   }
384 
385 vpx_sad64xN(64);
386 vpx_sad64xN(32);
387 sadMxNx4D_mmi(64, 64);
388 sadMxNx4D_mmi(64, 32);
389 
vpx_sad_avg64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)390 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
391                                           const uint8_t *ref, int ref_stride,
392                                           const uint8_t *second_pred,
393                                           int counter) {
394   unsigned int sad;
395   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
396   mips_reg l_counter = counter;
397   mips_reg l_second_pred = (mips_reg)second_pred;
398 
399   /* clang-format off */
400   __asm__ volatile (
401     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
402     "1:                                                         \n\t"
403     // Include two loop body, to reduce loop time.
404     SAD_SRC_AVGREF_ABS_SUB_64
405     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
406     MMI_ADDU(%[src],     %[src],         %[src_stride])
407     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
408     SAD_SRC_AVGREF_ABS_SUB_64
409     MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
410     MMI_ADDU(%[src],     %[src],         %[src_stride])
411     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
412     MMI_ADDIU(%[counter], %[counter], -0x02)
413     "bnez       %[counter], 1b                                  \n\t"
414     "mfc1       %[sad],     %[ftmp5]                            \n\t"
415     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
416       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
417       [src]"+&r"(src), [ref]"+&r"(ref),
418       [second_pred]"+&r"(l_second_pred),
419       [sad]"=&r"(sad)
420     : [src_stride]"r"((mips_reg)src_stride),
421       [ref_stride]"r"((mips_reg)ref_stride)
422   );
423   /* clang-format on */
424 
425   return sad;
426 }
427 
428 #define vpx_sad_avg64xN(H)                                                   \
429   unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
430                                        const uint8_t *ref, int ref_stride,   \
431                                        const uint8_t *second_pred) {         \
432     return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
433   }
434 
435 vpx_sad_avg64xN(64);
436 vpx_sad_avg64xN(32);
437 
vpx_sad32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)438 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
439                                       const uint8_t *ref, int ref_stride,
440                                       int counter) {
441   unsigned int sad;
442   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
443   mips_reg l_counter = counter;
444 
445   /* clang-format off */
446   __asm__ volatile (
447     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
448     "1:                                                         \n\t"
449     // Include two loop body, to reduce loop time.
450     SAD_SRC_REF_ABS_SUB_32
451     MMI_ADDU(%[src],     %[src],         %[src_stride])
452     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
453     SAD_SRC_REF_ABS_SUB_32
454     MMI_ADDU(%[src],     %[src],         %[src_stride])
455     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
456     MMI_ADDIU(%[counter], %[counter], -0x02)
457     "bnez       %[counter], 1b                                  \n\t"
458     "mfc1       %[sad],     %[ftmp5]                            \n\t"
459     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
460       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
461       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
462     : [src_stride]"r"((mips_reg)src_stride),
463       [ref_stride]"r"((mips_reg)ref_stride)
464   );
465   /* clang-format on */
466 
467   return sad;
468 }
469 
470 #define vpx_sad32xN(H)                                                   \
471   unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride,   \
472                                    const uint8_t *ref, int ref_stride) { \
473     return vpx_sad32x(src, src_stride, ref, ref_stride, H);              \
474   }
475 
476 vpx_sad32xN(64);
477 vpx_sad32xN(32);
478 vpx_sad32xN(16);
479 sadMxNx4D_mmi(32, 64);
480 sadMxNx4D_mmi(32, 32);
481 sadMxNx4D_mmi(32, 16);
482 
vpx_sad_avg32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)483 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
484                                           const uint8_t *ref, int ref_stride,
485                                           const uint8_t *second_pred,
486                                           int counter) {
487   unsigned int sad;
488   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
489   mips_reg l_counter = counter;
490   mips_reg l_second_pred = (mips_reg)second_pred;
491 
492   /* clang-format off */
493   __asm__ volatile (
494     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
495     "1:                                                         \n\t"
496     // Include two loop body, to reduce loop time.
497     SAD_SRC_AVGREF_ABS_SUB_32
498     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
499     MMI_ADDU(%[src],     %[src],         %[src_stride])
500     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
501     SAD_SRC_AVGREF_ABS_SUB_32
502     MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
503     MMI_ADDU(%[src],     %[src],         %[src_stride])
504     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
505     MMI_ADDIU(%[counter], %[counter], -0x02)
506     "bnez       %[counter], 1b                                  \n\t"
507     "mfc1       %[sad],     %[ftmp5]                            \n\t"
508     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
509       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
510       [src]"+&r"(src), [ref]"+&r"(ref),
511       [second_pred]"+&r"(l_second_pred),
512       [sad]"=&r"(sad)
513     : [src_stride]"r"((mips_reg)src_stride),
514       [ref_stride]"r"((mips_reg)ref_stride)
515   );
516   /* clang-format on */
517 
518   return sad;
519 }
520 
521 #define vpx_sad_avg32xN(H)                                                   \
522   unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
523                                        const uint8_t *ref, int ref_stride,   \
524                                        const uint8_t *second_pred) {         \
525     return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
526   }
527 
528 vpx_sad_avg32xN(64);
529 vpx_sad_avg32xN(32);
530 vpx_sad_avg32xN(16);
531 
vpx_sad16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)532 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
533                                       const uint8_t *ref, int ref_stride,
534                                       int counter) {
535   unsigned int sad;
536   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
537   mips_reg l_counter = counter;
538 
539   /* clang-format off */
540   __asm__ volatile (
541     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
542     "1:                                                         \n\t"
543     // Include two loop body, to reduce loop time.
544     SAD_SRC_REF_ABS_SUB_16
545     MMI_ADDU(%[src],     %[src],         %[src_stride])
546     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
547     SAD_SRC_REF_ABS_SUB_16
548     MMI_ADDU(%[src],     %[src],         %[src_stride])
549     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
550     MMI_ADDIU(%[counter], %[counter], -0x02)
551     "bnez       %[counter], 1b                                  \n\t"
552     "mfc1       %[sad],     %[ftmp5]                            \n\t"
553     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
554       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
555       [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
556     : [src_stride]"r"((mips_reg)src_stride),
557       [ref_stride]"r"((mips_reg)ref_stride)
558   );
559   /* clang-format on */
560 
561   return sad;
562 }
563 
564 #define vpx_sad16xN(H)                                                   \
565   unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride,   \
566                                    const uint8_t *ref, int ref_stride) { \
567     return vpx_sad16x(src, src_stride, ref, ref_stride, H);              \
568   }
569 
570 vpx_sad16xN(32);
571 vpx_sad16xN(16);
572 vpx_sad16xN(8);
573 sadMxNx4D_mmi(16, 32);
574 sadMxNx4D_mmi(16, 16);
575 sadMxNx4D_mmi(16, 8);
576 
vpx_sad_avg16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)577 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
578                                           const uint8_t *ref, int ref_stride,
579                                           const uint8_t *second_pred,
580                                           int counter) {
581   unsigned int sad;
582   double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
583   mips_reg l_counter = counter;
584   mips_reg l_second_pred = (mips_reg)second_pred;
585 
586   /* clang-format off */
587   __asm__ volatile (
588     "pxor       %[ftmp5],   %[ftmp5],       %[ftmp5]            \n\t"
589     "1:                                                         \n\t"
590     // Include two loop body, to reduce loop time.
591     SAD_SRC_AVGREF_ABS_SUB_16
592     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
593     MMI_ADDU(%[src],     %[src],         %[src_stride])
594     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
595     SAD_SRC_AVGREF_ABS_SUB_16
596     MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
597     MMI_ADDU(%[src],     %[src],         %[src_stride])
598     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
599     MMI_ADDIU(%[counter], %[counter], -0x02)
600     "bnez       %[counter], 1b                                  \n\t"
601     "mfc1       %[sad],     %[ftmp5]                            \n\t"
602     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
603       [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
604       [src]"+&r"(src), [ref]"+&r"(ref),
605       [second_pred]"+&r"(l_second_pred),
606       [sad]"=&r"(sad)
607     : [src_stride]"r"((mips_reg)src_stride),
608       [ref_stride]"r"((mips_reg)ref_stride)
609   );
610   /* clang-format on */
611 
612   return sad;
613 }
614 
615 #define vpx_sad_avg16xN(H)                                                   \
616   unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
617                                        const uint8_t *ref, int ref_stride,   \
618                                        const uint8_t *second_pred) {         \
619     return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
620   }
621 
622 vpx_sad_avg16xN(32);
623 vpx_sad_avg16xN(16);
624 vpx_sad_avg16xN(8);
625 
vpx_sad8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)626 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
627                                      const uint8_t *ref, int ref_stride,
628                                      int counter) {
629   unsigned int sad;
630   double ftmp1, ftmp2, ftmp3;
631   mips_reg l_counter = counter;
632 
633   /* clang-format off */
634   __asm__ volatile (
635     "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
636     "1:                                                         \n\t"
637     // Include two loop body, to reduce loop time.
638     SAD_SRC_REF_ABS_SUB_8
639     MMI_ADDU(%[src],     %[src],         %[src_stride])
640     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
641     SAD_SRC_REF_ABS_SUB_8
642     MMI_ADDU(%[src],     %[src],         %[src_stride])
643     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
644     MMI_ADDIU(%[counter], %[counter], -0x02)
645     "bnez       %[counter], 1b                                  \n\t"
646     "mfc1       %[sad],     %[ftmp3]                            \n\t"
647     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
648       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
649       [sad]"=&r"(sad)
650     : [src_stride]"r"((mips_reg)src_stride),
651       [ref_stride]"r"((mips_reg)ref_stride)
652   );
653   /* clang-format on */
654 
655   return sad;
656 }
657 
658 #define vpx_sad8xN(H)                                                   \
659   unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride,   \
660                                   const uint8_t *ref, int ref_stride) { \
661     return vpx_sad8x(src, src_stride, ref, ref_stride, H);              \
662   }
663 
664 vpx_sad8xN(16);
665 vpx_sad8xN(8);
666 vpx_sad8xN(4);
667 sadMxNx4D_mmi(8, 16);
668 sadMxNx4D_mmi(8, 8);
669 sadMxNx4D_mmi(8, 4);
670 
vpx_sad_avg8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)671 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
672                                          const uint8_t *ref, int ref_stride,
673                                          const uint8_t *second_pred,
674                                          int counter) {
675   unsigned int sad;
676   double ftmp1, ftmp2, ftmp3;
677   mips_reg l_counter = counter;
678   mips_reg l_second_pred = (mips_reg)second_pred;
679 
680   /* clang-format off */
681   __asm__ volatile (
682     "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
683     "1:                                                         \n\t"
684     // Include two loop body, to reduce loop time.
685     SAD_SRC_AVGREF_ABS_SUB_8
686     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
687     MMI_ADDU(%[src],     %[src],         %[src_stride])
688     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
689     SAD_SRC_AVGREF_ABS_SUB_8
690     MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
691     MMI_ADDU(%[src],     %[src],         %[src_stride])
692     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
693     MMI_ADDIU(%[counter], %[counter], -0x02)
694     "bnez       %[counter], 1b                                  \n\t"
695     "mfc1       %[sad],     %[ftmp3]                            \n\t"
696     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
697       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
698       [second_pred]"+&r"(l_second_pred),
699       [sad]"=&r"(sad)
700     : [src_stride]"r"((mips_reg)src_stride),
701       [ref_stride]"r"((mips_reg)ref_stride)
702   );
703   /* clang-format on */
704 
705   return sad;
706 }
707 
708 #define vpx_sad_avg8xN(H)                                                   \
709   unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
710                                       const uint8_t *ref, int ref_stride,   \
711                                       const uint8_t *second_pred) {         \
712     return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
713   }
714 
715 vpx_sad_avg8xN(16);
716 vpx_sad_avg8xN(8);
717 vpx_sad_avg8xN(4);
718 
vpx_sad4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)719 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
720                                      const uint8_t *ref, int ref_stride,
721                                      int counter) {
722   unsigned int sad;
723   double ftmp1, ftmp2, ftmp3;
724   mips_reg l_counter = counter;
725 
726   /* clang-format off */
727   __asm__ volatile (
728     "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
729     "1:                                                         \n\t"
730     // Include two loop body, to reduce loop time.
731     SAD_SRC_REF_ABS_SUB_4
732     MMI_ADDU(%[src],     %[src],         %[src_stride])
733     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
734     SAD_SRC_REF_ABS_SUB_4
735     MMI_ADDU(%[src],     %[src],         %[src_stride])
736     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
737     MMI_ADDIU(%[counter], %[counter], -0x02)
738     "bnez       %[counter], 1b                                  \n\t"
739     "mfc1       %[sad],     %[ftmp3]                            \n\t"
740     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
741       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
742       [sad]"=&r"(sad)
743     : [src_stride]"r"((mips_reg)src_stride),
744       [ref_stride]"r"((mips_reg)ref_stride)
745   );
746   /* clang-format on */
747 
748   return sad;
749 }
750 
751 #define vpx_sad4xN(H)                                                   \
752   unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride,   \
753                                   const uint8_t *ref, int ref_stride) { \
754     return vpx_sad4x(src, src_stride, ref, ref_stride, H);              \
755   }
756 
757 vpx_sad4xN(8);
758 vpx_sad4xN(4);
759 sadMxNx4D_mmi(4, 8);
760 sadMxNx4D_mmi(4, 4);
761 
vpx_sad_avg4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)762 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
763                                          const uint8_t *ref, int ref_stride,
764                                          const uint8_t *second_pred,
765                                          int counter) {
766   unsigned int sad;
767   double ftmp1, ftmp2, ftmp3;
768   mips_reg l_counter = counter;
769   mips_reg l_second_pred = (mips_reg)second_pred;
770 
771   /* clang-format off */
772   __asm__ volatile (
773     "pxor       %[ftmp3],   %[ftmp3],       %[ftmp3]            \n\t"
774     "1:                                                         \n\t"
775     // Include two loop body, to reduce loop time.
776     SAD_SRC_AVGREF_ABS_SUB_4
777     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
778     MMI_ADDU(%[src],     %[src],         %[src_stride])
779     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
780     SAD_SRC_AVGREF_ABS_SUB_4
781     MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
782     MMI_ADDU(%[src],     %[src],         %[src_stride])
783     MMI_ADDU(%[ref],     %[ref],         %[ref_stride])
784     MMI_ADDIU(%[counter], %[counter], -0x02)
785     "bnez       %[counter], 1b                                  \n\t"
786     "mfc1       %[sad],     %[ftmp3]                            \n\t"
787     : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
788       [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
789       [second_pred]"+&r"(l_second_pred),
790       [sad]"=&r"(sad)
791     : [src_stride]"r"((mips_reg)src_stride),
792       [ref_stride]"r"((mips_reg)ref_stride)
793   );
794   /* clang-format on */
795 
796   return sad;
797 }
798 
799 #define vpx_sad_avg4xN(H)                                                   \
800   unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride,   \
801                                       const uint8_t *ref, int ref_stride,   \
802                                       const uint8_t *second_pred) {         \
803     return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
804   }
805 
806 vpx_sad_avg4xN(8);
807 vpx_sad_avg4xN(4);
808