• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Loongson SIMD optimized h264qpel
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "h264dsp_mips.h"
25 #include "hpeldsp_mips.h"
26 #include "libavcodec/bit_depth_template.c"
27 #include "libavutil/mips/mmiutils.h"
28 
copy_block4_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)29 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
30         int dstStride, int srcStride, int h)
31 {
32     double ftmp[1];
33     DECLARE_VAR_LOW32;
34 
35     __asm__ volatile (
36         "1:                                                             \n\t"
37         MMI_ULWC1(%[ftmp0], %[src], 0x00)
38         MMI_SWC1(%[ftmp0], %[dst], 0x00)
39         "addi       %[h],       %[h],           -0x01                   \n\t"
40         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
41         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
42         "bnez       %[h],       1b                                      \n\t"
43         : [ftmp0]"=&f"(ftmp[0]),
44           [dst]"+&r"(dst),                  [src]"+&r"(src),
45           RESTRICT_ASM_LOW32
46           [h]"+&r"(h)
47         : [dstStride]"r"((mips_reg)dstStride),
48           [srcStride]"r"((mips_reg)srcStride)
49         : "memory"
50     );
51 }
52 
copy_block8_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)53 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
54         int dstStride, int srcStride, int h)
55 {
56     double ftmp[1];
57     DECLARE_VAR_ALL64;
58 
59     __asm__ volatile (
60         "1:                                                             \n\t"
61         MMI_ULDC1(%[ftmp0], %[src], 0x00)
62         MMI_SDC1(%[ftmp0], %[dst], 0x00)
63         "addi       %[h],       %[h],           -0x01                   \n\t"
64         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
65         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
66         "bnez       %[h],       1b                                      \n\t"
67         : [ftmp0]"=&f"(ftmp[0]),
68           RESTRICT_ASM_ALL64
69           [dst]"+&r"(dst),                  [src]"+&r"(src),
70           [h]"+&r"(h)
71         : [dstStride]"r"((mips_reg)dstStride),
72           [srcStride]"r"((mips_reg)srcStride)
73         : "memory"
74     );
75 }
76 
copy_block16_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)77 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
78         int dstStride, int srcStride, int h)
79 {
80     double ftmp[1];
81     uint64_t tmp[1];
82     DECLARE_VAR_ALL64;
83 
84     __asm__ volatile (
85         "1:                                                             \n\t"
86         MMI_ULDC1(%[ftmp0], %[src], 0x00)
87         "ldl        %[tmp0],    0x0f(%[src])                            \n\t"
88         "ldr        %[tmp0],    0x08(%[src])                            \n\t"
89         MMI_SDC1(%[ftmp0], %[dst], 0x00)
90         "sdl        %[tmp0],    0x0f(%[dst])                            \n\t"
91         "sdr        %[tmp0],    0x08(%[dst])                            \n\t"
92         "addi       %[h],       %[h],           -0x01                   \n\t"
93         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
94         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
95         "bnez       %[h],       1b                                      \n\t"
96         : [ftmp0]"=&f"(ftmp[0]),
97           [tmp0]"=&r"(tmp[0]),
98           RESTRICT_ASM_ALL64
99           [dst]"+&r"(dst),                  [src]"+&r"(src),
100           [h]"+&r"(h)
101         : [dstStride]"r"((mips_reg)dstStride),
102           [srcStride]"r"((mips_reg)srcStride)
103         : "memory"
104     );
105 }
106 
107 #define op2_avg(a, b)  a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
108 #define op2_put(a, b)  a = CLIP(((b) + 512)>>10)
put_h264_qpel4_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)109 static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
110         int dstStride, int srcStride)
111 {
112     double ftmp[10];
113     uint64_t tmp[1];
114     DECLARE_VAR_LOW32;
115 
116     __asm__ volatile (
117         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
118         "dli        %[tmp0],    0x04                                    \n\t"
119         "1:                                                             \n\t"
120         MMI_ULWC1(%[ftmp1], %[src], -0x02)
121         MMI_ULWC1(%[ftmp2], %[src], -0x01)
122         MMI_ULWC1(%[ftmp3], %[src],  0x00)
123         MMI_ULWC1(%[ftmp4], %[src],  0x01)
124         MMI_ULWC1(%[ftmp5], %[src],  0x02)
125         MMI_ULWC1(%[ftmp6], %[src],  0x03)
126 
127         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
128         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
129         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
130         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
131         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
132         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
133         "paddsh     %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
134         "paddsh     %[ftmp8],   %[ftmp2],       %[ftmp5]                \n\t"
135         "paddsh     %[ftmp9],   %[ftmp1],       %[ftmp6]                \n\t"
136         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_20]             \n\t"
137         "pmullh     %[ftmp8],   %[ftmp8],       %[ff_pw_5]              \n\t"
138         "psubsh     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
139         "paddsh     %[ftmp9],   %[ftmp7],       %[ftmp9]                \n\t"
140         "paddsh     %[ftmp9],   %[ftmp9],       %[ff_pw_16]             \n\t"
141         "psrah      %[ftmp9],   %[ftmp9],       %[ff_pw_5]              \n\t"
142         "packushb   %[ftmp9],   %[ftmp9],       %[ftmp0]                \n\t"
143         MMI_SWC1(%[ftmp9], %[dst],  0x00)
144         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
145         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
146         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
147         "bnez       %[tmp0],    1b                                      \n\t"
148         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
149           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
150           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
151           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
152           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
153           [tmp0]"=&r"(tmp[0]),
154           RESTRICT_ASM_LOW32
155           [dst]"+&r"(dst),                  [src]"+&r"(src)
156         : [dstStride]"r"((mips_reg)dstStride),
157           [srcStride]"r"((mips_reg)srcStride),
158           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5),
159           [ff_pw_16]"f"(ff_pw_16)
160         : "memory"
161     );
162 }
163 
put_h264_qpel8_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)164 static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
165         int dstStride, int srcStride)
166 {
167     double ftmp[11];
168     uint64_t tmp[1];
169     DECLARE_VAR_ALL64;
170 
171     __asm__ volatile (
172         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
173         "dli        %[tmp0],    0x08                                    \n\t"
174         "1:                                                             \n\t"
175         MMI_ULDC1(%[ftmp1], %[src], -0x02)
176         MMI_ULDC1(%[ftmp2], %[src], -0x01)
177         MMI_ULDC1(%[ftmp3], %[src],  0x00)
178         MMI_ULDC1(%[ftmp4], %[src],  0x01)
179         MMI_ULDC1(%[ftmp5], %[src],  0x02)
180         MMI_ULDC1(%[ftmp6], %[src],  0x03)
181         "punpcklbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t"
182         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
183         "punpcklbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
184         "punpckhbh  %[ftmp10],  %[ftmp4],       %[ftmp0]                \n\t"
185         "paddsh     %[ftmp3],   %[ftmp7],       %[ftmp9]                \n\t"
186         "paddsh     %[ftmp4],   %[ftmp8],       %[ftmp10]               \n\t"
187         "pmullh     %[ftmp3],   %[ftmp3],       %[ff_pw_20]             \n\t"
188         "pmullh     %[ftmp4],   %[ftmp4],       %[ff_pw_20]             \n\t"
189         "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
190         "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]                \n\t"
191         "punpcklbh  %[ftmp9],   %[ftmp5],       %[ftmp0]                \n\t"
192         "punpckhbh  %[ftmp10],  %[ftmp5],       %[ftmp0]                \n\t"
193         "paddsh     %[ftmp2],   %[ftmp7],       %[ftmp9]                \n\t"
194         "paddsh     %[ftmp5],   %[ftmp8],       %[ftmp10]               \n\t"
195         "pmullh     %[ftmp2],   %[ftmp2],       %[ff_pw_5]              \n\t"
196         "pmullh     %[ftmp5],   %[ftmp5],       %[ff_pw_5]              \n\t"
197         "punpcklbh  %[ftmp7],   %[ftmp1],       %[ftmp0]                \n\t"
198         "punpckhbh  %[ftmp8],   %[ftmp1],       %[ftmp0]                \n\t"
199         "punpcklbh  %[ftmp9],   %[ftmp6],       %[ftmp0]                \n\t"
200         "punpckhbh  %[ftmp10],  %[ftmp6],       %[ftmp0]                \n\t"
201         "paddsh     %[ftmp1],   %[ftmp7],       %[ftmp9]                \n\t"
202         "paddsh     %[ftmp6],   %[ftmp8],       %[ftmp10]               \n\t"
203         "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
204         "psubsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
205         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
206         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
207         "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_16]             \n\t"
208         "paddsh     %[ftmp4],   %[ftmp4],       %[ff_pw_16]             \n\t"
209         "psrah      %[ftmp3],   %[ftmp3],       %[ff_pw_5]              \n\t"
210         "psrah      %[ftmp4],   %[ftmp4],       %[ff_pw_5]              \n\t"
211         "packushb   %[ftmp9],   %[ftmp3],       %[ftmp4]                \n\t"
212         MMI_SDC1(%[ftmp9], %[dst],  0x00)
213         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
214         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
215         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
216         "bnez       %[tmp0],    1b                                      \n\t"
217         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
218           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
219           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
220           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
221           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
222           [ftmp10]"=&f"(ftmp[10]),
223           [tmp0]"=&r"(tmp[0]),
224           RESTRICT_ASM_ALL64
225           [dst]"+&r"(dst),                  [src]"+&r"(src)
226         : [dstStride]"r"((mips_reg)dstStride),
227           [srcStride]"r"((mips_reg)srcStride),
228           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5),
229           [ff_pw_16]"f"(ff_pw_16)
230         : "memory"
231     );
232 }
233 
put_h264_qpel16_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)234 static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
235         int dstStride, int srcStride)
236 {
237     put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
238     put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
239     src += 8*srcStride;
240     dst += 8*dstStride;
241     put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
242     put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
243 }
244 
avg_h264_qpel4_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)245 static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
246         int dstStride, int srcStride)
247 {
248     double ftmp[11];
249     uint64_t tmp[1];
250     DECLARE_VAR_LOW32;
251 
252     __asm__ volatile (
253         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
254         "dli        %[tmp0],    0x04                                    \n\t"
255         "1:                                                             \n\t"
256         MMI_ULWC1(%[ftmp1], %[src], -0x02)
257         MMI_ULWC1(%[ftmp2], %[src], -0x01)
258         MMI_ULWC1(%[ftmp3], %[src],  0x00)
259         MMI_ULWC1(%[ftmp4], %[src],  0x01)
260         MMI_ULWC1(%[ftmp5], %[src],  0x02)
261         MMI_ULWC1(%[ftmp6], %[src],  0x03)
262         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
263         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
264         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
265         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
266         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
267         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
268         "paddsh     %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
269         "paddsh     %[ftmp8],   %[ftmp2],       %[ftmp5]                \n\t"
270         "paddsh     %[ftmp9],   %[ftmp1],       %[ftmp6]                \n\t"
271         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_20]             \n\t"
272         "pmullh     %[ftmp8],   %[ftmp8],       %[ff_pw_5]              \n\t"
273         "psubsh     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
274         "paddsh     %[ftmp9],   %[ftmp7],       %[ftmp9]                \n\t"
275         "paddsh     %[ftmp9],   %[ftmp9],       %[ff_pw_16]             \n\t"
276         "psrah      %[ftmp9],   %[ftmp9],       %[ff_pw_5]              \n\t"
277         "packushb   %[ftmp9],   %[ftmp9],       %[ftmp0]                \n\t"
278         MMI_LWC1(%[ftmp10], %[dst],  0x00)
279         "pavgb      %[ftmp9],   %[ftmp9],       %[ftmp10]               \n\t"
280         MMI_SWC1(%[ftmp9], %[dst],  0x00)
281         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
282         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
283         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
284         "bnez       %[tmp0],    1b                                      \n\t"
285         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
286           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
287           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
288           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
289           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
290           [ftmp10]"=&f"(ftmp[10]),
291           [tmp0]"=&r"(tmp[0]),
292           RESTRICT_ASM_LOW32
293           [dst]"+&r"(dst),                  [src]"+&r"(src)
294         : [dstStride]"r"((mips_reg)dstStride),
295           [srcStride]"r"((mips_reg)srcStride),
296           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5),
297           [ff_pw_16]"f"(ff_pw_16)
298         : "memory"
299     );
300 }
301 
avg_h264_qpel8_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)302 static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
303         int dstStride, int srcStride)
304 {
305     double ftmp[11];
306     uint64_t tmp[1];
307     DECLARE_VAR_ALL64;
308 
309     __asm__ volatile (
310         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
311         "dli        %[tmp0],    0x08                                    \n\t"
312         "1:                                                             \n\t"
313         MMI_ULDC1(%[ftmp1], %[src], -0x02)
314         MMI_ULDC1(%[ftmp2], %[src], -0x01)
315         MMI_ULDC1(%[ftmp3], %[src],  0x00)
316         MMI_ULDC1(%[ftmp4], %[src],  0x01)
317         MMI_ULDC1(%[ftmp5], %[src],  0x02)
318         MMI_ULDC1(%[ftmp6], %[src],  0x03)
319         "punpcklbh  %[ftmp7],   %[ftmp3],       %[ftmp0]                \n\t"
320         "punpckhbh  %[ftmp8],   %[ftmp3],       %[ftmp0]                \n\t"
321         "punpcklbh  %[ftmp9],   %[ftmp4],       %[ftmp0]                \n\t"
322         "punpckhbh  %[ftmp10],  %[ftmp4],       %[ftmp0]                \n\t"
323         "paddsh     %[ftmp3],   %[ftmp7],       %[ftmp9]                \n\t"
324         "paddsh     %[ftmp4],   %[ftmp8],       %[ftmp10]               \n\t"
325         "pmullh     %[ftmp3],   %[ftmp3],       %[ff_pw_20]             \n\t"
326         "pmullh     %[ftmp4],   %[ftmp4],       %[ff_pw_20]             \n\t"
327         "punpcklbh  %[ftmp7],   %[ftmp2],       %[ftmp0]                \n\t"
328         "punpckhbh  %[ftmp8],   %[ftmp2],       %[ftmp0]                \n\t"
329         "punpcklbh  %[ftmp9],   %[ftmp5],       %[ftmp0]                \n\t"
330         "punpckhbh  %[ftmp10],  %[ftmp5],       %[ftmp0]                \n\t"
331         "paddsh     %[ftmp2],   %[ftmp7],       %[ftmp9]                \n\t"
332         "paddsh     %[ftmp5],   %[ftmp8],       %[ftmp10]               \n\t"
333         "pmullh     %[ftmp2],   %[ftmp2],       %[ff_pw_5]              \n\t"
334         "pmullh     %[ftmp5],   %[ftmp5],       %[ff_pw_5]              \n\t"
335         "punpcklbh  %[ftmp7],   %[ftmp1],       %[ftmp0]                \n\t"
336         "punpckhbh  %[ftmp8],   %[ftmp1],       %[ftmp0]                \n\t"
337         "punpcklbh  %[ftmp9],   %[ftmp6],       %[ftmp0]                \n\t"
338         "punpckhbh  %[ftmp10],  %[ftmp6],       %[ftmp0]                \n\t"
339         "paddsh     %[ftmp1],   %[ftmp7],       %[ftmp9]                \n\t"
340         "paddsh     %[ftmp6],   %[ftmp8],       %[ftmp10]               \n\t"
341         "psubsh     %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
342         "psubsh     %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
343         "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp1]                \n\t"
344         "paddsh     %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
345         "paddsh     %[ftmp3],   %[ftmp3],       %[ff_pw_16]             \n\t"
346         "paddsh     %[ftmp4],   %[ftmp4],       %[ff_pw_16]             \n\t"
347         "psrah      %[ftmp3],   %[ftmp3],       %[ff_pw_5]              \n\t"
348         "psrah      %[ftmp4],   %[ftmp4],       %[ff_pw_5]              \n\t"
349         "packushb   %[ftmp9],   %[ftmp3],       %[ftmp4]                \n\t"
350         MMI_LDC1(%[ftmp10], %[dst], 0x00)
351         "pavgb      %[ftmp9],   %[ftmp9],       %[ftmp10]               \n\t"
352         MMI_SDC1(%[ftmp9], %[dst], 0x00)
353         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
354         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
355         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
356         "bnez       %[tmp0],    1b                                      \n\t"
357         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
358           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
359           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
360           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
361           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
362           [ftmp10]"=&f"(ftmp[10]),
363           [tmp0]"=&r"(tmp[0]),
364           RESTRICT_ASM_ALL64
365           [dst]"+&r"(dst),                  [src]"+&r"(src)
366         : [dstStride]"r"((mips_reg)dstStride),
367           [srcStride]"r"((mips_reg)srcStride),
368           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5),
369           [ff_pw_16]"f"(ff_pw_16)
370         : "memory"
371     );
372 }
373 
avg_h264_qpel16_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)374 static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
375         int dstStride, int srcStride)
376 {
377     avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
378     avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
379     src += 8*srcStride;
380     dst += 8*dstStride;
381     avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
382     avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
383 }
384 
put_h264_qpel4_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)385 static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
386         int dstStride, int srcStride)
387 {
388     double ftmp[12];
389     uint64_t tmp[1];
390     DECLARE_VAR_LOW32;
391 
392     src -= 2 * srcStride;
393 
394     __asm__ volatile (
395         ".set       push                                                \n\t"
396         ".set       noreorder                                           \n\t"
397         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
398         "dli        %[tmp0],    0x02                                    \n\t"
399         MMI_LWC1(%[ftmp1], %[src], 0x00)
400         "mtc1       %[tmp0],    %[ftmp10]                               \n\t"
401         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
402         "dli        %[tmp0],    0x05                                    \n\t"
403         MMI_LWC1(%[ftmp2], %[src], 0x00)
404         "mtc1       %[tmp0],    %[ftmp11]                               \n\t"
405         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
406         MMI_LWC1(%[ftmp3], %[src], 0x00)
407         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
408         MMI_LWC1(%[ftmp4], %[src], 0x00)
409         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
410         MMI_LWC1(%[ftmp5], %[src], 0x00)
411         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
412         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
413         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
414         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
415         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
416         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
417         MMI_LWC1(%[ftmp6], %[src], 0x00)
418         "paddh      %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
419         "psllh      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
420         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
421         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
422         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
423         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_5]              \n\t"
424         "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]             \n\t"
425         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
426         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
427         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
428         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
429         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
430         MMI_SWC1(%[ftmp7], %[dst], 0x00)
431         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
432         MMI_LWC1(%[ftmp1], %[src], 0x00)
433         "paddh      %[ftmp7],   %[ftmp4],       %[ftmp5]                \n\t"
434         "psllh      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
435         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
436         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp6]                \n\t"
437         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
438         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_5]              \n\t"
439         "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]             \n\t"
440         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
441         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
442         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
443         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
444         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
445         MMI_SWC1(%[ftmp7], %[dst], 0x00)
446         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
447         MMI_LWC1(%[ftmp2], %[src], 0x00)
448         "paddh      %[ftmp7],   %[ftmp5],       %[ftmp6]                \n\t"
449         "psllh      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
450         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
451         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp1]                \n\t"
452         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
453         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_5]              \n\t"
454         "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]             \n\t"
455         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
456         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
457         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp3]                \n\t"
458         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
459         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
460         MMI_SWC1(%[ftmp7], %[dst], 0x00)
461         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
462         MMI_LWC1(%[ftmp3], %[src], 0x00)
463         "paddh      %[ftmp7],   %[ftmp6],       %[ftmp1]                \n\t"
464         "psllh      %[ftmp7],   %[ftmp7],       %[ftmp10]               \n\t"
465         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp5]                \n\t"
466         "psubh      %[ftmp7],   %[ftmp7],       %[ftmp2]                \n\t"
467         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
468         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_5]              \n\t"
469         "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]             \n\t"
470         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
471         "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]                \n\t"
472         "paddh      %[ftmp7],   %[ftmp7],       %[ftmp4]                \n\t"
473         "psrah      %[ftmp7],   %[ftmp7],       %[ftmp11]               \n\t"
474         "packushb   %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
475         MMI_SWC1(%[ftmp7], %[dst], 0x00)
476         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
477         ".set       pop                                                 \n\t"
478         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
479           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
480           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
481           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
482           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
483           [ftmp10]"=&f"(ftmp[10]),          [ftmp11]"=&f"(ftmp[11]),
484           [tmp0]"=&r"(tmp[0]),
485           RESTRICT_ASM_LOW32
486           [dst]"+&r"(dst),                  [src]"+&r"(src)
487         : [dstStride]"r"((mips_reg)dstStride),
488           [srcStride]"r"((mips_reg)srcStride),
489           [ff_pw_5]"f"(ff_pw_5),            [ff_pw_16]"f"(ff_pw_16)
490         : "memory"
491     );
492 }
493 
put_h264_qpel8_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)494 static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
495         int dstStride, int srcStride)
496 {
497     int w = 2;
498     int h = 8;
499     double ftmp[10];
500     uint64_t tmp[1];
501     DECLARE_VAR_LOW32;
502 
503     src -= 2 * srcStride;
504 
505     while (w--) {
506         __asm__ volatile (
507             ".set       push                                            \n\t"
508             ".set       noreorder                                       \n\t"
509             "dli        %[tmp0],    0x02                                \n\t"
510             MMI_LWC1(%[ftmp0], %[src], 0x00)
511             "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
512             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
513             "dli        %[tmp0],    0x05                                \n\t"
514             MMI_LWC1(%[ftmp1], %[src], 0x00)
515             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
516             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
517             MMI_LWC1(%[ftmp2], %[src], 0x00)
518             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
519             "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
520             MMI_LWC1(%[ftmp3], %[src], 0x00)
521             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
522             MMI_LWC1(%[ftmp4], %[src], 0x00)
523             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
524             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
525             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
526             MMI_LWC1(%[ftmp5], %[src], 0x00)
527             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
528             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
529             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
530             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
531             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
532             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
533             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
534             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
535             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
536             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
537             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
538             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
539             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
540             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
541             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
542             MMI_SWC1(%[ftmp6], %[dst], 0x00)
543             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
544             MMI_LWC1(%[ftmp0], %[src], 0x00)
545             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
546             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
547             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
548             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
549             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
550             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
551             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
552             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
553             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
554             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
555             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
556             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
557             MMI_SWC1(%[ftmp6], %[dst], 0x00)
558             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
559             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
560             MMI_LWC1(%[ftmp1], %[src], 0x00)
561             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
562             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
563             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
564             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
565             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
566             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
567             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
568             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
569             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
570             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
571             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
572             MMI_SWC1(%[ftmp6], %[dst], 0x00)
573             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
574             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
575             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
576             MMI_LWC1(%[ftmp2], %[src], 0x00)
577             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
578             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
579             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
580             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
581             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
582             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
583             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
584             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
585             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
586             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
587             MMI_SWC1(%[ftmp6], %[dst], 0x00)
588             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
589             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
590             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
591             MMI_LWC1(%[ftmp3], %[src], 0x00)
592             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
593             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
594             "punpcklbh  %[ftmp3] ,  %[ftmp3],       %[ftmp7]            \n\t"
595             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
596             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
597             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
598             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
599             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
600             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
601             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
602             MMI_SWC1(%[ftmp6], %[dst], 0x00)
603             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
604             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
605             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
606             MMI_LWC1(%[ftmp4], %[src], 0x00)
607             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
608             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
609             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
610             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
611             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
612             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
613             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
614             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
615             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
616             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
617             MMI_SWC1(%[ftmp6], %[dst], 0x00)
618             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
619             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
620             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
621             MMI_LWC1(%[ftmp5], %[src], 0x00)
622             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
623             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
624             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
625             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
626             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
627             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
628             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
629             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
630             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
631             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
632             MMI_SWC1(%[ftmp6], %[dst], 0x00)
633             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
634             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
635             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
636             MMI_LWC1(%[ftmp0], %[src], 0x00)
637             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
638             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
639             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
640             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
641             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
642             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
643             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
644             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
645             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
646             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
647             MMI_SWC1(%[ftmp6], %[dst], 0x00)
648             "bne        %[h],       0x10,           2f                  \n\t"
649             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
650             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
651             MMI_LWC1(%[ftmp1], %[src], 0x00)
652             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
653             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
654             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
655             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
656             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
657             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
658             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
659             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
660             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
661             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
662             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
663             MMI_SWC1(%[ftmp6], %[dst], 0x00)
664             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
665             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
666             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
667             MMI_LWC1(%[ftmp2], %[src], 0x00)
668             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
669             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
670             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
671             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
672             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
673             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
674             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
675             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
676             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
677             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
678             MMI_SWC1(%[ftmp6], %[dst], 0x00)
679             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
680             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
681             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
682             MMI_LWC1(%[ftmp3], %[src], 0x00)
683             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
684             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
685             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
686             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
687             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
688             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
689             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
690             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
691             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
692             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
693             MMI_SWC1(%[ftmp6], %[dst], 0x00)
694             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
695             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
696             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
697             MMI_LWC1(%[ftmp4], %[src], 0x00)
698             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
699             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
700             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
701             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
702             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
703             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
704             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
705             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
706             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
707             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
708             MMI_SWC1(%[ftmp6], %[dst], 0x00)
709             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
710             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
711             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
712             MMI_LWC1(%[ftmp5], %[src], 0x00)
713             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
714             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
715             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
716             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
717             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
718             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
719             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
720             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
721             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
722             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
723             MMI_SWC1(%[ftmp6], %[dst], 0x00)
724             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
725             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
726             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
727             MMI_LWC1(%[ftmp0], %[src], 0x00)
728             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
729             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
730             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
731             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
732             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
733             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
734             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
735             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
736             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
737             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
738             MMI_SWC1(%[ftmp6], %[dst], 0x00)
739             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
740             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
741             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
742             MMI_LWC1(%[ftmp1], %[src], 0x00)
743             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
744             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
745             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
746             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
747             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
748             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
749             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
750             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
751             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
752             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
753             MMI_SWC1(%[ftmp6], %[dst], 0x00)
754             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
755             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
756             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
757             MMI_LWC1(%[ftmp2], %[src], 0x00)
758             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
759             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
760             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
761             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
762             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
763             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
764             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
765             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
766             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
767             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
768             MMI_SWC1(%[ftmp6], %[dst], 0x00)
769             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
770             "2:                                                         \n\t"
771             ".set       pop                                             \n\t"
772             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
773               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
774               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
775               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
776               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
777               [tmp0]"=&r"(tmp[0]),
778               RESTRICT_ASM_LOW32
779               [src]"+&r"(src),              [dst]"+&r"(dst),
780               [h]"+&r"(h)
781             : [dstStride]"r"((mips_reg)dstStride),
782               [srcStride]"r"((mips_reg)srcStride),
783               [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
784             : "memory"
785         );
786 
787         src += 4 - (h + 5) * srcStride;
788         dst += 4 - h * dstStride;
789     }
790 }
791 
put_h264_qpel16_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)792 static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
793         int dstStride, int srcStride)
794 {
795     put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
796     put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
797     src += 8*srcStride;
798     dst += 8*dstStride;
799     put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
800     put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
801 }
802 
avg_h264_qpel4_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)803 static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
804         int dstStride, int srcStride)
805 {
806     double ftmp[10];
807     uint64_t tmp[1];
808 
809     src -= 2 * srcStride;
810 
811     __asm__ volatile (
812         ".set       push                                                \n\t"
813         ".set       noreorder                                           \n\t"
814         "dli        %[tmp0],    0x02                                    \n\t"
815         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
816         "mtc1       %[tmp0],    %[ftmp9]                                \n\t"
817         "dli        %[tmp0],    0x05                                    \n\t"
818         MMI_LWC1(%[ftmp0], %[src], 0x00)
819         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
820         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
821         MMI_LWC1(%[ftmp1], %[src], 0x00)
822         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
823         MMI_LWC1(%[ftmp2], %[src], 0x00)
824         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
825         MMI_LWC1(%[ftmp3], %[src], 0x00)
826         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
827         MMI_LWC1(%[ftmp4], %[src], 0x00)
828         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
829         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
830         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
831         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
832         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
833         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
834         MMI_LWC1(%[ftmp5], %[src], 0x00)
835         "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]                \n\t"
836         "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]                \n\t"
837         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
838         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
839         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
840         "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]              \n\t"
841         "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]             \n\t"
842         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
843         "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]                \n\t"
844         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
845         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
846         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
847         MMI_LWC1(%[ftmp0], %[dst], 0x00)
848         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
849         MMI_SWC1(%[ftmp6], %[dst], 0x00)
850         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
851         MMI_LWC1(%[ftmp0], %[src], 0x00)
852         "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]                \n\t"
853         "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]                \n\t"
854         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
855         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]                \n\t"
856         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
857         "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]              \n\t"
858         "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]             \n\t"
859         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
860         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
861         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
862         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
863         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
864         MMI_LWC1(%[ftmp1], %[dst], 0x00)
865         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
866         MMI_SWC1(%[ftmp6], %[dst], 0x00)
867         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
868         MMI_LWC1(%[ftmp1], %[src], 0x00)
869         "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]                \n\t"
870         "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]                \n\t"
871         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
872         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
873         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
874         "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]              \n\t"
875         "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]             \n\t"
876         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
877         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]                \n\t"
878         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
879         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
880         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
881         MMI_LWC1(%[ftmp2], %[dst], 0x00)
882         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp2]                \n\t"
883         MMI_SWC1(%[ftmp6], %[dst], 0x00)
884         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
885         MMI_LWC1(%[ftmp2], %[src], 0x00)
886         "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]                \n\t"
887         "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]                \n\t"
888         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
889         "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]                \n\t"
890         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
891         "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]              \n\t"
892         "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]             \n\t"
893         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
894         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]                \n\t"
895         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
896         "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
897         "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
898         MMI_LWC1(%[ftmp3], %[dst], 0x00)
899         "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp3]                \n\t"
900         MMI_SWC1(%[ftmp6], %[dst], 0x00)
901         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
902         ".set       pop                                                 \n\t"
903         : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
904           [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
905           [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
906           [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
907           [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
908           [tmp0]"=&r"(tmp[0]),
909           [src]"+&r"(src),              [dst]"+&r"(dst)
910         : [dstStride]"r"((mips_reg)dstStride),
911           [srcStride]"r"((mips_reg)srcStride),
912           [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
913         : "memory"
914     );
915 }
916 
avg_h264_qpel8_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)917 static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
918         int dstStride, int srcStride)
919 {
920     int w = 2;
921     int h = 8;
922     double ftmp[10];
923     uint64_t tmp[1];
924     DECLARE_VAR_LOW32;
925 
926     src -= 2 * srcStride;
927 
928     while (w--) {
929         __asm__ volatile (
930             ".set       push                                            \n\t"
931             ".set       noreorder                                       \n\t"
932             "dli        %[tmp0],    0x02                                \n\t"
933             "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
934             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
935             "dli        %[tmp0],    0x05                                \n\t"
936             MMI_LWC1(%[ftmp0], %[src], 0x00)
937             "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
938             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
939             MMI_LWC1(%[ftmp1], %[src], 0x00)
940             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
941             MMI_LWC1(%[ftmp2], %[src], 0x00)
942             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
943             MMI_LWC1(%[ftmp3], %[src], 0x00)
944             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
945             MMI_LWC1(%[ftmp4], %[src], 0x00)
946             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
947             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
948             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
949             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
950             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
951             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
952             MMI_LWC1(%[ftmp5], %[src], 0x00)
953             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
954             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
955             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
956             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
957             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
958             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
959             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
960             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
961             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
962             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
963             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
964             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
965             MMI_LWC1(%[ftmp0], %[dst], 0x00)
966             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
967             MMI_SWC1(%[ftmp6], %[dst], 0x00)
968             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
969             MMI_LWC1(%[ftmp0], %[src], 0x00)
970             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
971             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
972             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
973             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
974             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
975             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
976             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
977             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
978             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
979             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
980             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
981             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
982             MMI_LWC1(%[ftmp1], %[dst], 0x00)
983             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
984             MMI_SWC1(%[ftmp6], %[dst], 0x00)
985             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
986             MMI_LWC1(%[ftmp1], %[src], 0x00)
987             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
988             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
989             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
990             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
991             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
992             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
993             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
994             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
995             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
996             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
997             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
998             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
999             MMI_LWC1(%[ftmp2], %[dst], 0x00)
1000             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1001             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1002             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1003             MMI_LWC1(%[ftmp2], %[src], 0x00)
1004             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1005             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1006             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1007             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1008             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1009             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1010             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1011             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1012             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1013             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1014             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1015             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1016             MMI_LWC1(%[ftmp3], %[dst], 0x00)
1017             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1018             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1019             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1020             MMI_LWC1(%[ftmp3], %[src], 0x00)
1021             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
1022             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1023             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1024             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1025             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1026             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1027             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
1028             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1029             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1030             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1031             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1032             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1033             MMI_LWC1(%[ftmp4], %[dst], 0x00)
1034             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1035             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1036             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1037             MMI_LWC1(%[ftmp4], %[src], 0x00)
1038             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
1039             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1040             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1041             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1042             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1043             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1044             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
1045             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1046             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
1047             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1048             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1049             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1050             MMI_LWC1(%[ftmp5], %[dst], 0x00)
1051             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1052             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1053             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1054             MMI_LWC1(%[ftmp5], %[src], 0x00)
1055             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
1056             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1057             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1058             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1059             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1060             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1061             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
1062             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1063             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1064             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1065             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1066             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1067             MMI_LWC1(%[ftmp0], %[dst], 0x00)
1068             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1069             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1070             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1071             MMI_LWC1(%[ftmp0], %[src], 0x00)
1072             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
1073             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1074             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1075             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1076             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1077             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1078             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
1079             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1080             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1081             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1082             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1083             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1084             MMI_LWC1(%[ftmp1], %[dst], 0x00)
1085             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1086             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1087             "bne        %[h],       0x10,           2f                  \n\t"
1088             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1089             MMI_LWC1(%[ftmp1], %[src], 0x00)
1090             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
1091             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1092             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1093             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1094             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1095             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1096             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
1097             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1098             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
1099             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1100             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1101             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1102             MMI_LWC1(%[ftmp2], %[dst], 0x00)
1103             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1104             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1105             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1106             MMI_LWC1(%[ftmp2], %[src], 0x00)
1107             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1108             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1109             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1110             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1111             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1112             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1113             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1114             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1115             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1116             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1117             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1118             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1119             MMI_LWC1(%[ftmp3], %[dst], 0x00)
1120             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1121             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1122             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1123             MMI_LWC1(%[ftmp3], %[src], 0x00)
1124             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
1125             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1126             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1127             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1128             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1129             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1130             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
1131             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1132             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1133             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1134             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1135             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1136             MMI_LWC1(%[ftmp4], %[dst], 0x00)
1137             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1138             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1139             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1140             MMI_LWC1(%[ftmp4], %[src], 0x00)
1141             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
1142             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1143             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1144             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1145             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1146             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1147             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
1148             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1149             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
1150             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1151             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1152             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1153             MMI_LWC1(%[ftmp5], %[dst], 0x00)
1154             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1155             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1156             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1157             MMI_LWC1(%[ftmp5], %[src], 0x00)
1158             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
1159             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1160             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1161             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1162             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1163             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1164             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
1165             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1166             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1167             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1168             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1169             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1170             MMI_LWC1(%[ftmp0], %[dst], 0x00)
1171             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1172             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1173             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1174             MMI_LWC1(%[ftmp0], %[src], 0x00)
1175             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
1176             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1177             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1178             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1179             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1180             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1181             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
1182             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1183             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1184             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1185             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1186             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1187             MMI_LWC1(%[ftmp1], %[dst], 0x00)
1188             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1189             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1190             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1191             MMI_LWC1(%[ftmp1], %[src], 0x00)
1192             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
1193             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1194             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1195             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1196             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1197             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1198             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
1199             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1200             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
1201             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1202             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1203             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1204             MMI_LWC1(%[ftmp2], %[dst], 0x00)
1205             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1206             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1207             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1208             MMI_LWC1(%[ftmp2], %[src], 0x00)
1209             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1210             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp9]            \n\t"
1211             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1212             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1213             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1214             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1215             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1216             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1217             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1218             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1219             "psrah      %[ftmp6],   %[ftmp6],       %[ftmp8]            \n\t"
1220             "packushb   %[ftmp6],   %[ftmp6],       %[ftmp6]            \n\t"
1221             MMI_LWC1(%[ftmp3], %[dst], 0x00)
1222             "pavgb      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1223             MMI_SWC1(%[ftmp6], %[dst], 0x00)
1224             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1225             "2:                                                         \n\t"
1226             ".set       pop                                             \n\t"
1227             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1228               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1229               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1230               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1231               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1232               [tmp0]"=&r"(tmp[0]),
1233               RESTRICT_ASM_LOW32
1234               [src]"+&r"(src),              [dst]"+&r"(dst),
1235               [h]"+&r"(h)
1236             : [dstStride]"r"((mips_reg)dstStride),
1237               [srcStride]"r"((mips_reg)srcStride),
1238               [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
1239             : "memory"
1240         );
1241 
1242         src += 4 - (h + 5) * srcStride;
1243         dst += 4 - h * dstStride;
1244     }
1245 }
1246 
avg_h264_qpel16_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1247 static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1248         int dstStride, int srcStride)
1249 {
1250     avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1251     avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1252     src += 8*srcStride;
1253     dst += 8*dstStride;
1254     avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1255     avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1256 }
1257 
put_h264_qpel4_hv_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1258 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1259         int dstStride, int srcStride)
1260 {
1261     INIT_CLIP
1262     int i;
1263     int16_t _tmp[36];
1264     int16_t *tmp = _tmp;
1265     double ftmp[10];
1266     uint64_t tmp0;
1267     DECLARE_VAR_LOW32;
1268 
1269     src -= 2*srcStride;
1270 
1271     __asm__ volatile (
1272         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1273         "dli        %[tmp0],    0x09                                    \n\t"
1274         "1:                                                             \n\t"
1275         MMI_ULWC1(%[ftmp1], %[src], -0x02)
1276         MMI_ULWC1(%[ftmp2], %[src], -0x01)
1277         MMI_ULWC1(%[ftmp3], %[src],  0x00)
1278         MMI_ULWC1(%[ftmp4], %[src],  0x01)
1279         MMI_ULWC1(%[ftmp5], %[src],  0x02)
1280         MMI_ULWC1(%[ftmp6], %[src],  0x03)
1281         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
1282         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
1283         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1284         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
1285         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1286         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1287         "paddsh     %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1288         "paddsh     %[ftmp8],   %[ftmp2],       %[ftmp5]                \n\t"
1289         "paddsh     %[ftmp9],   %[ftmp1],       %[ftmp6]                \n\t"
1290         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_20]             \n\t"
1291         "pmullh     %[ftmp8],   %[ftmp8],       %[ff_pw_5]              \n\t"
1292         "psubsh     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1293         "paddsh     %[ftmp9],   %[ftmp7],       %[ftmp9]                \n\t"
1294         MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1295         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
1296         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
1297         PTR_ADDU   "%[tmp],     %[tmp],         %[tmpStride]            \n\t"
1298         "bnez       %[tmp0],    1b                                      \n\t"
1299         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1300           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1301           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1302           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1303           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1304           [tmp0]"=&r"(tmp0),
1305           RESTRICT_ASM_LOW32
1306           [tmp]"+&r"(tmp),                  [src]"+&r"(src)
1307         : [tmpStride]"r"(8),
1308           [srcStride]"r"((mips_reg)srcStride),
1309           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5)
1310         : "memory"
1311     );
1312 
1313     tmp -= 28;
1314 
1315     for (i=0; i<4; i++) {
1316         const int16_t tmpB= tmp[-8];
1317         const int16_t tmpA= tmp[-4];
1318         const int16_t tmp0= tmp[ 0];
1319         const int16_t tmp1= tmp[ 4];
1320         const int16_t tmp2= tmp[ 8];
1321         const int16_t tmp3= tmp[12];
1322         const int16_t tmp4= tmp[16];
1323         const int16_t tmp5= tmp[20];
1324         const int16_t tmp6= tmp[24];
1325         op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1326         op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1327         op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1328         op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1329         dst++;
1330         tmp++;
1331     }
1332 }
1333 
put_h264_qpel8or16_hv1_lowpass_mmi(int16_t * tmp,const uint8_t * src,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1334 static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
1335         const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
1336 {
1337     int w = (size + 8) >> 2;
1338     double ftmp[11];
1339     uint64_t tmp0;
1340     DECLARE_VAR_LOW32;
1341 
1342     src -= 2 * srcStride + 2;
1343 
1344     while (w--) {
1345         __asm__ volatile (
1346             "dli        %[tmp0],    0x02                                \n\t"
1347             MMI_ULWC1(%[ftmp0], %[src], 0x00)
1348             "mtc1       %[tmp0],    %[ftmp10]                           \n\t"
1349             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1350             "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]            \n\t"
1351             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1352             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1353             MMI_ULWC1(%[ftmp2], %[src], 0x00)
1354             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1355             MMI_ULWC1(%[ftmp3], %[src], 0x00)
1356             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1357             MMI_ULWC1(%[ftmp4], %[src], 0x00)
1358             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1359             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1360             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1361             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1362             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1363             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1364             MMI_ULWC1(%[ftmp5], %[src], 0x00)
1365             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
1366             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1367             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
1368             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1369             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1370             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1371             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1372             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1373             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1374             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1375             MMI_SDC1(%[ftmp6], %[tmp], 0x00)
1376             MMI_ULWC1(%[ftmp0], %[src], 0x00)
1377             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
1378             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1379             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
1380             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1381             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1382             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1383             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1384             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1385             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1386             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1387             MMI_SDC1(%[ftmp6], %[tmp], 0x30)
1388             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1389             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
1390             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1391             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
1392             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1393             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1394             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1395             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1396             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
1397             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1398             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1399             MMI_SDC1(%[ftmp6], %[tmp], 0x60)
1400             MMI_ULWC1(%[ftmp2], %[src], 0x00)
1401             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1402             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1403             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1404             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1405             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1406             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1407             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1408             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1409             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1410             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1411             MMI_SDC1(%[ftmp6], %[tmp], 0x90)
1412             MMI_ULWC1(%[ftmp3], %[src], 0x00)
1413             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
1414             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1415             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
1416             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1417             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1418             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1419             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1420             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1421             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1422             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1423             MMI_SDC1(%[ftmp6], %[tmp], 0xc0)
1424             MMI_ULWC1(%[ftmp4], %[src], 0x00)
1425             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
1426             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1427             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
1428             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1429             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1430             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1431             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1432             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
1433             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1434             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1435             MMI_SDC1(%[ftmp6], %[tmp], 0xf0)
1436             MMI_ULWC1(%[ftmp5], %[src], 0x00)
1437             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
1438             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1439             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
1440             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1441             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1442             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1443             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1444             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1445             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1446             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1447             MMI_SDC1(%[ftmp6], %[tmp], 0x120)
1448             MMI_ULWC1(%[ftmp0], %[src], 0x00)
1449             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
1450             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1451             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
1452             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1453             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1454             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1455             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1456             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1457             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1458             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1459             MMI_SDC1(%[ftmp6], %[tmp], 0x150)
1460             "bne        %[size],    0x10,           2f                  \n\t"
1461 
1462             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1463             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
1464             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1465             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
1466             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1467             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1468             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1469             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1470             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
1471             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1472             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1473             MMI_SDC1(%[ftmp6], %[tmp], 0x180)
1474             MMI_ULWC1(%[ftmp2], %[src], 0x00)
1475             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1476             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1477             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1478             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1479             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1480             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1481             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1482             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1483             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1484             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1485             MMI_SDC1(%[ftmp6], %[tmp], 0x1b0)
1486             MMI_ULWC1(%[ftmp3], %[src], 0x00)
1487             "paddh      %[ftmp6],   %[ftmp0],       %[ftmp1]            \n\t"
1488             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1489             "paddh      %[ftmp4],   %[ftmp4],       %[ff_pw_16]         \n\t"
1490             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1491             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1492             "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp7]            \n\t"
1493             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1494             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp3]            \n\t"
1495             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1496             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1497             MMI_SDC1(%[ftmp6], %[tmp], 0x1e0)
1498             MMI_ULWC1(%[ftmp4], %[src], 0x00)
1499             "paddh      %[ftmp6],   %[ftmp1],       %[ftmp2]            \n\t"
1500             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1501             "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]         \n\t"
1502             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1503             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1504             "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1505             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1506             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp4]            \n\t"
1507             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1508             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1509             MMI_SDC1(%[ftmp6], %[tmp], 0x210)
1510             MMI_ULWC1(%[ftmp5], %[src], 0x00)
1511             "paddh      %[ftmp6],   %[ftmp2],       %[ftmp3]            \n\t"
1512             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1513             "paddh      %[ftmp0],   %[ftmp0],       %[ff_pw_16]         \n\t"
1514             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1515             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1516             "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1517             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1518             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1519             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1520             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1521             MMI_SDC1(%[ftmp6], %[tmp], 0x240)
1522             MMI_ULWC1(%[ftmp0], %[src], 0x00)
1523             "paddh      %[ftmp6],   %[ftmp3],       %[ftmp4]            \n\t"
1524             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1525             "paddh      %[ftmp1],   %[ftmp1],       %[ff_pw_16]         \n\t"
1526             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1527             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp5]            \n\t"
1528             "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
1529             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1530             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp0]            \n\t"
1531             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1532             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1533             MMI_SDC1(%[ftmp6], %[tmp], 0x270)
1534             MMI_ULWC1(%[ftmp1], %[src], 0x00)
1535             "paddh      %[ftmp6],   %[ftmp4],       %[ftmp5]            \n\t"
1536             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1537             "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]         \n\t"
1538             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1539             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp0]            \n\t"
1540             "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp7]            \n\t"
1541             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1542             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp1]            \n\t"
1543             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1544             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp2]            \n\t"
1545             MMI_SDC1(%[ftmp6], %[tmp], 0x2a0)
1546             MMI_ULWC1(%[ftmp2], %[src], 0x00)
1547             "paddh      %[ftmp6],   %[ftmp5],       %[ftmp0]            \n\t"
1548             "psllh      %[ftmp6],   %[ftmp6],       %[ftmp10]           \n\t"
1549             "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]         \n\t"
1550             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp4]            \n\t"
1551             "psubh      %[ftmp6],   %[ftmp6],       %[ftmp1]            \n\t"
1552             "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1553             "pmullh     %[ftmp6],   %[ftmp6],       %[ff_pw_5]          \n\t"
1554             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp2]            \n\t"
1555             PTR_ADDU   "%[src],     %[src],         %[srcStride]        \n\t"
1556             "paddh      %[ftmp6],   %[ftmp6],       %[ftmp3]            \n\t"
1557             MMI_SDC1(%[ftmp6], %[tmp], 0x2d0)
1558             "2:                                                         \n\t"
1559             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1560               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1561               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1562               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1563               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1564               [ftmp10]"=&f"(ftmp[10]),
1565               [tmp0]"=&r"(tmp0),
1566               RESTRICT_ASM_LOW32
1567               [src]"+&r"(src)
1568             : [tmp]"r"(tmp),                [size]"r"(size),
1569               [srcStride]"r"((mips_reg)srcStride),
1570               [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
1571             : "memory"
1572         );
1573 
1574         tmp += 4;
1575         src += 4 - (size + 5) * srcStride;
1576     }
1577 }
1578 
put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t * dst,int16_t * tmp,ptrdiff_t dstStride,ptrdiff_t tmpStride,int size)1579 static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1580         int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1581 {
1582     int w = size >> 4;
1583     double ftmp[10];
1584     uint64_t tmp0;
1585     DECLARE_VAR_ALL64;
1586 
1587     do {
1588         int h = size;
1589 
1590         __asm__ volatile (
1591             "dli        %[tmp0],    0x02                                \n\t"
1592             "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
1593             "dli        %[tmp0],    0x06                                \n\t"
1594             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
1595             "1:                                                         \n\t"
1596             MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1597             MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1598             MMI_LDC1(%[ftmp6], %[tmp], 0x10)
1599             MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1600             MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1601             MMI_ULDC1(%[ftmp5], %[tmp], 0x12)
1602             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]            \n\t"
1603             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1604             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
1605             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp6]            \n\t"
1606             MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1607             MMI_ULDC1(%[ftmp6], %[tmp], 0x06)
1608             MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1609             MMI_ULDC1(%[ftmp7], %[tmp], 0x0e)
1610             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
1611             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp7]            \n\t"
1612             "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
1613             "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
1614             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp8]            \n\t"
1615             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]            \n\t"
1616             "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
1617             "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
1618             "paddsh     %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
1619             "paddsh     %[ftmp3] ,  %[ftmp3],       %[ftmp5]            \n\t"
1620             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp8]            \n\t"
1621             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]            \n\t"
1622             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
1623             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
1624             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]            \n\t"
1625             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]            \n\t"
1626             "packushb   %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
1627             "addi       %[h],       %[h],           -0x01               \n\t"
1628             MMI_SDC1(%[ftmp0], %[dst], 0x00)
1629             PTR_ADDIU  "%[tmp],     %[tmp],         0x30                \n\t"
1630             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1631             "bnez       %[h],       1b                                  \n\t"
1632             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1633               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1634               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1635               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1636               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1637               [tmp0]"=&r"(tmp0),
1638               RESTRICT_ASM_ALL64
1639               [tmp]"+&r"(tmp),              [dst]"+&r"(dst),
1640               [h]"+&r"(h)
1641             : [dstStride]"r"((mips_reg)dstStride)
1642             : "memory"
1643         );
1644 
1645         tmp += 8 - size * 24;
1646         dst += 8 - size * dstStride;
1647     } while (w--);
1648 }
1649 
put_h264_qpel8or16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1650 static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1651         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1652         ptrdiff_t srcStride, int size)
1653 {
1654     put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1655     put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1656 }
1657 
put_h264_qpel8_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1658 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1659         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1660         ptrdiff_t srcStride)
1661 {
1662     put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1663             srcStride, 8);
1664 }
1665 
put_h264_qpel16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1666 static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1667         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1668         ptrdiff_t srcStride)
1669 {
1670     put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1671             srcStride, 16);
1672 }
1673 
put_h264_qpel8_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1674 static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1675         const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1676 {
1677     int h = 8;
1678     double ftmp[9];
1679     uint64_t tmp[1];
1680     DECLARE_VAR_LOW32;
1681     DECLARE_VAR_ALL64;
1682 
1683     __asm__ volatile (
1684         "dli        %[tmp0],    0x02                                    \n\t"
1685         "mtc1       %[tmp0],    %[ftmp7]                                \n\t"
1686         "dli        %[tmp0],    0x05                                    \n\t"
1687         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1688         "mtc1       %[tmp0],    %[ftmp8]                                \n\t"
1689         "1:                                                             \n\t"
1690         MMI_ULDC1(%[ftmp1], %[src], 0x00)
1691         MMI_ULDC1(%[ftmp3], %[src], 0x01)
1692         "punpckhbh  %[ftmp2],   %[ftmp1],       %[ftmp0]                \n\t"
1693         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
1694         "punpckhbh  %[ftmp4],   %[ftmp3],       %[ftmp0]                \n\t"
1695         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1696         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
1697         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1698         "psllh      %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
1699         "psllh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
1700         MMI_ULDC1(%[ftmp3], %[src], -0x01)
1701         MMI_ULDC1(%[ftmp5], %[src],  0x02)
1702         "punpckhbh  %[ftmp4],   %[ftmp3],       %[ftmp0]                \n\t"
1703         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1704         "punpckhbh  %[ftmp6],   %[ftmp5],       %[ftmp0]                \n\t"
1705         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1706         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
1707         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
1708         "psubh      %[ftmp2],   %[ftmp2],       %[ftmp6]                \n\t"
1709         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1710         "pmullh     %[ftmp2],   %[ftmp2],       %[ff_pw_5]              \n\t"
1711         "pmullh     %[ftmp1],   %[ftmp1],       %[ff_pw_5]              \n\t"
1712         MMI_ULWC1(%[ftmp3], %[src], -0x02)
1713         MMI_ULWC1(%[ftmp6], %[src], 0x07)
1714         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1715         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1716         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
1717         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
1718         "paddh      %[ftmp3],   %[ftmp3],       %[ff_pw_16]             \n\t"
1719         "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]             \n\t"
1720         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
1721         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
1722         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
1723         "psrah      %[ftmp2],   %[ftmp2],       %[ftmp8]                \n\t"
1724         MMI_LDC1(%[ftmp5], %[src2],  0x00)
1725         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
1726         PTR_ADDU   "%[src],     %[src],         %[dstStride]            \n\t"
1727         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
1728         PTR_ADDU   "%[h],       %[h],           -0x01                   \n\t"
1729         MMI_SDC1(%[ftmp1], %[dst], 0x00)
1730         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
1731         PTR_ADDU   "%[src2],    %[src2],        %[src2Stride]           \n\t"
1732         "bgtz       %[h],       1b                                      \n\t"
1733         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1734           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1735           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1736           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1737           [ftmp8]"=&f"(ftmp[8]),
1738           [tmp0]"=&r"(tmp[0]),
1739           RESTRICT_ASM_LOW32
1740           RESTRICT_ASM_ALL64
1741           [src]"+&r"(src),                  [dst]"+&r"(dst),
1742           [src2]"+&r"(src2),                [h]"+&r"(h)
1743         : [src2Stride]"r"((mips_reg)src2Stride),
1744           [dstStride]"r"((mips_reg)dstStride),
1745           [ff_pw_5]"f"(ff_pw_5),            [ff_pw_16]"f"(ff_pw_16)
1746         : "memory"
1747     );
1748 }
1749 
put_pixels8_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int h)1750 static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1751         const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1752 {
1753     double ftmp[7];
1754     uint64_t tmp0;
1755     DECLARE_VAR_ALL64;
1756     DECLARE_VAR_ADDRT;
1757 
1758     do {
1759         __asm__ volatile (
1760             "dli        %[tmp0],    0x05                                \n\t"
1761             MMI_ULDC1(%[ftmp0], %[src16], 0x00)
1762             "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
1763             MMI_ULDC1(%[ftmp1], %[src16], 0x08)
1764             MMI_ULDC1(%[ftmp2], %[src16], 0x30)
1765             MMI_ULDC1(%[ftmp3], %[src16], 0x38)
1766             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp6]            \n\t"
1767             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
1768             "psrah      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
1769             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
1770             "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
1771             "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t"
1772             MMI_LDC1(%[ftmp5], %[src8], 0x00)
1773             MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00)
1774             "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp5]            \n\t"
1775             "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp4]            \n\t"
1776             MMI_SDC1(%[ftmp0], %[dst], 0x00)
1777             MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
1778             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1779               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1780               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1781               [ftmp6]"=&f"(ftmp[6]),
1782               RESTRICT_ASM_ALL64
1783               RESTRICT_ASM_ADDRT
1784               [tmp0]"=&r"(tmp0)
1785             : [src8]"r"(src8),              [src16]"r"(src16),
1786               [dst]"r"(dst),
1787               [src8Stride]"r"((mips_reg)src8Stride),
1788               [dstStride]"r"((mips_reg)dstStride)
1789             : "memory"
1790         );
1791 
1792         src8  += 2 * src8Stride;
1793         src16 += 48;
1794         dst   += 2 * dstStride;
1795     } while (h -= 2);
1796 }
1797 
put_h264_qpel16_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1798 static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1799         const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1800 {
1801     put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1802     put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1803             src2Stride);
1804 
1805     src += 8 * dstStride;
1806     dst += 8 * dstStride;
1807     src2 += 8 * src2Stride;
1808 
1809     put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1810     put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1811             src2Stride);
1812 }
1813 
put_pixels16_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int h)1814 static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1815         const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1816 {
1817     put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h);
1818     put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
1819             src8Stride, h);
1820 }
1821 
avg_h264_qpel4_hv_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1822 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1823         int dstStride, int srcStride)
1824 {
1825     INIT_CLIP
1826     int i;
1827     int16_t _tmp[36];
1828     int16_t *tmp = _tmp;
1829     double ftmp[10];
1830     uint64_t tmp0;
1831     DECLARE_VAR_LOW32;
1832 
1833     src -= 2*srcStride;
1834 
1835     __asm__ volatile (
1836         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
1837         "dli        %[tmp0],    0x09                                    \n\t"
1838         "1:                                                             \n\t"
1839         MMI_ULWC1(%[ftmp1], %[src], -0x02)
1840         MMI_ULWC1(%[ftmp2], %[src], -0x01)
1841         MMI_ULWC1(%[ftmp3], %[src],  0x00)
1842         MMI_ULWC1(%[ftmp4], %[src],  0x01)
1843         MMI_ULWC1(%[ftmp5], %[src],  0x02)
1844         MMI_ULWC1(%[ftmp6], %[src],  0x03)
1845         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
1846         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
1847         "punpcklbh  %[ftmp3],   %[ftmp3],       %[ftmp0]                \n\t"
1848         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
1849         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
1850         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
1851         "paddsh     %[ftmp7],   %[ftmp3],       %[ftmp4]                \n\t"
1852         "paddsh     %[ftmp8],   %[ftmp2],       %[ftmp5]                \n\t"
1853         "paddsh     %[ftmp9],   %[ftmp1],       %[ftmp6]                \n\t"
1854         "pmullh     %[ftmp7],   %[ftmp7],       %[ff_pw_20]             \n\t"
1855         "pmullh     %[ftmp8],   %[ftmp8],       %[ff_pw_5]              \n\t"
1856         "psubsh     %[ftmp7],   %[ftmp7],       %[ftmp8]                \n\t"
1857         "paddsh     %[ftmp9],   %[ftmp7],       %[ftmp9]                \n\t"
1858         MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1859         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
1860         PTR_ADDU   "%[src],     %[src],         %[srcStride]            \n\t"
1861         PTR_ADDU   "%[tmp],     %[tmp],         %[tmpStride]            \n\t"
1862         "bnez       %[tmp0],    1b                                      \n\t"
1863         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
1864           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
1865           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
1866           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
1867           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
1868           [tmp0]"=&r"(tmp0),
1869           RESTRICT_ASM_LOW32
1870           [tmp]"+&r"(tmp),                  [src]"+&r"(src)
1871         : [tmpStride]"r"(8),
1872           [srcStride]"r"((mips_reg)srcStride),
1873           [ff_pw_20]"f"(ff_pw_20),          [ff_pw_5]"f"(ff_pw_5)
1874         : "memory"
1875     );
1876 
1877     tmp -= 28;
1878 
1879     for (i=0; i<4; i++) {
1880         const int16_t tmpB= tmp[-8];
1881         const int16_t tmpA= tmp[-4];
1882         const int16_t tmp0= tmp[ 0];
1883         const int16_t tmp1= tmp[ 4];
1884         const int16_t tmp2= tmp[ 8];
1885         const int16_t tmp3= tmp[12];
1886         const int16_t tmp4= tmp[16];
1887         const int16_t tmp5= tmp[20];
1888         const int16_t tmp6= tmp[24];
1889         op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1890         op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1891         op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1892         op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1893         dst++;
1894         tmp++;
1895     }
1896 }
1897 
avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t * dst,int16_t * tmp,ptrdiff_t dstStride,ptrdiff_t tmpStride,int size)1898 static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1899         int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1900 {
1901     int w = size >> 4;
1902     double ftmp[11];
1903     uint64_t tmp0;
1904     DECLARE_VAR_ALL64;
1905 
1906     do {
1907         int h = size;
1908         __asm__ volatile (
1909             "dli        %[tmp0],    0x02                                \n\t"
1910             "mtc1       %[tmp0],    %[ftmp9]                            \n\t"
1911             "dli        %[tmp0],    0x06                                \n\t"
1912             "mtc1       %[tmp0],    %[ftmp10]                           \n\t"
1913             "1:                                                         \n\t"
1914             MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1915             MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1916             MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1917             MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1918             MMI_LDC1(%[ftmp7], %[tmp], 0x10)
1919             MMI_ULDC1(%[ftmp8], %[tmp], 0x12)
1920             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp4]            \n\t"
1921             "paddh      %[ftmp1],   %[ftmp1],       %[ftmp3]            \n\t"
1922             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp8]            \n\t"
1923             "paddh      %[ftmp4],   %[ftmp4],       %[ftmp7]            \n\t"
1924             MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1925             MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1926             MMI_ULDC1(%[ftmp7], %[tmp], 0x06)
1927             MMI_ULDC1(%[ftmp8], %[tmp], 0x0e)
1928             "paddh      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
1929             "paddh      %[ftmp5],   %[ftmp5],       %[ftmp8]            \n\t"
1930             "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
1931             "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
1932             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]            \n\t"
1933             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]            \n\t"
1934             "psubh      %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
1935             "psubh      %[ftmp3],   %[ftmp3],       %[ftmp4]            \n\t"
1936             "paddsh     %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
1937             "paddsh     %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
1938             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp9]            \n\t"
1939             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp9]            \n\t"
1940             "paddh      %[ftmp0],   %[ftmp0],       %[ftmp2]            \n\t"
1941             "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]            \n\t"
1942             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp10]           \n\t"
1943             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp10]           \n\t"
1944             "packushb   %[ftmp0],   %[ftmp0],       %[ftmp3]            \n\t"
1945             MMI_LDC1(%[ftmp6], %[dst], 0x00)
1946             "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp6]            \n\t"
1947             MMI_SDC1(%[ftmp0], %[dst], 0x00)
1948             "addi       %[h],       %[h],           -0x01               \n\t"
1949             PTR_ADDI   "%[tmp],     %[tmp],         0x30                \n\t"
1950             PTR_ADDU   "%[dst],     %[dst],         %[dstStride]        \n\t"
1951             "bnez       %[h],       1b                                  \n\t"
1952             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
1953               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
1954               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
1955               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
1956               [ftmp8]"=&f"(ftmp[8]),        [ftmp9]"=&f"(ftmp[9]),
1957               [ftmp10]"=&f"(ftmp[10]),
1958               [tmp0]"=&r"(tmp0),
1959               RESTRICT_ASM_ALL64
1960               [tmp]"+&r"(tmp),              [dst]"+&r"(dst),
1961               [h]"+&r"(h)
1962             : [dstStride]"r"((mips_reg)dstStride)
1963             : "memory"
1964         );
1965 
1966         tmp += 8 - size * 24;
1967         dst += 8 - size * dstStride;
1968     } while (w--);
1969 }
1970 
avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1971 static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1972         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1973         ptrdiff_t srcStride, int size)
1974 {
1975     put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1976     avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1977 }
1978 
avg_h264_qpel8_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1979 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1980         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1981         ptrdiff_t srcStride)
1982 {
1983     avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1984             srcStride, 8);
1985 }
1986 
avg_h264_qpel16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1987 static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1988         const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1989         ptrdiff_t srcStride)
1990 {
1991     avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1992             srcStride, 16);
1993 }
1994 
avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1995 static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1996         const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1997 {
1998     double ftmp[10];
1999     uint64_t tmp[2];
2000     DECLARE_VAR_LOW32;
2001     DECLARE_VAR_ALL64;
2002 
2003     __asm__ volatile (
2004         "dli        %[tmp1],    0x02                                    \n\t"
2005         "ori        %[tmp0],    $0,             0x8                     \n\t"
2006         "mtc1       %[tmp1],    %[ftmp7]                                \n\t"
2007         "dli        %[tmp1],    0x05                                    \n\t"
2008         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
2009         "mtc1       %[tmp1],    %[ftmp8]                                \n\t"
2010         "1:                                                             \n\t"
2011         MMI_ULDC1(%[ftmp1], %[src], 0x00)
2012         MMI_ULDC1(%[ftmp2], %[src], 0x01)
2013         "punpckhbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
2014         "punpcklbh  %[ftmp1],   %[ftmp1],       %[ftmp0]                \n\t"
2015         "punpckhbh  %[ftmp4],   %[ftmp2],       %[ftmp0]                \n\t"
2016         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2017         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2018         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
2019         "psllh      %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
2020         "psllh      %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
2021         MMI_ULDC1(%[ftmp2], %[src], -0x01)
2022         MMI_ULDC1(%[ftmp5], %[src],  0x02)
2023         "punpckhbh  %[ftmp4],   %[ftmp2],       %[ftmp0]                \n\t"
2024         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2025         "punpckhbh  %[ftmp6],   %[ftmp5],       %[ftmp0]                \n\t"
2026         "punpcklbh  %[ftmp5],   %[ftmp5],       %[ftmp0]                \n\t"
2027         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp5]                \n\t"
2028         "paddh      %[ftmp6],   %[ftmp6],       %[ftmp4]                \n\t"
2029         "psubh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2030         "psubh      %[ftmp3],   %[ftmp3],       %[ftmp6]                \n\t"
2031         "pmullh     %[ftmp1],   %[ftmp1],       %[ff_pw_5]              \n\t"
2032         "pmullh     %[ftmp3],   %[ftmp3],       %[ff_pw_5]              \n\t"
2033         MMI_ULWC1(%[ftmp2], %[src], -0x02)
2034         MMI_ULWC1(%[ftmp6], %[src],  0x07)
2035         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp0]                \n\t"
2036         "punpcklbh  %[ftmp6],   %[ftmp6],       %[ftmp0]                \n\t"
2037         "paddh      %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
2038         "paddh      %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
2039         "paddh      %[ftmp2],   %[ftmp2],       %[ff_pw_16]             \n\t"
2040         "paddh      %[ftmp5],   %[ftmp5],       %[ff_pw_16]             \n\t"
2041         "paddh      %[ftmp1],   %[ftmp1],       %[ftmp2]                \n\t"
2042         "paddh      %[ftmp3],   %[ftmp3],       %[ftmp5]                \n\t"
2043         "psrah      %[ftmp1],   %[ftmp1],       %[ftmp8]                \n\t"
2044         "psrah      %[ftmp3],   %[ftmp3],       %[ftmp8]                \n\t"
2045         MMI_LDC1(%[ftmp5], %[src2], 0x00)
2046         "packushb   %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
2047         MMI_LDC1(%[ftmp9], %[dst], 0x00)
2048         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
2049         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
2050         PTR_ADDU   "%[src],     %[src],         %[dstStride]            \n\t"
2051         MMI_SDC1(%[ftmp1], %[dst], 0x00)
2052         "daddi      %[tmp0],    %[tmp0],        -0x01                   \n\t"
2053         PTR_ADDU   "%[dst],     %[dst],         %[dstStride]            \n\t"
2054         PTR_ADDU   "%[src2],    %[src2],        %[src2Stride]           \n\t"
2055         "bgtz       %[tmp0],    1b                                      \n\t"
2056         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
2057           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
2058           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
2059           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
2060           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
2061           [tmp0]"=&r"(tmp[0]),              [tmp1]"=&r"(tmp[1]),
2062           RESTRICT_ASM_LOW32
2063           RESTRICT_ASM_ALL64
2064           [dst]"+&r"(dst),                  [src]"+&r"(src),
2065           [src2]"+&r"(src2)
2066         : [dstStride]"r"((mips_reg)dstStride),
2067           [src2Stride]"r"((mips_reg)src2Stride),
2068           [ff_pw_5]"f"(ff_pw_5),            [ff_pw_16]"f"(ff_pw_16)
2069         : "memory"
2070     );
2071 }
2072 
avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)2073 static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
2074         const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2075 {
2076     avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2077     avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2078             src2Stride);
2079 
2080     src += 8 * dstStride;
2081     dst += 8 * dstStride;
2082     src2 += 8 * src2Stride;
2083 
2084     avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2085     avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2086             src2Stride);
2087 }
2088 
avg_pixels8_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int b)2089 static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2090         const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2091 {
2092     double ftmp[8];
2093     uint64_t tmp0;
2094     DECLARE_VAR_ALL64;
2095     DECLARE_VAR_ADDRT;
2096 
2097     do {
2098         __asm__ volatile (
2099             "dli        %[tmp0],    0x05                                \n\t"
2100             MMI_ULDC1(%[ftmp0], %[src16], 0x00)
2101             "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
2102             MMI_ULDC1(%[ftmp1], %[src16], 0x08)
2103             MMI_ULDC1(%[ftmp2], %[src16], 0x30)
2104             MMI_ULDC1(%[ftmp3], %[src16], 0x38)
2105             "psrah      %[ftmp0],   %[ftmp0],       %[ftmp6]            \n\t"
2106             "psrah      %[ftmp1],   %[ftmp1],       %[ftmp6]            \n\t"
2107             "psrah      %[ftmp2],   %[ftmp2],       %[ftmp6]            \n\t"
2108             "psrah      %[ftmp3],   %[ftmp3],       %[ftmp6]            \n\t"
2109             "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]            \n\t"
2110             MMI_LDC1(%[ftmp4], %[src8], 0x00)
2111             MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00)
2112             "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]            \n\t"
2113             "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]            \n\t"
2114             "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp5]            \n\t"
2115             MMI_LDC1(%[ftmp7], %[dst], 0x00)
2116             "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp7]            \n\t"
2117             MMI_SDC1(%[ftmp0], %[dst], 0x00)
2118             MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00)
2119             "pavgb      %[ftmp2],   %[ftmp2],       %[ftmp7]            \n\t"
2120             MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
2121             : [ftmp0]"=&f"(ftmp[0]),        [ftmp1]"=&f"(ftmp[1]),
2122               [ftmp2]"=&f"(ftmp[2]),        [ftmp3]"=&f"(ftmp[3]),
2123               [ftmp4]"=&f"(ftmp[4]),        [ftmp5]"=&f"(ftmp[5]),
2124               [ftmp6]"=&f"(ftmp[6]),        [ftmp7]"=&f"(ftmp[7]),
2125               RESTRICT_ASM_ALL64
2126               RESTRICT_ASM_ADDRT
2127               [tmp0]"=&r"(tmp0)
2128             : [src8]"r"(src8),              [src16]"r"(src16),
2129               [dst]"r"(dst),
2130               [src8Stride]"r"((mips_reg)src8Stride),
2131               [dstStride]"r"((mips_reg)dstStride)
2132             : "memory"
2133         );
2134 
2135         src8  += 2 * src8Stride;
2136         src16 += 48;
2137         dst   += 2 * dstStride;
2138     } while (b -= 2);
2139 }
2140 
avg_pixels16_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int b)2141 static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2142         const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2143 {
2144     avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b);
2145     avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
2146             src8Stride, b);
2147 }
2148 
2149 //DEF_H264_MC_MMI(put_, 4)
ff_put_h264_qpel4_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2150 void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2151         ptrdiff_t stride)
2152 {
2153     ff_put_pixels4_8_mmi(dst, src, stride, 4);
2154 }
2155 
ff_put_h264_qpel4_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2156 void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2157         ptrdiff_t stride)
2158 {
2159     uint8_t half[16];
2160     put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2161     ff_put_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2162 }
2163 
ff_put_h264_qpel4_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2164 void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2165         ptrdiff_t stride)
2166 {
2167     put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2168 }
2169 
ff_put_h264_qpel4_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2170 void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2171         ptrdiff_t stride)
2172 {
2173     uint8_t half[16];
2174     put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2175     ff_put_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2176 }
2177 
ff_put_h264_qpel4_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2178 void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2179         ptrdiff_t stride)
2180 {
2181     uint8_t full[36];
2182     uint8_t * const full_mid= full + 8;
2183     uint8_t half[16];
2184     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2185     put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2186     ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2187 }
2188 
ff_put_h264_qpel4_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2189 void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2190         ptrdiff_t stride)
2191 {
2192     uint8_t full[36];
2193     uint8_t * const full_mid= full + 8;
2194     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2195     put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2196 }
2197 
ff_put_h264_qpel4_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2198 void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2199         ptrdiff_t stride)
2200 {
2201     uint8_t full[36];
2202     uint8_t * const full_mid= full + 8;
2203     uint8_t half[16];
2204     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2205     put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2206     ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2207 }
2208 
ff_put_h264_qpel4_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2209 void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2210         ptrdiff_t stride)
2211 {
2212     uint8_t full[36];
2213     uint8_t * const full_mid= full + 8;
2214     uint8_t halfH[16];
2215     uint8_t halfV[16];
2216     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2217     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2218     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2219     ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2220 }
2221 
ff_put_h264_qpel4_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2222 void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2223         ptrdiff_t stride)
2224 {
2225     uint8_t full[36];
2226     uint8_t * const full_mid= full + 8;
2227     uint8_t halfH[16];
2228     uint8_t halfV[16];
2229     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2230     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2231     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2232     ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2233 }
2234 
ff_put_h264_qpel4_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2235 void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2236         ptrdiff_t stride)
2237 {
2238     uint8_t full[36];
2239     uint8_t * const full_mid= full + 8;
2240     uint8_t halfH[16];
2241     uint8_t halfV[16];
2242     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2243     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2244     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2245     ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2246 }
2247 
ff_put_h264_qpel4_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2248 void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2249         ptrdiff_t stride)
2250 {
2251     uint8_t full[36];
2252     uint8_t * const full_mid= full + 8;
2253     uint8_t halfH[16];
2254     uint8_t halfV[16];
2255     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2256     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2257     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2258     ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2259 }
2260 
ff_put_h264_qpel4_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2261 void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2262         ptrdiff_t stride)
2263 {
2264     put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2265 }
2266 
ff_put_h264_qpel4_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2267 void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2268         ptrdiff_t stride)
2269 {
2270     uint8_t halfH[16];
2271     uint8_t halfHV[16];
2272     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2273     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2274     ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2275 }
2276 
ff_put_h264_qpel4_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2277 void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2278         ptrdiff_t stride)
2279 {
2280     uint8_t halfH[16];
2281     uint8_t halfHV[16];
2282     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2283     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2284     ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2285 }
2286 
ff_put_h264_qpel4_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2287 void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2288         ptrdiff_t stride)
2289 {
2290     uint8_t full[36];
2291     uint8_t * const full_mid= full + 8;
2292     uint8_t halfV[16];
2293     uint8_t halfHV[16];
2294     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2295     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2296     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2297     ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2298 }
2299 
ff_put_h264_qpel4_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2300 void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2301         ptrdiff_t stride)
2302 {
2303     uint8_t full[36];
2304     uint8_t * const full_mid= full + 8;
2305     uint8_t halfV[16];
2306     uint8_t halfHV[16];
2307     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2308     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2309     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2310     ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2311 }
2312 
2313 //DEF_H264_MC_MMI(avg_, 4)
ff_avg_h264_qpel4_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2314 void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2315         ptrdiff_t stride)
2316 {
2317     ff_avg_pixels4_8_mmi(dst, src, stride, 4);
2318 }
2319 
ff_avg_h264_qpel4_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2320 void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2321         ptrdiff_t stride)
2322 {
2323     uint8_t half[16];
2324     put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2325     ff_avg_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2326 }
2327 
ff_avg_h264_qpel4_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2328 void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2329         ptrdiff_t stride)
2330 {
2331     avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2332 }
2333 
ff_avg_h264_qpel4_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2334 void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2335         ptrdiff_t stride)
2336 {
2337     uint8_t half[16];
2338     put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2339     ff_avg_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2340 }
2341 
ff_avg_h264_qpel4_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2342 void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2343         ptrdiff_t stride)
2344 {
2345     uint8_t full[36];
2346     uint8_t * const full_mid= full + 8;
2347     uint8_t half[16];
2348     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2349     put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2350     ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2351 }
2352 
ff_avg_h264_qpel4_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2353 void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2354         ptrdiff_t stride)
2355 {
2356     uint8_t full[36];
2357     uint8_t * const full_mid= full + 8;
2358     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2359     avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2360 }
2361 
ff_avg_h264_qpel4_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2362 void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2363         ptrdiff_t stride)
2364 {
2365     uint8_t full[36];
2366     uint8_t * const full_mid= full + 8;
2367     uint8_t half[16];
2368     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2369     put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2370     ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2371 }
2372 
ff_avg_h264_qpel4_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2373 void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2374         ptrdiff_t stride)
2375 {
2376     uint8_t full[36];
2377     uint8_t * const full_mid= full + 8;
2378     uint8_t halfH[16];
2379     uint8_t halfV[16];
2380     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2381     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2382     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2383     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2384 }
2385 
ff_avg_h264_qpel4_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2386 void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2387         ptrdiff_t stride)
2388 {
2389     uint8_t full[36];
2390     uint8_t * const full_mid= full + 8;
2391     uint8_t halfH[16];
2392     uint8_t halfV[16];
2393     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2394     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2395     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2396     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2397 }
2398 
ff_avg_h264_qpel4_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2399 void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2400         ptrdiff_t stride)
2401 {
2402     uint8_t full[36];
2403     uint8_t * const full_mid= full + 8;
2404     uint8_t halfH[16];
2405     uint8_t halfV[16];
2406     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2407     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2408     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2409     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2410 }
2411 
ff_avg_h264_qpel4_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2412 void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2413         ptrdiff_t stride)
2414 {
2415     uint8_t full[36];
2416     uint8_t * const full_mid= full + 8;
2417     uint8_t halfH[16];
2418     uint8_t halfV[16];
2419     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2420     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2421     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2422     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2423 }
2424 
ff_avg_h264_qpel4_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2425 void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2426         ptrdiff_t stride)
2427 {
2428     avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2429 }
2430 
ff_avg_h264_qpel4_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2431 void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2432         ptrdiff_t stride)
2433 {
2434     uint8_t halfH[16];
2435     uint8_t halfHV[16];
2436     put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2437     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2438     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2439 }
2440 
ff_avg_h264_qpel4_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2441 void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2442         ptrdiff_t stride)
2443 {
2444     uint8_t halfH[16];
2445     uint8_t halfHV[16];
2446     put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2447     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2448     ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2449 }
2450 
ff_avg_h264_qpel4_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2451 void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2452         ptrdiff_t stride)
2453 {
2454     uint8_t full[36];
2455     uint8_t * const full_mid= full + 8;
2456     uint8_t halfV[16];
2457     uint8_t halfHV[16];
2458     copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
2459     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2460     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2461     ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2462 }
2463 
ff_avg_h264_qpel4_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2464 void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2465         ptrdiff_t stride)
2466 {
2467     uint8_t full[36];
2468     uint8_t * const full_mid= full + 8;
2469     uint8_t halfV[16];
2470     uint8_t halfHV[16];
2471     copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
2472     put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2473     put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2474     ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2475 }
2476 
2477 //DEF_H264_MC_MMI(put_, 8)
ff_put_h264_qpel8_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2478 void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2479         ptrdiff_t stride)
2480 {
2481     ff_put_pixels8_8_mmi(dst, src, stride, 8);
2482 }
2483 
ff_put_h264_qpel8_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2484 void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2485         ptrdiff_t stride)
2486 {
2487     uint8_t half[64];
2488     put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2489     ff_put_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2490 }
2491 
ff_put_h264_qpel8_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2492 void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2493         ptrdiff_t stride)
2494 {
2495     put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2496 }
2497 
ff_put_h264_qpel8_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2498 void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2499         ptrdiff_t stride)
2500 {
2501     uint8_t half[64];
2502     put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2503     ff_put_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2504 }
2505 
ff_put_h264_qpel8_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2506 void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2507         ptrdiff_t stride)
2508 {
2509     uint8_t full[104];
2510     uint8_t * const full_mid= full + 16;
2511     uint8_t half[64];
2512     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2513     put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2514     ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2515 }
2516 
ff_put_h264_qpel8_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2517 void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2518         ptrdiff_t stride)
2519 {
2520     uint8_t full[104];
2521     uint8_t * const full_mid= full + 16;
2522     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2523     put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2524 }
2525 
ff_put_h264_qpel8_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2526 void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2527         ptrdiff_t stride)
2528 {
2529     uint8_t full[104];
2530     uint8_t * const full_mid= full + 16;
2531     uint8_t half[64];
2532     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2533     put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2534     ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2535 }
2536 
ff_put_h264_qpel8_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2537 void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2538         ptrdiff_t stride)
2539 {
2540     uint8_t full[104];
2541     uint8_t * const full_mid= full + 16;
2542     uint8_t halfH[64];
2543     uint8_t halfV[64];
2544     put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2545     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2546     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2547     ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2548 }
2549 
ff_put_h264_qpel8_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2550 void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2551         ptrdiff_t stride)
2552 {
2553     uint8_t full[104];
2554     uint8_t * const full_mid= full + 16;
2555     uint8_t halfH[64];
2556     uint8_t halfV[64];
2557     put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2558     copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
2559     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2560     ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2561 }
2562 
ff_put_h264_qpel8_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2563 void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2564         ptrdiff_t stride)
2565 {
2566     uint8_t full[104];
2567     uint8_t * const full_mid= full + 16;
2568     uint8_t halfH[64];
2569     uint8_t halfV[64];
2570     put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2571     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2572     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2573     ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2574 }
2575 
ff_put_h264_qpel8_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2576 void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2577         ptrdiff_t stride)
2578 {
2579     uint8_t full[104];
2580     uint8_t * const full_mid= full + 16;
2581     uint8_t halfH[64];
2582     uint8_t halfV[64];
2583     put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2584     copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
2585     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2586     ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2587 }
2588 
ff_put_h264_qpel8_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2589 void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2590         ptrdiff_t stride)
2591 {
2592     uint16_t __attribute__ ((aligned(8))) temp[192];
2593 
2594     put_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2595 }
2596 
ff_put_h264_qpel8_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2597 void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2598         ptrdiff_t stride)
2599 {
2600     uint8_t __attribute__ ((aligned(8))) temp[448];
2601     uint8_t *const halfHV = temp;
2602     int16_t *const halfV = (int16_t *) (temp + 64);
2603 
2604     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2605     put_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2606 }
2607 
ff_put_h264_qpel8_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2608 void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2609         ptrdiff_t stride)
2610 {
2611     uint8_t __attribute__ ((aligned(8))) temp[448];
2612     uint8_t *const halfHV = temp;
2613     int16_t *const halfV = (int16_t *) (temp + 64);
2614 
2615     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2616     put_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2617 }
2618 
ff_put_h264_qpel8_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2619 void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2620         ptrdiff_t stride)
2621 {
2622     uint8_t __attribute__ ((aligned(8))) temp[448];
2623     uint8_t *const halfHV = temp;
2624     int16_t *const halfV = (int16_t *) (temp + 64);
2625 
2626     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2627     put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2628 }
2629 
ff_put_h264_qpel8_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2630 void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2631         ptrdiff_t stride)
2632 {
2633     uint8_t __attribute__ ((aligned(8))) temp[448];
2634     uint8_t *const halfHV = temp;
2635     int16_t *const halfV = (int16_t *) (temp + 64);
2636 
2637     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2638     put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2639 }
2640 
2641 //DEF_H264_MC_MMI(avg_, 8)
ff_avg_h264_qpel8_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2642 void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2643         ptrdiff_t stride)
2644 {
2645     ff_avg_pixels8_8_mmi(dst, src, stride, 8);
2646 }
2647 
ff_avg_h264_qpel8_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2648 void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2649         ptrdiff_t stride)
2650 {
2651     uint8_t half[64];
2652     put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2653     ff_avg_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2654 }
2655 
ff_avg_h264_qpel8_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2656 void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2657         ptrdiff_t stride)
2658 {
2659     avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2660 }
2661 
ff_avg_h264_qpel8_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2662 void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2663         ptrdiff_t stride)
2664 {
2665     uint8_t half[64];
2666     put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2667     ff_avg_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2668 }
2669 
ff_avg_h264_qpel8_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2670 void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2671         ptrdiff_t stride)
2672 {
2673     uint8_t full[104];
2674     uint8_t * const full_mid= full + 16;
2675     uint8_t half[64];
2676     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2677     put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2678     ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2679 }
2680 
ff_avg_h264_qpel8_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2681 void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2682         ptrdiff_t stride)
2683 {
2684     uint8_t full[104];
2685     uint8_t * const full_mid= full + 16;
2686     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2687     avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2688 }
2689 
ff_avg_h264_qpel8_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2690 void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2691         ptrdiff_t stride)
2692 {
2693     uint8_t full[104];
2694     uint8_t * const full_mid= full + 16;
2695     uint8_t half[64];
2696     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2697     put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2698     ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2699 }
2700 
ff_avg_h264_qpel8_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2701 void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2702         ptrdiff_t stride)
2703 {
2704     uint8_t full[104];
2705     uint8_t * const full_mid= full + 16;
2706     uint8_t halfH[64];
2707     uint8_t halfV[64];
2708     put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2709     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2710     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2711     ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2712 }
2713 
ff_avg_h264_qpel8_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2714 void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2715         ptrdiff_t stride)
2716 {
2717     uint8_t full[104];
2718     uint8_t * const full_mid= full + 16;
2719     uint8_t halfH[64];
2720     uint8_t halfV[64];
2721     put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2722     copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
2723     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2724     ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2725 }
2726 
ff_avg_h264_qpel8_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2727 void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2728         ptrdiff_t stride)
2729 {
2730     uint8_t full[104];
2731     uint8_t * const full_mid= full + 16;
2732     uint8_t halfH[64];
2733     uint8_t halfV[64];
2734     put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2735     copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
2736     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2737     ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2738 }
2739 
ff_avg_h264_qpel8_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2740 void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2741         ptrdiff_t stride)
2742 {
2743     uint8_t full[104];
2744     uint8_t * const full_mid= full + 16;
2745     uint8_t halfH[64];
2746     uint8_t halfV[64];
2747     put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2748     copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
2749     put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2750     ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2751 }
2752 
ff_avg_h264_qpel8_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2753 void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2754         ptrdiff_t stride)
2755 {
2756     uint16_t __attribute__ ((aligned(8))) temp[192];
2757 
2758     avg_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2759 }
2760 
ff_avg_h264_qpel8_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2761 void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2762         ptrdiff_t stride)
2763 {
2764     uint8_t __attribute__ ((aligned(8))) temp[448];
2765     uint8_t *const halfHV = temp;
2766     int16_t *const halfV = (int16_t *) (temp + 64);
2767 
2768     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2769     avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2770 }
2771 
ff_avg_h264_qpel8_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2772 void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2773         ptrdiff_t stride)
2774 {
2775     uint8_t __attribute__ ((aligned(8))) temp[448];
2776     uint8_t *const halfHV = temp;
2777     int16_t *const halfV = (int16_t *) (temp + 64);
2778 
2779     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2780     avg_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2781 }
2782 
ff_avg_h264_qpel8_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2783 void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2784         ptrdiff_t stride)
2785 {
2786     uint8_t __attribute__ ((aligned(8))) temp[448];
2787     uint8_t *const halfHV = temp;
2788     int16_t *const halfV = (int16_t *) (temp + 64);
2789 
2790     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2791     avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2792 }
2793 
ff_avg_h264_qpel8_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2794 void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2795         ptrdiff_t stride)
2796 {
2797     uint8_t __attribute__ ((aligned(8))) temp[448];
2798     uint8_t *const halfHV = temp;
2799     int16_t *const halfV = (int16_t *) (temp + 64);
2800 
2801     put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2802     avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2803 }
2804 
2805 //DEF_H264_MC_MMI(put_, 16)
ff_put_h264_qpel16_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2806 void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2807         ptrdiff_t stride)
2808 {
2809     ff_put_pixels16_8_mmi(dst, src, stride, 16);
2810 }
2811 
ff_put_h264_qpel16_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2812 void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2813         ptrdiff_t stride)
2814 {
2815     uint8_t half[256];
2816     put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2817     ff_put_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2818 }
2819 
ff_put_h264_qpel16_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2820 void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2821         ptrdiff_t stride)
2822 {
2823     put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2824 }
2825 
ff_put_h264_qpel16_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2826 void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2827         ptrdiff_t stride)
2828 {
2829     uint8_t half[256];
2830     put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2831     ff_put_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2832 }
2833 
ff_put_h264_qpel16_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2834 void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2835         ptrdiff_t stride)
2836 {
2837     uint8_t full[336];
2838     uint8_t * const full_mid= full + 32;
2839     uint8_t half[256];
2840     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
2841     put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2842     ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
2843 }
2844 
ff_put_h264_qpel16_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2845 void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
2846         ptrdiff_t stride)
2847 {
2848     uint8_t full[336];
2849     uint8_t * const full_mid= full + 32;
2850     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
2851     put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2852 }
2853 
ff_put_h264_qpel16_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2854 void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
2855         ptrdiff_t stride)
2856 {
2857     uint8_t full[336];
2858     uint8_t * const full_mid= full + 32;
2859     uint8_t half[256];
2860     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
2861     put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2862     ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2863 }
2864 
ff_put_h264_qpel16_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2865 void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
2866         ptrdiff_t stride)
2867 {
2868     uint8_t full[336];
2869     uint8_t * const full_mid= full + 32;
2870     uint8_t halfH[256];
2871     uint8_t halfV[256];
2872     put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2873     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
2874     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2875     ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2876 }
2877 
ff_put_h264_qpel16_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2878 void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
2879         ptrdiff_t stride)
2880 {
2881     uint8_t full[336];
2882     uint8_t * const full_mid= full + 32;
2883     uint8_t halfH[256];
2884     uint8_t halfV[256];
2885     put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2886     copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
2887     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2888     ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2889 }
2890 
ff_put_h264_qpel16_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2891 void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
2892         ptrdiff_t stride)
2893 {
2894     uint8_t full[336];
2895     uint8_t * const full_mid= full + 32;
2896     uint8_t halfH[256];
2897     uint8_t halfV[256];
2898     put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2899     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
2900     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2901     ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2902 }
2903 
ff_put_h264_qpel16_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2904 void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
2905         ptrdiff_t stride)
2906 {
2907     uint8_t full[336];
2908     uint8_t * const full_mid= full + 32;
2909     uint8_t halfH[256];
2910     uint8_t halfV[256];
2911     put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2912     copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
2913     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2914     ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2915 }
2916 
ff_put_h264_qpel16_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2917 void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
2918         ptrdiff_t stride)
2919 {
2920     uint16_t __attribute__ ((aligned(8))) temp[384];
2921 
2922     put_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
2923 }
2924 
ff_put_h264_qpel16_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2925 void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
2926         ptrdiff_t stride)
2927 {
2928     uint8_t __attribute__ ((aligned(8))) temp[1024];
2929     uint8_t *const halfHV = temp;
2930     int16_t *const halfV = (int16_t *) (temp + 256);
2931 
2932     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2933     put_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
2934 }
2935 
ff_put_h264_qpel16_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2936 void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
2937         ptrdiff_t stride)
2938 {
2939     uint8_t __attribute__ ((aligned(8))) temp[1024];
2940     uint8_t *const halfHV = temp;
2941     int16_t *const halfV = (int16_t *) (temp + 256);
2942 
2943     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2944     put_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
2945 }
2946 
ff_put_h264_qpel16_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2947 void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
2948         ptrdiff_t stride)
2949 {
2950     uint8_t __attribute__ ((aligned(8))) temp[1024];
2951     uint8_t *const halfHV = temp;
2952     int16_t *const halfV = (int16_t *) (temp + 256);
2953 
2954     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2955     put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
2956 }
2957 
ff_put_h264_qpel16_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2958 void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
2959         ptrdiff_t stride)
2960 {
2961     uint8_t __attribute__ ((aligned(8))) temp[1024];
2962     uint8_t *const halfHV = temp;
2963     int16_t *const halfV = (int16_t *) (temp + 256);
2964 
2965     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2966     put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
2967 }
2968 
2969 //DEF_H264_MC_MMI(avg_, 16)
ff_avg_h264_qpel16_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2970 void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2971         ptrdiff_t stride)
2972 {
2973     ff_avg_pixels16_8_mmi(dst, src, stride, 16);
2974 }
2975 
ff_avg_h264_qpel16_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2976 void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2977         ptrdiff_t stride)
2978 {
2979     uint8_t half[256];
2980     put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2981     ff_avg_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2982 }
2983 
ff_avg_h264_qpel16_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2984 void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2985         ptrdiff_t stride)
2986 {
2987     avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2988 }
2989 
ff_avg_h264_qpel16_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2990 void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2991         ptrdiff_t stride)
2992 {
2993     uint8_t half[256];
2994     put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2995     ff_avg_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2996 }
2997 
ff_avg_h264_qpel16_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2998 void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2999         ptrdiff_t stride)
3000 {
3001     uint8_t full[336];
3002     uint8_t * const full_mid= full + 32;
3003     uint8_t half[256];
3004     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
3005     put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3006     ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
3007 }
3008 
ff_avg_h264_qpel16_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3009 void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
3010         ptrdiff_t stride)
3011 {
3012     uint8_t full[336];
3013     uint8_t * const full_mid= full + 32;
3014     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
3015     avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
3016 }
3017 
ff_avg_h264_qpel16_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3018 void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
3019         ptrdiff_t stride)
3020 {
3021     uint8_t full[336];
3022     uint8_t * const full_mid= full + 32;
3023     uint8_t half[256];
3024     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
3025     put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3026     ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
3027 }
3028 
ff_avg_h264_qpel16_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3029 void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
3030         ptrdiff_t stride)
3031 {
3032     uint8_t full[336];
3033     uint8_t * const full_mid= full + 32;
3034     uint8_t halfH[256];
3035     uint8_t halfV[256];
3036     put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3037     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
3038     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3039     ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3040 }
3041 
ff_avg_h264_qpel16_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3042 void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
3043         ptrdiff_t stride)
3044 {
3045     uint8_t full[336];
3046     uint8_t * const full_mid= full + 32;
3047     uint8_t halfH[256];
3048     uint8_t halfV[256];
3049     put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3050     copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
3051     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3052     ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3053 }
3054 
ff_avg_h264_qpel16_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3055 void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
3056         ptrdiff_t stride)
3057 {
3058     uint8_t full[336];
3059     uint8_t * const full_mid= full + 32;
3060     uint8_t halfH[256];
3061     uint8_t halfV[256];
3062     put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3063     copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
3064     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3065     ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3066 }
3067 
ff_avg_h264_qpel16_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3068 void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
3069         ptrdiff_t stride)
3070 {
3071     uint8_t full[336];
3072     uint8_t * const full_mid= full + 32;
3073     uint8_t halfH[256];
3074     uint8_t halfV[256];
3075     put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3076     copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
3077     put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3078     ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3079 }
3080 
ff_avg_h264_qpel16_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3081 void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
3082         ptrdiff_t stride)
3083 {
3084     uint16_t __attribute__ ((aligned(8))) temp[384];
3085 
3086     avg_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
3087 }
3088 
ff_avg_h264_qpel16_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3089 void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
3090         ptrdiff_t stride)
3091 {
3092     uint8_t __attribute__ ((aligned(8))) temp[1024];
3093     uint8_t *const halfHV = temp;
3094     int16_t *const halfV = (int16_t *) (temp + 256);
3095 
3096     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3097     avg_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
3098 }
3099 
ff_avg_h264_qpel16_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3100 void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
3101         ptrdiff_t stride)
3102 {
3103     uint8_t __attribute__ ((aligned(8))) temp[1024];
3104     uint8_t *const halfHV = temp;
3105     int16_t *const halfV = (int16_t *) (temp + 256);
3106 
3107     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3108     avg_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
3109 }
3110 
ff_avg_h264_qpel16_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3111 void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
3112         ptrdiff_t stride)
3113 {
3114     uint8_t __attribute__ ((aligned(8))) temp[1024];
3115     uint8_t *const halfHV = temp;
3116     int16_t *const halfV = (int16_t *) (temp + 256);
3117 
3118     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3119     avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3120 }
3121 
ff_avg_h264_qpel16_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3122 void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
3123         ptrdiff_t stride)
3124 {
3125     uint8_t __attribute__ ((aligned(8))) temp[1024];
3126     uint8_t *const halfHV = temp;
3127     int16_t *const halfV = (int16_t *) (temp + 256);
3128 
3129     put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3130     avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3131 }
3132 
3133 #undef op2_avg
3134 #undef op2_put
3135