1 /*
2 * Loongson SIMD optimized h264qpel
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "h264dsp_mips.h"
25 #include "hpeldsp_mips.h"
26 #include "libavcodec/bit_depth_template.c"
27 #include "libavutil/mips/mmiutils.h"
28
copy_block4_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)29 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
30 int dstStride, int srcStride, int h)
31 {
32 double ftmp[1];
33 DECLARE_VAR_LOW32;
34
35 __asm__ volatile (
36 "1: \n\t"
37 MMI_ULWC1(%[ftmp0], %[src], 0x00)
38 MMI_SWC1(%[ftmp0], %[dst], 0x00)
39 "addi %[h], %[h], -0x01 \n\t"
40 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
41 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
42 "bnez %[h], 1b \n\t"
43 : [ftmp0]"=&f"(ftmp[0]),
44 [dst]"+&r"(dst), [src]"+&r"(src),
45 RESTRICT_ASM_LOW32
46 [h]"+&r"(h)
47 : [dstStride]"r"((mips_reg)dstStride),
48 [srcStride]"r"((mips_reg)srcStride)
49 : "memory"
50 );
51 }
52
copy_block8_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)53 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
54 int dstStride, int srcStride, int h)
55 {
56 double ftmp[1];
57 DECLARE_VAR_ALL64;
58
59 __asm__ volatile (
60 "1: \n\t"
61 MMI_ULDC1(%[ftmp0], %[src], 0x00)
62 MMI_SDC1(%[ftmp0], %[dst], 0x00)
63 "addi %[h], %[h], -0x01 \n\t"
64 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
65 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
66 "bnez %[h], 1b \n\t"
67 : [ftmp0]"=&f"(ftmp[0]),
68 RESTRICT_ASM_ALL64
69 [dst]"+&r"(dst), [src]"+&r"(src),
70 [h]"+&r"(h)
71 : [dstStride]"r"((mips_reg)dstStride),
72 [srcStride]"r"((mips_reg)srcStride)
73 : "memory"
74 );
75 }
76
copy_block16_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride,int h)77 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
78 int dstStride, int srcStride, int h)
79 {
80 double ftmp[1];
81 uint64_t tmp[1];
82 DECLARE_VAR_ALL64;
83
84 __asm__ volatile (
85 "1: \n\t"
86 MMI_ULDC1(%[ftmp0], %[src], 0x00)
87 "ldl %[tmp0], 0x0f(%[src]) \n\t"
88 "ldr %[tmp0], 0x08(%[src]) \n\t"
89 MMI_SDC1(%[ftmp0], %[dst], 0x00)
90 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
91 "sdr %[tmp0], 0x08(%[dst]) \n\t"
92 "addi %[h], %[h], -0x01 \n\t"
93 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
94 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
95 "bnez %[h], 1b \n\t"
96 : [ftmp0]"=&f"(ftmp[0]),
97 [tmp0]"=&r"(tmp[0]),
98 RESTRICT_ASM_ALL64
99 [dst]"+&r"(dst), [src]"+&r"(src),
100 [h]"+&r"(h)
101 : [dstStride]"r"((mips_reg)dstStride),
102 [srcStride]"r"((mips_reg)srcStride)
103 : "memory"
104 );
105 }
106
107 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
108 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
put_h264_qpel4_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)109 static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
110 int dstStride, int srcStride)
111 {
112 double ftmp[10];
113 uint64_t tmp[1];
114 DECLARE_VAR_LOW32;
115
116 __asm__ volatile (
117 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
118 "dli %[tmp0], 0x04 \n\t"
119 "1: \n\t"
120 MMI_ULWC1(%[ftmp1], %[src], -0x02)
121 MMI_ULWC1(%[ftmp2], %[src], -0x01)
122 MMI_ULWC1(%[ftmp3], %[src], 0x00)
123 MMI_ULWC1(%[ftmp4], %[src], 0x01)
124 MMI_ULWC1(%[ftmp5], %[src], 0x02)
125 MMI_ULWC1(%[ftmp6], %[src], 0x03)
126
127 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
128 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
129 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
130 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
131 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
132 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
133 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
134 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
135 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
136 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
137 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
138 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
139 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
140 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
141 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
142 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
143 MMI_SWC1(%[ftmp9], %[dst], 0x00)
144 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
145 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
146 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
147 "bnez %[tmp0], 1b \n\t"
148 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
149 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
150 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
151 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
152 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
153 [tmp0]"=&r"(tmp[0]),
154 RESTRICT_ASM_LOW32
155 [dst]"+&r"(dst), [src]"+&r"(src)
156 : [dstStride]"r"((mips_reg)dstStride),
157 [srcStride]"r"((mips_reg)srcStride),
158 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
159 [ff_pw_16]"f"(ff_pw_16.f)
160 : "memory"
161 );
162 }
163
put_h264_qpel8_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)164 static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
165 int dstStride, int srcStride)
166 {
167 double ftmp[11];
168 uint64_t tmp[1];
169 DECLARE_VAR_ALL64;
170
171 __asm__ volatile (
172 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
173 "dli %[tmp0], 0x08 \n\t"
174 "1: \n\t"
175 MMI_ULDC1(%[ftmp1], %[src], -0x02)
176 MMI_ULDC1(%[ftmp2], %[src], -0x01)
177 MMI_ULDC1(%[ftmp3], %[src], 0x00)
178 MMI_ULDC1(%[ftmp4], %[src], 0x01)
179 MMI_ULDC1(%[ftmp5], %[src], 0x02)
180 MMI_ULDC1(%[ftmp6], %[src], 0x03)
181 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
182 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
183 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
184 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
185 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
186 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
187 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
188 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
189 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
190 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
191 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
192 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
193 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
194 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
195 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
196 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
197 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
198 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
199 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
200 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
201 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
202 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
203 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
204 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
205 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
206 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
207 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
208 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
209 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
210 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
211 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
212 MMI_SDC1(%[ftmp9], %[dst], 0x00)
213 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
214 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
215 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
216 "bnez %[tmp0], 1b \n\t"
217 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
218 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
219 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
220 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
221 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
222 [ftmp10]"=&f"(ftmp[10]),
223 [tmp0]"=&r"(tmp[0]),
224 RESTRICT_ASM_ALL64
225 [dst]"+&r"(dst), [src]"+&r"(src)
226 : [dstStride]"r"((mips_reg)dstStride),
227 [srcStride]"r"((mips_reg)srcStride),
228 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
229 [ff_pw_16]"f"(ff_pw_16.f)
230 : "memory"
231 );
232 }
233
put_h264_qpel16_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)234 static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
235 int dstStride, int srcStride)
236 {
237 put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
238 put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
239 src += 8*srcStride;
240 dst += 8*dstStride;
241 put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
242 put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
243 }
244
avg_h264_qpel4_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)245 static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
246 int dstStride, int srcStride)
247 {
248 double ftmp[11];
249 uint64_t tmp[1];
250 DECLARE_VAR_LOW32;
251
252 __asm__ volatile (
253 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
254 "dli %[tmp0], 0x04 \n\t"
255 "1: \n\t"
256 MMI_ULWC1(%[ftmp1], %[src], -0x02)
257 MMI_ULWC1(%[ftmp2], %[src], -0x01)
258 MMI_ULWC1(%[ftmp3], %[src], 0x00)
259 MMI_ULWC1(%[ftmp4], %[src], 0x01)
260 MMI_ULWC1(%[ftmp5], %[src], 0x02)
261 MMI_ULWC1(%[ftmp6], %[src], 0x03)
262 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
263 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
264 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
265 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
267 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
268 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
269 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
270 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
271 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
272 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
273 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
274 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
275 "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
276 "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
277 "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
278 MMI_LWC1(%[ftmp10], %[dst], 0x00)
279 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
280 MMI_SWC1(%[ftmp9], %[dst], 0x00)
281 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
282 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
283 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
284 "bnez %[tmp0], 1b \n\t"
285 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
286 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
287 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
288 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
289 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
290 [ftmp10]"=&f"(ftmp[10]),
291 [tmp0]"=&r"(tmp[0]),
292 RESTRICT_ASM_LOW32
293 [dst]"+&r"(dst), [src]"+&r"(src)
294 : [dstStride]"r"((mips_reg)dstStride),
295 [srcStride]"r"((mips_reg)srcStride),
296 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
297 [ff_pw_16]"f"(ff_pw_16.f)
298 : "memory"
299 );
300 }
301
avg_h264_qpel8_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)302 static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
303 int dstStride, int srcStride)
304 {
305 double ftmp[11];
306 uint64_t tmp[1];
307 DECLARE_VAR_ALL64;
308
309 __asm__ volatile (
310 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
311 "dli %[tmp0], 0x08 \n\t"
312 "1: \n\t"
313 MMI_ULDC1(%[ftmp1], %[src], -0x02)
314 MMI_ULDC1(%[ftmp2], %[src], -0x01)
315 MMI_ULDC1(%[ftmp3], %[src], 0x00)
316 MMI_ULDC1(%[ftmp4], %[src], 0x01)
317 MMI_ULDC1(%[ftmp5], %[src], 0x02)
318 MMI_ULDC1(%[ftmp6], %[src], 0x03)
319 "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
320 "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
321 "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
322 "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
323 "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
324 "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
325 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
326 "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
327 "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
328 "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
329 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
330 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
331 "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
332 "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
333 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
334 "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
335 "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
336 "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
337 "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
338 "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
339 "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
340 "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
341 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
342 "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
343 "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
344 "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
345 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
346 "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
347 "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
348 "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
349 "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
350 MMI_LDC1(%[ftmp10], %[dst], 0x00)
351 "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
352 MMI_SDC1(%[ftmp9], %[dst], 0x00)
353 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
354 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
355 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
356 "bnez %[tmp0], 1b \n\t"
357 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
358 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
359 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
360 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
361 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
362 [ftmp10]"=&f"(ftmp[10]),
363 [tmp0]"=&r"(tmp[0]),
364 RESTRICT_ASM_ALL64
365 [dst]"+&r"(dst), [src]"+&r"(src)
366 : [dstStride]"r"((mips_reg)dstStride),
367 [srcStride]"r"((mips_reg)srcStride),
368 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
369 [ff_pw_16]"f"(ff_pw_16.f)
370 : "memory"
371 );
372 }
373
avg_h264_qpel16_h_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)374 static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
375 int dstStride, int srcStride)
376 {
377 avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
378 avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
379 src += 8*srcStride;
380 dst += 8*dstStride;
381 avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
382 avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
383 }
384
put_h264_qpel4_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)385 static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
386 int dstStride, int srcStride)
387 {
388 double ftmp[12];
389 uint64_t tmp[1];
390 DECLARE_VAR_LOW32;
391
392 src -= 2 * srcStride;
393
394 __asm__ volatile (
395 ".set push \n\t"
396 ".set noreorder \n\t"
397 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
398 "dli %[tmp0], 0x02 \n\t"
399 MMI_LWC1(%[ftmp1], %[src], 0x00)
400 "mtc1 %[tmp0], %[ftmp10] \n\t"
401 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
402 "dli %[tmp0], 0x05 \n\t"
403 MMI_LWC1(%[ftmp2], %[src], 0x00)
404 "mtc1 %[tmp0], %[ftmp11] \n\t"
405 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
406 MMI_LWC1(%[ftmp3], %[src], 0x00)
407 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
408 MMI_LWC1(%[ftmp4], %[src], 0x00)
409 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
410 MMI_LWC1(%[ftmp5], %[src], 0x00)
411 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
412 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
413 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
414 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
415 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
416 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
417 MMI_LWC1(%[ftmp6], %[src], 0x00)
418 "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
419 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
420 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
421 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
422 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
423 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
424 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
425 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
426 "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
427 "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
428 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
429 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
430 MMI_SWC1(%[ftmp7], %[dst], 0x00)
431 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
432 MMI_LWC1(%[ftmp1], %[src], 0x00)
433 "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
434 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
435 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
436 "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
437 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
438 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
439 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
440 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
441 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
442 "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
443 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
444 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
445 MMI_SWC1(%[ftmp7], %[dst], 0x00)
446 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
447 MMI_LWC1(%[ftmp2], %[src], 0x00)
448 "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
449 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
450 "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
451 "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
452 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
453 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
454 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
455 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
456 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
457 "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
458 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
459 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
460 MMI_SWC1(%[ftmp7], %[dst], 0x00)
461 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
462 MMI_LWC1(%[ftmp3], %[src], 0x00)
463 "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
464 "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
465 "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
466 "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
467 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
468 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
469 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
470 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
471 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
472 "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
473 "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
474 "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
475 MMI_SWC1(%[ftmp7], %[dst], 0x00)
476 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
477 ".set pop \n\t"
478 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
479 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
480 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
481 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
482 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
483 [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
484 [tmp0]"=&r"(tmp[0]),
485 RESTRICT_ASM_LOW32
486 [dst]"+&r"(dst), [src]"+&r"(src)
487 : [dstStride]"r"((mips_reg)dstStride),
488 [srcStride]"r"((mips_reg)srcStride),
489 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
490 : "memory"
491 );
492 }
493
put_h264_qpel8_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)494 static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
495 int dstStride, int srcStride)
496 {
497 int w = 2;
498 int h = 8;
499 double ftmp[10];
500 uint64_t tmp[1];
501 DECLARE_VAR_LOW32;
502
503 src -= 2 * srcStride;
504
505 while (w--) {
506 __asm__ volatile (
507 ".set push \n\t"
508 ".set noreorder \n\t"
509 "dli %[tmp0], 0x02 \n\t"
510 MMI_LWC1(%[ftmp0], %[src], 0x00)
511 "mtc1 %[tmp0], %[ftmp8] \n\t"
512 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
513 "dli %[tmp0], 0x05 \n\t"
514 MMI_LWC1(%[ftmp1], %[src], 0x00)
515 "mtc1 %[tmp0], %[ftmp9] \n\t"
516 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
517 MMI_LWC1(%[ftmp2], %[src], 0x00)
518 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
519 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
520 MMI_LWC1(%[ftmp3], %[src], 0x00)
521 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
522 MMI_LWC1(%[ftmp4], %[src], 0x00)
523 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
524 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
525 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
526 MMI_LWC1(%[ftmp5], %[src], 0x00)
527 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
528 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
529 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
530 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
531 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
532 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
533 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
534 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
535 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
536 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
537 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
538 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
539 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
540 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
541 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542 MMI_SWC1(%[ftmp6], %[dst], 0x00)
543 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
544 MMI_LWC1(%[ftmp0], %[src], 0x00)
545 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
546 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
547 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
548 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
549 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
550 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
551 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
552 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
553 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
554 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
555 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
556 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
557 MMI_SWC1(%[ftmp6], %[dst], 0x00)
558 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
559 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
560 MMI_LWC1(%[ftmp1], %[src], 0x00)
561 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
562 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
563 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
564 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
565 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
566 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
567 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
568 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
569 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
570 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
571 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
572 MMI_SWC1(%[ftmp6], %[dst], 0x00)
573 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
574 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
575 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
576 MMI_LWC1(%[ftmp2], %[src], 0x00)
577 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
578 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
579 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
580 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
581 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
582 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
583 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
584 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
585 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
586 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
587 MMI_SWC1(%[ftmp6], %[dst], 0x00)
588 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
589 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
590 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
591 MMI_LWC1(%[ftmp3], %[src], 0x00)
592 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
593 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
594 "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
595 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
596 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
597 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
598 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
599 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
600 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
601 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
602 MMI_SWC1(%[ftmp6], %[dst], 0x00)
603 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
604 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
605 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
606 MMI_LWC1(%[ftmp4], %[src], 0x00)
607 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
608 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
609 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
610 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
611 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
612 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
613 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
614 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
615 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
616 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
617 MMI_SWC1(%[ftmp6], %[dst], 0x00)
618 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
619 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
620 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
621 MMI_LWC1(%[ftmp5], %[src], 0x00)
622 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
623 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
624 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
625 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
626 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
627 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
628 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
629 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
630 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
631 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
632 MMI_SWC1(%[ftmp6], %[dst], 0x00)
633 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
634 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
635 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
636 MMI_LWC1(%[ftmp0], %[src], 0x00)
637 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
638 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
639 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
640 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
641 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
642 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
643 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
644 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
645 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
646 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
647 MMI_SWC1(%[ftmp6], %[dst], 0x00)
648 "bne %[h], 0x10, 2f \n\t"
649 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
650 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
651 MMI_LWC1(%[ftmp1], %[src], 0x00)
652 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
653 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
654 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
655 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
656 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
657 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
658 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
659 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
660 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
661 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
662 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
663 MMI_SWC1(%[ftmp6], %[dst], 0x00)
664 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
665 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
666 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
667 MMI_LWC1(%[ftmp2], %[src], 0x00)
668 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
669 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
670 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
671 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
672 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
673 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
674 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
675 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
676 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
677 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
678 MMI_SWC1(%[ftmp6], %[dst], 0x00)
679 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
680 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
681 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
682 MMI_LWC1(%[ftmp3], %[src], 0x00)
683 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
684 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
685 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
686 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
687 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
688 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
689 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
690 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
691 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
692 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
693 MMI_SWC1(%[ftmp6], %[dst], 0x00)
694 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
695 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
696 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
697 MMI_LWC1(%[ftmp4], %[src], 0x00)
698 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
699 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
700 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
701 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
702 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
703 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
704 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
705 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
706 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
707 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
708 MMI_SWC1(%[ftmp6], %[dst], 0x00)
709 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
710 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
711 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
712 MMI_LWC1(%[ftmp5], %[src], 0x00)
713 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
714 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
715 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
716 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
717 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
718 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
719 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
720 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
721 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
722 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
723 MMI_SWC1(%[ftmp6], %[dst], 0x00)
724 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
725 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
726 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
727 MMI_LWC1(%[ftmp0], %[src], 0x00)
728 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
729 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
730 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
731 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
732 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
733 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
734 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
735 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
736 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
737 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
738 MMI_SWC1(%[ftmp6], %[dst], 0x00)
739 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
740 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
741 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
742 MMI_LWC1(%[ftmp1], %[src], 0x00)
743 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
744 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
745 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
746 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
747 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
748 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
749 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
750 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
751 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
752 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
753 MMI_SWC1(%[ftmp6], %[dst], 0x00)
754 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
755 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
756 "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
757 MMI_LWC1(%[ftmp2], %[src], 0x00)
758 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
759 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
760 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
761 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
762 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
763 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
764 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
765 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
766 "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
767 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
768 MMI_SWC1(%[ftmp6], %[dst], 0x00)
769 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
770 "2: \n\t"
771 ".set pop \n\t"
772 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
773 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
774 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
775 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
776 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
777 [tmp0]"=&r"(tmp[0]),
778 RESTRICT_ASM_LOW32
779 [src]"+&r"(src), [dst]"+&r"(dst),
780 [h]"+&r"(h)
781 : [dstStride]"r"((mips_reg)dstStride),
782 [srcStride]"r"((mips_reg)srcStride),
783 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
784 : "memory"
785 );
786
787 src += 4 - (h + 5) * srcStride;
788 dst += 4 - h * dstStride;
789 }
790 }
791
put_h264_qpel16_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)792 static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
793 int dstStride, int srcStride)
794 {
795 put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
796 put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
797 src += 8*srcStride;
798 dst += 8*dstStride;
799 put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
800 put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
801 }
802
avg_h264_qpel4_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)803 static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
804 int dstStride, int srcStride)
805 {
806 double ftmp[10];
807 uint64_t tmp[1];
808
809 src -= 2 * srcStride;
810
811 __asm__ volatile (
812 ".set push \n\t"
813 ".set noreorder \n\t"
814 "dli %[tmp0], 0x02 \n\t"
815 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
816 "mtc1 %[tmp0], %[ftmp9] \n\t"
817 "dli %[tmp0], 0x05 \n\t"
818 MMI_LWC1(%[ftmp0], %[src], 0x00)
819 "mtc1 %[tmp0], %[ftmp8] \n\t"
820 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
821 MMI_LWC1(%[ftmp1], %[src], 0x00)
822 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
823 MMI_LWC1(%[ftmp2], %[src], 0x00)
824 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
825 MMI_LWC1(%[ftmp3], %[src], 0x00)
826 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
827 MMI_LWC1(%[ftmp4], %[src], 0x00)
828 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
829 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
830 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
831 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
832 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
833 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
834 MMI_LWC1(%[ftmp5], %[src], 0x00)
835 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
836 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
837 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
838 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
839 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
840 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
841 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
842 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
843 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
844 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
845 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
846 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
847 MMI_LWC1(%[ftmp0], %[dst], 0x00)
848 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
849 MMI_SWC1(%[ftmp6], %[dst], 0x00)
850 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
851 MMI_LWC1(%[ftmp0], %[src], 0x00)
852 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
853 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
854 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
855 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
856 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
857 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
858 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
859 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
860 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
861 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
862 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
863 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
864 MMI_LWC1(%[ftmp1], %[dst], 0x00)
865 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
866 MMI_SWC1(%[ftmp6], %[dst], 0x00)
867 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
868 MMI_LWC1(%[ftmp1], %[src], 0x00)
869 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
870 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
871 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
872 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
873 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
874 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
875 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
876 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
877 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
878 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
879 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
880 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
881 MMI_LWC1(%[ftmp2], %[dst], 0x00)
882 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
883 MMI_SWC1(%[ftmp6], %[dst], 0x00)
884 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
885 MMI_LWC1(%[ftmp2], %[src], 0x00)
886 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
887 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
888 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
889 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
890 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
891 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
892 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
893 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
894 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
895 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
896 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
897 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
898 MMI_LWC1(%[ftmp3], %[dst], 0x00)
899 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
900 MMI_SWC1(%[ftmp6], %[dst], 0x00)
901 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
902 ".set pop \n\t"
903 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
904 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
905 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
906 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
907 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
908 [tmp0]"=&r"(tmp[0]),
909 [src]"+&r"(src), [dst]"+&r"(dst)
910 : [dstStride]"r"((mips_reg)dstStride),
911 [srcStride]"r"((mips_reg)srcStride),
912 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
913 : "memory"
914 );
915 }
916
avg_h264_qpel8_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)917 static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
918 int dstStride, int srcStride)
919 {
920 int w = 2;
921 int h = 8;
922 double ftmp[10];
923 uint64_t tmp[1];
924 DECLARE_VAR_LOW32;
925
926 src -= 2 * srcStride;
927
928 while (w--) {
929 __asm__ volatile (
930 ".set push \n\t"
931 ".set noreorder \n\t"
932 "dli %[tmp0], 0x02 \n\t"
933 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
934 "mtc1 %[tmp0], %[ftmp9] \n\t"
935 "dli %[tmp0], 0x05 \n\t"
936 MMI_LWC1(%[ftmp0], %[src], 0x00)
937 "mtc1 %[tmp0], %[ftmp8] \n\t"
938 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
939 MMI_LWC1(%[ftmp1], %[src], 0x00)
940 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
941 MMI_LWC1(%[ftmp2], %[src], 0x00)
942 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
943 MMI_LWC1(%[ftmp3], %[src], 0x00)
944 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
945 MMI_LWC1(%[ftmp4], %[src], 0x00)
946 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
947 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
948 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
949 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
950 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
951 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
952 MMI_LWC1(%[ftmp5], %[src], 0x00)
953 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
954 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
955 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
956 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
957 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
958 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
959 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
960 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
961 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
962 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
963 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
964 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
965 MMI_LWC1(%[ftmp0], %[dst], 0x00)
966 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
967 MMI_SWC1(%[ftmp6], %[dst], 0x00)
968 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
969 MMI_LWC1(%[ftmp0], %[src], 0x00)
970 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
971 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
972 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
973 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
974 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
975 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
976 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
977 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
978 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
979 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
980 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
981 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
982 MMI_LWC1(%[ftmp1], %[dst], 0x00)
983 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
984 MMI_SWC1(%[ftmp6], %[dst], 0x00)
985 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
986 MMI_LWC1(%[ftmp1], %[src], 0x00)
987 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
988 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
989 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
990 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
991 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
992 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
993 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
994 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
995 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
996 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
997 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
998 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
999 MMI_LWC1(%[ftmp2], %[dst], 0x00)
1000 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1001 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1002 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1003 MMI_LWC1(%[ftmp2], %[src], 0x00)
1004 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1005 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1006 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1007 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1008 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1009 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1010 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1011 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1012 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1013 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1014 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1015 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1016 MMI_LWC1(%[ftmp3], %[dst], 0x00)
1017 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1018 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1019 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1020 MMI_LWC1(%[ftmp3], %[src], 0x00)
1021 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1022 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1023 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1024 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1025 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1026 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1027 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1028 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1029 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1030 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1031 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1032 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1033 MMI_LWC1(%[ftmp4], %[dst], 0x00)
1034 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1035 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1036 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1037 MMI_LWC1(%[ftmp4], %[src], 0x00)
1038 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1039 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1040 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1041 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1042 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1043 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1044 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1045 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1046 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1047 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1048 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1049 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1050 MMI_LWC1(%[ftmp5], %[dst], 0x00)
1051 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1052 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1053 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1054 MMI_LWC1(%[ftmp5], %[src], 0x00)
1055 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1056 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1057 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1058 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1059 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1060 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1061 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1062 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1063 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1064 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1065 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1066 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1067 MMI_LWC1(%[ftmp0], %[dst], 0x00)
1068 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1069 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1070 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1071 MMI_LWC1(%[ftmp0], %[src], 0x00)
1072 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1073 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1074 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1075 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1076 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1077 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1078 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1079 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1080 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1081 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1082 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1083 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1084 MMI_LWC1(%[ftmp1], %[dst], 0x00)
1085 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1086 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1087 "bne %[h], 0x10, 2f \n\t"
1088 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1089 MMI_LWC1(%[ftmp1], %[src], 0x00)
1090 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1091 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1092 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1093 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1094 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1095 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1096 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1097 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1098 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1099 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1100 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1101 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1102 MMI_LWC1(%[ftmp2], %[dst], 0x00)
1103 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1104 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1105 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1106 MMI_LWC1(%[ftmp2], %[src], 0x00)
1107 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1108 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1109 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1110 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1111 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1112 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1113 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1114 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1115 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1116 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1117 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1118 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1119 MMI_LWC1(%[ftmp3], %[dst], 0x00)
1120 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1121 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1122 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1123 MMI_LWC1(%[ftmp3], %[src], 0x00)
1124 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1125 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1126 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1127 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1128 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1129 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1130 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1131 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1132 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1133 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1134 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1135 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1136 MMI_LWC1(%[ftmp4], %[dst], 0x00)
1137 "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1138 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1139 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1140 MMI_LWC1(%[ftmp4], %[src], 0x00)
1141 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1142 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1143 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1144 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1145 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1146 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1147 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1148 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1149 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1150 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1151 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1152 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1153 MMI_LWC1(%[ftmp5], %[dst], 0x00)
1154 "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1155 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1156 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1157 MMI_LWC1(%[ftmp5], %[src], 0x00)
1158 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1159 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1160 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1161 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1162 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1163 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1164 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1165 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1166 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1167 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1168 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1169 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1170 MMI_LWC1(%[ftmp0], %[dst], 0x00)
1171 "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1172 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1173 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1174 MMI_LWC1(%[ftmp0], %[src], 0x00)
1175 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1176 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1177 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1178 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1179 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1180 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1181 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1182 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1183 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1184 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1185 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1186 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1187 MMI_LWC1(%[ftmp1], %[dst], 0x00)
1188 "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1189 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1190 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1191 MMI_LWC1(%[ftmp1], %[src], 0x00)
1192 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1193 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1194 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1195 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1196 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1197 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1198 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1199 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1200 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1201 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1202 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1203 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1204 MMI_LWC1(%[ftmp2], %[dst], 0x00)
1205 "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1206 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1207 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1208 MMI_LWC1(%[ftmp2], %[src], 0x00)
1209 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1210 "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1211 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1212 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1213 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1214 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1215 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1216 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1217 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1218 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1219 "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1220 "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1221 MMI_LWC1(%[ftmp3], %[dst], 0x00)
1222 "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1223 MMI_SWC1(%[ftmp6], %[dst], 0x00)
1224 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1225 "2: \n\t"
1226 ".set pop \n\t"
1227 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1228 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1229 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1230 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1231 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1232 [tmp0]"=&r"(tmp[0]),
1233 RESTRICT_ASM_LOW32
1234 [src]"+&r"(src), [dst]"+&r"(dst),
1235 [h]"+&r"(h)
1236 : [dstStride]"r"((mips_reg)dstStride),
1237 [srcStride]"r"((mips_reg)srcStride),
1238 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1239 : "memory"
1240 );
1241
1242 src += 4 - (h + 5) * srcStride;
1243 dst += 4 - h * dstStride;
1244 }
1245 }
1246
avg_h264_qpel16_v_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1247 static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1248 int dstStride, int srcStride)
1249 {
1250 avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1251 avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1252 src += 8*srcStride;
1253 dst += 8*dstStride;
1254 avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1255 avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1256 }
1257
put_h264_qpel4_hv_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1258 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1259 int dstStride, int srcStride)
1260 {
1261 INIT_CLIP
1262 int i;
1263 int16_t _tmp[36];
1264 int16_t *tmp = _tmp;
1265 double ftmp[10];
1266 uint64_t tmp0;
1267 DECLARE_VAR_LOW32;
1268
1269 src -= 2*srcStride;
1270
1271 __asm__ volatile (
1272 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1273 "dli %[tmp0], 0x09 \n\t"
1274 "1: \n\t"
1275 MMI_ULWC1(%[ftmp1], %[src], -0x02)
1276 MMI_ULWC1(%[ftmp2], %[src], -0x01)
1277 MMI_ULWC1(%[ftmp3], %[src], 0x00)
1278 MMI_ULWC1(%[ftmp4], %[src], 0x01)
1279 MMI_ULWC1(%[ftmp5], %[src], 0x02)
1280 MMI_ULWC1(%[ftmp6], %[src], 0x03)
1281 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1282 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1283 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1284 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1285 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1286 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1287 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1288 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1289 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1290 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1291 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1292 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1293 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1294 MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1295 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1296 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1297 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1298 "bnez %[tmp0], 1b \n\t"
1299 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1300 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1301 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1302 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1303 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1304 [tmp0]"=&r"(tmp0),
1305 RESTRICT_ASM_LOW32
1306 [tmp]"+&r"(tmp), [src]"+&r"(src)
1307 : [tmpStride]"r"(8),
1308 [srcStride]"r"((mips_reg)srcStride),
1309 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1310 : "memory"
1311 );
1312
1313 tmp -= 28;
1314
1315 for (i=0; i<4; i++) {
1316 const int16_t tmpB= tmp[-8];
1317 const int16_t tmpA= tmp[-4];
1318 const int16_t tmp0= tmp[ 0];
1319 const int16_t tmp1= tmp[ 4];
1320 const int16_t tmp2= tmp[ 8];
1321 const int16_t tmp3= tmp[12];
1322 const int16_t tmp4= tmp[16];
1323 const int16_t tmp5= tmp[20];
1324 const int16_t tmp6= tmp[24];
1325 op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1326 op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1327 op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1328 op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1329 dst++;
1330 tmp++;
1331 }
1332 }
1333
put_h264_qpel8or16_hv1_lowpass_mmi(int16_t * tmp,const uint8_t * src,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1334 static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
1335 const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
1336 {
1337 int w = (size + 8) >> 2;
1338 double ftmp[11];
1339 uint64_t tmp0;
1340 DECLARE_VAR_LOW32;
1341
1342 src -= 2 * srcStride + 2;
1343
1344 while (w--) {
1345 __asm__ volatile (
1346 "dli %[tmp0], 0x02 \n\t"
1347 MMI_ULWC1(%[ftmp0], %[src], 0x00)
1348 "mtc1 %[tmp0], %[ftmp10] \n\t"
1349 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1350 "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1351 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1352 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1353 MMI_ULWC1(%[ftmp2], %[src], 0x00)
1354 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1355 MMI_ULWC1(%[ftmp3], %[src], 0x00)
1356 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1357 MMI_ULWC1(%[ftmp4], %[src], 0x00)
1358 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1359 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1360 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1361 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1362 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1363 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1364 MMI_ULWC1(%[ftmp5], %[src], 0x00)
1365 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1366 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1367 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1368 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1369 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1370 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1371 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1372 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1373 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1374 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1375 MMI_SDC1(%[ftmp6], %[tmp], 0x00)
1376 MMI_ULWC1(%[ftmp0], %[src], 0x00)
1377 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1378 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1379 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1380 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1381 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1382 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1383 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1384 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1385 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1386 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1387 MMI_SDC1(%[ftmp6], %[tmp], 0x30)
1388 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1389 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1390 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1391 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1392 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1393 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1394 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1395 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1396 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1397 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1398 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1399 MMI_SDC1(%[ftmp6], %[tmp], 0x60)
1400 MMI_ULWC1(%[ftmp2], %[src], 0x00)
1401 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1402 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1403 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1404 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1405 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1406 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1407 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1408 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1409 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1410 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1411 MMI_SDC1(%[ftmp6], %[tmp], 0x90)
1412 MMI_ULWC1(%[ftmp3], %[src], 0x00)
1413 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1414 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1415 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1416 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1417 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1418 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1419 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1420 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1421 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1422 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1423 MMI_SDC1(%[ftmp6], %[tmp], 0xc0)
1424 MMI_ULWC1(%[ftmp4], %[src], 0x00)
1425 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1426 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1427 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1428 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1429 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1430 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1431 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1432 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1433 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1434 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1435 MMI_SDC1(%[ftmp6], %[tmp], 0xf0)
1436 MMI_ULWC1(%[ftmp5], %[src], 0x00)
1437 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1438 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1439 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1440 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1441 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1442 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1443 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1444 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1445 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1446 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1447 MMI_SDC1(%[ftmp6], %[tmp], 0x120)
1448 MMI_ULWC1(%[ftmp0], %[src], 0x00)
1449 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1450 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1451 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1452 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1453 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1454 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1455 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1456 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1457 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1458 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1459 MMI_SDC1(%[ftmp6], %[tmp], 0x150)
1460 "bne %[size], 0x10, 2f \n\t"
1461
1462 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1463 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1464 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1465 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1466 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1467 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1468 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1469 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1470 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1471 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1472 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1473 MMI_SDC1(%[ftmp6], %[tmp], 0x180)
1474 MMI_ULWC1(%[ftmp2], %[src], 0x00)
1475 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1476 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1477 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1478 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1479 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1480 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1481 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1482 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1483 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1484 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1485 MMI_SDC1(%[ftmp6], %[tmp], 0x1b0)
1486 MMI_ULWC1(%[ftmp3], %[src], 0x00)
1487 "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1488 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1489 "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1490 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1491 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1492 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1493 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1494 "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1495 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1496 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1497 MMI_SDC1(%[ftmp6], %[tmp], 0x1e0)
1498 MMI_ULWC1(%[ftmp4], %[src], 0x00)
1499 "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1500 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1501 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1502 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1503 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1504 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1505 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1506 "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1507 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1508 "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1509 MMI_SDC1(%[ftmp6], %[tmp], 0x210)
1510 MMI_ULWC1(%[ftmp5], %[src], 0x00)
1511 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1512 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1513 "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1514 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1515 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1516 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1517 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1518 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1519 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1520 "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1521 MMI_SDC1(%[ftmp6], %[tmp], 0x240)
1522 MMI_ULWC1(%[ftmp0], %[src], 0x00)
1523 "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1524 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1525 "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1526 "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1527 "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1528 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1529 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1530 "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1531 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1532 "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1533 MMI_SDC1(%[ftmp6], %[tmp], 0x270)
1534 MMI_ULWC1(%[ftmp1], %[src], 0x00)
1535 "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1536 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1537 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1538 "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1539 "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1540 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1541 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1542 "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1543 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1544 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1545 MMI_SDC1(%[ftmp6], %[tmp], 0x2a0)
1546 MMI_ULWC1(%[ftmp2], %[src], 0x00)
1547 "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1548 "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1549 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1550 "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1551 "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1552 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1553 "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1554 "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1555 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1556 "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1557 MMI_SDC1(%[ftmp6], %[tmp], 0x2d0)
1558 "2: \n\t"
1559 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1560 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1561 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1562 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1563 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1564 [ftmp10]"=&f"(ftmp[10]),
1565 [tmp0]"=&r"(tmp0),
1566 RESTRICT_ASM_LOW32
1567 [src]"+&r"(src)
1568 : [tmp]"r"(tmp), [size]"r"(size),
1569 [srcStride]"r"((mips_reg)srcStride),
1570 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1571 : "memory"
1572 );
1573
1574 tmp += 4;
1575 src += 4 - (size + 5) * srcStride;
1576 }
1577 }
1578
put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t * dst,int16_t * tmp,ptrdiff_t dstStride,ptrdiff_t tmpStride,int size)1579 static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1580 int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1581 {
1582 int w = size >> 4;
1583 double ftmp[10];
1584 uint64_t tmp0;
1585 DECLARE_VAR_ALL64;
1586
1587 do {
1588 int h = size;
1589
1590 __asm__ volatile (
1591 "dli %[tmp0], 0x02 \n\t"
1592 "mtc1 %[tmp0], %[ftmp8] \n\t"
1593 "dli %[tmp0], 0x06 \n\t"
1594 "mtc1 %[tmp0], %[ftmp9] \n\t"
1595 "1: \n\t"
1596 MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1597 MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1598 MMI_LDC1(%[ftmp6], %[tmp], 0x10)
1599 MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1600 MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1601 MMI_ULDC1(%[ftmp5], %[tmp], 0x12)
1602 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1603 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1604 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1605 "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1606 MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1607 MMI_ULDC1(%[ftmp6], %[tmp], 0x06)
1608 MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1609 MMI_ULDC1(%[ftmp7], %[tmp], 0x0e)
1610 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1611 "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1612 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1613 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1614 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1615 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1616 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1617 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1618 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1619 "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t"
1620 "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1621 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1622 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1623 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1624 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1625 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1626 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1627 "addi %[h], %[h], -0x01 \n\t"
1628 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1629 PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t"
1630 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1631 "bnez %[h], 1b \n\t"
1632 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1633 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1634 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1635 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1636 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1637 [tmp0]"=&r"(tmp0),
1638 RESTRICT_ASM_ALL64
1639 [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1640 [h]"+&r"(h)
1641 : [dstStride]"r"((mips_reg)dstStride)
1642 : "memory"
1643 );
1644
1645 tmp += 8 - size * 24;
1646 dst += 8 - size * dstStride;
1647 } while (w--);
1648 }
1649
put_h264_qpel8or16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1650 static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1651 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1652 ptrdiff_t srcStride, int size)
1653 {
1654 put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1655 put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1656 }
1657
put_h264_qpel8_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1658 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1659 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1660 ptrdiff_t srcStride)
1661 {
1662 put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1663 srcStride, 8);
1664 }
1665
put_h264_qpel16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1666 static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1667 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1668 ptrdiff_t srcStride)
1669 {
1670 put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1671 srcStride, 16);
1672 }
1673
put_h264_qpel8_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1674 static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1675 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1676 {
1677 int h = 8;
1678 double ftmp[9];
1679 uint64_t tmp[1];
1680 DECLARE_VAR_LOW32;
1681 DECLARE_VAR_ALL64;
1682
1683 __asm__ volatile (
1684 "dli %[tmp0], 0x02 \n\t"
1685 "mtc1 %[tmp0], %[ftmp7] \n\t"
1686 "dli %[tmp0], 0x05 \n\t"
1687 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1688 "mtc1 %[tmp0], %[ftmp8] \n\t"
1689 "1: \n\t"
1690 MMI_ULDC1(%[ftmp1], %[src], 0x00)
1691 MMI_ULDC1(%[ftmp3], %[src], 0x01)
1692 "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
1693 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1694 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1695 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1696 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1697 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1698 "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1699 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1700 MMI_ULDC1(%[ftmp3], %[src], -0x01)
1701 MMI_ULDC1(%[ftmp5], %[src], 0x02)
1702 "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1703 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1704 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1705 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1706 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1707 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1708 "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1709 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1710 "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
1711 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
1712 MMI_ULWC1(%[ftmp3], %[src], -0x02)
1713 MMI_ULWC1(%[ftmp6], %[src], 0x07)
1714 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1715 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1716 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1717 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1718 "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1719 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1720 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1721 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1722 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
1723 "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1724 MMI_LDC1(%[ftmp5], %[src2], 0x00)
1725 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1726 PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
1727 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1728 PTR_ADDU "%[h], %[h], -0x01 \n\t"
1729 MMI_SDC1(%[ftmp1], %[dst], 0x00)
1730 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1731 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
1732 "bgtz %[h], 1b \n\t"
1733 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1734 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1735 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1736 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1737 [ftmp8]"=&f"(ftmp[8]),
1738 [tmp0]"=&r"(tmp[0]),
1739 RESTRICT_ASM_LOW32
1740 RESTRICT_ASM_ALL64
1741 [src]"+&r"(src), [dst]"+&r"(dst),
1742 [src2]"+&r"(src2), [h]"+&r"(h)
1743 : [src2Stride]"r"((mips_reg)src2Stride),
1744 [dstStride]"r"((mips_reg)dstStride),
1745 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1746 : "memory"
1747 );
1748 }
1749
put_pixels8_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int h)1750 static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1751 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1752 {
1753 double ftmp[7];
1754 uint64_t tmp0;
1755 DECLARE_VAR_ALL64;
1756 DECLARE_VAR_ADDRT;
1757
1758 do {
1759 __asm__ volatile (
1760 "dli %[tmp0], 0x05 \n\t"
1761 MMI_ULDC1(%[ftmp0], %[src16], 0x00)
1762 "mtc1 %[tmp0], %[ftmp6] \n\t"
1763 MMI_ULDC1(%[ftmp1], %[src16], 0x08)
1764 MMI_ULDC1(%[ftmp2], %[src16], 0x30)
1765 MMI_ULDC1(%[ftmp3], %[src16], 0x38)
1766 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1767 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1768 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1769 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1770 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1771 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1772 MMI_LDC1(%[ftmp5], %[src8], 0x00)
1773 MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00)
1774 "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1775 "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1776 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1777 MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
1778 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1779 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1780 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1781 [ftmp6]"=&f"(ftmp[6]),
1782 RESTRICT_ASM_ALL64
1783 RESTRICT_ASM_ADDRT
1784 [tmp0]"=&r"(tmp0)
1785 : [src8]"r"(src8), [src16]"r"(src16),
1786 [dst]"r"(dst),
1787 [src8Stride]"r"((mips_reg)src8Stride),
1788 [dstStride]"r"((mips_reg)dstStride)
1789 : "memory"
1790 );
1791
1792 src8 += 2 * src8Stride;
1793 src16 += 48;
1794 dst += 2 * dstStride;
1795 } while (h -= 2);
1796 }
1797
put_h264_qpel16_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1798 static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1799 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1800 {
1801 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1802 put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1803 src2Stride);
1804
1805 src += 8 * dstStride;
1806 dst += 8 * dstStride;
1807 src2 += 8 * src2Stride;
1808
1809 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1810 put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1811 src2Stride);
1812 }
1813
put_pixels16_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int h)1814 static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1815 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1816 {
1817 put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h);
1818 put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
1819 src8Stride, h);
1820 }
1821
avg_h264_qpel4_hv_lowpass_mmi(uint8_t * dst,const uint8_t * src,int dstStride,int srcStride)1822 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1823 int dstStride, int srcStride)
1824 {
1825 INIT_CLIP
1826 int i;
1827 int16_t _tmp[36];
1828 int16_t *tmp = _tmp;
1829 double ftmp[10];
1830 uint64_t tmp0;
1831 DECLARE_VAR_LOW32;
1832
1833 src -= 2*srcStride;
1834
1835 __asm__ volatile (
1836 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1837 "dli %[tmp0], 0x09 \n\t"
1838 "1: \n\t"
1839 MMI_ULWC1(%[ftmp1], %[src], -0x02)
1840 MMI_ULWC1(%[ftmp2], %[src], -0x01)
1841 MMI_ULWC1(%[ftmp3], %[src], 0x00)
1842 MMI_ULWC1(%[ftmp4], %[src], 0x01)
1843 MMI_ULWC1(%[ftmp5], %[src], 0x02)
1844 MMI_ULWC1(%[ftmp6], %[src], 0x03)
1845 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1846 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1847 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1848 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1849 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1850 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1851 "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1852 "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1853 "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1854 "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1855 "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1856 "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1857 "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1858 MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1859 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1860 PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1861 PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1862 "bnez %[tmp0], 1b \n\t"
1863 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1864 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1865 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1866 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1867 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1868 [tmp0]"=&r"(tmp0),
1869 RESTRICT_ASM_LOW32
1870 [tmp]"+&r"(tmp), [src]"+&r"(src)
1871 : [tmpStride]"r"(8),
1872 [srcStride]"r"((mips_reg)srcStride),
1873 [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1874 : "memory"
1875 );
1876
1877 tmp -= 28;
1878
1879 for (i=0; i<4; i++) {
1880 const int16_t tmpB= tmp[-8];
1881 const int16_t tmpA= tmp[-4];
1882 const int16_t tmp0= tmp[ 0];
1883 const int16_t tmp1= tmp[ 4];
1884 const int16_t tmp2= tmp[ 8];
1885 const int16_t tmp3= tmp[12];
1886 const int16_t tmp4= tmp[16];
1887 const int16_t tmp5= tmp[20];
1888 const int16_t tmp6= tmp[24];
1889 op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1890 op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1891 op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1892 op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1893 dst++;
1894 tmp++;
1895 }
1896 }
1897
avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t * dst,int16_t * tmp,ptrdiff_t dstStride,ptrdiff_t tmpStride,int size)1898 static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst,
1899 int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1900 {
1901 int w = size >> 4;
1902 double ftmp[11];
1903 uint64_t tmp0;
1904 DECLARE_VAR_ALL64;
1905
1906 do {
1907 int h = size;
1908 __asm__ volatile (
1909 "dli %[tmp0], 0x02 \n\t"
1910 "mtc1 %[tmp0], %[ftmp9] \n\t"
1911 "dli %[tmp0], 0x06 \n\t"
1912 "mtc1 %[tmp0], %[ftmp10] \n\t"
1913 "1: \n\t"
1914 MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1915 MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1916 MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1917 MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1918 MMI_LDC1(%[ftmp7], %[tmp], 0x10)
1919 MMI_ULDC1(%[ftmp8], %[tmp], 0x12)
1920 "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1921 "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1922 "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1923 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1924 MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1925 MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1926 MMI_ULDC1(%[ftmp7], %[tmp], 0x06)
1927 MMI_ULDC1(%[ftmp8], %[tmp], 0x0e)
1928 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1929 "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1930 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1931 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1932 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1933 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1934 "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1935 "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1936 "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1937 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1938 "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1939 "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1940 "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1941 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1942 "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
1943 "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1944 "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1945 MMI_LDC1(%[ftmp6], %[dst], 0x00)
1946 "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1947 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1948 "addi %[h], %[h], -0x01 \n\t"
1949 PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
1950 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1951 "bnez %[h], 1b \n\t"
1952 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1953 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1954 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1955 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1956 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1957 [ftmp10]"=&f"(ftmp[10]),
1958 [tmp0]"=&r"(tmp0),
1959 RESTRICT_ASM_ALL64
1960 [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1961 [h]"+&r"(h)
1962 : [dstStride]"r"((mips_reg)dstStride)
1963 : "memory"
1964 );
1965
1966 tmp += 8 - size * 24;
1967 dst += 8 - size * dstStride;
1968 } while (w--);
1969 }
1970
avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride,int size)1971 static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1972 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1973 ptrdiff_t srcStride, int size)
1974 {
1975 put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1976 avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1977 }
1978
avg_h264_qpel8_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1979 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1980 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1981 ptrdiff_t srcStride)
1982 {
1983 avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1984 srcStride, 8);
1985 }
1986
avg_h264_qpel16_hv_lowpass_mmi(uint8_t * dst,int16_t * tmp,const uint8_t * src,ptrdiff_t dstStride,ptrdiff_t tmpStride,ptrdiff_t srcStride)1987 static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1988 const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1989 ptrdiff_t srcStride)
1990 {
1991 avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1992 srcStride, 16);
1993 }
1994
avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)1995 static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1996 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1997 {
1998 double ftmp[10];
1999 uint64_t tmp[2];
2000 DECLARE_VAR_LOW32;
2001 DECLARE_VAR_ALL64;
2002
2003 __asm__ volatile (
2004 "dli %[tmp1], 0x02 \n\t"
2005 "ori %[tmp0], $0, 0x8 \n\t"
2006 "mtc1 %[tmp1], %[ftmp7] \n\t"
2007 "dli %[tmp1], 0x05 \n\t"
2008 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2009 "mtc1 %[tmp1], %[ftmp8] \n\t"
2010 "1: \n\t"
2011 MMI_ULDC1(%[ftmp1], %[src], 0x00)
2012 MMI_ULDC1(%[ftmp2], %[src], 0x01)
2013 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
2014 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2015 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2016 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2017 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2018 "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2019 "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2020 "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2021 MMI_ULDC1(%[ftmp2], %[src], -0x01)
2022 MMI_ULDC1(%[ftmp5], %[src], 0x02)
2023 "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2024 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2025 "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
2026 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
2027 "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2028 "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
2029 "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2030 "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2031 "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
2032 "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
2033 MMI_ULWC1(%[ftmp2], %[src], -0x02)
2034 MMI_ULWC1(%[ftmp6], %[src], 0x07)
2035 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2036 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2037 "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2038 "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
2039 "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
2040 "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
2041 "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2042 "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2043 "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
2044 "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2045 MMI_LDC1(%[ftmp5], %[src2], 0x00)
2046 "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2047 MMI_LDC1(%[ftmp9], %[dst], 0x00)
2048 "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2049 "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
2050 PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
2051 MMI_SDC1(%[ftmp1], %[dst], 0x00)
2052 "daddi %[tmp0], %[tmp0], -0x01 \n\t"
2053 PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2054 PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
2055 "bgtz %[tmp0], 1b \n\t"
2056 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2057 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2058 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2059 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2060 [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2061 [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
2062 RESTRICT_ASM_LOW32
2063 RESTRICT_ASM_ALL64
2064 [dst]"+&r"(dst), [src]"+&r"(src),
2065 [src2]"+&r"(src2)
2066 : [dstStride]"r"((mips_reg)dstStride),
2067 [src2Stride]"r"((mips_reg)src2Stride),
2068 [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
2069 : "memory"
2070 );
2071 }
2072
avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t * dst,const uint8_t * src,const uint8_t * src2,ptrdiff_t dstStride,ptrdiff_t src2Stride)2073 static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
2074 const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2075 {
2076 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2077 avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2078 src2Stride);
2079
2080 src += 8 * dstStride;
2081 dst += 8 * dstStride;
2082 src2 += 8 * src2Stride;
2083
2084 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2085 avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2086 src2Stride);
2087 }
2088
avg_pixels8_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int b)2089 static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2090 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2091 {
2092 double ftmp[8];
2093 uint64_t tmp0;
2094 DECLARE_VAR_ALL64;
2095 DECLARE_VAR_ADDRT;
2096
2097 do {
2098 __asm__ volatile (
2099 "dli %[tmp0], 0x05 \n\t"
2100 MMI_ULDC1(%[ftmp0], %[src16], 0x00)
2101 "mtc1 %[tmp0], %[ftmp6] \n\t"
2102 MMI_ULDC1(%[ftmp1], %[src16], 0x08)
2103 MMI_ULDC1(%[ftmp2], %[src16], 0x30)
2104 MMI_ULDC1(%[ftmp3], %[src16], 0x38)
2105 "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2106 "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2107 "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2108 "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2109 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2110 MMI_LDC1(%[ftmp4], %[src8], 0x00)
2111 MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00)
2112 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2113 "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2114 "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2115 MMI_LDC1(%[ftmp7], %[dst], 0x00)
2116 "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
2117 MMI_SDC1(%[ftmp0], %[dst], 0x00)
2118 MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00)
2119 "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2120 MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
2121 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2122 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2123 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2124 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2125 RESTRICT_ASM_ALL64
2126 RESTRICT_ASM_ADDRT
2127 [tmp0]"=&r"(tmp0)
2128 : [src8]"r"(src8), [src16]"r"(src16),
2129 [dst]"r"(dst),
2130 [src8Stride]"r"((mips_reg)src8Stride),
2131 [dstStride]"r"((mips_reg)dstStride)
2132 : "memory"
2133 );
2134
2135 src8 += 2 * src8Stride;
2136 src16 += 48;
2137 dst += 2 * dstStride;
2138 } while (b -= 2);
2139 }
2140
avg_pixels16_l2_shift5_mmi(uint8_t * dst,int16_t * src16,const uint8_t * src8,ptrdiff_t dstStride,ptrdiff_t src8Stride,int b)2141 static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2142 const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2143 {
2144 avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b);
2145 avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
2146 src8Stride, b);
2147 }
2148
2149 //DEF_H264_MC_MMI(put_, 4)
ff_put_h264_qpel4_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2150 void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2151 ptrdiff_t stride)
2152 {
2153 ff_put_pixels4_8_mmi(dst, src, stride, 4);
2154 }
2155
ff_put_h264_qpel4_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2156 void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2157 ptrdiff_t stride)
2158 {
2159 uint8_t half[16];
2160 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2161 ff_put_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2162 }
2163
ff_put_h264_qpel4_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2164 void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2165 ptrdiff_t stride)
2166 {
2167 put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2168 }
2169
ff_put_h264_qpel4_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2170 void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2171 ptrdiff_t stride)
2172 {
2173 uint8_t half[16];
2174 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2175 ff_put_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2176 }
2177
ff_put_h264_qpel4_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2178 void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2179 ptrdiff_t stride)
2180 {
2181 uint8_t full[36];
2182 uint8_t * const full_mid= full + 8;
2183 uint8_t half[16];
2184 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2185 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2186 ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2187 }
2188
ff_put_h264_qpel4_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2189 void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2190 ptrdiff_t stride)
2191 {
2192 uint8_t full[36];
2193 uint8_t * const full_mid= full + 8;
2194 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2195 put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2196 }
2197
ff_put_h264_qpel4_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2198 void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2199 ptrdiff_t stride)
2200 {
2201 uint8_t full[36];
2202 uint8_t * const full_mid= full + 8;
2203 uint8_t half[16];
2204 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2205 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2206 ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2207 }
2208
ff_put_h264_qpel4_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2209 void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2210 ptrdiff_t stride)
2211 {
2212 uint8_t full[36];
2213 uint8_t * const full_mid= full + 8;
2214 uint8_t halfH[16];
2215 uint8_t halfV[16];
2216 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2217 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2218 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2219 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2220 }
2221
ff_put_h264_qpel4_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2222 void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2223 ptrdiff_t stride)
2224 {
2225 uint8_t full[36];
2226 uint8_t * const full_mid= full + 8;
2227 uint8_t halfH[16];
2228 uint8_t halfV[16];
2229 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2230 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2231 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2232 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2233 }
2234
ff_put_h264_qpel4_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2235 void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2236 ptrdiff_t stride)
2237 {
2238 uint8_t full[36];
2239 uint8_t * const full_mid= full + 8;
2240 uint8_t halfH[16];
2241 uint8_t halfV[16];
2242 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2243 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2244 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2245 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2246 }
2247
ff_put_h264_qpel4_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2248 void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2249 ptrdiff_t stride)
2250 {
2251 uint8_t full[36];
2252 uint8_t * const full_mid= full + 8;
2253 uint8_t halfH[16];
2254 uint8_t halfV[16];
2255 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2256 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2257 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2258 ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2259 }
2260
ff_put_h264_qpel4_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2261 void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2262 ptrdiff_t stride)
2263 {
2264 put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2265 }
2266
ff_put_h264_qpel4_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2267 void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2268 ptrdiff_t stride)
2269 {
2270 uint8_t halfH[16];
2271 uint8_t halfHV[16];
2272 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2273 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2274 ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2275 }
2276
ff_put_h264_qpel4_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2277 void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2278 ptrdiff_t stride)
2279 {
2280 uint8_t halfH[16];
2281 uint8_t halfHV[16];
2282 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2283 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2284 ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2285 }
2286
ff_put_h264_qpel4_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2287 void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2288 ptrdiff_t stride)
2289 {
2290 uint8_t full[36];
2291 uint8_t * const full_mid= full + 8;
2292 uint8_t halfV[16];
2293 uint8_t halfHV[16];
2294 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2295 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2296 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2297 ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2298 }
2299
ff_put_h264_qpel4_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2300 void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2301 ptrdiff_t stride)
2302 {
2303 uint8_t full[36];
2304 uint8_t * const full_mid= full + 8;
2305 uint8_t halfV[16];
2306 uint8_t halfHV[16];
2307 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2308 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2309 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2310 ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2311 }
2312
2313 //DEF_H264_MC_MMI(avg_, 4)
ff_avg_h264_qpel4_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2314 void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2315 ptrdiff_t stride)
2316 {
2317 ff_avg_pixels4_8_mmi(dst, src, stride, 4);
2318 }
2319
ff_avg_h264_qpel4_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2320 void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2321 ptrdiff_t stride)
2322 {
2323 uint8_t half[16];
2324 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2325 ff_avg_pixels4_l2_8_mmi(dst, src, half, stride, stride, 4, 4);
2326 }
2327
ff_avg_h264_qpel4_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2328 void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2329 ptrdiff_t stride)
2330 {
2331 avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
2332 }
2333
ff_avg_h264_qpel4_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2334 void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2335 ptrdiff_t stride)
2336 {
2337 uint8_t half[16];
2338 put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
2339 ff_avg_pixels4_l2_8_mmi(dst, src+1, half, stride, stride, 4, 4);
2340 }
2341
ff_avg_h264_qpel4_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2342 void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2343 ptrdiff_t stride)
2344 {
2345 uint8_t full[36];
2346 uint8_t * const full_mid= full + 8;
2347 uint8_t half[16];
2348 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2349 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2350 ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2351 }
2352
ff_avg_h264_qpel4_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2353 void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2354 ptrdiff_t stride)
2355 {
2356 uint8_t full[36];
2357 uint8_t * const full_mid= full + 8;
2358 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2359 avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2360 }
2361
ff_avg_h264_qpel4_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2362 void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2363 ptrdiff_t stride)
2364 {
2365 uint8_t full[36];
2366 uint8_t * const full_mid= full + 8;
2367 uint8_t half[16];
2368 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2369 put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2370 ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2371 }
2372
ff_avg_h264_qpel4_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2373 void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2374 ptrdiff_t stride)
2375 {
2376 uint8_t full[36];
2377 uint8_t * const full_mid= full + 8;
2378 uint8_t halfH[16];
2379 uint8_t halfV[16];
2380 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2381 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2382 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2383 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2384 }
2385
ff_avg_h264_qpel4_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2386 void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2387 ptrdiff_t stride)
2388 {
2389 uint8_t full[36];
2390 uint8_t * const full_mid= full + 8;
2391 uint8_t halfH[16];
2392 uint8_t halfV[16];
2393 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2394 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2395 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2396 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2397 }
2398
ff_avg_h264_qpel4_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2399 void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2400 ptrdiff_t stride)
2401 {
2402 uint8_t full[36];
2403 uint8_t * const full_mid= full + 8;
2404 uint8_t halfH[16];
2405 uint8_t halfV[16];
2406 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2407 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2408 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2409 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2410 }
2411
ff_avg_h264_qpel4_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2412 void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2413 ptrdiff_t stride)
2414 {
2415 uint8_t full[36];
2416 uint8_t * const full_mid= full + 8;
2417 uint8_t halfH[16];
2418 uint8_t halfV[16];
2419 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2420 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2421 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2422 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2423 }
2424
ff_avg_h264_qpel4_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2425 void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2426 ptrdiff_t stride)
2427 {
2428 avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
2429 }
2430
ff_avg_h264_qpel4_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2431 void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2432 ptrdiff_t stride)
2433 {
2434 uint8_t halfH[16];
2435 uint8_t halfHV[16];
2436 put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
2437 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2438 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2439 }
2440
ff_avg_h264_qpel4_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2441 void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2442 ptrdiff_t stride)
2443 {
2444 uint8_t halfH[16];
2445 uint8_t halfHV[16];
2446 put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
2447 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2448 ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2449 }
2450
ff_avg_h264_qpel4_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2451 void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2452 ptrdiff_t stride)
2453 {
2454 uint8_t full[36];
2455 uint8_t * const full_mid= full + 8;
2456 uint8_t halfV[16];
2457 uint8_t halfHV[16];
2458 copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2459 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2460 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2461 ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2462 }
2463
ff_avg_h264_qpel4_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2464 void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2465 ptrdiff_t stride)
2466 {
2467 uint8_t full[36];
2468 uint8_t * const full_mid= full + 8;
2469 uint8_t halfV[16];
2470 uint8_t halfHV[16];
2471 copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2472 put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2473 put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
2474 ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2475 }
2476
2477 //DEF_H264_MC_MMI(put_, 8)
ff_put_h264_qpel8_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2478 void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2479 ptrdiff_t stride)
2480 {
2481 ff_put_pixels8_8_mmi(dst, src, stride, 8);
2482 }
2483
ff_put_h264_qpel8_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2484 void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2485 ptrdiff_t stride)
2486 {
2487 uint8_t half[64];
2488 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2489 ff_put_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2490 }
2491
ff_put_h264_qpel8_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2492 void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2493 ptrdiff_t stride)
2494 {
2495 put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2496 }
2497
ff_put_h264_qpel8_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2498 void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2499 ptrdiff_t stride)
2500 {
2501 uint8_t half[64];
2502 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2503 ff_put_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2504 }
2505
ff_put_h264_qpel8_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2506 void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2507 ptrdiff_t stride)
2508 {
2509 uint8_t full[104];
2510 uint8_t * const full_mid= full + 16;
2511 uint8_t half[64];
2512 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2513 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2514 ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2515 }
2516
ff_put_h264_qpel8_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2517 void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2518 ptrdiff_t stride)
2519 {
2520 uint8_t full[104];
2521 uint8_t * const full_mid= full + 16;
2522 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2523 put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2524 }
2525
ff_put_h264_qpel8_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2526 void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2527 ptrdiff_t stride)
2528 {
2529 uint8_t full[104];
2530 uint8_t * const full_mid= full + 16;
2531 uint8_t half[64];
2532 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2533 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2534 ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2535 }
2536
ff_put_h264_qpel8_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2537 void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2538 ptrdiff_t stride)
2539 {
2540 uint8_t full[104];
2541 uint8_t * const full_mid= full + 16;
2542 uint8_t halfH[64];
2543 uint8_t halfV[64];
2544 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2545 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2546 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2547 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2548 }
2549
ff_put_h264_qpel8_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2550 void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2551 ptrdiff_t stride)
2552 {
2553 uint8_t full[104];
2554 uint8_t * const full_mid= full + 16;
2555 uint8_t halfH[64];
2556 uint8_t halfV[64];
2557 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2558 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2559 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2560 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2561 }
2562
ff_put_h264_qpel8_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2563 void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2564 ptrdiff_t stride)
2565 {
2566 uint8_t full[104];
2567 uint8_t * const full_mid= full + 16;
2568 uint8_t halfH[64];
2569 uint8_t halfV[64];
2570 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2571 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2572 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2573 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2574 }
2575
ff_put_h264_qpel8_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2576 void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2577 ptrdiff_t stride)
2578 {
2579 uint8_t full[104];
2580 uint8_t * const full_mid= full + 16;
2581 uint8_t halfH[64];
2582 uint8_t halfV[64];
2583 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2584 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2585 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2586 ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2587 }
2588
ff_put_h264_qpel8_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2589 void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2590 ptrdiff_t stride)
2591 {
2592 uint16_t __attribute__ ((aligned(8))) temp[192];
2593
2594 put_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2595 }
2596
ff_put_h264_qpel8_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2597 void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2598 ptrdiff_t stride)
2599 {
2600 uint8_t __attribute__ ((aligned(8))) temp[448];
2601 uint8_t *const halfHV = temp;
2602 int16_t *const halfV = (int16_t *) (temp + 64);
2603
2604 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2605 put_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2606 }
2607
ff_put_h264_qpel8_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2608 void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2609 ptrdiff_t stride)
2610 {
2611 uint8_t __attribute__ ((aligned(8))) temp[448];
2612 uint8_t *const halfHV = temp;
2613 int16_t *const halfV = (int16_t *) (temp + 64);
2614
2615 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2616 put_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2617 }
2618
ff_put_h264_qpel8_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2619 void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2620 ptrdiff_t stride)
2621 {
2622 uint8_t __attribute__ ((aligned(8))) temp[448];
2623 uint8_t *const halfHV = temp;
2624 int16_t *const halfV = (int16_t *) (temp + 64);
2625
2626 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2627 put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2628 }
2629
ff_put_h264_qpel8_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2630 void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2631 ptrdiff_t stride)
2632 {
2633 uint8_t __attribute__ ((aligned(8))) temp[448];
2634 uint8_t *const halfHV = temp;
2635 int16_t *const halfV = (int16_t *) (temp + 64);
2636
2637 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2638 put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2639 }
2640
2641 //DEF_H264_MC_MMI(avg_, 8)
ff_avg_h264_qpel8_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2642 void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2643 ptrdiff_t stride)
2644 {
2645 ff_avg_pixels8_8_mmi(dst, src, stride, 8);
2646 }
2647
ff_avg_h264_qpel8_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2648 void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2649 ptrdiff_t stride)
2650 {
2651 uint8_t half[64];
2652 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2653 ff_avg_pixels8_l2_8_mmi(dst, src, half, stride, stride, 8, 8);
2654 }
2655
ff_avg_h264_qpel8_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2656 void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2657 ptrdiff_t stride)
2658 {
2659 avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
2660 }
2661
ff_avg_h264_qpel8_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2662 void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2663 ptrdiff_t stride)
2664 {
2665 uint8_t half[64];
2666 put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
2667 ff_avg_pixels8_l2_8_mmi(dst, src+1, half, stride, stride, 8, 8);
2668 }
2669
ff_avg_h264_qpel8_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2670 void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2671 ptrdiff_t stride)
2672 {
2673 uint8_t full[104];
2674 uint8_t * const full_mid= full + 16;
2675 uint8_t half[64];
2676 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2677 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2678 ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2679 }
2680
ff_avg_h264_qpel8_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2681 void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2682 ptrdiff_t stride)
2683 {
2684 uint8_t full[104];
2685 uint8_t * const full_mid= full + 16;
2686 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2687 avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2688 }
2689
ff_avg_h264_qpel8_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2690 void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2691 ptrdiff_t stride)
2692 {
2693 uint8_t full[104];
2694 uint8_t * const full_mid= full + 16;
2695 uint8_t half[64];
2696 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2697 put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2698 ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2699 }
2700
ff_avg_h264_qpel8_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2701 void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2702 ptrdiff_t stride)
2703 {
2704 uint8_t full[104];
2705 uint8_t * const full_mid= full + 16;
2706 uint8_t halfH[64];
2707 uint8_t halfV[64];
2708 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2709 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2710 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2711 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2712 }
2713
ff_avg_h264_qpel8_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2714 void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2715 ptrdiff_t stride)
2716 {
2717 uint8_t full[104];
2718 uint8_t * const full_mid= full + 16;
2719 uint8_t halfH[64];
2720 uint8_t halfV[64];
2721 put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
2722 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2723 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2724 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2725 }
2726
ff_avg_h264_qpel8_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2727 void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2728 ptrdiff_t stride)
2729 {
2730 uint8_t full[104];
2731 uint8_t * const full_mid= full + 16;
2732 uint8_t halfH[64];
2733 uint8_t halfV[64];
2734 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2735 copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2736 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2737 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2738 }
2739
ff_avg_h264_qpel8_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2740 void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2741 ptrdiff_t stride)
2742 {
2743 uint8_t full[104];
2744 uint8_t * const full_mid= full + 16;
2745 uint8_t halfH[64];
2746 uint8_t halfV[64];
2747 put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
2748 copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2749 put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2750 ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2751 }
2752
ff_avg_h264_qpel8_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2753 void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2754 ptrdiff_t stride)
2755 {
2756 uint16_t __attribute__ ((aligned(8))) temp[192];
2757
2758 avg_h264_qpel8_hv_lowpass_mmi(dst, temp, src, stride, 8, stride);
2759 }
2760
ff_avg_h264_qpel8_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2761 void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2762 ptrdiff_t stride)
2763 {
2764 uint8_t __attribute__ ((aligned(8))) temp[448];
2765 uint8_t *const halfHV = temp;
2766 int16_t *const halfV = (int16_t *) (temp + 64);
2767
2768 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2769 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, halfHV, stride, 8);
2770 }
2771
ff_avg_h264_qpel8_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2772 void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2773 ptrdiff_t stride)
2774 {
2775 uint8_t __attribute__ ((aligned(8))) temp[448];
2776 uint8_t *const halfHV = temp;
2777 int16_t *const halfV = (int16_t *) (temp + 64);
2778
2779 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2780 avg_h264_qpel8_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 8);
2781 }
2782
ff_avg_h264_qpel8_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2783 void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2784 ptrdiff_t stride)
2785 {
2786 uint8_t __attribute__ ((aligned(8))) temp[448];
2787 uint8_t *const halfHV = temp;
2788 int16_t *const halfV = (int16_t *) (temp + 64);
2789
2790 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2791 avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2792 }
2793
ff_avg_h264_qpel8_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2794 void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2795 ptrdiff_t stride)
2796 {
2797 uint8_t __attribute__ ((aligned(8))) temp[448];
2798 uint8_t *const halfHV = temp;
2799 int16_t *const halfV = (int16_t *) (temp + 64);
2800
2801 put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2802 avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2803 }
2804
2805 //DEF_H264_MC_MMI(put_, 16)
ff_put_h264_qpel16_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2806 void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2807 ptrdiff_t stride)
2808 {
2809 ff_put_pixels16_8_mmi(dst, src, stride, 16);
2810 }
2811
ff_put_h264_qpel16_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2812 void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2813 ptrdiff_t stride)
2814 {
2815 uint8_t half[256];
2816 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2817 ff_put_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2818 }
2819
ff_put_h264_qpel16_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2820 void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2821 ptrdiff_t stride)
2822 {
2823 put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2824 }
2825
ff_put_h264_qpel16_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2826 void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2827 ptrdiff_t stride)
2828 {
2829 uint8_t half[256];
2830 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2831 ff_put_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2832 }
2833
ff_put_h264_qpel16_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2834 void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2835 ptrdiff_t stride)
2836 {
2837 uint8_t full[336];
2838 uint8_t * const full_mid= full + 32;
2839 uint8_t half[256];
2840 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2841 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2842 ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
2843 }
2844
ff_put_h264_qpel16_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2845 void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
2846 ptrdiff_t stride)
2847 {
2848 uint8_t full[336];
2849 uint8_t * const full_mid= full + 32;
2850 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2851 put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2852 }
2853
ff_put_h264_qpel16_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2854 void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
2855 ptrdiff_t stride)
2856 {
2857 uint8_t full[336];
2858 uint8_t * const full_mid= full + 32;
2859 uint8_t half[256];
2860 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2861 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2862 ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2863 }
2864
ff_put_h264_qpel16_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2865 void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
2866 ptrdiff_t stride)
2867 {
2868 uint8_t full[336];
2869 uint8_t * const full_mid= full + 32;
2870 uint8_t halfH[256];
2871 uint8_t halfV[256];
2872 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2873 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2874 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2875 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2876 }
2877
ff_put_h264_qpel16_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2878 void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
2879 ptrdiff_t stride)
2880 {
2881 uint8_t full[336];
2882 uint8_t * const full_mid= full + 32;
2883 uint8_t halfH[256];
2884 uint8_t halfV[256];
2885 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
2886 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2887 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2888 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2889 }
2890
ff_put_h264_qpel16_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2891 void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
2892 ptrdiff_t stride)
2893 {
2894 uint8_t full[336];
2895 uint8_t * const full_mid= full + 32;
2896 uint8_t halfH[256];
2897 uint8_t halfV[256];
2898 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2899 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2900 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2901 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2902 }
2903
ff_put_h264_qpel16_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2904 void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
2905 ptrdiff_t stride)
2906 {
2907 uint8_t full[336];
2908 uint8_t * const full_mid= full + 32;
2909 uint8_t halfH[256];
2910 uint8_t halfV[256];
2911 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
2912 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2913 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2914 ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2915 }
2916
ff_put_h264_qpel16_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2917 void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
2918 ptrdiff_t stride)
2919 {
2920 uint16_t __attribute__ ((aligned(8))) temp[384];
2921
2922 put_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
2923 }
2924
ff_put_h264_qpel16_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2925 void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
2926 ptrdiff_t stride)
2927 {
2928 uint8_t __attribute__ ((aligned(8))) temp[1024];
2929 uint8_t *const halfHV = temp;
2930 int16_t *const halfV = (int16_t *) (temp + 256);
2931
2932 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2933 put_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
2934 }
2935
ff_put_h264_qpel16_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2936 void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
2937 ptrdiff_t stride)
2938 {
2939 uint8_t __attribute__ ((aligned(8))) temp[1024];
2940 uint8_t *const halfHV = temp;
2941 int16_t *const halfV = (int16_t *) (temp + 256);
2942
2943 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2944 put_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
2945 }
2946
ff_put_h264_qpel16_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2947 void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
2948 ptrdiff_t stride)
2949 {
2950 uint8_t __attribute__ ((aligned(8))) temp[1024];
2951 uint8_t *const halfHV = temp;
2952 int16_t *const halfV = (int16_t *) (temp + 256);
2953
2954 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2955 put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
2956 }
2957
ff_put_h264_qpel16_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2958 void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
2959 ptrdiff_t stride)
2960 {
2961 uint8_t __attribute__ ((aligned(8))) temp[1024];
2962 uint8_t *const halfHV = temp;
2963 int16_t *const halfV = (int16_t *) (temp + 256);
2964
2965 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2966 put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
2967 }
2968
2969 //DEF_H264_MC_MMI(avg_, 16)
ff_avg_h264_qpel16_mc00_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2970 void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2971 ptrdiff_t stride)
2972 {
2973 ff_avg_pixels16_8_mmi(dst, src, stride, 16);
2974 }
2975
ff_avg_h264_qpel16_mc10_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2976 void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2977 ptrdiff_t stride)
2978 {
2979 uint8_t half[256];
2980 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2981 ff_avg_pixels16_l2_8_mmi(dst, src, half, stride, stride, 16, 16);
2982 }
2983
ff_avg_h264_qpel16_mc20_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2984 void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2985 ptrdiff_t stride)
2986 {
2987 avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
2988 }
2989
ff_avg_h264_qpel16_mc30_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2990 void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2991 ptrdiff_t stride)
2992 {
2993 uint8_t half[256];
2994 put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
2995 ff_avg_pixels16_l2_8_mmi(dst, src+1, half, stride, stride, 16, 16);
2996 }
2997
ff_avg_h264_qpel16_mc01_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)2998 void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2999 ptrdiff_t stride)
3000 {
3001 uint8_t full[336];
3002 uint8_t * const full_mid= full + 32;
3003 uint8_t half[256];
3004 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3005 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3006 ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
3007 }
3008
ff_avg_h264_qpel16_mc02_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3009 void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
3010 ptrdiff_t stride)
3011 {
3012 uint8_t full[336];
3013 uint8_t * const full_mid= full + 32;
3014 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3015 avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
3016 }
3017
ff_avg_h264_qpel16_mc03_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3018 void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
3019 ptrdiff_t stride)
3020 {
3021 uint8_t full[336];
3022 uint8_t * const full_mid= full + 32;
3023 uint8_t half[256];
3024 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3025 put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3026 ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
3027 }
3028
ff_avg_h264_qpel16_mc11_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3029 void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
3030 ptrdiff_t stride)
3031 {
3032 uint8_t full[336];
3033 uint8_t * const full_mid= full + 32;
3034 uint8_t halfH[256];
3035 uint8_t halfV[256];
3036 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3037 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3038 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3039 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3040 }
3041
ff_avg_h264_qpel16_mc31_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3042 void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
3043 ptrdiff_t stride)
3044 {
3045 uint8_t full[336];
3046 uint8_t * const full_mid= full + 32;
3047 uint8_t halfH[256];
3048 uint8_t halfV[256];
3049 put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
3050 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3051 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3052 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3053 }
3054
ff_avg_h264_qpel16_mc13_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3055 void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
3056 ptrdiff_t stride)
3057 {
3058 uint8_t full[336];
3059 uint8_t * const full_mid= full + 32;
3060 uint8_t halfH[256];
3061 uint8_t halfV[256];
3062 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3063 copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3064 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3065 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3066 }
3067
ff_avg_h264_qpel16_mc33_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3068 void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
3069 ptrdiff_t stride)
3070 {
3071 uint8_t full[336];
3072 uint8_t * const full_mid= full + 32;
3073 uint8_t halfH[256];
3074 uint8_t halfV[256];
3075 put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
3076 copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3077 put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3078 ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3079 }
3080
ff_avg_h264_qpel16_mc22_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3081 void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
3082 ptrdiff_t stride)
3083 {
3084 uint16_t __attribute__ ((aligned(8))) temp[384];
3085
3086 avg_h264_qpel16_hv_lowpass_mmi(dst, temp, src, stride, 16, stride);
3087 }
3088
ff_avg_h264_qpel16_mc21_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3089 void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
3090 ptrdiff_t stride)
3091 {
3092 uint8_t __attribute__ ((aligned(8))) temp[1024];
3093 uint8_t *const halfHV = temp;
3094 int16_t *const halfV = (int16_t *) (temp + 256);
3095
3096 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3097 avg_h264_qpel16_h_lowpass_l2_mmi(dst, src, halfHV, stride, 16);
3098 }
3099
ff_avg_h264_qpel16_mc23_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3100 void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
3101 ptrdiff_t stride)
3102 {
3103 uint8_t __attribute__ ((aligned(8))) temp[1024];
3104 uint8_t *const halfHV = temp;
3105 int16_t *const halfV = (int16_t *) (temp + 256);
3106
3107 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3108 avg_h264_qpel16_h_lowpass_l2_mmi(dst, src + stride, halfHV, stride, 16);
3109 }
3110
ff_avg_h264_qpel16_mc12_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3111 void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
3112 ptrdiff_t stride)
3113 {
3114 uint8_t __attribute__ ((aligned(8))) temp[1024];
3115 uint8_t *const halfHV = temp;
3116 int16_t *const halfV = (int16_t *) (temp + 256);
3117
3118 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3119 avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3120 }
3121
ff_avg_h264_qpel16_mc32_mmi(uint8_t * dst,const uint8_t * src,ptrdiff_t stride)3122 void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
3123 ptrdiff_t stride)
3124 {
3125 uint8_t __attribute__ ((aligned(8))) temp[1024];
3126 uint8_t *const halfHV = temp;
3127 int16_t *const halfV = (int16_t *) (temp + 256);
3128
3129 put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3130 avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3131 }
3132
3133 #undef op2_avg
3134 #undef op2_put
3135