• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <stdio.h>
13 
14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vpx/vpx_integer.h"
18 #include "vpx_ports/mem.h"
19 #include "vp9/common/vp9_convolve.h"
20 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
21 
22 #if HAVE_DSPR2
convolve_bi_avg_horiz_4_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)23 static void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src,
24                                           int32_t src_stride,
25                                           uint8_t *dst,
26                                           int32_t dst_stride,
27                                           const int16_t *filter_x0,
28                                           int32_t h) {
29   int32_t y;
30   uint8_t *cm = vp9_ff_cropTbl;
31   int32_t  Temp1, Temp2, Temp3, Temp4;
32   uint32_t vector4a = 64;
33   uint32_t tp1, tp2;
34   uint32_t p1, p2, p3;
35   uint32_t tn1, tn2;
36   const int16_t *filter = &filter_x0[3];
37   uint32_t      filter45;
38 
39   filter45 = ((const int32_t *)filter)[0];
40 
41   for (y = h; y--;) {
42     /* prefetch data to cache memory */
43     vp9_prefetch_load(src + src_stride);
44     vp9_prefetch_load(src + src_stride + 32);
45     vp9_prefetch_store(dst + dst_stride);
46 
47     __asm__ __volatile__ (
48         "ulw              %[tp1],         0(%[src])                      \n\t"
49         "ulw              %[tp2],         4(%[src])                      \n\t"
50 
51         /* even 1. pixel */
52         "mtlo             %[vector4a],    $ac3                           \n\t"
53         "mthi             $zero,          $ac3                           \n\t"
54         "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
55         "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
56         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
57         "extp             %[Temp1],       $ac3,           31             \n\t"
58 
59         /* even 2. pixel */
60         "mtlo             %[vector4a],    $ac2                           \n\t"
61         "mthi             $zero,          $ac2                           \n\t"
62         "balign           %[tp2],         %[tp1],         3              \n\t"
63         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
64         "extp             %[Temp3],       $ac2,           31             \n\t"
65 
66         "lbu              %[p2],          3(%[dst])                      \n\t"  /* load odd 2 */
67 
68         /* odd 1. pixel */
69         "lbux             %[tp1],         %[Temp1](%[cm])                \n\t"  /* even 1 */
70         "mtlo             %[vector4a],    $ac3                           \n\t"
71         "mthi             $zero,          $ac3                           \n\t"
72         "lbu              %[Temp1],       1(%[dst])                      \n\t"  /* load odd 1 */
73         "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
74         "preceu.ph.qbl    %[p3],          %[tp2]                         \n\t"
75         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
76         "extp             %[Temp2],       $ac3,           31             \n\t"
77 
78         "lbu              %[tn2],         0(%[dst])                      \n\t"  /* load even 1 */
79 
80         /* odd 2. pixel */
81         "lbux             %[tp2],         %[Temp3](%[cm])                \n\t"  /* even 2 */
82         "mtlo             %[vector4a],    $ac2                           \n\t"
83         "mthi             $zero,          $ac2                           \n\t"
84         "lbux             %[tn1],         %[Temp2](%[cm])                \n\t"  /* odd 1 */
85         "addqh_r.w        %[tn2],         %[tn2],         %[tp1]         \n\t"  /* average even 1 */
86         "dpa.w.ph         $ac2,           %[p3],          %[filter45]    \n\t"
87         "extp             %[Temp4],       $ac2,           31             \n\t"
88 
89         "lbu              %[tp1],         2(%[dst])                      \n\t"  /* load even 2 */
90         "sb               %[tn2],         0(%[dst])                      \n\t"  /* store even 1 */
91 
92         /* clamp */
93         "addqh_r.w        %[Temp1],       %[Temp1],       %[tn1]         \n\t"  /* average odd 1 */
94         "lbux             %[p3],          %[Temp4](%[cm])                \n\t"  /* odd 2 */
95         "sb               %[Temp1],       1(%[dst])                      \n\t"  /* store odd 1 */
96 
97         "addqh_r.w        %[tp1],         %[tp1],         %[tp2]         \n\t"  /* average even 2 */
98         "sb               %[tp1],         2(%[dst])                      \n\t"  /* store even 2 */
99 
100         "addqh_r.w        %[p2],          %[p2],          %[p3]          \n\t"  /* average odd 2 */
101         "sb               %[p2],          3(%[dst])                      \n\t"  /* store odd 2 */
102 
103         : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
104           [tn1] "=&r" (tn1), [tn2] "=&r" (tn2),
105           [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
106           [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
107           [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
108         : [filter45] "r" (filter45), [vector4a] "r" (vector4a),
109           [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
110     );
111 
112     /* Next row... */
113     src += src_stride;
114     dst += dst_stride;
115   }
116 }
117 
convolve_bi_avg_horiz_8_dspr2(const uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,const int16_t * filter_x0,int32_t h)118 static void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src,
119                                          int32_t src_stride,
120                                          uint8_t *dst,
121                                          int32_t dst_stride,
122                                          const int16_t *filter_x0,
123                                          int32_t h) {
124   int32_t y;
125   uint8_t *cm = vp9_ff_cropTbl;
126   uint32_t vector4a = 64;
127   int32_t Temp1, Temp2, Temp3;
128   uint32_t tp1, tp2, tp3, tp4;
129   uint32_t p1, p2, p3, p4, n1;
130   uint32_t st0, st1;
131   const int16_t *filter = &filter_x0[3];
132   uint32_t filter45;;
133 
134   filter45 = ((const int32_t *)filter)[0];
135 
136   for (y = h; y--;) {
137     /* prefetch data to cache memory */
138     vp9_prefetch_load(src + src_stride);
139     vp9_prefetch_load(src + src_stride + 32);
140     vp9_prefetch_store(dst + dst_stride);
141 
142     __asm__ __volatile__ (
143         "ulw              %[tp1],         0(%[src])                      \n\t"
144         "ulw              %[tp2],         4(%[src])                      \n\t"
145 
146         /* even 1. pixel */
147         "mtlo             %[vector4a],    $ac3                           \n\t"
148         "mthi             $zero,          $ac3                           \n\t"
149         "mtlo             %[vector4a],    $ac2                           \n\t"
150         "mthi             $zero,          $ac2                           \n\t"
151         "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
152         "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
153         "preceu.ph.qbr    %[p3],          %[tp2]                         \n\t"
154         "preceu.ph.qbl    %[p4],          %[tp2]                         \n\t"
155         "ulw              %[tp3],         8(%[src])                      \n\t"
156         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
157         "extp             %[Temp1],       $ac3,           31             \n\t"
158         "lbu              %[Temp2],       0(%[dst])                      \n\t"
159         "lbu              %[tp4],         2(%[dst])                      \n\t"
160 
161         /* even 2. pixel */
162         "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
163         "extp             %[Temp3],       $ac2,           31             \n\t"
164 
165         /* even 3. pixel */
166         "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
167         "mtlo             %[vector4a],    $ac1                           \n\t"
168         "mthi             $zero,          $ac1                           \n\t"
169         "lbux             %[st1],         %[Temp3](%[cm])                \n\t"
170         "dpa.w.ph         $ac1,           %[p3],          %[filter45]    \n\t"
171         "extp             %[Temp1],       $ac1,           31             \n\t"
172 
173         "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
174         "addqh_r.w        %[tp4],         %[tp4],         %[st1]         \n\t"
175         "sb               %[Temp2],       0(%[dst])                      \n\t"
176         "sb               %[tp4],         2(%[dst])                      \n\t"
177 
178         /* even 4. pixel */
179         "mtlo             %[vector4a],    $ac2                           \n\t"
180         "mthi             $zero,          $ac2                           \n\t"
181         "mtlo             %[vector4a],    $ac3                           \n\t"
182         "mthi             $zero,          $ac3                           \n\t"
183 
184         "balign           %[tp3],         %[tp2],         3              \n\t"
185         "balign           %[tp2],         %[tp1],         3              \n\t"
186 
187         "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
188         "lbu              %[Temp2],       4(%[dst])                      \n\t"
189         "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
190 
191         "dpa.w.ph         $ac2,           %[p4],          %[filter45]    \n\t"
192         "extp             %[Temp3],       $ac2,           31             \n\t"
193 
194         /* odd 1. pixel */
195         "mtlo             %[vector4a],    $ac1                           \n\t"
196         "mthi             $zero,          $ac1                           \n\t"
197         "sb               %[Temp2],       4(%[dst])                      \n\t"
198         "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
199         "preceu.ph.qbl    %[p2],          %[tp2]                         \n\t"
200         "preceu.ph.qbr    %[p3],          %[tp3]                         \n\t"
201         "preceu.ph.qbl    %[p4],          %[tp3]                         \n\t"
202         "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
203         "extp             %[Temp2],       $ac3,           31             \n\t"
204 
205         "lbu              %[tp1],         6(%[dst])                      \n\t"
206 
207         /* odd 2. pixel */
208         "mtlo             %[vector4a],    $ac3                           \n\t"
209         "mthi             $zero,          $ac3                           \n\t"
210         "mtlo             %[vector4a],    $ac2                           \n\t"
211         "mthi             $zero,          $ac2                           \n\t"
212         "lbux             %[st0],         %[Temp3](%[cm])                \n\t"
213         "dpa.w.ph         $ac1,           %[p2],          %[filter45]    \n\t"
214         "extp             %[Temp3],       $ac1,           31             \n\t"
215 
216         "lbu              %[tp2],         1(%[dst])                      \n\t"
217         "lbu              %[tp3],         3(%[dst])                      \n\t"
218         "addqh_r.w        %[tp1],         %[tp1],         %[st0]         \n\t"
219 
220         /* odd 3. pixel */
221         "lbux             %[st1],         %[Temp2](%[cm])                \n\t"
222         "dpa.w.ph         $ac3,           %[p3],          %[filter45]    \n\t"
223         "addqh_r.w        %[tp2],         %[tp2],         %[st1]         \n\t"
224         "extp             %[Temp2],       $ac3,           31             \n\t"
225 
226         "lbu              %[tp4],         5(%[dst])                      \n\t"
227 
228         /* odd 4. pixel */
229         "sb               %[tp2],         1(%[dst])                      \n\t"
230         "sb               %[tp1],         6(%[dst])                      \n\t"
231         "dpa.w.ph         $ac2,           %[p4],          %[filter45]    \n\t"
232         "extp             %[Temp1],       $ac2,           31             \n\t"
233 
234         "lbu              %[tp1],         7(%[dst])                      \n\t"
235 
236         /* clamp */
237         "lbux             %[p4],          %[Temp3](%[cm])                \n\t"
238         "addqh_r.w        %[tp3],         %[tp3],         %[p4]          \n\t"
239 
240         "lbux             %[p2],          %[Temp2](%[cm])                \n\t"
241         "addqh_r.w        %[tp4],         %[tp4],         %[p2]          \n\t"
242 
243         "lbux             %[p1],          %[Temp1](%[cm])                \n\t"
244         "addqh_r.w        %[tp1],         %[tp1],         %[p1]          \n\t"
245 
246         /* store bytes */
247         "sb               %[tp3],         3(%[dst])                      \n\t"
248         "sb               %[tp4],         5(%[dst])                      \n\t"
249         "sb               %[tp1],         7(%[dst])                      \n\t"
250 
251         : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
252           [tp3] "=&r" (tp3), [tp4] "=&r" (tp4),
253           [st0] "=&r" (st0), [st1] "=&r" (st1),
254           [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
255           [n1] "=&r" (n1),
256           [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
257         : [filter45] "r" (filter45), [vector4a] "r" (vector4a),
258           [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
259     );
260 
261     /* Next row... */
262     src += src_stride;
263     dst += dst_stride;
264   }
265 }
266 
convolve_bi_avg_horiz_16_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h,int32_t count)267 static void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr,
268                                           int32_t src_stride,
269                                           uint8_t *dst_ptr,
270                                           int32_t dst_stride,
271                                           const int16_t *filter_x0,
272                                           int32_t h,
273                                           int32_t count) {
274   int32_t y, c;
275   const uint8_t *src;
276   uint8_t *dst;
277   uint8_t *cm = vp9_ff_cropTbl;
278   uint32_t vector_64 = 64;
279   int32_t Temp1, Temp2, Temp3;
280   uint32_t qload1, qload2, qload3;
281   uint32_t p1, p2, p3, p4, p5;
282   uint32_t st1, st2, st3;
283   const int16_t *filter = &filter_x0[3];
284   uint32_t filter45;;
285 
286   filter45 = ((const int32_t *)filter)[0];
287 
288   for (y = h; y--;) {
289     src = src_ptr;
290     dst = dst_ptr;
291 
292     /* prefetch data to cache memory */
293     vp9_prefetch_load(src_ptr + src_stride);
294     vp9_prefetch_load(src_ptr + src_stride + 32);
295     vp9_prefetch_store(dst_ptr + dst_stride);
296 
297     for (c = 0; c < count; c++) {
298       __asm__ __volatile__ (
299           "ulw              %[qload1],    0(%[src])                    \n\t"
300           "ulw              %[qload2],    4(%[src])                    \n\t"
301 
302           /* even 1. pixel */
303           "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
304           "mthi             $zero,        $ac1                         \n\t"
305           "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
306           "mthi             $zero,        $ac2                         \n\t"
307           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
308           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
309           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
310           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
311           "ulw              %[qload3],    8(%[src])                    \n\t"
312           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
313           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
314           "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
315 
316           /* even 2. pixel */
317           "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
318           "mthi             $zero,        $ac3                         \n\t"
319           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
320           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
321           "ulw              %[qload1],    12(%[src])                   \n\t"
322           "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
323           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
324           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
325 
326           "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
327 
328           /* even 3. pixel */
329           "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
330           "mthi             $zero,        $ac1                         \n\t"
331           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
332           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
333           "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
334           "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
335           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
336           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
337 
338           /* even 4. pixel */
339           "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
340           "mthi             $zero,        $ac2                         \n\t"
341           "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
342           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
343           "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
344           "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
345           "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
346           "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
347           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
348           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
349 
350           /* even 5. pixel */
351           "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
352           "mthi             $zero,        $ac3                         \n\t"
353           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
354           "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
355           "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
356           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
357           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
358 
359           /* even 6. pixel */
360           "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
361           "mthi             $zero,        $ac1                         \n\t"
362           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
363           "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
364           "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
365           "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
366           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
367           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
368 
369           /* even 7. pixel */
370           "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
371           "mthi             $zero,        $ac2                         \n\t"
372           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
373           "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
374           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
375           "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
376           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
377           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
378 
379           "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
380 
381           /* even 8. pixel */
382           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
383           "mthi             $zero,        $ac3                         \n\t"
384           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
385           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
386           "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
387           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
388           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
389 
390           /* ODD pixels */
391           "ulw              %[qload1],    1(%[src])                   \n\t"
392           "ulw              %[qload2],    5(%[src])                    \n\t"
393 
394           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
395 
396           /* odd 1. pixel */
397           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
398           "mthi             $zero,        $ac1                         \n\t"
399           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
400           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
401           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
402           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
403           "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
404           "ulw              %[qload3],    9(%[src])                    \n\t"
405           "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
406           "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
407           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
408           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
409 
410           "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
411 
412           /* odd 2. pixel */
413           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
414           "mthi             $zero,        $ac2                         \n\t"
415           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
416           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
417           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
418           "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
419           "ulw              %[qload1],    13(%[src])                   \n\t"
420           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
421           "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
422           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
423           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
424 
425           /* odd 3. pixel */
426           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
427           "mthi             $zero,        $ac3                         \n\t"
428           "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
429           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
430           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
431           "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
432           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
433           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
434 
435           /* odd 4. pixel */
436           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
437           "mthi             $zero,        $ac1                         \n\t"
438           "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
439           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
440           "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
441           "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
442           "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
443           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
444           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
445 
446           "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
447 
448           /* odd 5. pixel */
449           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
450           "mthi             $zero,        $ac2                         \n\t"
451           "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
452           "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
453           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
454           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
455           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
456 
457           "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
458 
459           /* odd 6. pixel */
460           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
461           "mthi             $zero,        $ac3                         \n\t"
462           "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
463           "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
464           "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
465           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
466           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
467 
468           /* odd 7. pixel */
469           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
470           "mthi             $zero,        $ac1                         \n\t"
471           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
472           "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
473           "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
474           "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
475           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
476 
477           "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
478 
479           /* odd 8. pixel */
480           "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
481           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
482 
483           "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
484 
485           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
486           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
487 
488           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
489           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
490 
491           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
492           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
493 
494           "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
495           "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
496           "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
497 
498           : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2),
499             [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3),
500             [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
501             [qload3] "=&r" (qload3), [p5] "=&r" (p5),
502             [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
503           : [filter45] "r" (filter45), [vector_64] "r" (vector_64),
504             [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
505       );
506 
507       src += 16;
508       dst += 16;
509     }
510 
511     /* Next row... */
512     src_ptr += src_stride;
513     dst_ptr += dst_stride;
514   }
515 }
516 
convolve_bi_avg_horiz_64_dspr2(const uint8_t * src_ptr,int32_t src_stride,uint8_t * dst_ptr,int32_t dst_stride,const int16_t * filter_x0,int32_t h)517 static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr,
518                                           int32_t src_stride,
519                                           uint8_t *dst_ptr,
520                                           int32_t dst_stride,
521                                           const int16_t *filter_x0,
522                                           int32_t h) {
523   int32_t y, c;
524   const uint8_t *src;
525   uint8_t *dst;
526   uint8_t *cm = vp9_ff_cropTbl;
527   uint32_t vector_64 = 64;
528   int32_t Temp1, Temp2, Temp3;
529   uint32_t qload1, qload2, qload3;
530   uint32_t p1, p2, p3, p4, p5;
531   uint32_t st1, st2, st3;
532   const int16_t *filter = &filter_x0[3];
533   uint32_t filter45;;
534 
535   filter45 = ((const int32_t *)filter)[0];
536 
537   for (y = h; y--;) {
538     src = src_ptr;
539     dst = dst_ptr;
540 
541     /* prefetch data to cache memory */
542     vp9_prefetch_load(src_ptr + src_stride);
543     vp9_prefetch_load(src_ptr + src_stride + 32);
544     vp9_prefetch_load(src_ptr + src_stride + 64);
545     vp9_prefetch_store(dst_ptr + dst_stride);
546     vp9_prefetch_store(dst_ptr + dst_stride + 32);
547 
548     for (c = 0; c < 4; c++) {
549       __asm__ __volatile__ (
550           "ulw              %[qload1],    0(%[src])                    \n\t"
551           "ulw              %[qload2],    4(%[src])                    \n\t"
552 
553           /* even 1. pixel */
554           "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
555           "mthi             $zero,        $ac1                         \n\t"
556           "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
557           "mthi             $zero,        $ac2                         \n\t"
558           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
559           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
560           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
561           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
562           "ulw              %[qload3],    8(%[src])                    \n\t"
563           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
564           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
565           "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
566 
567           /* even 2. pixel */
568           "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
569           "mthi             $zero,        $ac3                         \n\t"
570           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
571           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
572           "ulw              %[qload1],    12(%[src])                   \n\t"
573           "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
574           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
575           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
576 
577           "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
578 
579           /* even 3. pixel */
580           "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
581           "mthi             $zero,        $ac1                         \n\t"
582           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
583           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
584           "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
585           "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
586           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
587           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
588 
589           /* even 4. pixel */
590           "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
591           "mthi             $zero,        $ac2                         \n\t"
592           "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
593           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
594           "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
595           "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
596           "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
597           "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
598           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
599           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
600 
601           /* even 5. pixel */
602           "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
603           "mthi             $zero,        $ac3                         \n\t"
604           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
605           "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
606           "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
607           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
608           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
609 
610           /* even 6. pixel */
611           "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
612           "mthi             $zero,        $ac1                         \n\t"
613           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
614           "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
615           "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
616           "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
617           "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
618           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
619 
620           /* even 7. pixel */
621           "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
622           "mthi             $zero,        $ac2                         \n\t"
623           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
624           "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
625           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
626           "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
627           "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
628           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
629 
630           "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
631 
632           /* even 8. pixel */
633           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
634           "mthi             $zero,        $ac3                         \n\t"
635           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
636           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
637           "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
638           "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
639           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
640 
641           /* ODD pixels */
642           "ulw              %[qload1],    1(%[src])                   \n\t"
643           "ulw              %[qload2],    5(%[src])                    \n\t"
644 
645           "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
646 
647           /* odd 1. pixel */
648           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
649           "mthi             $zero,        $ac1                         \n\t"
650           "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
651           "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
652           "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
653           "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
654           "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
655           "ulw              %[qload3],    9(%[src])                    \n\t"
656           "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
657           "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
658           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
659           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
660 
661           "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
662 
663           /* odd 2. pixel */
664           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
665           "mthi             $zero,        $ac2                         \n\t"
666           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
667           "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
668           "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
669           "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
670           "ulw              %[qload1],    13(%[src])                   \n\t"
671           "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
672           "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
673           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
674           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
675 
676           /* odd 3. pixel */
677           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
678           "mthi             $zero,        $ac3                         \n\t"
679           "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
680           "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
681           "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
682           "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
683           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
684           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
685 
686           /* odd 4. pixel */
687           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
688           "mthi             $zero,        $ac1                         \n\t"
689           "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
690           "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
691           "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
692           "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
693           "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
694           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
695           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
696 
697           "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
698 
699           /* odd 5. pixel */
700           "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
701           "mthi             $zero,        $ac2                         \n\t"
702           "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
703           "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
704           "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
705           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
706           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
707 
708           "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
709 
710           /* odd 6. pixel */
711           "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
712           "mthi             $zero,        $ac3                         \n\t"
713           "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
714           "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
715           "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
716           "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
717           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
718 
719           /* odd 7. pixel */
720           "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
721           "mthi             $zero,        $ac1                         \n\t"
722           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
723           "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
724           "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
725           "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
726           "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
727 
728           "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
729 
730           /* odd 8. pixel */
731           "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
732           "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
733 
734           "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
735 
736           "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
737           "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
738 
739           "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
740           "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
741 
742           "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
743           "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
744 
745           "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
746           "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
747           "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
748 
749           : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2),
750             [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3),
751             [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
752             [qload3] "=&r" (qload3), [p5] "=&r" (p5),
753             [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
754           : [filter45] "r" (filter45), [vector_64] "r" (vector_64),
755             [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
756       );
757 
758       src += 16;
759       dst += 16;
760     }
761 
762     /* Next row... */
763     src_ptr += src_stride;
764     dst_ptr += dst_stride;
765   }
766 }
767 
vp9_convolve2_avg_horiz_dspr2(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)768 void vp9_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
769                                    uint8_t *dst, ptrdiff_t dst_stride,
770                                    const int16_t *filter_x, int x_step_q4,
771                                    const int16_t *filter_y, int y_step_q4,
772                                    int w, int h) {
773   if (16 == x_step_q4) {
774     uint32_t pos = 38;
775 
776     /* bit positon for extract from acc */
777     __asm__ __volatile__ (
778       "wrdsp      %[pos],     1           \n\t"
779       :
780       : [pos] "r" (pos)
781     );
782 
783     /* prefetch data to cache memory */
784     vp9_prefetch_load(src);
785     vp9_prefetch_load(src + 32);
786     vp9_prefetch_store(dst);
787 
788     switch (w) {
789       case 4:
790         convolve_bi_avg_horiz_4_dspr2(src, src_stride,
791                                      dst, dst_stride,
792                                      filter_x, h);
793         break;
794       case 8:
795         convolve_bi_avg_horiz_8_dspr2(src, src_stride,
796                                      dst, dst_stride,
797                                      filter_x, h);
798         break;
799       case 16:
800         convolve_bi_avg_horiz_16_dspr2(src, src_stride,
801                                       dst, dst_stride,
802                                       filter_x, h, 1);
803         break;
804       case 32:
805         convolve_bi_avg_horiz_16_dspr2(src, src_stride,
806                                       dst, dst_stride,
807                                       filter_x, h, 2);
808         break;
809       case 64:
810         vp9_prefetch_load(src + 64);
811         vp9_prefetch_store(dst + 32);
812 
813         convolve_bi_avg_horiz_64_dspr2(src, src_stride,
814                                       dst, dst_stride,
815                                       filter_x, h);
816         break;
817       default:
818         vp9_convolve8_avg_horiz_c(src, src_stride,
819                                   dst, dst_stride,
820                                   filter_x, x_step_q4,
821                                   filter_y, y_step_q4,
822                                   w, h);
823         break;
824     }
825   } else {
826     vp9_convolve8_avg_horiz_c(src, src_stride,
827                               dst, dst_stride,
828                               filter_x, x_step_q4,
829                               filter_y, y_step_q4,
830                               w, h);
831   }
832 }
833 #endif
834