1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_dsp/mips/common_dspr2.h"
12
13 #if HAVE_DSPR2
vpx_h_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)14 void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
15 const uint8_t *above, const uint8_t *left) {
16 int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
17 (void)above;
18
19 __asm__ __volatile__(
20 "lb %[tmp1], (%[left]) \n\t"
21 "lb %[tmp2], 1(%[left]) \n\t"
22 "lb %[tmp3], 2(%[left]) \n\t"
23 "lb %[tmp4], 3(%[left]) \n\t"
24 "lb %[tmp5], 4(%[left]) \n\t"
25 "lb %[tmp6], 5(%[left]) \n\t"
26 "lb %[tmp7], 6(%[left]) \n\t"
27 "lb %[tmp8], 7(%[left]) \n\t"
28
29 "replv.qb %[tmp1], %[tmp1] \n\t"
30 "replv.qb %[tmp2], %[tmp2] \n\t"
31 "replv.qb %[tmp3], %[tmp3] \n\t"
32 "replv.qb %[tmp4], %[tmp4] \n\t"
33 "replv.qb %[tmp5], %[tmp5] \n\t"
34 "replv.qb %[tmp6], %[tmp6] \n\t"
35 "replv.qb %[tmp7], %[tmp7] \n\t"
36 "replv.qb %[tmp8], %[tmp8] \n\t"
37
38 "sw %[tmp1], (%[dst]) \n\t"
39 "sw %[tmp1], 4(%[dst]) \n\t"
40 "add %[dst], %[dst], %[stride] \n\t"
41 "sw %[tmp2], (%[dst]) \n\t"
42 "sw %[tmp2], 4(%[dst]) \n\t"
43 "add %[dst], %[dst], %[stride] \n\t"
44 "sw %[tmp3], (%[dst]) \n\t"
45 "sw %[tmp3], 4(%[dst]) \n\t"
46 "add %[dst], %[dst], %[stride] \n\t"
47 "sw %[tmp4], (%[dst]) \n\t"
48 "sw %[tmp4], 4(%[dst]) \n\t"
49 "add %[dst], %[dst], %[stride] \n\t"
50 "sw %[tmp5], (%[dst]) \n\t"
51 "sw %[tmp5], 4(%[dst]) \n\t"
52 "add %[dst], %[dst], %[stride] \n\t"
53 "sw %[tmp6], (%[dst]) \n\t"
54 "sw %[tmp6], 4(%[dst]) \n\t"
55 "add %[dst], %[dst], %[stride] \n\t"
56 "sw %[tmp7], (%[dst]) \n\t"
57 "sw %[tmp7], 4(%[dst]) \n\t"
58 "add %[dst], %[dst], %[stride] \n\t"
59 "sw %[tmp8], (%[dst]) \n\t"
60 "sw %[tmp8], 4(%[dst]) \n\t"
61
62 : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
63 [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
64 [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8)
65 : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
66 }
67
vpx_dc_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)68 void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
69 const uint8_t *above, const uint8_t *left) {
70 int32_t expected_dc;
71 int32_t average;
72 int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
73 int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
74
75 __asm__ __volatile__(
76 "lw %[above1], (%[above]) \n\t"
77 "lw %[above2], 4(%[above]) \n\t"
78 "lw %[left1], (%[left]) \n\t"
79 "lw %[left2], 4(%[left]) \n\t"
80
81 "preceu.ph.qbl %[above_l1], %[above1] \n\t"
82 "preceu.ph.qbr %[above_r1], %[above1] \n\t"
83 "preceu.ph.qbl %[left_l1], %[left1] \n\t"
84 "preceu.ph.qbr %[left_r1], %[left1] \n\t"
85
86 "preceu.ph.qbl %[above_l2], %[above2] \n\t"
87 "preceu.ph.qbr %[above_r2], %[above2] \n\t"
88 "preceu.ph.qbl %[left_l2], %[left2] \n\t"
89 "preceu.ph.qbr %[left_r2], %[left2] \n\t"
90
91 "addu.ph %[average], %[above_r1], %[above_l1] \n\t"
92 "addu.ph %[average], %[average], %[left_l1] \n\t"
93 "addu.ph %[average], %[average], %[left_r1] \n\t"
94
95 "addu.ph %[average], %[average], %[above_l2] \n\t"
96 "addu.ph %[average], %[average], %[above_r2] \n\t"
97 "addu.ph %[average], %[average], %[left_l2] \n\t"
98 "addu.ph %[average], %[average], %[left_r2] \n\t"
99
100 "addiu %[average], %[average], 8 \n\t"
101
102 "srl %[tmp], %[average], 16 \n\t"
103 "addu.ph %[average], %[tmp], %[average] \n\t"
104 "srl %[expected_dc], %[average], 4 \n\t"
105 "replv.qb %[expected_dc], %[expected_dc] \n\t"
106
107 "sw %[expected_dc], (%[dst]) \n\t"
108 "sw %[expected_dc], 4(%[dst]) \n\t"
109
110 "add %[dst], %[dst], %[stride] \n\t"
111 "sw %[expected_dc], (%[dst]) \n\t"
112 "sw %[expected_dc], 4(%[dst]) \n\t"
113
114 "add %[dst], %[dst], %[stride] \n\t"
115 "sw %[expected_dc], (%[dst]) \n\t"
116 "sw %[expected_dc], 4(%[dst]) \n\t"
117
118 "add %[dst], %[dst], %[stride] \n\t"
119 "sw %[expected_dc], (%[dst]) \n\t"
120 "sw %[expected_dc], 4(%[dst]) \n\t"
121
122 "add %[dst], %[dst], %[stride] \n\t"
123 "sw %[expected_dc], (%[dst]) \n\t"
124 "sw %[expected_dc], 4(%[dst]) \n\t"
125
126 "add %[dst], %[dst], %[stride] \n\t"
127 "sw %[expected_dc], (%[dst]) \n\t"
128 "sw %[expected_dc], 4(%[dst]) \n\t"
129
130 "add %[dst], %[dst], %[stride] \n\t"
131 "sw %[expected_dc], (%[dst]) \n\t"
132 "sw %[expected_dc], 4(%[dst]) \n\t"
133
134 "add %[dst], %[dst], %[stride] \n\t"
135 "sw %[expected_dc], (%[dst]) \n\t"
136 "sw %[expected_dc], 4(%[dst]) \n\t"
137
138 : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1),
139 [above_r1] "=&r"(above_r1), [left1] "=&r"(left1),
140 [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1),
141 [above2] "=&r"(above2), [above_l2] "=&r"(above_l2),
142 [above_r2] "=&r"(above_r2), [left2] "=&r"(left2),
143 [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2),
144 [average] "=&r"(average), [tmp] "=&r"(tmp),
145 [expected_dc] "=&r"(expected_dc)
146 : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
147 [stride] "r"(stride));
148 }
149
vpx_tm_predictor_8x8_dspr2(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)150 void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
151 const uint8_t *above, const uint8_t *left) {
152 int32_t abovel, abover;
153 int32_t abovel_1, abover_1;
154 int32_t left0;
155 int32_t res0, res1, res2, res3;
156 int32_t reshw;
157 int32_t top_left;
158 uint8_t *cm = vpx_ff_cropTbl;
159
160 __asm__ __volatile__(
161 "ulw %[reshw], (%[above]) \n\t"
162 "ulw %[top_left], 4(%[above]) \n\t"
163
164 "lbu %[left0], (%[left]) \n\t"
165
166 "preceu.ph.qbl %[abovel], %[reshw] \n\t"
167 "preceu.ph.qbr %[abover], %[reshw] \n\t"
168 "preceu.ph.qbl %[abovel_1], %[top_left] \n\t"
169 "preceu.ph.qbr %[abover_1], %[top_left] \n\t"
170
171 "lbu %[top_left], -1(%[above]) \n\t"
172 "replv.ph %[left0], %[left0] \n\t"
173
174 "replv.ph %[top_left], %[top_left] \n\t"
175
176 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
177 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
178
179 "sll %[res2], %[reshw], 16 \n\t"
180 "sra %[res2], %[res2], 16 \n\t"
181 "sra %[res3], %[reshw], 16 \n\t"
182
183 "addu.ph %[reshw], %[abover], %[left0] \n\t"
184 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
185
186 "sll %[res0], %[reshw], 16 \n\t"
187 "sra %[res0], %[res0], 16 \n\t"
188 "sra %[res1], %[reshw], 16 \n\t"
189
190 "lbux %[res0], %[res0](%[cm]) \n\t"
191 "lbux %[res1], %[res1](%[cm]) \n\t"
192 "lbux %[res2], %[res2](%[cm]) \n\t"
193 "lbux %[res3], %[res3](%[cm]) \n\t"
194
195 "sb %[res0], (%[dst]) \n\t"
196 "sb %[res1], 1(%[dst]) \n\t"
197 "sb %[res2], 2(%[dst]) \n\t"
198 "sb %[res3], 3(%[dst]) \n\t"
199
200 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
201 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
202
203 "sll %[res2], %[reshw], 16 \n\t"
204 "sra %[res2], %[res2], 16 \n\t"
205 "sra %[res3], %[reshw], 16 \n\t"
206
207 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
208 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
209
210 "sll %[res0], %[reshw], 16 \n\t"
211 "sra %[res0], %[res0], 16 \n\t"
212 "sra %[res1], %[reshw], 16 \n\t"
213
214 "lbu %[left0], 1(%[left]) \n\t"
215
216 "lbux %[res0], %[res0](%[cm]) \n\t"
217 "lbux %[res1], %[res1](%[cm]) \n\t"
218 "lbux %[res2], %[res2](%[cm]) \n\t"
219 "lbux %[res3], %[res3](%[cm]) \n\t"
220
221 "sb %[res0], 4(%[dst]) \n\t"
222 "sb %[res1], 5(%[dst]) \n\t"
223 "sb %[res2], 6(%[dst]) \n\t"
224 "sb %[res3], 7(%[dst]) \n\t"
225
226 "replv.ph %[left0], %[left0] \n\t"
227 "add %[dst], %[dst], %[stride] \n\t"
228
229 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
230 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
231
232 "sll %[res2], %[reshw], 16 \n\t"
233 "sra %[res2], %[res2], 16 \n\t"
234 "sra %[res3], %[reshw], 16 \n\t"
235
236 "addu.ph %[reshw], %[abover], %[left0] \n\t"
237 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
238
239 "sll %[res0], %[reshw], 16 \n\t"
240 "sra %[res0], %[res0], 16 \n\t"
241 "sra %[res1], %[reshw], 16 \n\t"
242
243 "lbux %[res0], %[res0](%[cm]) \n\t"
244 "lbux %[res1], %[res1](%[cm]) \n\t"
245 "lbux %[res2], %[res2](%[cm]) \n\t"
246 "lbux %[res3], %[res3](%[cm]) \n\t"
247
248 "sb %[res0], (%[dst]) \n\t"
249 "sb %[res1], 1(%[dst]) \n\t"
250 "sb %[res2], 2(%[dst]) \n\t"
251 "sb %[res3], 3(%[dst]) \n\t"
252
253 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
254 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
255
256 "sll %[res2], %[reshw], 16 \n\t"
257 "sra %[res2], %[res2], 16 \n\t"
258 "sra %[res3], %[reshw], 16 \n\t"
259
260 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
261 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
262
263 "sll %[res0], %[reshw], 16 \n\t"
264 "sra %[res0], %[res0], 16 \n\t"
265 "sra %[res1], %[reshw], 16 \n\t"
266
267 "lbu %[left0], 2(%[left]) \n\t"
268
269 "lbux %[res0], %[res0](%[cm]) \n\t"
270 "lbux %[res1], %[res1](%[cm]) \n\t"
271 "lbux %[res2], %[res2](%[cm]) \n\t"
272 "lbux %[res3], %[res3](%[cm]) \n\t"
273
274 "sb %[res0], 4(%[dst]) \n\t"
275 "sb %[res1], 5(%[dst]) \n\t"
276 "sb %[res2], 6(%[dst]) \n\t"
277 "sb %[res3], 7(%[dst]) \n\t"
278
279 "replv.ph %[left0], %[left0] \n\t"
280 "add %[dst], %[dst], %[stride] \n\t"
281
282 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
283 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
284
285 "sll %[res2], %[reshw], 16 \n\t"
286 "sra %[res2], %[res2], 16 \n\t"
287 "sra %[res3], %[reshw], 16 \n\t"
288
289 "addu.ph %[reshw], %[abover], %[left0] \n\t"
290 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
291
292 "sll %[res0], %[reshw], 16 \n\t"
293 "sra %[res0], %[res0], 16 \n\t"
294 "sra %[res1], %[reshw], 16 \n\t"
295
296 "lbux %[res0], %[res0](%[cm]) \n\t"
297 "lbux %[res1], %[res1](%[cm]) \n\t"
298 "lbux %[res2], %[res2](%[cm]) \n\t"
299 "lbux %[res3], %[res3](%[cm]) \n\t"
300
301 "sb %[res0], (%[dst]) \n\t"
302 "sb %[res1], 1(%[dst]) \n\t"
303 "sb %[res2], 2(%[dst]) \n\t"
304 "sb %[res3], 3(%[dst]) \n\t"
305
306 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
307 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
308
309 "sll %[res2], %[reshw], 16 \n\t"
310 "sra %[res2], %[res2], 16 \n\t"
311 "sra %[res3], %[reshw], 16 \n\t"
312
313 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
314 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
315
316 "sll %[res0], %[reshw], 16 \n\t"
317 "sra %[res0], %[res0], 16 \n\t"
318 "sra %[res1], %[reshw], 16 \n\t"
319
320 "lbu %[left0], 3(%[left]) \n\t"
321
322 "lbux %[res0], %[res0](%[cm]) \n\t"
323 "lbux %[res1], %[res1](%[cm]) \n\t"
324 "lbux %[res2], %[res2](%[cm]) \n\t"
325 "lbux %[res3], %[res3](%[cm]) \n\t"
326
327 "sb %[res0], 4(%[dst]) \n\t"
328 "sb %[res1], 5(%[dst]) \n\t"
329 "sb %[res2], 6(%[dst]) \n\t"
330 "sb %[res3], 7(%[dst]) \n\t"
331
332 "replv.ph %[left0], %[left0] \n\t"
333 "add %[dst], %[dst], %[stride] \n\t"
334
335 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
336 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
337
338 "sll %[res2], %[reshw], 16 \n\t"
339 "sra %[res2], %[res2], 16 \n\t"
340 "sra %[res3], %[reshw], 16 \n\t"
341
342 "addu.ph %[reshw], %[abover], %[left0] \n\t"
343 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
344
345 "sll %[res0], %[reshw], 16 \n\t"
346 "sra %[res0], %[res0], 16 \n\t"
347 "sra %[res1], %[reshw], 16 \n\t"
348
349 "lbux %[res0], %[res0](%[cm]) \n\t"
350 "lbux %[res1], %[res1](%[cm]) \n\t"
351 "lbux %[res2], %[res2](%[cm]) \n\t"
352 "lbux %[res3], %[res3](%[cm]) \n\t"
353
354 "sb %[res0], (%[dst]) \n\t"
355 "sb %[res1], 1(%[dst]) \n\t"
356 "sb %[res2], 2(%[dst]) \n\t"
357 "sb %[res3], 3(%[dst]) \n\t"
358
359 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
360 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
361
362 "sll %[res2], %[reshw], 16 \n\t"
363 "sra %[res2], %[res2], 16 \n\t"
364 "sra %[res3], %[reshw], 16 \n\t"
365
366 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
367 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
368
369 "sll %[res0], %[reshw], 16 \n\t"
370 "sra %[res0], %[res0], 16 \n\t"
371 "sra %[res1], %[reshw], 16 \n\t"
372
373 "lbu %[left0], 4(%[left]) \n\t"
374
375 "lbux %[res0], %[res0](%[cm]) \n\t"
376 "lbux %[res1], %[res1](%[cm]) \n\t"
377 "lbux %[res2], %[res2](%[cm]) \n\t"
378 "lbux %[res3], %[res3](%[cm]) \n\t"
379
380 "sb %[res0], 4(%[dst]) \n\t"
381 "sb %[res1], 5(%[dst]) \n\t"
382 "sb %[res2], 6(%[dst]) \n\t"
383 "sb %[res3], 7(%[dst]) \n\t"
384
385 "replv.ph %[left0], %[left0] \n\t"
386 "add %[dst], %[dst], %[stride] \n\t"
387
388 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
389 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
390
391 "sll %[res2], %[reshw], 16 \n\t"
392 "sra %[res2], %[res2], 16 \n\t"
393 "sra %[res3], %[reshw], 16 \n\t"
394
395 "addu.ph %[reshw], %[abover], %[left0] \n\t"
396 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
397
398 "sll %[res0], %[reshw], 16 \n\t"
399 "sra %[res0], %[res0], 16 \n\t"
400 "sra %[res1], %[reshw], 16 \n\t"
401
402 "lbux %[res0], %[res0](%[cm]) \n\t"
403 "lbux %[res1], %[res1](%[cm]) \n\t"
404 "lbux %[res2], %[res2](%[cm]) \n\t"
405 "lbux %[res3], %[res3](%[cm]) \n\t"
406
407 "sb %[res0], (%[dst]) \n\t"
408 "sb %[res1], 1(%[dst]) \n\t"
409 "sb %[res2], 2(%[dst]) \n\t"
410 "sb %[res3], 3(%[dst]) \n\t"
411
412 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
413 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
414
415 "sll %[res2], %[reshw], 16 \n\t"
416 "sra %[res2], %[res2], 16 \n\t"
417 "sra %[res3], %[reshw], 16 \n\t"
418
419 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
420 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
421
422 "sll %[res0], %[reshw], 16 \n\t"
423 "sra %[res0], %[res0], 16 \n\t"
424 "sra %[res1], %[reshw], 16 \n\t"
425
426 "lbu %[left0], 5(%[left]) \n\t"
427
428 "lbux %[res0], %[res0](%[cm]) \n\t"
429 "lbux %[res1], %[res1](%[cm]) \n\t"
430 "lbux %[res2], %[res2](%[cm]) \n\t"
431 "lbux %[res3], %[res3](%[cm]) \n\t"
432
433 "sb %[res0], 4(%[dst]) \n\t"
434 "sb %[res1], 5(%[dst]) \n\t"
435 "sb %[res2], 6(%[dst]) \n\t"
436 "sb %[res3], 7(%[dst]) \n\t"
437
438 "replv.ph %[left0], %[left0] \n\t"
439 "add %[dst], %[dst], %[stride] \n\t"
440
441 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
442 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
443
444 "sll %[res2], %[reshw], 16 \n\t"
445 "sra %[res2], %[res2], 16 \n\t"
446 "sra %[res3], %[reshw], 16 \n\t"
447
448 "addu.ph %[reshw], %[abover], %[left0] \n\t"
449 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
450
451 "sll %[res0], %[reshw], 16 \n\t"
452 "sra %[res0], %[res0], 16 \n\t"
453 "sra %[res1], %[reshw], 16 \n\t"
454
455 "lbux %[res0], %[res0](%[cm]) \n\t"
456 "lbux %[res1], %[res1](%[cm]) \n\t"
457 "lbux %[res2], %[res2](%[cm]) \n\t"
458 "lbux %[res3], %[res3](%[cm]) \n\t"
459
460 "sb %[res0], (%[dst]) \n\t"
461 "sb %[res1], 1(%[dst]) \n\t"
462 "sb %[res2], 2(%[dst]) \n\t"
463 "sb %[res3], 3(%[dst]) \n\t"
464
465 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
466 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
467
468 "sll %[res2], %[reshw], 16 \n\t"
469 "sra %[res2], %[res2], 16 \n\t"
470 "sra %[res3], %[reshw], 16 \n\t"
471
472 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
473 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
474
475 "sll %[res0], %[reshw], 16 \n\t"
476 "sra %[res0], %[res0], 16 \n\t"
477 "sra %[res1], %[reshw], 16 \n\t"
478
479 "lbu %[left0], 6(%[left]) \n\t"
480
481 "lbux %[res0], %[res0](%[cm]) \n\t"
482 "lbux %[res1], %[res1](%[cm]) \n\t"
483 "lbux %[res2], %[res2](%[cm]) \n\t"
484 "lbux %[res3], %[res3](%[cm]) \n\t"
485
486 "sb %[res0], 4(%[dst]) \n\t"
487 "sb %[res1], 5(%[dst]) \n\t"
488 "sb %[res2], 6(%[dst]) \n\t"
489 "sb %[res3], 7(%[dst]) \n\t"
490
491 "replv.ph %[left0], %[left0] \n\t"
492 "add %[dst], %[dst], %[stride] \n\t"
493
494 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
495 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
496
497 "sll %[res2], %[reshw], 16 \n\t"
498 "sra %[res2], %[res2], 16 \n\t"
499 "sra %[res3], %[reshw], 16 \n\t"
500
501 "addu.ph %[reshw], %[abover], %[left0] \n\t"
502 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
503
504 "sll %[res0], %[reshw], 16 \n\t"
505 "sra %[res0], %[res0], 16 \n\t"
506 "sra %[res1], %[reshw], 16 \n\t"
507
508 "lbux %[res0], %[res0](%[cm]) \n\t"
509 "lbux %[res1], %[res1](%[cm]) \n\t"
510 "lbux %[res2], %[res2](%[cm]) \n\t"
511 "lbux %[res3], %[res3](%[cm]) \n\t"
512
513 "sb %[res0], (%[dst]) \n\t"
514 "sb %[res1], 1(%[dst]) \n\t"
515 "sb %[res2], 2(%[dst]) \n\t"
516 "sb %[res3], 3(%[dst]) \n\t"
517
518 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
519 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
520
521 "sll %[res2], %[reshw], 16 \n\t"
522 "sra %[res2], %[res2], 16 \n\t"
523 "sra %[res3], %[reshw], 16 \n\t"
524
525 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
526 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
527
528 "sll %[res0], %[reshw], 16 \n\t"
529 "sra %[res0], %[res0], 16 \n\t"
530 "sra %[res1], %[reshw], 16 \n\t"
531
532 "lbu %[left0], 7(%[left]) \n\t"
533
534 "lbux %[res0], %[res0](%[cm]) \n\t"
535 "lbux %[res1], %[res1](%[cm]) \n\t"
536 "lbux %[res2], %[res2](%[cm]) \n\t"
537 "lbux %[res3], %[res3](%[cm]) \n\t"
538
539 "sb %[res0], 4(%[dst]) \n\t"
540 "sb %[res1], 5(%[dst]) \n\t"
541 "sb %[res2], 6(%[dst]) \n\t"
542 "sb %[res3], 7(%[dst]) \n\t"
543
544 "replv.ph %[left0], %[left0] \n\t"
545 "add %[dst], %[dst], %[stride] \n\t"
546
547 "addu.ph %[reshw], %[abovel], %[left0] \n\t"
548 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
549
550 "sll %[res2], %[reshw], 16 \n\t"
551 "sra %[res2], %[res2], 16 \n\t"
552 "sra %[res3], %[reshw], 16 \n\t"
553
554 "addu.ph %[reshw], %[abover], %[left0] \n\t"
555 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
556
557 "sll %[res0], %[reshw], 16 \n\t"
558 "sra %[res0], %[res0], 16 \n\t"
559 "sra %[res1], %[reshw], 16 \n\t"
560
561 "lbux %[res0], %[res0](%[cm]) \n\t"
562 "lbux %[res1], %[res1](%[cm]) \n\t"
563 "lbux %[res2], %[res2](%[cm]) \n\t"
564 "lbux %[res3], %[res3](%[cm]) \n\t"
565
566 "sb %[res0], (%[dst]) \n\t"
567 "sb %[res1], 1(%[dst]) \n\t"
568 "sb %[res2], 2(%[dst]) \n\t"
569 "sb %[res3], 3(%[dst]) \n\t"
570
571 "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
572 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
573
574 "sll %[res2], %[reshw], 16 \n\t"
575 "sra %[res2], %[res2], 16 \n\t"
576 "sra %[res3], %[reshw], 16 \n\t"
577
578 "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
579 "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
580
581 "sll %[res0], %[reshw], 16 \n\t"
582 "sra %[res0], %[res0], 16 \n\t"
583 "sra %[res1], %[reshw], 16 \n\t"
584
585 "lbux %[res0], %[res0](%[cm]) \n\t"
586 "lbux %[res1], %[res1](%[cm]) \n\t"
587 "lbux %[res2], %[res2](%[cm]) \n\t"
588 "lbux %[res3], %[res3](%[cm]) \n\t"
589
590 "sb %[res0], 4(%[dst]) \n\t"
591 "sb %[res1], 5(%[dst]) \n\t"
592 "sb %[res2], 6(%[dst]) \n\t"
593 "sb %[res3], 7(%[dst]) \n\t"
594
595 : [abovel] "=&r"(abovel), [abover] "=&r"(abover),
596 [abovel_1] "=&r"(abovel_1), [abover_1] "=&r"(abover_1),
597 [left0] "=&r"(left0), [res2] "=&r"(res2), [res3] "=&r"(res3),
598 [res0] "=&r"(res0), [res1] "=&r"(res1), [reshw] "=&r"(reshw),
599 [top_left] "=&r"(top_left)
600 : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
601 [stride] "r"(stride), [cm] "r"(cm));
602 }
603 #endif // #if HAVE_DSPR2
604