1 /*!
2 * \copy
3 * Copyright (c) 2009-2018, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file expand_picture_mmi.c
33 *
34 * \brief Loongson optimization
35 *
36 * \date 24/07/2018 Created
37 *
38 *************************************************************************************
39 */
40 #include <stdint.h>
41 #include "asmdefs_mmi.h"
42
43 #define mov_line_8x4_mmi_aligned(r0, r1, f0) \
44 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
45 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
46 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
47 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
48 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
49 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
50 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
51 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
52
53 #define mov_line_8x4_mmi_unaligned(r0, r1, f0) \
54 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
55 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
56 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
57 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
58 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
59 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
60 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
61 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
62 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
63 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
64 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
65 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
66
67 #define mov_line_end8x4_mmi_aligned(r0, r1, f0) \
68 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
69 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
70 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
71 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
72 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
73 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
74 "gssdxc1 "#f0", 0x0("#r0", $0) \n\t" \
75 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
76
77 #define mov_line_end8x4_mmi_unaligned(r0, r1, f0) \
78 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
79 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
80 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
81 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
82 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
83 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
84 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
85 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
86 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
87 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
88 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
89
90 #define mov_line_16x4_mmi_aligned(r0, r1, f0, f2) \
91 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
92 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
93 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
94 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
95 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
96 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
97 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
98 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
99
100 #define mov_line_16x4_mmi_unaligned(r0, r1, f0, f2) \
101 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
102 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
103 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
104 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
105 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
106 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
107 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
108 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
109 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
110 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
111 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
112 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
113 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
114 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
115 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
116 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
117 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
118 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
119 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
120 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
121
122 #define mov_line_end16x4_mmi_aligned(r0, r1, f0, f2) \
123 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
124 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
125 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
126 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
127 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
128 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
129 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t"
130
131 #define mov_line_end16x4_mmi_unaligned(r0, r1, f0, f2) \
132 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
133 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
134 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
135 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
136 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
137 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
138 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
139 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
140 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
141 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
142 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
143 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
144 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
145 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
146 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
147 "gssdlc1 "#f0", 0x7("#r0") \n\t" \
148 "gssdlc1 "#f2", 0xF("#r0") \n\t" \
149 "gssdrc1 "#f0", 0x0("#r0") \n\t" \
150 "gssdrc1 "#f2", 0x8("#r0") \n\t" \
151
152 #define exp_top_bottom_mmi_32 \
153 "dsra %[iWidth], %[iWidth], 0x4 \n\t" \
154 "1: \n\t" \
155 "gslqc1 $f2, $f0, 0x0(%[pDst]) \n\t" \
156 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
157 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
158 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
159 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
160 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
161 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
162 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
163 mov_line_end16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
164 "gslqc1 $f6, $f4, 0x0(%[iHeight]) \n\t" \
165 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
166 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
167 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
168 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
169 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
170 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
171 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
172 mov_line_end16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
173 PTR_ADDIU "%[pDst], %[pDst], 0x10 \n\t" \
174 PTR_ADDIU "$9, $9, 0x10 \n\t" \
175 PTR_ADDIU "%[iHeight], %[iHeight], 0x10 \n\t" \
176 PTR_ADDIU "$11, $11, 0x10 \n\t" \
177 "dnegu %[iStride], %[iStride] \n\t" \
178 PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" \
179 "bnez %[iWidth], 1b \n\t" \
180 "nop \n\t"
181
182 #define exp_left_right_mmi_32 \
183 "2: \n\t" \
184 "lbu %[iWidth], 0x0(%[pDst]) \n\t" \
185 MMI_Copy16Times($f0, $f2, $f28, %[iWidth]) \
186 "gssqc1 $f2, $f0, 0x0($9) \n\t" \
187 "gssqc1 $f2, $f0, 0x10($9) \n\t" \
188 "lbu %[iWidth], 0x0(%[iHeight]) \n\t" \
189 MMI_Copy16Times($f4, $f6, $f28, %[iWidth]) \
190 "gssqc1 $f6, $f4, 0x0($11) \n\t" \
191 "gssqc1 $f6, $f4, 0x10($11) \n\t" \
192 PTR_ADDU "%[pDst], %[pDst], %[iStride] \n\t" \
193 PTR_ADDU "$9, $9, %[iStride] \n\t" \
194 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t" \
195 PTR_ADDU "$11, $11, %[iStride] \n\t" \
196 PTR_ADDIU "$8, $8, -0x1 \n\t" \
197 "bnez $8, 2b \n\t" \
198 "nop \n\t"
199
200 #define mov_line_32x4_mmi(r0, r1, f0, f2) \
201 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
202 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
203 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
204 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
205 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
206 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
207 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
208 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
209 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
210 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
211 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
212 PTR_ADDU ""#r0", "#r0", "#r1" \n\t"
213
214 #define mov_line_end32x4_mmi(r0, r1, f0, f2) \
215 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
216 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
217 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
218 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
219 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
220 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
221 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
222 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t" \
223 PTR_ADDU ""#r0", "#r0", "#r1" \n\t" \
224 "gssqc1 "#f2", "#f0", 0x0("#r0") \n\t" \
225 "gssqc1 "#f2", "#f0", 0x10("#r0") \n\t"
226
227 #define exp_cross_mmi_32 \
228 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
229 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
230 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
231 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
232 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
233 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
234 mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
235 mov_line_end32x4_mmi(%[iHeight], %[iStride], $f12, $f14) \
236 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
237 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
238 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
239 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
240 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
241 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
242 mov_line_32x4_mmi($11, %[iStride], $f16, $f18) \
243 mov_line_end32x4_mmi($11, %[iStride], $f16, $f18) \
244 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
245 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
246 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
247 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
248 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
249 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
250 mov_line_32x4_mmi($9, %[iStride], $f20, $f22) \
251 mov_line_end32x4_mmi($9, %[iStride], $f20, $f22) \
252 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
253 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
254 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
255 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
256 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
257 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
258 mov_line_32x4_mmi($8, %[iStride], $f24, $f26) \
259 mov_line_end32x4_mmi($8, %[iStride], $f24, $f26)
260
261 #define exp_top_bottom_mmi_16_aligned \
262 "move $8, %[iWidth] \n\t" \
263 "dsra %[iWidth], %[iWidth], 0x4 \n\t" \
264 "1: \n\t" \
265 "gslqc1 $f2, $f0, 0x0(%[pDst]) \n\t" \
266 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
267 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
268 mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
269 mov_line_end16x4_mmi_aligned($9, %[iStride], $f0, $f2) \
270 "gslqc1 $f6, $f4, 0x0(%[iHeight]) \n\t" \
271 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
272 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
273 mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
274 mov_line_end16x4_mmi_aligned($11, %[iStride], $f4, $f6) \
275 PTR_ADDIU "%[pDst], %[pDst], 0x10 \n\t" \
276 PTR_ADDIU "$9, $9, 0x10 \n\t" \
277 PTR_ADDIU "%[iHeight], %[iHeight], 0x10 \n\t" \
278 PTR_ADDIU "$11, $11, 0x10 \n\t" \
279 "dnegu %[iStride], %[iStride] \n\t" \
280 PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" \
281 "bnez %[iWidth], 1b \n\t" \
282 "nop \n\t" \
283 "and $8, 0x0F \n\t" \
284 "beqz $8, 2f \n\t" \
285 "nop \n\t" \
286 "gsldxc1 $f0, 0x0(%[pDst], $0) \n\t" \
287 mov_line_8x4_mmi_aligned($9, %[iStride], $f0) \
288 mov_line_8x4_mmi_aligned($9, %[iStride], $f0) \
289 mov_line_8x4_mmi_aligned($9, %[iStride], $f0) \
290 mov_line_end8x4_mmi_aligned($9, %[iStride], $f0) \
291 "gsldxc1 $f4, 0x0(%[iHeight], $0) \n\t" \
292 mov_line_8x4_mmi_aligned($11, %[iStride], $f4) \
293 mov_line_8x4_mmi_aligned($11, %[iStride], $f4) \
294 mov_line_8x4_mmi_aligned($11, %[iStride], $f4) \
295 mov_line_end8x4_mmi_aligned($11, %[iStride], $f4) \
296 "2: \n\t"
297
298 #define exp_top_bottom_mmi_16_unaligned \
299 "move $8, %[iWidth] \n\t" \
300 "dsra %[iWidth], %[iWidth], 0x4 \n\t" \
301 "1: \n\t" \
302 "gsldlc1 $f0, 0x7(%[pDst]) \n\t" \
303 "gsldlc1 $f2, 0xF(%[pDst]) \n\t" \
304 "gsldrc1 $f0, 0x0(%[pDst]) \n\t" \
305 "gsldrc1 $f2, 0x8(%[pDst]) \n\t" \
306 mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2) \
307 mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2) \
308 mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2) \
309 mov_line_end16x4_mmi_unaligned($9, %[iStride], $f0, $f2) \
310 "gsldlc1 $f4, 0x7(%[iHeight]) \n\t" \
311 "gsldlc1 $f6, 0xF(%[iHeight]) \n\t" \
312 "gsldrc1 $f4, 0x0(%[iHeight]) \n\t" \
313 "gsldrc1 $f6, 0x8(%[iHeight]) \n\t" \
314 mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6) \
315 mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6) \
316 mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6) \
317 mov_line_end16x4_mmi_unaligned($11, %[iStride], $f4, $f6) \
318 PTR_ADDIU "%[pDst], %[pDst], 0x10 \n\t" \
319 PTR_ADDIU "$9, $9, 0x10 \n\t" \
320 PTR_ADDIU "%[iHeight], %[iHeight], 0x10 \n\t" \
321 PTR_ADDIU "$11, $11, 0x10 \n\t" \
322 "dnegu %[iStride], %[iStride] \n\t" \
323 PTR_ADDIU "%[iWidth], %[iWidth], -0x1 \n\t" \
324 "bnez %[iWidth], 1b \n\t" \
325 "nop \n\t" \
326 "and $8, 0x0F \n\t" \
327 "beqz $8, 2f \n\t" \
328 "nop \n\t" \
329 "gsldlc1 $f0, 0x7(%[pDst]) \n\t" \
330 "gsldrc1 $f0, 0x0(%[pDst]) \n\t" \
331 mov_line_8x4_mmi_unaligned($9, %[iStride], $f0) \
332 mov_line_8x4_mmi_unaligned($9, %[iStride], $f0) \
333 mov_line_8x4_mmi_unaligned($9, %[iStride], $f0) \
334 mov_line_end8x4_mmi_unaligned($9, %[iStride], $f0) \
335 "gsldlc1 $f4, 0x7(%[iHeight]) \n\t" \
336 "gsldrc1 $f4, 0x0(%[iHeight]) \n\t" \
337 mov_line_8x4_mmi_unaligned($11, %[iStride], $f4) \
338 mov_line_8x4_mmi_unaligned($11, %[iStride], $f4) \
339 mov_line_8x4_mmi_unaligned($11, %[iStride], $f4) \
340 mov_line_end8x4_mmi_unaligned($11, %[iStride], $f4) \
341 "2: \n\t"
342
343 #define exp_left_right_mmi_16_aligned \
344 "3: \n\t" \
345 "lbu %[iWidth], 0x0(%[pDst]) \n\t" \
346 MMI_Copy16Times($f0, $f2, $f28, %[iWidth]) \
347 "gssqc1 $f2, $f0, 0x0($9) \n\t" \
348 "lbu %[iWidth], 0x0(%[iHeight]) \n\t" \
349 MMI_Copy16Times($f4, $f6, $f28, %[iWidth]) \
350 "gssqc1 $f6, $f4, 0x0($11) \n\t" \
351 PTR_ADDU "%[pDst], %[pDst], %[iStride] \n\t" \
352 PTR_ADDU "$9, $9, %[iStride] \n\t" \
353 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t" \
354 PTR_ADDU "$11, $11, %[iStride] \n\t" \
355 PTR_ADDIU "$8, $8, -0x1 \n\t" \
356 "bnez $8, 3b \n\t" \
357 "nop \n\t"
358
359 #define exp_left_right_mmi_16_unaligned \
360 "3: \n\t" \
361 "lbu %[iWidth], 0x0(%[pDst]) \n\t" \
362 MMI_Copy16Times($f0, $f2, $f28, %[iWidth]) \
363 "gssdlc1 $f0, 0x7($9) \n\t" \
364 "gssdlc1 $f2, 0xF($9) \n\t" \
365 "gssdrc1 $f0, 0x0($9) \n\t" \
366 "gssdrc1 $f2, 0x8($9) \n\t" \
367 "lbu %[iWidth], 0x0(%[iHeight]) \n\t" \
368 MMI_Copy16Times($f4, $f6, $f28, %[iWidth]) \
369 "gssdlc1 $f4, 0x7($11) \n\t" \
370 "gssdlc1 $f6, 0xF($11) \n\t" \
371 "gssdrc1 $f4, 0x0($11) \n\t" \
372 "gssdrc1 $f6, 0x8($11) \n\t" \
373 PTR_ADDU "%[pDst], %[pDst], %[iStride] \n\t" \
374 PTR_ADDU "$9, $9, %[iStride] \n\t" \
375 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t" \
376 PTR_ADDU "$11, $11, %[iStride] \n\t" \
377 PTR_ADDIU "$8, $8, -0x1 \n\t" \
378 "bnez $8, 3b \n\t" \
379 "nop \n\t"
380
381 #define exp_cross_mmi_16_aligned \
382 mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14) \
383 mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14) \
384 mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14) \
385 mov_line_end16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14) \
386 mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18) \
387 mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18) \
388 mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18) \
389 mov_line_end16x4_mmi_aligned($11, %[iStride], $f16, $f18) \
390 mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22) \
391 mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22) \
392 mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22) \
393 mov_line_end16x4_mmi_aligned($9, %[iStride], $f20, $f22) \
394 mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26) \
395 mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26) \
396 mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26) \
397 mov_line_end16x4_mmi_aligned($8, %[iStride], $f24, $f26)
398
399 #define exp_cross_mmi_16_unaligned \
400 mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14) \
401 mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14) \
402 mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14) \
403 mov_line_end16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14) \
404 mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18) \
405 mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18) \
406 mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18) \
407 mov_line_end16x4_mmi_unaligned($11, %[iStride], $f16, $f18) \
408 mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22) \
409 mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22) \
410 mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22) \
411 mov_line_end16x4_mmi_unaligned($9, %[iStride], $f20, $f22) \
412 mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26) \
413 mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26) \
414 mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26) \
415 mov_line_end16x4_mmi_unaligned($8, %[iStride], $f24, $f26)
416
ExpandPictureLuma_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)417 void ExpandPictureLuma_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
418 int32_t iHeight) {
419 BACKUP_REG;
420 __asm__ volatile (
421 ".set arch=loongson3a \n\t"
422 "xor $f28, $f28, $f28 \n\t"
423 "lbu $8, 0x0(%[pDst]) \n\t"
424
425 MMI_Copy16Times($f12, $f14, $f28, $8)
426
427 "dnegu %[iStride], %[iStride] \n\t"
428 PTR_ADDU "$9, %[pDst], %[iStride] \n\t"
429 "dnegu %[iStride], %[iStride] \n\t"
430 "move $10, %[iHeight] \n\t"
431 PTR_ADDU "%[iHeight], %[iHeight], -0x1 \n\t"
432 "dmul %[iHeight], %[iHeight], %[iStride] \n\t"
433 PTR_ADDU "%[iHeight], %[iHeight], %[pDst] \n\t"
434
435 "move $8, %[iStride] \n\t"
436 "dsll $8, 0x5 \n\t"
437 PTR_ADDU "$11, %[iHeight], $8 \n\t"
438
439 "lbu $8, 0x0(%[iHeight]) \n\t"
440 MMI_Copy16Times($f20, $f22, $f28, $8)
441 PTR_ADDU "$8, %[iHeight], %[iWidth] \n\t"
442 PTR_ADDIU "$8, -0x1 \n\t"
443 "lbu $8, 0x0($8) \n\t"
444 "dmtc1 $8, $f24 \n\t"
445 "pshufh $f24, $f24, $f28 \n\t"
446 "packushb $f24, $f24, $f24 \n\t"
447 "mov.d $f26, $f24 \n\t"
448 "dnegu %[iStride], %[iStride] \n\t"
449 "move $12, %[pDst] \n\t"
450 "move $13, %[iStride] \n\t"
451 "move $14, %[iWidth] \n\t"
452 exp_top_bottom_mmi_32
453 "move %[iWidth], $14 \n\t"
454 "move %[iStride], $13 \n\t"
455 "move %[pDst], $12 \n\t"
456 PTR_ADDIU "$9, %[pDst], -0x20 \n\t"
457 PTR_ADDU "%[iHeight], %[pDst], %[iWidth] \n\t"
458 PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t"
459 PTR_ADDIU "$11, %[iHeight], 0x1 \n\t"
460 "lbu $8, 0x0(%[iHeight]) \n\t"
461 MMI_Copy16Times($f16, $f18, $f28, $8)
462 "dnegu %[iStride], %[iStride] \n\t"
463 "move $8, $10 \n\t"
464 "move $10, %[pDst] \n\t"
465 "move $12, %[iStride] \n\t"
466 "move $13, %[iWidth] \n\t"
467 "move $14, $8 \n\t"
468
469 exp_left_right_mmi_32
470
471 "move $8, $14 \n\t"
472 "move %[iWidth], $13 \n\t"
473 "move %[iStride], $12 \n\t"
474 "move %[pDst], $10 \n\t"
475 "dnegu %[iStride], %[iStride] \n\t"
476 PTR_ADDIU "%[iHeight], %[pDst], -0x20 \n\t"
477 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t"
478 PTR_ADDU "$11, %[pDst], %[iWidth] \n\t"
479 PTR_ADDU "$11, $11, %[iStride] \n\t"
480 "dnegu %[iStride], %[iStride] \n\t"
481 PTR_ADDIU "$8, $8, 0x20 \n\t"
482 "dmul $8, $8, %[iStride] \n\t"
483 PTR_ADDU "$9, %[iHeight], $8 \n\t"
484 PTR_ADDU "$8, $11, $8 \n\t"
485 "dnegu %[iStride], %[iStride] \n\t"
486 exp_cross_mmi_32
487 : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
488 [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
489 :
490 : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
491 "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
492 "$f22", "$f24", "$f26", "$f28"
493 );
494 RECOVER_REG;
495 }
496
ExpandPictureChromaUnalign_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)497 void ExpandPictureChromaUnalign_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
498 int32_t iHeight) {
499 BACKUP_REG;
500 __asm__ volatile (
501 ".set arch=loongson3a \n\t"
502 "xor $f28, $f28, $f28 \n\t"
503 "lbu $8, 0x0(%[pDst]) \n\t"
504
505 MMI_Copy16Times($f12, $f14, $f28, $8)
506
507 "dnegu %[iStride], %[iStride] \n\t"
508 PTR_ADDU "$9, %[pDst], %[iStride] \n\t"
509 "dnegu %[iStride], %[iStride] \n\t"
510 "move $10, %[iHeight] \n\t"
511 PTR_ADDU "%[iHeight], %[iHeight], -0x1 \n\t"
512 "dmul %[iHeight], %[iHeight], %[iStride] \n\t"
513 PTR_ADDU "%[iHeight], %[iHeight], %[pDst] \n\t"
514 "move $8, %[iStride] \n\t"
515 "dsll $8, 0x4 \n\t"
516 PTR_ADDU "$11, %[iHeight], $8 \n\t"
517 "lbu $8, 0x0(%[iHeight]) \n\t"
518
519 MMI_Copy16Times($f20, $f22, $f28, $8)
520
521 PTR_ADDU "$8, %[iHeight], %[iWidth] \n\t"
522 PTR_ADDIU "$8, -0x1 \n\t"
523 "lbu $8, 0x0($8) \n\t"
524
525 MMI_Copy16Times($f24, $f26, $f28, $8)
526
527 "dnegu %[iStride], %[iStride] \n\t"
528 "move $12, %[pDst] \n\t"
529 "move $13, %[iStride] \n\t"
530 "move $14, %[iWidth] \n\t"
531
532 exp_top_bottom_mmi_16_unaligned
533
534 "move %[iWidth], $14 \n\t"
535 "move %[iStride], $13 \n\t"
536 "move %[pDst], $12 \n\t"
537 PTR_ADDIU "$9, %[pDst], -0x10 \n\t"
538 PTR_ADDU "%[iHeight], %[pDst], %[iWidth] \n\t"
539 PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t"
540 PTR_ADDIU "$11, %[iHeight], 0x1 \n\t"
541 "lbu $8, 0x0(%[iHeight]) \n\t"
542 MMI_Copy16Times($f16, $f18, $f28, $8)
543
544 "dnegu %[iStride], %[iStride] \n\t"
545 "move $8, $10 \n\t"
546
547 "move $10, %[pDst] \n\t"
548 "move $12, %[iStride] \n\t"
549 "move $13, %[iWidth] \n\t"
550 "move $14, $8 \n\t"
551
552 exp_left_right_mmi_16_unaligned
553
554 "move $8, $14 \n\t"
555 "move %[iWidth], $13 \n\t"
556 "move %[iStride], $12 \n\t"
557 "move %[pDst], $10 \n\t"
558
559 "dnegu %[iStride], %[iStride] \n\t"
560 PTR_ADDIU "%[iHeight], %[pDst], -0x10 \n\t"
561 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t"
562 PTR_ADDU "$11, %[pDst], %[iWidth] \n\t"
563 PTR_ADDU "$11, $11, %[iStride] \n\t"
564
565 "dnegu %[iStride], %[iStride] \n\t"
566 PTR_ADDIU "$8, $8, 0x10 \n\t"
567 "dmul $8, $8, %[iStride] \n\t"
568
569 PTR_ADDU "$9, %[iHeight], $8 \n\t"
570 PTR_ADDU "$8, $11, $8 \n\t"
571 "dnegu %[iStride], %[iStride] \n\t"
572
573 exp_cross_mmi_16_unaligned
574 : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
575 [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
576 :
577 : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
578 "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
579 "$f22", "$f24", "$f26", "$f28"
580 );
581 RECOVER_REG;
582 }
583
ExpandPictureChromaAlign_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)584 void ExpandPictureChromaAlign_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
585 int32_t iHeight) {
586 BACKUP_REG;
587 __asm__ volatile (
588 ".set arch=loongson3a \n\t"
589 "xor $f28, $f28, $f28 \n\t"
590 "lbu $8, 0x0(%[pDst]) \n\t"
591
592 MMI_Copy16Times($f12, $f14, $f28, $8)
593
594 "dnegu %[iStride], %[iStride] \n\t"
595 PTR_ADDU "$9, %[pDst], %[iStride] \n\t"
596 "dnegu %[iStride], %[iStride] \n\t"
597 "move $10, %[iHeight] \n\t"
598 PTR_ADDU "%[iHeight], %[iHeight], -0x1 \n\t"
599 "dmul %[iHeight], %[iHeight], %[iStride] \n\t"
600 PTR_ADDU "%[iHeight], %[iHeight], %[pDst] \n\t"
601 "move $8, %[iStride] \n\t"
602 "dsll $8, 0x4 \n\t"
603 PTR_ADDU "$11, %[iHeight], $8 \n\t"
604 "lbu $8, 0x0(%[iHeight]) \n\t"
605
606 MMI_Copy16Times($f20, $f22, $f28, $8)
607
608 PTR_ADDU "$8, %[iHeight], %[iWidth] \n\t"
609 PTR_ADDIU "$8, -0x1 \n\t"
610 "lbu $8, 0x0($8) \n\t"
611
612 MMI_Copy16Times($f24, $f26, $f28, $8)
613
614 "dnegu %[iStride], %[iStride] \n\t"
615
616 "move $12, %[pDst] \n\t"
617 "move $13, %[iStride] \n\t"
618 "move $14, %[iWidth] \n\t"
619 exp_top_bottom_mmi_16_aligned
620
621 "move %[iWidth], $14 \n\t"
622 "move %[iStride], $13 \n\t"
623 "move %[pDst], $12 \n\t"
624
625 PTR_ADDIU "$9, %[pDst], -0x10 \n\t"
626
627 PTR_ADDU "%[iHeight], %[pDst], %[iWidth] \n\t"
628 PTR_ADDIU "%[iHeight], %[iHeight], -0x1 \n\t"
629 PTR_ADDIU "$11, %[iHeight], 0x1 \n\t"
630
631 "lbu $8, 0x0(%[iHeight]) \n\t"
632
633 MMI_Copy16Times($f16, $f18, $f28, $8)
634
635 "dnegu %[iStride], %[iStride] \n\t"
636 "move $8, $10 \n\t"
637
638 "move $10, %[pDst] \n\t"
639 "move $12, %[iStride] \n\t"
640 "move $13, %[iWidth] \n\t"
641 "move $14, $8 \n\t"
642
643 exp_left_right_mmi_16_aligned
644
645 "move $8, $14 \n\t"
646 "move %[iWidth], $13 \n\t"
647 "move %[iStride], $12 \n\t"
648 "move %[pDst], $10 \n\t"
649
650 "dnegu %[iStride], %[iStride] \n\t"
651 PTR_ADDIU "%[iHeight], %[pDst], -0x10 \n\t"
652 PTR_ADDU "%[iHeight], %[iHeight], %[iStride] \n\t"
653 PTR_ADDU "$11, %[pDst], %[iWidth] \n\t"
654 PTR_ADDU "$11, $11, %[iStride] \n\t"
655
656 "dnegu %[iStride], %[iStride] \n\t"
657 PTR_ADDIU "$8, $8, 0x10 \n\t"
658 "dmul $8, $8, %[iStride] \n\t"
659
660 PTR_ADDU "$9, %[iHeight], $8 \n\t"
661 PTR_ADDU "$8, $11, $8 \n\t"
662 "dnegu %[iStride], %[iStride] \n\t"
663
664 exp_cross_mmi_16_aligned
665 : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
666 [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
667 :
668 : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
669 "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
670 "$f22", "$f24", "$f26", "$f28"
671 );
672 RECOVER_REG;
673 }
674