• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2018, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    expand_picture_mmi.c
33  *
34  * \brief   Loongson optimization
35  *
36  * \date    24/07/2018 Created
37  *
38  *************************************************************************************
39  */
40 #include <stdint.h>
41 #include "asmdefs_mmi.h"
42 
43 #define mov_line_8x4_mmi_aligned(r0, r1, f0) \
44   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
45   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
46   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
47   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
48   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
49   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
50   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
51   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t"
52 
53 #define mov_line_8x4_mmi_unaligned(r0, r1, f0) \
54   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
55   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
56   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
57   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
58   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
59   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
60   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
61   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
62   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
63   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
64   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
65   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t"
66 
67 #define mov_line_end8x4_mmi_aligned(r0, r1, f0) \
68   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
69   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
70   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
71   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
72   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
73   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
74   "gssdxc1    "#f0", 0x0("#r0", $0)           \n\t" \
75   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t"
76 
77 #define mov_line_end8x4_mmi_unaligned(r0, r1, f0) \
78   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
79   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
80   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
81   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
82   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
83   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
84   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
85   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
86   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
87   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
88   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
89 
90 #define mov_line_16x4_mmi_aligned(r0, r1, f0, f2) \
91   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
92   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
93   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
94   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
95   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
96   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
97   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
98   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t"
99 
100 #define mov_line_16x4_mmi_unaligned(r0, r1, f0, f2) \
101   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
102   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
103   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
104   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
105   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
106   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
107   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
108   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
109   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
110   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
111   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
112   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
113   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
114   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
115   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
116   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
117   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
118   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
119   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
120   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t"
121 
122 #define mov_line_end16x4_mmi_aligned(r0, r1, f0, f2) \
123   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
124   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
125   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
126   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
127   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t" \
128   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
129   "gssqc1     "#f2", "#f0", 0x0("#r0")        \n\t"
130 
131 #define mov_line_end16x4_mmi_unaligned(r0, r1, f0, f2) \
132   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
133   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
134   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
135   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
136   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
137   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
138   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
139   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
140   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
141   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
142   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
143   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
144   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
145   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
146   PTR_ADDU   ""#r0", "#r0", "#r1"             \n\t" \
147   "gssdlc1    "#f0", 0x7("#r0")               \n\t" \
148   "gssdlc1    "#f2", 0xF("#r0")               \n\t" \
149   "gssdrc1    "#f0", 0x0("#r0")               \n\t" \
150   "gssdrc1    "#f2", 0x8("#r0")               \n\t" \
151 
152 #define exp_top_bottom_mmi_32 \
153   "dsra       %[iWidth], %[iWidth], 0x4              \n\t" \
154   "1:                                                \n\t" \
155   "gslqc1     $f2, $f0, 0x0(%[pDst])                 \n\t" \
156   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
157   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
158   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
159   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
160   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
161   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
162   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)      \
163   mov_line_end16x4_mmi_aligned($9, %[iStride], $f0, $f2)   \
164   "gslqc1     $f6, $f4, 0x0(%[iHeight])              \n\t" \
165   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
166   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
167   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
168   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
169   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
170   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
171   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)     \
172   mov_line_end16x4_mmi_aligned($11, %[iStride], $f4, $f6)  \
173   PTR_ADDIU  "%[pDst], %[pDst], 0x10                 \n\t" \
174   PTR_ADDIU  "$9, $9, 0x10                           \n\t" \
175   PTR_ADDIU  "%[iHeight], %[iHeight], 0x10           \n\t" \
176   PTR_ADDIU  "$11, $11, 0x10                         \n\t" \
177   "dnegu      %[iStride], %[iStride]                 \n\t" \
178   PTR_ADDIU  "%[iWidth], %[iWidth], -0x1             \n\t" \
179   "bnez       %[iWidth], 1b                          \n\t" \
180   "nop                                               \n\t"
181 
182 #define exp_left_right_mmi_32 \
183   "2:                                             \n\t" \
184   "lbu        %[iWidth], 0x0(%[pDst])             \n\t" \
185   MMI_Copy16Times($f0, $f2, $f28, %[iWidth])            \
186   "gssqc1     $f2, $f0, 0x0($9)                   \n\t" \
187   "gssqc1     $f2, $f0, 0x10($9)                  \n\t" \
188   "lbu        %[iWidth], 0x0(%[iHeight])          \n\t" \
189   MMI_Copy16Times($f4, $f6, $f28, %[iWidth])            \
190   "gssqc1     $f6, $f4, 0x0($11)                  \n\t" \
191   "gssqc1     $f6, $f4, 0x10($11)                 \n\t" \
192   PTR_ADDU   "%[pDst], %[pDst], %[iStride]        \n\t" \
193   PTR_ADDU   "$9, $9, %[iStride]                  \n\t" \
194   PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t" \
195   PTR_ADDU   "$11, $11, %[iStride]                \n\t" \
196   PTR_ADDIU  "$8, $8, -0x1                        \n\t" \
197   "bnez       $8, 2b                              \n\t" \
198   "nop                                            \n\t"
199 
200 #define mov_line_32x4_mmi(r0, r1, f0, f2) \
201   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
202   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
203   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
204   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
205   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
206   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
207   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
208   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
209   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
210   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
211   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
212   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t"
213 
214 #define mov_line_end32x4_mmi(r0, r1, f0, f2) \
215   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
216   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
217   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
218   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
219   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
220   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
221   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
222   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t" \
223   PTR_ADDU   ""#r0", "#r0", "#r1"              \n\t" \
224   "gssqc1     "#f2", "#f0", 0x0("#r0")         \n\t" \
225   "gssqc1     "#f2", "#f0", 0x10("#r0")        \n\t"
226 
227 #define  exp_cross_mmi_32 \
228   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
229   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
230   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
231   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
232   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
233   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
234   mov_line_32x4_mmi(%[iHeight], %[iStride], $f12, $f14)        \
235   mov_line_end32x4_mmi(%[iHeight], %[iStride], $f12, $f14)     \
236   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
237   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
238   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
239   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
240   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
241   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
242   mov_line_32x4_mmi($11, %[iStride], $f16, $f18)               \
243   mov_line_end32x4_mmi($11, %[iStride], $f16, $f18)            \
244   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
245   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
246   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
247   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
248   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
249   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
250   mov_line_32x4_mmi($9, %[iStride], $f20, $f22)                \
251   mov_line_end32x4_mmi($9, %[iStride], $f20, $f22)             \
252   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
253   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
254   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
255   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
256   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
257   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
258   mov_line_32x4_mmi($8, %[iStride], $f24, $f26)                \
259   mov_line_end32x4_mmi($8, %[iStride], $f24, $f26)
260 
261 #define exp_top_bottom_mmi_16_aligned \
262   "move       $8, %[iWidth]                              \n\t" \
263   "dsra       %[iWidth], %[iWidth], 0x4                  \n\t" \
264   "1:                                                    \n\t" \
265   "gslqc1     $f2, $f0, 0x0(%[pDst])                     \n\t" \
266   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)          \
267   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)          \
268   mov_line_16x4_mmi_aligned($9, %[iStride], $f0, $f2)          \
269   mov_line_end16x4_mmi_aligned($9, %[iStride], $f0, $f2)       \
270   "gslqc1     $f6, $f4, 0x0(%[iHeight])                  \n\t" \
271   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)         \
272   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)         \
273   mov_line_16x4_mmi_aligned($11, %[iStride], $f4, $f6)         \
274   mov_line_end16x4_mmi_aligned($11, %[iStride], $f4, $f6)      \
275   PTR_ADDIU  "%[pDst], %[pDst], 0x10                     \n\t" \
276   PTR_ADDIU  "$9, $9, 0x10                               \n\t" \
277   PTR_ADDIU  "%[iHeight], %[iHeight], 0x10               \n\t" \
278   PTR_ADDIU  "$11, $11, 0x10                             \n\t" \
279   "dnegu      %[iStride], %[iStride]                     \n\t" \
280   PTR_ADDIU  "%[iWidth], %[iWidth], -0x1                 \n\t" \
281   "bnez       %[iWidth], 1b                              \n\t" \
282   "nop                                                   \n\t" \
283   "and        $8, 0x0F                                   \n\t" \
284   "beqz       $8, 2f                                     \n\t" \
285   "nop                                                   \n\t" \
286   "gsldxc1    $f0, 0x0(%[pDst], $0)                      \n\t" \
287   mov_line_8x4_mmi_aligned($9, %[iStride], $f0)                \
288   mov_line_8x4_mmi_aligned($9, %[iStride], $f0)                \
289   mov_line_8x4_mmi_aligned($9, %[iStride], $f0)                \
290   mov_line_end8x4_mmi_aligned($9, %[iStride], $f0)             \
291   "gsldxc1    $f4, 0x0(%[iHeight], $0)                   \n\t" \
292   mov_line_8x4_mmi_aligned($11, %[iStride], $f4)               \
293   mov_line_8x4_mmi_aligned($11, %[iStride], $f4)               \
294   mov_line_8x4_mmi_aligned($11, %[iStride], $f4)               \
295   mov_line_end8x4_mmi_aligned($11, %[iStride], $f4)            \
296   "2:                                                    \n\t"
297 
298 #define exp_top_bottom_mmi_16_unaligned \
299   "move       $8, %[iWidth]                              \n\t" \
300   "dsra       %[iWidth], %[iWidth], 0x4                  \n\t" \
301   "1:                                                    \n\t" \
302   "gsldlc1    $f0, 0x7(%[pDst])                          \n\t" \
303   "gsldlc1    $f2, 0xF(%[pDst])                          \n\t" \
304   "gsldrc1    $f0, 0x0(%[pDst])                          \n\t" \
305   "gsldrc1    $f2, 0x8(%[pDst])                          \n\t" \
306   mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2)        \
307   mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2)        \
308   mov_line_16x4_mmi_unaligned($9, %[iStride], $f0, $f2)        \
309   mov_line_end16x4_mmi_unaligned($9, %[iStride], $f0, $f2)     \
310   "gsldlc1    $f4, 0x7(%[iHeight])                       \n\t" \
311   "gsldlc1    $f6, 0xF(%[iHeight])                       \n\t" \
312   "gsldrc1    $f4, 0x0(%[iHeight])                       \n\t" \
313   "gsldrc1    $f6, 0x8(%[iHeight])                       \n\t" \
314   mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6)       \
315   mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6)       \
316   mov_line_16x4_mmi_unaligned($11, %[iStride], $f4, $f6)       \
317   mov_line_end16x4_mmi_unaligned($11, %[iStride], $f4, $f6)    \
318   PTR_ADDIU  "%[pDst], %[pDst], 0x10                     \n\t" \
319   PTR_ADDIU  "$9, $9, 0x10                               \n\t" \
320   PTR_ADDIU  "%[iHeight], %[iHeight], 0x10               \n\t" \
321   PTR_ADDIU  "$11, $11, 0x10                             \n\t" \
322   "dnegu      %[iStride], %[iStride]                     \n\t" \
323   PTR_ADDIU  "%[iWidth], %[iWidth], -0x1                 \n\t" \
324   "bnez       %[iWidth], 1b                              \n\t" \
325   "nop                                                   \n\t" \
326   "and        $8, 0x0F                                   \n\t" \
327   "beqz       $8, 2f                                     \n\t" \
328   "nop                                                   \n\t" \
329   "gsldlc1    $f0, 0x7(%[pDst])                          \n\t" \
330   "gsldrc1    $f0, 0x0(%[pDst])                          \n\t" \
331   mov_line_8x4_mmi_unaligned($9, %[iStride], $f0)              \
332   mov_line_8x4_mmi_unaligned($9, %[iStride], $f0)              \
333   mov_line_8x4_mmi_unaligned($9, %[iStride], $f0)              \
334   mov_line_end8x4_mmi_unaligned($9, %[iStride], $f0)           \
335   "gsldlc1    $f4, 0x7(%[iHeight])                       \n\t" \
336   "gsldrc1    $f4, 0x0(%[iHeight])                       \n\t" \
337   mov_line_8x4_mmi_unaligned($11, %[iStride], $f4)             \
338   mov_line_8x4_mmi_unaligned($11, %[iStride], $f4)             \
339   mov_line_8x4_mmi_unaligned($11, %[iStride], $f4)             \
340   mov_line_end8x4_mmi_unaligned($11, %[iStride], $f4)          \
341   "2:                                                    \n\t"
342 
343 #define exp_left_right_mmi_16_aligned \
344   "3:                                             \n\t" \
345   "lbu        %[iWidth], 0x0(%[pDst])             \n\t" \
346   MMI_Copy16Times($f0, $f2, $f28, %[iWidth])            \
347   "gssqc1     $f2, $f0, 0x0($9)                   \n\t" \
348   "lbu        %[iWidth], 0x0(%[iHeight])          \n\t" \
349   MMI_Copy16Times($f4, $f6, $f28, %[iWidth])            \
350   "gssqc1     $f6, $f4, 0x0($11)                  \n\t" \
351   PTR_ADDU   "%[pDst], %[pDst], %[iStride]        \n\t" \
352   PTR_ADDU   "$9, $9, %[iStride]                  \n\t" \
353   PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t" \
354   PTR_ADDU   "$11, $11, %[iStride]                \n\t" \
355   PTR_ADDIU  "$8, $8, -0x1                        \n\t" \
356   "bnez       $8, 3b                              \n\t" \
357   "nop                                            \n\t"
358 
359 #define exp_left_right_mmi_16_unaligned \
360   "3:                                             \n\t" \
361   "lbu        %[iWidth], 0x0(%[pDst])             \n\t" \
362   MMI_Copy16Times($f0, $f2, $f28, %[iWidth])            \
363   "gssdlc1    $f0, 0x7($9)                        \n\t" \
364   "gssdlc1    $f2, 0xF($9)                        \n\t" \
365   "gssdrc1    $f0, 0x0($9)                        \n\t" \
366   "gssdrc1    $f2, 0x8($9)                        \n\t" \
367   "lbu        %[iWidth], 0x0(%[iHeight])          \n\t" \
368   MMI_Copy16Times($f4, $f6, $f28, %[iWidth])            \
369   "gssdlc1    $f4, 0x7($11)                       \n\t" \
370   "gssdlc1    $f6, 0xF($11)                       \n\t" \
371   "gssdrc1    $f4, 0x0($11)                       \n\t" \
372   "gssdrc1    $f6, 0x8($11)                       \n\t" \
373   PTR_ADDU   "%[pDst], %[pDst], %[iStride]        \n\t" \
374   PTR_ADDU   "$9, $9, %[iStride]                  \n\t" \
375   PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t" \
376   PTR_ADDU   "$11, $11, %[iStride]                \n\t" \
377   PTR_ADDIU  "$8, $8, -0x1                        \n\t" \
378   "bnez       $8, 3b                              \n\t" \
379   "nop                                            \n\t"
380 
381 #define exp_cross_mmi_16_aligned \
382   mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14)        \
383   mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14)        \
384   mov_line_16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14)        \
385   mov_line_end16x4_mmi_aligned(%[iHeight], %[iStride], $f12, $f14)     \
386   mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18)               \
387   mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18)               \
388   mov_line_16x4_mmi_aligned($11, %[iStride], $f16, $f18)               \
389   mov_line_end16x4_mmi_aligned($11, %[iStride], $f16, $f18)            \
390   mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22)                \
391   mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22)                \
392   mov_line_16x4_mmi_aligned($9, %[iStride], $f20, $f22)                \
393   mov_line_end16x4_mmi_aligned($9, %[iStride], $f20, $f22)             \
394   mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26)                \
395   mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26)                \
396   mov_line_16x4_mmi_aligned($8, %[iStride], $f24, $f26)                \
397   mov_line_end16x4_mmi_aligned($8, %[iStride], $f24, $f26)
398 
399 #define exp_cross_mmi_16_unaligned \
400   mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14)      \
401   mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14)      \
402   mov_line_16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14)      \
403   mov_line_end16x4_mmi_unaligned(%[iHeight], %[iStride], $f12, $f14)   \
404   mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18)             \
405   mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18)             \
406   mov_line_16x4_mmi_unaligned($11, %[iStride], $f16, $f18)             \
407   mov_line_end16x4_mmi_unaligned($11, %[iStride], $f16, $f18)          \
408   mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22)              \
409   mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22)              \
410   mov_line_16x4_mmi_unaligned($9, %[iStride], $f20, $f22)              \
411   mov_line_end16x4_mmi_unaligned($9, %[iStride], $f20, $f22)           \
412   mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26)              \
413   mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26)              \
414   mov_line_16x4_mmi_unaligned($8, %[iStride], $f24, $f26)              \
415   mov_line_end16x4_mmi_unaligned($8, %[iStride], $f24, $f26)
416 
ExpandPictureLuma_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)417 void ExpandPictureLuma_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
418                            int32_t iHeight) {
419   BACKUP_REG;
420   __asm__ volatile (
421     ".set       arch=loongson3a                     \n\t"
422     "xor        $f28, $f28, $f28                    \n\t"
423     "lbu        $8, 0x0(%[pDst])                    \n\t"
424 
425     MMI_Copy16Times($f12, $f14, $f28, $8)
426 
427     "dnegu      %[iStride], %[iStride]              \n\t"
428     PTR_ADDU   "$9, %[pDst], %[iStride]             \n\t"
429     "dnegu      %[iStride], %[iStride]              \n\t"
430     "move       $10, %[iHeight]                     \n\t"
431     PTR_ADDU   "%[iHeight], %[iHeight], -0x1        \n\t"
432     "dmul       %[iHeight], %[iHeight], %[iStride]  \n\t"
433     PTR_ADDU   "%[iHeight], %[iHeight], %[pDst]     \n\t"
434 
435     "move       $8, %[iStride]                      \n\t"
436     "dsll       $8, 0x5                             \n\t"
437     PTR_ADDU   "$11, %[iHeight], $8                 \n\t"
438 
439     "lbu        $8, 0x0(%[iHeight])                 \n\t"
440     MMI_Copy16Times($f20, $f22, $f28, $8)
441     PTR_ADDU   "$8, %[iHeight], %[iWidth]           \n\t"
442     PTR_ADDIU  "$8, -0x1                            \n\t"
443     "lbu        $8, 0x0($8)                         \n\t"
444     "dmtc1      $8, $f24                            \n\t"
445     "pshufh     $f24, $f24, $f28                    \n\t"
446     "packushb   $f24, $f24, $f24                    \n\t"
447     "mov.d      $f26, $f24                          \n\t"
448     "dnegu      %[iStride], %[iStride]              \n\t"
449     "move       $12, %[pDst]                        \n\t"
450     "move       $13, %[iStride]                     \n\t"
451     "move       $14, %[iWidth]                      \n\t"
452     exp_top_bottom_mmi_32
453     "move       %[iWidth], $14                      \n\t"
454     "move       %[iStride], $13                     \n\t"
455     "move       %[pDst], $12                        \n\t"
456     PTR_ADDIU  "$9, %[pDst], -0x20                  \n\t"
457     PTR_ADDU   "%[iHeight], %[pDst], %[iWidth]      \n\t"
458     PTR_ADDIU  "%[iHeight], %[iHeight], -0x1        \n\t"
459     PTR_ADDIU  "$11, %[iHeight], 0x1                \n\t"
460     "lbu        $8, 0x0(%[iHeight])                 \n\t"
461     MMI_Copy16Times($f16, $f18, $f28, $8)
462     "dnegu      %[iStride], %[iStride]              \n\t"
463     "move       $8, $10                             \n\t"
464     "move       $10, %[pDst]                        \n\t"
465     "move       $12, %[iStride]                     \n\t"
466     "move       $13, %[iWidth]                      \n\t"
467     "move       $14, $8                             \n\t"
468 
469     exp_left_right_mmi_32
470 
471     "move       $8, $14                             \n\t"
472     "move       %[iWidth], $13                      \n\t"
473     "move       %[iStride], $12                     \n\t"
474     "move       %[pDst], $10                        \n\t"
475     "dnegu      %[iStride], %[iStride]              \n\t"
476     PTR_ADDIU  "%[iHeight], %[pDst], -0x20          \n\t"
477     PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t"
478     PTR_ADDU   "$11, %[pDst], %[iWidth]             \n\t"
479     PTR_ADDU   "$11, $11, %[iStride]                \n\t"
480     "dnegu      %[iStride], %[iStride]              \n\t"
481     PTR_ADDIU  "$8, $8, 0x20                        \n\t"
482     "dmul       $8, $8, %[iStride]                  \n\t"
483     PTR_ADDU   "$9, %[iHeight], $8                  \n\t"
484     PTR_ADDU   "$8, $11, $8                         \n\t"
485     "dnegu      %[iStride], %[iStride]              \n\t"
486     exp_cross_mmi_32
487     : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
488       [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
489     :
490     : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
491       "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
492       "$f22", "$f24", "$f26", "$f28"
493   );
494   RECOVER_REG;
495 }
496 
ExpandPictureChromaUnalign_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)497 void ExpandPictureChromaUnalign_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
498                                     int32_t iHeight) {
499   BACKUP_REG;
500   __asm__ volatile (
501     ".set       arch=loongson3a                     \n\t"
502     "xor        $f28, $f28, $f28                    \n\t"
503     "lbu        $8, 0x0(%[pDst])                    \n\t"
504 
505     MMI_Copy16Times($f12, $f14, $f28, $8)
506 
507     "dnegu      %[iStride], %[iStride]              \n\t"
508     PTR_ADDU   "$9, %[pDst], %[iStride]             \n\t"
509     "dnegu      %[iStride], %[iStride]              \n\t"
510     "move       $10, %[iHeight]                     \n\t"
511     PTR_ADDU   "%[iHeight], %[iHeight], -0x1        \n\t"
512     "dmul       %[iHeight], %[iHeight], %[iStride]  \n\t"
513     PTR_ADDU   "%[iHeight], %[iHeight], %[pDst]     \n\t"
514     "move       $8, %[iStride]                      \n\t"
515     "dsll       $8, 0x4                             \n\t"
516     PTR_ADDU   "$11, %[iHeight], $8                 \n\t"
517     "lbu        $8, 0x0(%[iHeight])                 \n\t"
518 
519     MMI_Copy16Times($f20, $f22, $f28, $8)
520 
521     PTR_ADDU   "$8, %[iHeight], %[iWidth]           \n\t"
522     PTR_ADDIU  "$8, -0x1                            \n\t"
523     "lbu        $8, 0x0($8)                         \n\t"
524 
525     MMI_Copy16Times($f24, $f26, $f28, $8)
526 
527     "dnegu      %[iStride], %[iStride]              \n\t"
528     "move       $12, %[pDst]                        \n\t"
529     "move       $13, %[iStride]                     \n\t"
530     "move       $14, %[iWidth]                      \n\t"
531 
532     exp_top_bottom_mmi_16_unaligned
533 
534     "move       %[iWidth], $14                      \n\t"
535     "move       %[iStride], $13                     \n\t"
536     "move       %[pDst], $12                        \n\t"
537     PTR_ADDIU  "$9, %[pDst], -0x10                  \n\t"
538     PTR_ADDU   "%[iHeight], %[pDst], %[iWidth]      \n\t"
539     PTR_ADDIU  "%[iHeight], %[iHeight], -0x1        \n\t"
540     PTR_ADDIU  "$11, %[iHeight], 0x1                \n\t"
541     "lbu        $8, 0x0(%[iHeight])                 \n\t"
542     MMI_Copy16Times($f16, $f18, $f28, $8)
543 
544     "dnegu      %[iStride], %[iStride]              \n\t"
545     "move       $8, $10                             \n\t"
546 
547     "move       $10, %[pDst]                        \n\t"
548     "move       $12, %[iStride]                     \n\t"
549     "move       $13, %[iWidth]                      \n\t"
550     "move       $14, $8                             \n\t"
551 
552     exp_left_right_mmi_16_unaligned
553 
554     "move       $8, $14                             \n\t"
555     "move       %[iWidth], $13                      \n\t"
556     "move       %[iStride], $12                     \n\t"
557     "move       %[pDst], $10                        \n\t"
558 
559     "dnegu      %[iStride], %[iStride]              \n\t"
560     PTR_ADDIU  "%[iHeight], %[pDst], -0x10          \n\t"
561     PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t"
562     PTR_ADDU   "$11, %[pDst], %[iWidth]             \n\t"
563     PTR_ADDU   "$11, $11, %[iStride]                \n\t"
564 
565     "dnegu      %[iStride], %[iStride]              \n\t"
566     PTR_ADDIU  "$8, $8, 0x10                        \n\t"
567     "dmul       $8, $8, %[iStride]                  \n\t"
568 
569     PTR_ADDU   "$9, %[iHeight], $8                  \n\t"
570     PTR_ADDU   "$8, $11, $8                         \n\t"
571     "dnegu      %[iStride], %[iStride]              \n\t"
572 
573     exp_cross_mmi_16_unaligned
574     : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
575       [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
576     :
577     : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
578       "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
579       "$f22", "$f24", "$f26", "$f28"
580   );
581   RECOVER_REG;
582 }
583 
ExpandPictureChromaAlign_mmi(uint8_t * pDst,int32_t iStride,int32_t iWidth,int32_t iHeight)584 void ExpandPictureChromaAlign_mmi(uint8_t *pDst, int32_t iStride, int32_t iWidth,
585                                   int32_t iHeight) {
586   BACKUP_REG;
587   __asm__ volatile (
588     ".set       arch=loongson3a                     \n\t"
589     "xor        $f28, $f28, $f28                    \n\t"
590     "lbu        $8, 0x0(%[pDst])                    \n\t"
591 
592     MMI_Copy16Times($f12, $f14, $f28, $8)
593 
594     "dnegu      %[iStride], %[iStride]              \n\t"
595     PTR_ADDU   "$9, %[pDst], %[iStride]             \n\t"
596     "dnegu      %[iStride], %[iStride]              \n\t"
597     "move       $10, %[iHeight]                     \n\t"
598     PTR_ADDU   "%[iHeight], %[iHeight], -0x1        \n\t"
599     "dmul       %[iHeight], %[iHeight], %[iStride]  \n\t"
600     PTR_ADDU   "%[iHeight], %[iHeight], %[pDst]     \n\t"
601     "move       $8, %[iStride]                      \n\t"
602     "dsll       $8, 0x4                             \n\t"
603     PTR_ADDU   "$11, %[iHeight], $8                 \n\t"
604     "lbu        $8, 0x0(%[iHeight])                 \n\t"
605 
606     MMI_Copy16Times($f20, $f22, $f28, $8)
607 
608     PTR_ADDU   "$8, %[iHeight], %[iWidth]           \n\t"
609     PTR_ADDIU  "$8, -0x1                            \n\t"
610     "lbu        $8, 0x0($8)                         \n\t"
611 
612     MMI_Copy16Times($f24, $f26, $f28, $8)
613 
614     "dnegu      %[iStride], %[iStride]              \n\t"
615 
616     "move       $12, %[pDst]                        \n\t"
617     "move       $13, %[iStride]                     \n\t"
618     "move       $14, %[iWidth]                      \n\t"
619     exp_top_bottom_mmi_16_aligned
620 
621     "move       %[iWidth], $14                      \n\t"
622     "move       %[iStride], $13                     \n\t"
623     "move       %[pDst], $12                        \n\t"
624 
625     PTR_ADDIU  "$9, %[pDst], -0x10                  \n\t"
626 
627     PTR_ADDU   "%[iHeight], %[pDst], %[iWidth]      \n\t"
628     PTR_ADDIU  "%[iHeight], %[iHeight], -0x1        \n\t"
629     PTR_ADDIU  "$11, %[iHeight], 0x1                \n\t"
630 
631     "lbu        $8, 0x0(%[iHeight])                 \n\t"
632 
633     MMI_Copy16Times($f16, $f18, $f28, $8)
634 
635     "dnegu      %[iStride], %[iStride]              \n\t"
636     "move       $8, $10                             \n\t"
637 
638     "move       $10, %[pDst]                        \n\t"
639     "move       $12, %[iStride]                     \n\t"
640     "move       $13, %[iWidth]                      \n\t"
641     "move       $14, $8                             \n\t"
642 
643     exp_left_right_mmi_16_aligned
644 
645     "move       $8, $14                             \n\t"
646     "move       %[iWidth], $13                      \n\t"
647     "move       %[iStride], $12                     \n\t"
648     "move       %[pDst], $10                        \n\t"
649 
650     "dnegu      %[iStride], %[iStride]              \n\t"
651     PTR_ADDIU  "%[iHeight], %[pDst], -0x10          \n\t"
652     PTR_ADDU   "%[iHeight], %[iHeight], %[iStride]  \n\t"
653     PTR_ADDU   "$11, %[pDst], %[iWidth]             \n\t"
654     PTR_ADDU   "$11, $11, %[iStride]                \n\t"
655 
656     "dnegu      %[iStride], %[iStride]              \n\t"
657     PTR_ADDIU  "$8, $8, 0x10                        \n\t"
658     "dmul       $8, $8, %[iStride]                  \n\t"
659 
660     PTR_ADDU   "$9, %[iHeight], $8                  \n\t"
661     PTR_ADDU   "$8, $11, $8                         \n\t"
662     "dnegu      %[iStride], %[iStride]              \n\t"
663 
664     exp_cross_mmi_16_aligned
665     : [pDst]"+&r"((unsigned char *)pDst), [iStride]"+&r"((int)iStride),
666       [iWidth]"+&r"((int)iWidth), [iHeight]"+&r"((int)iHeight)
667     :
668     : "memory", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$f0", "$f2",
669       "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", "$f16", "$f18", "$f20",
670       "$f22", "$f24", "$f26", "$f28"
671   );
672   RECOVER_REG;
673 }
674