• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2013, Loongson Technology Co.,Ltd.
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #ifndef ASMDEFS_MMI_H_
34 #define ASMDEFS_MMI_H_
35 
36 #define CACHE_LINE_SIZE 32
37 
38 #if defined(_ABI64) && _MIPS_SIM == _ABI64
39 # define mips_reg       int64_t
40 # define PTRSIZE        " 8 "
41 # define PTRLOG         " 3 "
42 # define PTR_ADDU       "daddu "
43 # define PTR_ADDIU      "daddiu "
44 # define PTR_ADDI       "daddi "
45 # define PTR_SUBU       "dsubu "
46 # define PTR_L          "ld "
47 # define PTR_S          "sd "
48 # define PTR_SRA        "dsra "
49 # define PTR_SRL        "dsrl "
50 # define PTR_SLL        "dsll "
51 #else
52 # define mips_reg       int32_t
53 # define PTRSIZE        " 4 "
54 # define PTRLOG         " 2 "
55 # define PTR_ADDU       "addu "
56 # define PTR_ADDIU      "addiu "
57 # define PTR_ADDI       "addi "
58 # define PTR_SUBU       "subu "
59 # define PTR_L          "lw "
60 # define PTR_S          "sw "
61 # define PTR_SRA        "sra "
62 # define PTR_SRL        "srl "
63 # define PTR_SLL        "sll "
64 #endif
65 
66 #define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \
67   "mov.d      "#f8", "#f2"                \n\t" \
68   "punpckhbh  "#f2", "#f0", "#f4"         \n\t" \
69   "punpcklbh  "#f0", "#f0", "#f4"         \n\t" \
70   "punpckhbh  "#f10", "#f8", "#f6"        \n\t" \
71   "punpcklbh  "#f8", "#f8", "#f6"         \n\t"
72 
73 #define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \
74   "mov.d      "#f8", "#f2"                \n\t" \
75   "punpckhhw  "#f2", "#f0", "#f4"         \n\t" \
76   "punpcklhw  "#f0", "#f0", "#f4"         \n\t" \
77   "punpckhhw  "#f10", "#f8", "#f6"        \n\t" \
78   "punpcklhw  "#f8", "#f8", "#f6"         \n\t"
79 
80 #define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \
81   "mov.d      "#f8", "#f2"                \n\t" \
82   "punpckhwd  "#f2", "#f0", "#f4"         \n\t" \
83   "punpcklwd  "#f0", "#f0", "#f4"         \n\t" \
84   "punpckhwd  "#f10", "#f8", "#f6"        \n\t" \
85   "punpcklwd  "#f8", "#f8", "#f6"         \n\t"
86 
87 #define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \
88   "mov.d      "#f8", "#f2"                \n\t" \
89   "mov.d      "#f2", "#f4"                \n\t" \
90   "mov.d      "#f10", "#f6"               \n\t"
91 
92 #define WELS_AbsH(f0, f2, f4, f6, f8, f10) \
93   "xor        "#f8", "#f8", "#f8"         \n\t" \
94   "psubh      "#f10", "#f8", "#f6"        \n\t" \
95   "psubh      "#f8", "#f8", "#f4"         \n\t" \
96   "pmaxsh     "#f0", "#f4", "#f8"         \n\t" \
97   "pmaxsh     "#f2", "#f6", "#f10"        \n\t"
98 
99 #define MMI_SumSub(f0, f2, f4, f6, f8, f10) \
100   "mov.d      "#f8", "#f4"                    \n\t" \
101   "mov.d      "#f10", "#f6"                   \n\t" \
102   "paddh      "#f4", "#f4", "#f0"             \n\t" \
103   "paddh      "#f6", "#f6", "#f2"             \n\t" \
104   "psubh      "#f0", "#f0", "#f8"             \n\t" \
105   "psubh      "#f2", "#f2", "#f10"            \n\t"
106 
107 #define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \
108   "gsldlc1    "#f0", 0x7("#r0")               \n\t" \
109   "gsldlc1    "#f4", 0x7("#r1")               \n\t" \
110   "gsldrc1    "#f0", 0x0("#r0")               \n\t" \
111   "gsldrc1    "#f4", 0x0("#r1")               \n\t" \
112   "punpckhbh  "#f2", "#f0", "#f8"             \n\t" \
113   "punpcklbh  "#f0", "#f0", "#f8"             \n\t" \
114   "punpckhbh  "#f6", "#f4", "#f8"             \n\t" \
115   "punpcklbh  "#f4", "#f4", "#f8"             \n\t" \
116   "psubh      "#f0", "#f0", "#f4"             \n\t" \
117   "psubh      "#f2", "#f2", "#f6"             \n\t"
118 
119 #define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \
120   MMI_XSawp_HW(f0, f2, f4, f6, f16, f18)  \
121   MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \
122   MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \
123   MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \
124   MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6)  \
125   MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18)
126 
127 #define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \
128   "dmfc1      "#r0", "#f28"                   \n\t" \
129   "dmfc1      "#r1", "#f30"                   \n\t" \
130   MMI_XSawp_BH(f0, f2, f4, f6, f28, f30)            \
131   MMI_XSawp_BH(f8, f10, f12, f14, f4, f6)           \
132   MMI_XSawp_BH(f16, f18, f20, f22, f12, f14)        \
133   "dmtc1      "#r0", "#f20"                   \n\t" \
134   "dmtc1      "#r1", "#f22"                   \n\t" \
135   "dmfc1      "#r0", "#f12"                   \n\t" \
136   "dmfc1      "#r1", "#f14"                   \n\t" \
137   MMI_XSawp_BH(f24, f26, f20, f22, f12, f14)        \
138   MMI_XSawp_HW(f0, f2, f8, f10, f20, f22)           \
139   MMI_XSawp_HW(f28, f30, f4, f6, f8, f10)           \
140   MMI_XSawp_HW(f16, f18, f24, f26, f4, f6)          \
141   "dmtc1      "#r0", "#f24"                   \n\t" \
142   "dmtc1      "#r1", "#f26"                   \n\t" \
143   "dmfc1      "#r0", "#f8"                    \n\t" \
144   "dmfc1      "#r1", "#f10"                   \n\t" \
145   MMI_XSawp_HW(f24, f26, f12, f14, f8, f10)         \
146   MMI_XSawp_WD(f0, f2, f16, f18, f12, f14)          \
147   MMI_XSawp_WD(f20, f22, f4, f6, f16, f18)          \
148   MMI_XSawp_WD(f28, f30, f24, f26, f4, f6)          \
149   "dmtc1      "#r0", "#f24"                   \n\t" \
150   "dmtc1      "#r1", "#f26"                   \n\t" \
151   "dmfc1      "#r0", "#f16"                   \n\t" \
152   "dmfc1      "#r1", "#f18"                   \n\t" \
153   MMI_XSawp_WD(f24, f26, f8, f10, f16, f18)         \
154   MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10)           \
155   MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30)          \
156   MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6)          \
157   "dmtc1      "#r0", "#f24"                   \n\t" \
158   "dmtc1      "#r1", "#f26"                   \n\t" \
159   "dmfc1      "#r0", "#f0"                    \n\t" \
160   "dmfc1      "#r1", "#f2"                    \n\t" \
161   MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2)          \
162   "dmtc1      "#r0", "#f16"                   \n\t" \
163   "dmtc1      "#r1", "#f18"                   \n\t"
164 
165 #define MMI_XSwap_HW_SINGLE(f0, f2, f4) \
166   "punpckhhw  "#f4", "#f0", "#f2"             \n\t" \
167   "punpcklhw  "#f0", "#f0", "#f2"             \n\t"
168 
169 #define MMI_XSwap_WD_SINGLE(f0, f2, f4) \
170   "punpckhwd  "#f4", "#f0", "#f2"             \n\t" \
171   "punpcklwd  "#f0", "#f0", "#f2"             \n\t"
172 
173 #define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \
174   MMI_XSwap_HW_SINGLE(f0, f2, f8)              \
175   MMI_XSwap_HW_SINGLE(f4, f6, f2)              \
176   MMI_XSwap_WD_SINGLE(f0, f4, f6)              \
177   MMI_XSwap_WD_SINGLE(f8, f2, f4)
178 
179 #define MMI_SumSub_SINGLE(f0, f2, f4) \
180   "mov.d      "#f4", "#f2"                    \n\t" \
181   "psubh      "#f2", "#f2", "#f0"             \n\t" \
182   "paddh      "#f0", "#f0", "#f4"             \n\t"
183 
184 #define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \
185   "mov.d      "#f4", "#f0"                    \n\t" \
186   "psllh      "#f0", "#f0", "#f6"             \n\t" \
187   "paddh      "#f0", "#f0", "#f2"             \n\t" \
188   "psllh      "#f2", "#f2", "#f6"             \n\t" \
189   "psubh      "#f4", "#f4", "#f2"             \n\t"
190 
191 //f4 should be 0x0
192 #define MMI_Copy8Times(f0, f2, f4, r0) \
193   "dmtc1      "#r0", "#f0"                    \n\t" \
194   "pshufh     "#f0", "#f0", "#f4"             \n\t" \
195   "mov.d      "#f2", "#f0"                    \n\t"
196 
197 //f4 should be 0x0
198 #define MMI_Copy16Times(f0, f2, f4, r0) \
199   "dmtc1      "#r0", "#f0"                    \n\t" \
200   "punpcklbh  "#f0", "#f0", "#f0"             \n\t" \
201   "pshufh     "#f0", "#f0", "#f4"             \n\t" \
202   "mov.d      "#f2", "#f0"                    \n\t"
203 
204 #define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \
205   "psrah      "#f4", "#f2", "#f6"             \n\t" \
206   "paddh      "#f4", "#f4", "#f0"             \n\t" \
207   "psrah      "#f0", "#f0", "#f6"             \n\t" \
208   "psubh      "#f0", "#f0", "#f2"             \n\t"
209 
210 #define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \
211   MMI_SumSub_SINGLE(f6, f8, f10)             \
212   MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12)     \
213   MMI_SumSub_SINGLE(f0, f6, f10)             \
214   MMI_SumSub_SINGLE(f4, f8, f10)
215 
216 #define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \
217   "gsldlc1    "#f2", 0x7("#r1")               \n\t" \
218   "gsldrc1    "#f2", 0x0("#r1")               \n\t" \
219   "punpcklbh  "#f2", "#f2", "#f6"             \n\t" \
220   "paddh      "#f0", "#f0", "#f4"             \n\t" \
221   "psrah      "#f0", "#f0", "#f8"             \n\t" \
222   "paddsh     "#f0", "#f0", "#f2"             \n\t" \
223   "packushb   "#f0", "#f0", "#f2"             \n\t" \
224   "gsswlc1    "#f0", 0x3("#r0")               \n\t" \
225   "gsswrc1    "#f0", 0x0("#r0")               \n\t"
226 
227 #define SUMH_HORIZON(f0, f2, f4, f6, f8) \
228   "paddh      "#f0", "#f0", "#f2"                       \n\t" \
229   "punpckhhw  "#f2", "#f0", "#f8"                       \n\t" \
230   "punpcklhw  "#f0", "#f0", "#f8"                       \n\t" \
231   "paddw      "#f0", "#f0", "#f2"                       \n\t" \
232   "punpckhwd  "#f2", "#f0", "#f0"                       \n\t" \
233   "paddw      "#f0", "#f0", "#f2"                       \n\t"
234 
235 #define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \
236   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
237   "gsldlc1    "#f0", 0x7("#r0")                         \n\t" \
238   "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
239   "gsldrc1    "#f0", 0x0("#r0")                         \n\t" \
240   "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
241   "punpcklbh  "#f0", "#f0", "#f4"                       \n\t" \
242   "daddu      "#r0", "#r2", "#r1"                       \n\t" \
243   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
244   "gsldlc1    "#f8", 0x7("#r0")                         \n\t" \
245   "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
246   "gsldrc1    "#f8", 0x0("#r0")                         \n\t" \
247   "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
248   "punpcklbh  "#f8", "#f8", "#f4"                       \n\t" \
249   "punpckhhw  "#f2", "#f0", "#f8"                       \n\t" \
250   "punpcklhw  "#f0", "#f0", "#f8"                       \n\t" \
251   "daddu      "#r0", "#r2", "#r1"                       \n\t" \
252   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
253   "gsldlc1    "#f12", 0x7("#r0")                        \n\t" \
254   "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
255   "gsldrc1    "#f12", 0x0("#r0")                        \n\t" \
256   "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
257   "punpcklbh  "#f12", "#f12", "#f4"                     \n\t" \
258   "daddu      "#r0", "#r2", "#r1"                       \n\t" \
259   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
260   "gsldlc1    "#f8", 0x7("#r0")                         \n\t" \
261   "gsldlc1    "#f4", 0x7("#r2")                         \n\t" \
262   "gsldrc1    "#f8", 0x0("#r0")                         \n\t" \
263   "gsldrc1    "#f4", 0x0("#r2")                         \n\t" \
264   "punpcklbh  "#f8", "#f8", "#f4"                       \n\t" \
265   "punpckhhw  "#f14", "#f12", "#f8"                     \n\t" \
266   "punpcklhw  "#f12", "#f12", "#f8"                     \n\t" \
267   "daddu      "#r0", "#r2", "#r1"                       \n\t" \
268   "punpcklwd  "#f0", "#f2", "#f14"                      \n\t" \
269   "punpckhwd  "#f2", "#f2", "#f14"                      \n\t"
270 
271 #define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \
272   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
273   "gsldlc1    "#f0", 0x7("#r0")                         \n\t" \
274   "gsldlc1    "#f2", 0x7("#r2")                         \n\t" \
275   "gsldrc1    "#f0", 0x0("#r0")                         \n\t" \
276   "gsldrc1    "#f2", 0x0("#r2")                         \n\t" \
277   "punpcklbh  "#f0", "#f0", "#f2"                       \n\t" \
278   "daddu      "#r0", "#r2", "#r1"                       \n\t" \
279   "daddu      "#r2", "#r0", "#r1"                       \n\t" \
280   "gsldlc1    "#f4", 0x7("#r0")                         \n\t" \
281   "gsldlc1    "#f2", 0x7("#r2")                         \n\t" \
282   "gsldrc1    "#f4", 0x0("#r0")                         \n\t" \
283   "gsldrc1    "#f2", 0x0("#r2")                         \n\t" \
284   "punpcklbh  "#f4", "#f4", "#f2"                       \n\t" \
285   "punpckhhw  "#f0", "#f0", "#f4"                       \n\t" \
286   "daddu      "#r0", "#r2", "#r1"                       \n\t"
287 
288 /**
289  * backup register
290  */
291 #if defined(_ABI64) && _MIPS_SIM == _ABI64
292 #define BACKUP_REG \
293    double __attribute__((aligned(16))) __back_temp[8];         \
294    __asm__ volatile (                                          \
295      "gssqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
296      "gssqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
297      "gssqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
298      "gssqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
299      :                                                         \
300      : [temp]"r"(__back_temp)                                  \
301      : "memory"                                                \
302    );
303 #else
304 #define BACKUP_REG \
305    double __attribute__((aligned(16))) __back_temp[8];         \
306    __asm__ volatile (                                          \
307      "gssqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
308      "gssqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
309      "gssqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
310      :                                                         \
311      : [temp]"r"(__back_temp)                                  \
312      : "memory"                                                \
313    );
314 #endif
315 
316 /**
317  * recover register
318  */
319 #if defined(_ABI64) && _MIPS_SIM == _ABI64
320 #define RECOVER_REG \
321    __asm__ volatile (                                          \
322      "gslqc1       $f25,      $f24,       0x00(%[temp])  \n\t" \
323      "gslqc1       $f27,      $f26,       0x10(%[temp])  \n\t" \
324      "gslqc1       $f29,      $f28,       0x20(%[temp])  \n\t" \
325      "gslqc1       $f31,      $f30,       0x30(%[temp])  \n\t" \
326      :                                                         \
327      : [temp]"r"(__back_temp)                                  \
328      : "memory"                                                \
329    );
330 #else
331 #define RECOVER_REG \
332    __asm__ volatile (                                          \
333      "gslqc1       $f22,      $f20,       0x00(%[temp])  \n\t" \
334      "gslqc1       $f26,      $f24,       0x10(%[temp])  \n\t" \
335      "gslqc1       $f30,      $f28,       0x20(%[temp])  \n\t" \
336      :                                                         \
337      : [temp]"r"(__back_temp)                                  \
338      : "memory"                                                \
339    );
340 #endif
341 
342 # define OK             1
343 # define NOTOK          0
344 
345 #endif  /* ASMDEFS_MMI_H_ */
346