• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Loongson SIMD utils
3  *
4  * Copyright (c) 2016 Loongson Technology Corporation Limited
5  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #ifndef AVUTIL_MIPS_MMIUTILS_H
25 #define AVUTIL_MIPS_MMIUTILS_H
26 
27 #include "config.h"
28 
29 #include "libavutil/mem_internal.h"
30 #include "libavutil/mips/asmdefs.h"
31 
32 /*
33  * These were used to define temporary registers for MMI marcos
34  * however now we're using $at. They're theoretically unnecessary
35  * but just leave them here to avoid mess.
36  */
37 #define DECLARE_VAR_LOW32
38 #define RESTRICT_ASM_LOW32
39 #define DECLARE_VAR_ALL64
40 #define RESTRICT_ASM_ALL64
41 #define DECLARE_VAR_ADDRT
42 #define RESTRICT_ASM_ADDRT
43 
44 #if HAVE_LOONGSON2
45 
46 #define MMI_LWX(reg, addr, stride, bias)                                    \
47     ".set noat                                                 \n\t"   \
48     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
49     "lw         "#reg",     "#bias"($at)                       \n\t"   \
50     ".set at                                                   \n\t"
51 
52 #define MMI_SWX(reg, addr, stride, bias)                                    \
53     ".set noat                                                 \n\t"   \
54     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
55     "sw         "#reg",     "#bias"($at)                       \n\t"   \
56     ".set at                                                   \n\t"
57 
58 #define MMI_LDX(reg, addr, stride, bias)                                    \
59     ".set noat                                                 \n\t"   \
60     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
61     "ld         "#reg",     "#bias"($at)                       \n\t"   \
62     ".set at                                                   \n\t"
63 
64 #define MMI_SDX(reg, addr, stride, bias)                                    \
65     ".set noat                                                 \n\t"   \
66     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
67     "sd         "#reg",     "#bias"($at)                       \n\t"   \
68     ".set at                                                   \n\t"
69 
70 #define MMI_LWC1(fp, addr, bias)                                            \
71     "lwc1       "#fp",      "#bias"("#addr")                        \n\t"
72 
73 #define MMI_ULWC1(fp, addr, bias)                                           \
74     ".set noat                                                      \n\t"   \
75     "ulw        $at,   "#bias"("#addr")                             \n\t"   \
76     "mtc1       $at,   "#fp"                                        \n\t"   \
77     ".set at                                                        \n\t"
78 
79 #define MMI_LWXC1(fp, addr, stride, bias)                                   \
80     ".set noat                                                 \n\t"   \
81     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
82     MMI_LWC1(fp, $at, bias)                                            \
83     ".set at                                                   \n\t"
84 
85 #define MMI_SWC1(fp, addr, bias)                                            \
86     "swc1       "#fp",      "#bias"("#addr")                        \n\t"
87 
88 #define MMI_USWC1(fp, addr, bias)                                           \
89     ".set noat                                                      \n\t"   \
90     "mfc1       $at,   "#fp"                                        \n\t"   \
91     "usw        $at,   "#bias"("#addr")                             \n\t"   \
92     ".set at                                                        \n\t"
93 
94 #define MMI_SWXC1(fp, addr, stride, bias)                                   \
95     ".set noat                                                 \n\t"   \
96     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
97     MMI_SWC1(fp, $at, bias)                                           \
98     ".set at                                                   \n\t"
99 
100 #define MMI_LDC1(fp, addr, bias)                                            \
101     "ldc1       "#fp",      "#bias"("#addr")                        \n\t"
102 
103 #define MMI_ULDC1(fp, addr, bias)                                           \
104     ".set noat                                                      \n\t"   \
105     "uld        $at,   "#bias"("#addr")                             \n\t"   \
106     "dmtc1      $at,   "#fp"                                        \n\t"   \
107     ".set at                                                        \n\t"
108 
109 #define MMI_LDXC1(fp, addr, stride, bias)                                   \
110     ".set noat                                                 \n\t"   \
111     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
112     MMI_LDC1(fp, $at, bias)                                           \
113     ".set at                                                   \n\t"
114 
115 #define MMI_SDC1(fp, addr, bias)                                            \
116     "sdc1       "#fp",      "#bias"("#addr")                        \n\t"
117 
118 #define MMI_USDC1(fp, addr, bias)                                           \
119     ".set noat                                                      \n\t"   \
120     "dmfc1      $at,   "#fp"                                        \n\t"   \
121     "usd        $at,   "#bias"("#addr")                             \n\t"   \
122     ".set at                                                        \n\t"
123 
124 #define MMI_SDXC1(fp, addr, stride, bias)                                   \
125     ".set noat                                                 \n\t"   \
126     PTR_ADDU    "$at,  "#addr",    "#stride"                   \n\t"   \
127     MMI_SDC1(fp, $at, bias)                                            \
128     ".set at                                                   \n\t"
129 
130 #define MMI_LQ(reg1, reg2, addr, bias)                                      \
131     "ld         "#reg1",    "#bias"("#addr")                        \n\t"   \
132     "ld         "#reg2",  8+"#bias"("#addr")                        \n\t"
133 
134 #define MMI_SQ(reg1, reg2, addr, bias)                                      \
135     "sd         "#reg1",    "#bias"("#addr")                        \n\t"   \
136     "sd         "#reg2",  8+"#bias"("#addr")                        \n\t"
137 
138 #define MMI_LQC1(fp1, fp2, addr, bias)                                      \
139     "ldc1       "#fp1",     "#bias"("#addr")                        \n\t"   \
140     "ldc1       "#fp2",   8+"#bias"("#addr")                        \n\t"
141 
142 #define MMI_SQC1(fp1, fp2, addr, bias)                                      \
143     "sdc1       "#fp1",     "#bias"("#addr")                        \n\t"   \
144     "sdc1       "#fp2",   8+"#bias"("#addr")                        \n\t"
145 
146 #elif HAVE_LOONGSON3 /* !HAVE_LOONGSON2 */
147 
148 #define MMI_LWX(reg, addr, stride, bias)                                    \
149     "gslwx      "#reg",     "#bias"("#addr", "#stride")             \n\t"
150 
151 #define MMI_SWX(reg, addr, stride, bias)                                    \
152     "gsswx      "#reg",     "#bias"("#addr", "#stride")             \n\t"
153 
154 #define MMI_LDX(reg, addr, stride, bias)                                    \
155     "gsldx      "#reg",     "#bias"("#addr", "#stride")             \n\t"
156 
157 #define MMI_SDX(reg, addr, stride, bias)                                    \
158     "gssdx      "#reg",     "#bias"("#addr", "#stride")             \n\t"
159 
160 #define MMI_LWC1(fp, addr, bias)                                            \
161     "lwc1       "#fp",      "#bias"("#addr")                        \n\t"
162 
163 #if _MIPS_SIM == _ABIO32 /* workaround for 3A2000 gslwlc1 bug */
164 
165 #define MMI_LWLRC1(fp, addr, bias, off)                                     \
166     ".set noat                                                 \n\t"   \
167     "lwl        $at,   "#bias"+"#off"("#addr")                 \n\t"   \
168     "lwr        $at,   "#bias"("#addr")                        \n\t"   \
169     "mtc1       $at,   "#fp"                                   \n\t"   \
170     ".set at                                                   \n\t"
171 
172 #else /* _MIPS_SIM != _ABIO32 */
173 
174 #define DECLARE_VAR_LOW32
175 #define RESTRICT_ASM_LOW32
176 
177 #define MMI_ULWC1(fp, addr, bias)                                           \
178     "gslwlc1    "#fp",    3+"#bias"("#addr")                        \n\t"   \
179     "gslwrc1    "#fp",      "#bias"("#addr")                        \n\t"
180 
181 #endif /* _MIPS_SIM != _ABIO32 */
182 
183 #define MMI_LWXC1(fp, addr, stride, bias)                                   \
184     "gslwxc1    "#fp",      "#bias"("#addr", "#stride")             \n\t"
185 
186 #define MMI_SWC1(fp, addr, bias)                                            \
187     "swc1       "#fp",      "#bias"("#addr")                        \n\t"
188 
189 #define MMI_USWC1(fp, addr, bias)                                           \
190     "gsswlc1    "#fp",    3+"#bias"("#addr")                        \n\t"   \
191     "gsswrc1    "#fp",      "#bias"("#addr")                        \n\t"
192 
193 #define MMI_SWXC1(fp, addr, stride, bias)                                   \
194     "gsswxc1    "#fp",      "#bias"("#addr", "#stride")             \n\t"
195 
196 #define MMI_LDC1(fp, addr, bias)                                            \
197     "ldc1       "#fp",      "#bias"("#addr")                        \n\t"
198 
199 #define MMI_ULDC1(fp, addr, bias)                                           \
200     "gsldlc1    "#fp",    7+"#bias"("#addr")                        \n\t"   \
201     "gsldrc1    "#fp",      "#bias"("#addr")                        \n\t"
202 
203 #define MMI_LDXC1(fp, addr, stride, bias)                                   \
204     "gsldxc1    "#fp",      "#bias"("#addr", "#stride")             \n\t"
205 
206 #define MMI_SDC1(fp, addr, bias)                                            \
207     "sdc1       "#fp",      "#bias"("#addr")                        \n\t"
208 
209 #define MMI_USDC1(fp, addr, bias)                                           \
210     "gssdlc1    "#fp",    7+"#bias"("#addr")                        \n\t"   \
211     "gssdrc1    "#fp",      "#bias"("#addr")                        \n\t"
212 
213 #define MMI_SDXC1(fp, addr, stride, bias)                                   \
214     "gssdxc1    "#fp",      "#bias"("#addr", "#stride")             \n\t"
215 
216 #define MMI_LQ(reg1, reg2, addr, bias)                                      \
217     "gslq       "#reg1",    "#reg2",     "#bias"("#addr")           \n\t"
218 
219 #define MMI_SQ(reg1, reg2, addr, bias)                                      \
220     "gssq       "#reg1",    "#reg2",     "#bias"("#addr")           \n\t"
221 
222 #define MMI_LQC1(fp1, fp2, addr, bias)                                      \
223     "gslqc1     "#fp1",     "#fp2",     "#bias"("#addr")            \n\t"
224 
225 #define MMI_SQC1(fp1, fp2, addr, bias)                                      \
226     "gssqc1     "#fp1",     "#fp2",     "#bias"("#addr")            \n\t"
227 
228 #endif /* HAVE_LOONGSON2 */
229 
230 /**
231  * Backup saved registers
232  * We're not using compiler's clobber list as it's not smart enough
233  * to take advantage of quad word load/store.
234  */
235 #define BACKUP_REG \
236   LOCAL_ALIGNED_16(double, temp_backup_reg, [8]);               \
237   if (_MIPS_SIM == _ABI64)                                      \
238     __asm__ volatile (                                          \
239       MMI_SQC1($f25, $f24, %[temp], 0x00)                       \
240       MMI_SQC1($f27, $f26, %[temp], 0x10)                       \
241       MMI_SQC1($f29, $f28, %[temp], 0x20)                       \
242       MMI_SQC1($f31, $f30, %[temp], 0x30)                       \
243       :                                                         \
244       : [temp]"r"(temp_backup_reg)                              \
245       : "memory"                                                \
246     );                                                          \
247   else                                                          \
248     __asm__ volatile (                                          \
249       MMI_SQC1($f22, $f20, %[temp], 0x10)                       \
250       MMI_SQC1($f26, $f24, %[temp], 0x10)                       \
251       MMI_SQC1($f30, $f28, %[temp], 0x20)                       \
252       :                                                         \
253       : [temp]"r"(temp_backup_reg)                              \
254       : "memory"                                                \
255     );
256 
257 /**
258  * recover register
259  */
260 #define RECOVER_REG \
261   if (_MIPS_SIM == _ABI64)                                      \
262     __asm__ volatile (                                          \
263       MMI_LQC1($f25, $f24, %[temp], 0x00)                       \
264       MMI_LQC1($f27, $f26, %[temp], 0x10)                       \
265       MMI_LQC1($f29, $f28, %[temp], 0x20)                       \
266       MMI_LQC1($f31, $f30, %[temp], 0x30)                       \
267       :                                                         \
268       : [temp]"r"(temp_backup_reg)                              \
269       : "memory"                                                \
270     );                                                          \
271   else                                                          \
272     __asm__ volatile (                                          \
273       MMI_LQC1($f22, $f20, %[temp], 0x10)                       \
274       MMI_LQC1($f26, $f24, %[temp], 0x10)                       \
275       MMI_LQC1($f30, $f28, %[temp], 0x20)                       \
276       :                                                         \
277       : [temp]"r"(temp_backup_reg)                              \
278       : "memory"                                                \
279     );
280 
281 /**
282  * brief: Transpose 2X2 word packaged data.
283  * fr_i0, fr_i1: src
284  * fr_o0, fr_o1: dst
285  */
286 #define TRANSPOSE_2W(fr_i0, fr_i1, fr_o0, fr_o1)                          \
287         "punpcklwd  "#fr_o0",   "#fr_i0",   "#fr_i1"                \n\t" \
288         "punpckhwd  "#fr_o1",   "#fr_i0",   "#fr_i1"                \n\t"
289 
290 /**
291  * brief: Transpose 4X4 half word packaged data.
292  * fr_i0, fr_i1, fr_i2, fr_i3: src & dst
293  * fr_t0, fr_t1, fr_t2, fr_t3: temporary register
294  */
295 #define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3,                          \
296                      fr_t0, fr_t1, fr_t2, fr_t3)                          \
297         "punpcklhw  "#fr_t0",   "#fr_i0",   "#fr_i1"                \n\t" \
298         "punpckhhw  "#fr_t1",   "#fr_i0",   "#fr_i1"                \n\t" \
299         "punpcklhw  "#fr_t2",   "#fr_i2",   "#fr_i3"                \n\t" \
300         "punpckhhw  "#fr_t3",   "#fr_i2",   "#fr_i3"                \n\t" \
301         "punpcklwd  "#fr_i0",   "#fr_t0",   "#fr_t2"                \n\t" \
302         "punpckhwd  "#fr_i1",   "#fr_t0",   "#fr_t2"                \n\t" \
303         "punpcklwd  "#fr_i2",   "#fr_t1",   "#fr_t3"                \n\t" \
304         "punpckhwd  "#fr_i3",   "#fr_t1",   "#fr_t3"                \n\t"
305 
306 /**
307  * brief: Transpose 8x8 byte packaged data.
308  * fr_i0~i7: src & dst
309  * fr_t0~t3: temporary register
310  */
311 #define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5,            \
312                      fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)            \
313         "punpcklbh  "#fr_t0",   "#fr_i0",   "#fr_i1"                \n\t" \
314         "punpckhbh  "#fr_t1",   "#fr_i0",   "#fr_i1"                \n\t" \
315         "punpcklbh  "#fr_t2",   "#fr_i2",   "#fr_i3"                \n\t" \
316         "punpckhbh  "#fr_t3",   "#fr_i2",   "#fr_i3"                \n\t" \
317         "punpcklbh  "#fr_i0",   "#fr_i4",   "#fr_i5"                \n\t" \
318         "punpckhbh  "#fr_i1",   "#fr_i4",   "#fr_i5"                \n\t" \
319         "punpcklbh  "#fr_i2",   "#fr_i6",   "#fr_i7"                \n\t" \
320         "punpckhbh  "#fr_i3",   "#fr_i6",   "#fr_i7"                \n\t" \
321         "punpcklhw  "#fr_i4",   "#fr_t0",   "#fr_t2"                \n\t" \
322         "punpckhhw  "#fr_i5",   "#fr_t0",   "#fr_t2"                \n\t" \
323         "punpcklhw  "#fr_i6",   "#fr_t1",   "#fr_t3"                \n\t" \
324         "punpckhhw  "#fr_i7",   "#fr_t1",   "#fr_t3"                \n\t" \
325         "punpcklhw  "#fr_t0",   "#fr_i0",   "#fr_i2"                \n\t" \
326         "punpckhhw  "#fr_t1",   "#fr_i0",   "#fr_i2"                \n\t" \
327         "punpcklhw  "#fr_t2",   "#fr_i1",   "#fr_i3"                \n\t" \
328         "punpckhhw  "#fr_t3",   "#fr_i1",   "#fr_i3"                \n\t" \
329         "punpcklwd  "#fr_i0",   "#fr_i4",   "#fr_t0"                \n\t" \
330         "punpckhwd  "#fr_i1",   "#fr_i4",   "#fr_t0"                \n\t" \
331         "punpcklwd  "#fr_i2",   "#fr_i5",   "#fr_t1"                \n\t" \
332         "punpckhwd  "#fr_i3",   "#fr_i5",   "#fr_t1"                \n\t" \
333         "punpcklwd  "#fr_i4",   "#fr_i6",   "#fr_t2"                \n\t" \
334         "punpckhwd  "#fr_i5",   "#fr_i6",   "#fr_t2"                \n\t" \
335         "punpcklwd  "#fr_i6",   "#fr_i7",   "#fr_t3"                \n\t" \
336         "punpckhwd  "#fr_i7",   "#fr_i7",   "#fr_t3"                \n\t"
337 
338 /**
339  * brief: Parallel SRA for 8 byte packaged data.
340  * fr_i0: src
341  * fr_i1: SRA number(SRAB number + 8)
342  * fr_t0, fr_t1: temporary register
343  * fr_d0: dst
344  */
345 #define PSRAB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0)                      \
346         "punpcklbh    "#fr_t0",   "#fr_t0",   "#fr_i0"              \n\t" \
347         "punpckhbh    "#fr_t1",   "#fr_t1",   "#fr_i0"              \n\t" \
348         "psrah        "#fr_t0",   "#fr_t0",   "#fr_i1"              \n\t" \
349         "psrah        "#fr_t1",   "#fr_t1",   "#fr_i1"              \n\t" \
350         "packsshb     "#fr_d0",   "#fr_t0",   "#fr_t1"              \n\t"
351 
352 /**
353  * brief: Parallel SRL for 8 byte packaged data.
354  * fr_i0: src
355  * fr_i1: SRL number(SRLB number + 8)
356  * fr_t0, fr_t1: temporary register
357  * fr_d0: dst
358  */
359 #define PSRLB_MMI(fr_i0, fr_i1, fr_t0, fr_t1, fr_d0)                      \
360         "punpcklbh    "#fr_t0",   "#fr_t0",   "#fr_i0"              \n\t" \
361         "punpckhbh    "#fr_t1",   "#fr_t1",   "#fr_i0"              \n\t" \
362         "psrlh        "#fr_t0",   "#fr_t0",   "#fr_i1"              \n\t" \
363         "psrlh        "#fr_t1",   "#fr_t1",   "#fr_i1"              \n\t" \
364         "packsshb     "#fr_d0",   "#fr_t0",   "#fr_t1"              \n\t"
365 
366 #define PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                            \
367         "psrah      "#fp1",     "#fp1",     "#shift"                \n\t" \
368         "psrah      "#fp2",     "#fp2",     "#shift"                \n\t" \
369         "psrah      "#fp3",     "#fp3",     "#shift"                \n\t" \
370         "psrah      "#fp4",     "#fp4",     "#shift"                \n\t"
371 
372 #define PSRAH_8_MMI(fp1, fp2, fp3, fp4, fp5, fp6, fp7, fp8, shift)        \
373         PSRAH_4_MMI(fp1, fp2, fp3, fp4, shift)                            \
374         PSRAH_4_MMI(fp5, fp6, fp7, fp8, shift)
375 
376 /**
377  * brief: (((value) + (1 << ((n) - 1))) >> (n))
378  * fr_i0: src & dst
379  * fr_i1: Operand number
380  * fr_t0, fr_t1: temporary FPR
381  * gr_t0: temporary GPR
382  */
383 #define ROUND_POWER_OF_TWO_MMI(fr_i0, fr_i1, fr_t0, fr_t1, gr_t0)         \
384         "li         "#gr_t0",     0x01                              \n\t" \
385         "dmtc1      "#gr_t0",     "#fr_t0"                          \n\t" \
386         "punpcklwd  "#fr_t0",     "#fr_t0",    "#fr_t0"             \n\t" \
387         "psubw      "#fr_t1",     "#fr_i1",    "#fr_t0"             \n\t" \
388         "psllw      "#fr_t1",     "#fr_t0",    "#fr_t1"             \n\t" \
389         "paddw      "#fr_i0",     "#fr_i0",    "#fr_t1"             \n\t" \
390         "psraw      "#fr_i0",     "#fr_i0",    "#fr_i1"             \n\t"
391 
392 #endif /* AVUTILS_MIPS_MMIUTILS_H */
393