1 /*
2 * Loongson SIMD optimized idctdsp
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "idctdsp_mips.h"
25 #include "constants.h"
26 #include "libavutil/mips/mmiutils.h"
27
ff_put_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)28 void ff_put_pixels_clamped_mmi(const int16_t *block,
29 uint8_t *av_restrict pixels, ptrdiff_t line_size)
30 {
31 double ftmp[8];
32
33 __asm__ volatile (
34 MMI_LDC1(%[ftmp0], %[block], 0x00)
35 MMI_LDC1(%[ftmp1], %[block], 0x08)
36 MMI_LDC1(%[ftmp2], %[block], 0x10)
37 MMI_LDC1(%[ftmp3], %[block], 0x18)
38 MMI_LDC1(%[ftmp4], %[block], 0x20)
39 MMI_LDC1(%[ftmp5], %[block], 0x28)
40 MMI_LDC1(%[ftmp6], %[block], 0x30)
41 MMI_LDC1(%[ftmp7], %[block], 0x38)
42 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
43 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
44 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
45 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
46 MMI_SDC1(%[ftmp0], %[pixels], 0x00)
47 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
48 MMI_SDC1(%[ftmp2], %[pixels], 0x00)
49 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
50 MMI_SDC1(%[ftmp4], %[pixels], 0x00)
51 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
52 MMI_SDC1(%[ftmp6], %[pixels], 0x00)
53 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
54
55 MMI_LDC1(%[ftmp0], %[block], 0x40)
56 MMI_LDC1(%[ftmp1], %[block], 0x48)
57 MMI_LDC1(%[ftmp2], %[block], 0x50)
58 MMI_LDC1(%[ftmp3], %[block], 0x58)
59 MMI_LDC1(%[ftmp4], %[block], 0x60)
60 MMI_LDC1(%[ftmp5], %[block], 0x68)
61 MMI_LDC1(%[ftmp6], %[block], 0x70)
62 MMI_LDC1(%[ftmp7], %[block], 0x78)
63 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
64 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
65 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
66 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
67 MMI_SDC1(%[ftmp0], %[pixels], 0x00)
68 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
69 MMI_SDC1(%[ftmp2], %[pixels], 0x00)
70 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
71 MMI_SDC1(%[ftmp4], %[pixels], 0x00)
72 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
73 MMI_SDC1(%[ftmp6], %[pixels], 0x00)
74 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
75 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
76 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
77 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
78 [pixels]"+&r"(pixels)
79 : [line_size]"r"((mips_reg)line_size),
80 [block]"r"(block)
81 : "memory"
82 );
83 }
84
ff_put_signed_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)85 void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
86 uint8_t *av_restrict pixels, ptrdiff_t line_size)
87 {
88 double ftmp[5];
89
90 __asm__ volatile (
91 MMI_LDC1(%[ftmp1], %[block], 0x00)
92 MMI_LDC1(%[ftmp0], %[block], 0x08)
93 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
94 MMI_LDC1(%[ftmp2], %[block], 0x10)
95 MMI_LDC1(%[ftmp0], %[block], 0x18)
96 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
97 MMI_LDC1(%[ftmp3], %[block], 0x20)
98 MMI_LDC1(%[ftmp0], %[block], 0x28)
99 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
100 MMI_LDC1(%[ftmp4], %[block], 0x30)
101 MMI_LDC1(%[ftmp0], %[block], 0x38)
102 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
103 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
104 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
105 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
106 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
107 MMI_SDC1(%[ftmp1], %[pixels], 0x00)
108 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
109 MMI_SDC1(%[ftmp2], %[pixels], 0x00)
110 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
111 MMI_SDC1(%[ftmp3], %[pixels], 0x00)
112 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
113 MMI_SDC1(%[ftmp4], %[pixels], 0x00)
114 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
115
116 MMI_LDC1(%[ftmp1], %[block], 0x40)
117 MMI_LDC1(%[ftmp0], %[block], 0x48)
118 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
119 MMI_LDC1(%[ftmp2], %[block], 0x50)
120 MMI_LDC1(%[ftmp0], %[block], 0x58)
121 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
122 MMI_LDC1(%[ftmp3], %[block], 0x60)
123 MMI_LDC1(%[ftmp0], %[block], 0x68)
124 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
125 MMI_LDC1(%[ftmp4], %[block], 0x70)
126 MMI_LDC1(%[ftmp0], %[block], 0x78)
127 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
128 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
129 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
130 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
131 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
132 MMI_SDC1(%[ftmp1], %[pixels], 0x00)
133 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
134 MMI_SDC1(%[ftmp2], %[pixels], 0x00)
135 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
136 MMI_SDC1(%[ftmp3], %[pixels], 0x00)
137 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
138 MMI_SDC1(%[ftmp4], %[pixels], 0x00)
139 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
140 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
141 [ftmp4]"=&f"(ftmp[4]),
142 [pixels]"+&r"(pixels)
143 : [block]"r"(block),
144 [line_size]"r"((mips_reg)line_size),
145 [ff_pb_80]"f"(ff_pb_80.f)
146 : "memory"
147 );
148 }
149
ff_add_pixels_clamped_mmi(const int16_t * block,uint8_t * av_restrict pixels,ptrdiff_t line_size)150 void ff_add_pixels_clamped_mmi(const int16_t *block,
151 uint8_t *av_restrict pixels, ptrdiff_t line_size)
152 {
153 double ftmp[9];
154 uint64_t tmp[1];
155 __asm__ volatile (
156 "li %[tmp0], 0x04 \n\t"
157 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
158 "1: \n\t"
159 MMI_LDC1(%[ftmp5], %[pixels], 0x00)
160 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
161 MMI_LDC1(%[ftmp6], %[pixels], 0x00)
162 PTR_SUBU "%[pixels], %[pixels], %[line_size] \n\t"
163 MMI_LDC1(%[ftmp1], %[block], 0x00)
164 MMI_LDC1(%[ftmp2], %[block], 0x08)
165 MMI_LDC1(%[ftmp3], %[block], 0x10)
166 MMI_LDC1(%[ftmp4], %[block], 0x18)
167 PTR_ADDIU "%[block], %[block], 0x20 \n\t"
168 "punpckhbh %[ftmp7], %[ftmp5], %[ftmp0] \n\t"
169 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
170 "punpckhbh %[ftmp8], %[ftmp6], %[ftmp0] \n\t"
171 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
172 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
173 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
174 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
175 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
176 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
177 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
178 MMI_SDC1(%[ftmp1], %[pixels], 0x00)
179 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
180 MMI_SDC1(%[ftmp3], %[pixels], 0x00)
181 "addi %[tmp0], %[tmp0], -0x01 \n\t"
182 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
183 "bnez %[tmp0], 1b \n\t"
184 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
185 [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
186 [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
187 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
188 [ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]),
189 [pixels]"+&r"(pixels), [block]"+&r"(block)
190 : [line_size]"r"((mips_reg)line_size)
191 : "memory"
192 );
193 }
194