1 /*
2 * Loongson optimized cabac
3 *
4 * Copyright (c) 2020 Loongson Technology Corporation Limited
5 * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
6 * Gu Xiwei(guxiwei-hf@loongson.cn)
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #ifndef AVCODEC_LOONGARCH_CABAC_H
26 #define AVCODEC_LOONGARCH_CABAC_H
27
28 #include "libavcodec/cabac.h"
29 #include "config.h"
30
31 #define GET_CABAC_LOONGARCH_UNCBSR \
32 "ld.bu %[bit], %[state], 0x0 \n\t" \
33 "andi %[tmp0], %[c_range], 0xC0 \n\t" \
34 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \
35 "add.d %[tmp0], %[tmp0], %[tables] \n\t" \
36 "add.d %[tmp0], %[tmp0], %[bit] \n\t" \
37 /* tmp1: RangeLPS */ \
38 "ld.bu %[tmp1], %[tmp0], %[lps_off] \n\t" \
39 \
40 "sub.d %[c_range], %[c_range], %[tmp1] \n\t" \
41 "slli.d %[tmp0], %[c_range], 0x11 \n\t" \
42 "bge %[tmp0], %[c_low], 1f \n\t" \
43 "move %[c_range], %[tmp1] \n\t" \
44 "nor %[bit], %[bit], %[bit] \n\t" \
45 "sub.d %[c_low], %[c_low], %[tmp0] \n\t" \
46 \
47 "1: \n\t" \
48 /* tmp1: *state */ \
49 "add.d %[tmp0], %[tables], %[bit] \n\t" \
50 "ld.bu %[tmp1], %[tmp0], %[mlps_off] \n\t" \
51 /* tmp2: lps_mask */ \
52 "add.d %[tmp0], %[tables], %[c_range] \n\t" \
53 "ld.bu %[tmp2], %[tmp0], %[norm_off] \n\t" \
54 \
55 "andi %[bit], %[bit], 0x01 \n\t" \
56 "st.b %[tmp1], %[state], 0x0 \n\t" \
57 "sll.d %[c_range], %[c_range], %[tmp2] \n\t" \
58 "sll.d %[c_low], %[c_low], %[tmp2] \n\t" \
59 \
60 "and %[tmp1], %[c_low], %[cabac_mask] \n\t" \
61 "bnez %[tmp1], 1f \n\t" \
62 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" \
63 "ctz.d %[tmp0], %[c_low] \n\t" \
64 "addi.d %[tmp2], %[tmp0], -16 \n\t" \
65 "revb.2h %[tmp0], %[tmp1] \n\t" \
66 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \
67 "sub.d %[tmp0], %[tmp0], %[cabac_mask] \n\t" \
68 "sll.d %[tmp0], %[tmp0], %[tmp2] \n\t" \
69 "add.d %[c_low], %[c_low], %[tmp0] \n\t" \
70 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t" \
71 "1: \n\t" \
72
73 #define GET_CABAC_LOONGARCH \
74 "ld.bu %[bit], %[state], 0x0 \n\t" \
75 "andi %[tmp0], %[c_range], 0xC0 \n\t" \
76 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \
77 "add.d %[tmp0], %[tmp0], %[tables] \n\t" \
78 "add.d %[tmp0], %[tmp0], %[bit] \n\t" \
79 /* tmp1: RangeLPS */ \
80 "ld.bu %[tmp1], %[tmp0], %[lps_off] \n\t" \
81 \
82 "sub.d %[c_range], %[c_range], %[tmp1] \n\t" \
83 "slli.d %[tmp0], %[c_range], 0x11 \n\t" \
84 "bge %[tmp0], %[c_low], 1f \n\t" \
85 "move %[c_range], %[tmp1] \n\t" \
86 "nor %[bit], %[bit], %[bit] \n\t" \
87 "sub.d %[c_low], %[c_low], %[tmp0] \n\t" \
88 \
89 "1: \n\t" \
90 /* tmp1: *state */ \
91 "add.d %[tmp0], %[tables], %[bit] \n\t" \
92 "ld.bu %[tmp1], %[tmp0], %[mlps_off] \n\t" \
93 /* tmp2: lps_mask */ \
94 "add.d %[tmp0], %[tables], %[c_range] \n\t" \
95 "ld.bu %[tmp2], %[tmp0], %[norm_off] \n\t" \
96 \
97 "andi %[bit], %[bit], 0x01 \n\t" \
98 "st.b %[tmp1], %[state], 0x0 \n\t" \
99 "sll.d %[c_range], %[c_range], %[tmp2] \n\t" \
100 "sll.d %[c_low], %[c_low], %[tmp2] \n\t" \
101 \
102 "and %[tmp1], %[c_low], %[cabac_mask] \n\t" \
103 "bnez %[tmp1], 1f \n\t" \
104 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t" \
105 "ctz.d %[tmp0], %[c_low] \n\t" \
106 "addi.d %[tmp2], %[tmp0], -16 \n\t" \
107 "revb.2h %[tmp0], %[tmp1] \n\t" \
108 "slli.d %[tmp0], %[tmp0], 0x01 \n\t" \
109 "sub.d %[tmp0], %[tmp0], %[cabac_mask] \n\t" \
110 "sll.d %[tmp0], %[tmp0], %[tmp2] \n\t" \
111 \
112 "add.d %[c_low], %[c_low], %[tmp0] \n\t" \
113 \
114 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t" \
115 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" \
116 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t" \
117 "1: \n\t" \
118
119 #define get_cabac_inline get_cabac_inline_loongarch
120 static av_always_inline
get_cabac_inline_loongarch(CABACContext * c,uint8_t * const state)121 int get_cabac_inline_loongarch(CABACContext *c, uint8_t * const state)
122 {
123 int64_t tmp0, tmp1, tmp2, bit;
124
125 __asm__ volatile (
126 #if UNCHECKED_BITSTREAM_READER
127 GET_CABAC_LOONGARCH_UNCBSR
128 #else
129 GET_CABAC_LOONGARCH
130 #endif
131 : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
132 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
133 [c_bytestream]"+&r"(c->bytestream)
134 : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables),
135 #if !UNCHECKED_BITSTREAM_READER
136 [c_bytestream_end]"r"(c->bytestream_end),
137 #endif
138 [lps_off]"i"(H264_LPS_RANGE_OFFSET),
139 [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128),
140 [norm_off]"i"(H264_NORM_SHIFT_OFFSET),
141 [cabac_mask]"r"(CABAC_MASK)
142 : "memory"
143 );
144
145 return bit;
146 }
147
148 #define get_cabac_bypass get_cabac_bypass_loongarch
get_cabac_bypass_loongarch(CABACContext * c)149 static av_always_inline int get_cabac_bypass_loongarch(CABACContext *c)
150 {
151 int64_t tmp0, tmp1, tmp2;
152 int res = 0;
153 __asm__ volatile(
154 "slli.d %[c_low], %[c_low], 0x01 \n\t"
155 "and %[tmp0], %[c_low], %[cabac_mask] \n\t"
156 "bnez %[tmp0], 1f \n\t"
157 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t"
158 #if UNCHECKED_BITSTREAM_READER
159 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t"
160 #else
161 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t"
162 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t"
163 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t"
164 #endif
165 "revb.2h %[tmp1], %[tmp1] \n\t"
166 "slli.d %[tmp1], %[tmp1], 0x01 \n\t"
167 "sub.d %[tmp1], %[tmp1], %[cabac_mask] \n\t"
168 "add.d %[c_low], %[c_low], %[tmp1] \n\t"
169 "1: \n\t"
170 "slli.d %[tmp1], %[c_range], 0x11 \n\t"
171 "slt %[tmp0], %[c_low], %[tmp1] \n\t"
172 "sub.d %[tmp1], %[c_low], %[tmp1] \n\t"
173 "masknez %[tmp2], %[one], %[tmp0] \n\t"
174 "maskeqz %[res], %[res], %[tmp0] \n\t"
175 "or %[res], %[res], %[tmp2] \n\t"
176 "masknez %[tmp2], %[tmp1], %[tmp0] \n\t"
177 "maskeqz %[c_low], %[c_low], %[tmp0] \n\t"
178 "or %[c_low], %[c_low], %[tmp2] \n\t"
179 : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
180 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
181 [c_bytestream]"+&r"(c->bytestream), [res]"+&r"(res)
182 : [cabac_mask]"r"(CABAC_MASK),
183 #if !UNCHECKED_BITSTREAM_READER
184 [c_bytestream_end]"r"(c->bytestream_end),
185 #endif
186 [one]"r"(0x01)
187 : "memory"
188 );
189 return res;
190 }
191
192 #define get_cabac_bypass_sign get_cabac_bypass_sign_loongarch
193 static av_always_inline
get_cabac_bypass_sign_loongarch(CABACContext * c,int val)194 int get_cabac_bypass_sign_loongarch(CABACContext *c, int val)
195 {
196 int64_t tmp0, tmp1;
197 int res = val;
198 __asm__ volatile(
199 "slli.d %[c_low], %[c_low], 0x01 \n\t"
200 "and %[tmp0], %[c_low], %[cabac_mask] \n\t"
201 "bnez %[tmp0], 1f \n\t"
202 "ld.hu %[tmp1], %[c_bytestream], 0x0 \n\t"
203 #if UNCHECKED_BITSTREAM_READER
204 "addi.d %[c_bytestream], %[c_bytestream], 0x02 \n\t"
205 #else
206 "slt %[tmp0], %[c_bytestream], %[c_bytestream_end] \n\t"
207 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t"
208 "add.d %[c_bytestream], %[c_bytestream], %[tmp0] \n\t"
209 #endif
210 "revb.2h %[tmp1], %[tmp1] \n\t"
211 "slli.d %[tmp1], %[tmp1], 0x01 \n\t"
212 "sub.d %[tmp1], %[tmp1], %[cabac_mask] \n\t"
213 "add.d %[c_low], %[c_low], %[tmp1] \n\t"
214 "1: \n\t"
215 "slli.d %[tmp1], %[c_range], 0x11 \n\t"
216 "slt %[tmp0], %[c_low], %[tmp1] \n\t"
217 "sub.d %[tmp1], %[c_low], %[tmp1] \n\t"
218 "masknez %[tmp1], %[tmp1], %[tmp0] \n\t"
219 "maskeqz %[c_low], %[c_low], %[tmp0] \n\t"
220 "or %[c_low], %[c_low], %[tmp1] \n\t"
221 "sub.d %[tmp1], %[zero], %[res] \n\t"
222 "maskeqz %[tmp1], %[tmp1], %[tmp0] \n\t"
223 "masknez %[res], %[res], %[tmp0] \n\t"
224 "or %[res], %[res], %[tmp1] \n\t"
225 : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [res]"+&r"(res),
226 [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
227 [c_bytestream]"+&r"(c->bytestream)
228 : [cabac_mask]"r"(CABAC_MASK),
229 #if !UNCHECKED_BITSTREAM_READER
230 [c_bytestream_end]"r"(c->bytestream_end),
231 #endif
232 [zero]"r"(0x0)
233 : "memory"
234 );
235
236 return res;
237 }
238 #endif /* AVCODEC_LOONGARCH_CABAC_H */
239