• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Loongson  optimized cabac
3  *
4  * Copyright (c) 2020 Loongson Technology Corporation Limited
5  * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
6  *                Gu Xiwei(guxiwei-hf@loongson.cn)
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #ifndef AVCODEC_LOONGARCH_CABAC_H
26 #define AVCODEC_LOONGARCH_CABAC_H
27 
28 #include "libavcodec/cabac.h"
29 #include "config.h"
30 
31 #define GET_CABAC_LOONGARCH_UNCBSR                                      \
32     "ld.bu        %[bit],        %[state],       0x0           \n\t"    \
33     "andi         %[tmp0],       %[c_range],     0xC0          \n\t"    \
34     "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
35     "add.d        %[tmp0],       %[tmp0],        %[tables]     \n\t"    \
36     "add.d        %[tmp0],       %[tmp0],        %[bit]        \n\t"    \
37     /* tmp1: RangeLPS */                                                \
38     "ld.bu        %[tmp1],       %[tmp0],        %[lps_off]    \n\t"    \
39                                                                         \
40     "sub.d        %[c_range],    %[c_range],     %[tmp1]       \n\t"    \
41     "slli.d       %[tmp0],       %[c_range],     0x11          \n\t"    \
42     "bge          %[tmp0],       %[c_low],       1f            \n\t"    \
43     "move         %[c_range],    %[tmp1]                       \n\t"    \
44     "nor          %[bit],        %[bit],         %[bit]        \n\t"    \
45     "sub.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
46                                                                         \
47     "1:                                                        \n\t"    \
48     /* tmp1: *state */                                                  \
49     "add.d        %[tmp0],       %[tables],      %[bit]        \n\t"    \
50     "ld.bu        %[tmp1],       %[tmp0],        %[mlps_off]   \n\t"    \
51     /* tmp2: lps_mask */                                                \
52     "add.d        %[tmp0],       %[tables],      %[c_range]    \n\t"    \
53     "ld.bu        %[tmp2],       %[tmp0],        %[norm_off]   \n\t"    \
54                                                                         \
55     "andi         %[bit],        %[bit],         0x01          \n\t"    \
56     "st.b         %[tmp1],       %[state],       0x0           \n\t"    \
57     "sll.d        %[c_range],    %[c_range],     %[tmp2]       \n\t"    \
58     "sll.d        %[c_low],      %[c_low],       %[tmp2]       \n\t"    \
59                                                                         \
60     "and          %[tmp1],       %[c_low],       %[cabac_mask] \n\t"    \
61     "bnez         %[tmp1],       1f                            \n\t"    \
62     "ld.hu        %[tmp1],       %[c_bytestream], 0x0          \n\t"    \
63     "ctz.d        %[tmp0],       %[c_low]                      \n\t"    \
64     "addi.d       %[tmp2],       %[tmp0],        -16           \n\t"    \
65     "revb.2h      %[tmp0],       %[tmp1]                       \n\t"    \
66     "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
67     "sub.d        %[tmp0],       %[tmp0],        %[cabac_mask] \n\t"    \
68     "sll.d        %[tmp0],       %[tmp0],        %[tmp2]       \n\t"    \
69     "add.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
70     "addi.d       %[c_bytestream], %[c_bytestream],     0x02   \n\t"    \
71     "1:                                                        \n\t"    \
72 
73 #define GET_CABAC_LOONGARCH                                             \
74     "ld.bu        %[bit],        %[state],       0x0           \n\t"    \
75     "andi         %[tmp0],       %[c_range],     0xC0          \n\t"    \
76     "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
77     "add.d        %[tmp0],       %[tmp0],        %[tables]     \n\t"    \
78     "add.d        %[tmp0],       %[tmp0],        %[bit]        \n\t"    \
79     /* tmp1: RangeLPS */                                                \
80     "ld.bu        %[tmp1],       %[tmp0],        %[lps_off]    \n\t"    \
81                                                                         \
82     "sub.d        %[c_range],    %[c_range],     %[tmp1]       \n\t"    \
83     "slli.d       %[tmp0],       %[c_range],     0x11          \n\t"    \
84     "bge          %[tmp0],       %[c_low],       1f            \n\t"    \
85     "move         %[c_range],    %[tmp1]                       \n\t"    \
86     "nor          %[bit],        %[bit],         %[bit]        \n\t"    \
87     "sub.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
88                                                                         \
89     "1:                                                        \n\t"    \
90     /* tmp1: *state */                                                  \
91     "add.d        %[tmp0],       %[tables],      %[bit]        \n\t"    \
92     "ld.bu        %[tmp1],       %[tmp0],        %[mlps_off]   \n\t"    \
93     /* tmp2: lps_mask */                                                \
94     "add.d        %[tmp0],       %[tables],      %[c_range]    \n\t"    \
95     "ld.bu        %[tmp2],       %[tmp0],        %[norm_off]   \n\t"    \
96                                                                         \
97     "andi         %[bit],        %[bit],         0x01          \n\t"    \
98     "st.b         %[tmp1],       %[state],       0x0           \n\t"    \
99     "sll.d        %[c_range],    %[c_range],     %[tmp2]       \n\t"    \
100     "sll.d        %[c_low],      %[c_low],       %[tmp2]       \n\t"    \
101                                                                         \
102     "and          %[tmp1],       %[c_low],       %[cabac_mask] \n\t"    \
103     "bnez         %[tmp1],       1f                            \n\t"    \
104     "ld.hu        %[tmp1],       %[c_bytestream], 0x0          \n\t"    \
105     "ctz.d        %[tmp0],       %[c_low]                      \n\t"    \
106     "addi.d       %[tmp2],       %[tmp0],        -16           \n\t"    \
107     "revb.2h      %[tmp0],       %[tmp1]                       \n\t"    \
108     "slli.d       %[tmp0],       %[tmp0],        0x01          \n\t"    \
109     "sub.d        %[tmp0],       %[tmp0],        %[cabac_mask] \n\t"    \
110     "sll.d        %[tmp0],       %[tmp0],        %[tmp2]       \n\t"    \
111                                                                         \
112     "add.d        %[c_low],      %[c_low],       %[tmp0]       \n\t"    \
113                                                                         \
114     "slt      %[tmp0],  %[c_bytestream],  %[c_bytestream_end]  \n\t"    \
115     "add.d    %[c_bytestream], %[c_bytestream],     %[tmp0]    \n\t"    \
116     "add.d    %[c_bytestream], %[c_bytestream],     %[tmp0]    \n\t"    \
117     "1:                                                        \n\t"    \
118 
119 #define get_cabac_inline get_cabac_inline_loongarch
120 static av_always_inline
get_cabac_inline_loongarch(CABACContext * c,uint8_t * const state)121 int get_cabac_inline_loongarch(CABACContext *c, uint8_t * const state)
122 {
123     int64_t tmp0, tmp1, tmp2, bit;
124 
125     __asm__ volatile (
126 #if UNCHECKED_BITSTREAM_READER
127         GET_CABAC_LOONGARCH_UNCBSR
128 #else
129         GET_CABAC_LOONGARCH
130 #endif
131     : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
132       [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
133       [c_bytestream]"+&r"(c->bytestream)
134     : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables),
135 #if !UNCHECKED_BITSTREAM_READER
136       [c_bytestream_end]"r"(c->bytestream_end),
137 #endif
138       [lps_off]"i"(H264_LPS_RANGE_OFFSET),
139       [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128),
140       [norm_off]"i"(H264_NORM_SHIFT_OFFSET),
141       [cabac_mask]"r"(CABAC_MASK)
142     : "memory"
143     );
144 
145     return bit;
146 }
147 
148 #define get_cabac_bypass get_cabac_bypass_loongarch
get_cabac_bypass_loongarch(CABACContext * c)149 static av_always_inline int get_cabac_bypass_loongarch(CABACContext *c)
150 {
151     int64_t tmp0, tmp1, tmp2;
152     int res = 0;
153     __asm__ volatile(
154         "slli.d     %[c_low],        %[c_low],        0x01                \n\t"
155         "and        %[tmp0],         %[c_low],        %[cabac_mask]       \n\t"
156         "bnez       %[tmp0],         1f                                   \n\t"
157         "ld.hu      %[tmp1],         %[c_bytestream], 0x0                 \n\t"
158 #if UNCHECKED_BITSTREAM_READER
159         "addi.d     %[c_bytestream], %[c_bytestream], 0x02                \n\t"
160 #else
161         "slt        %[tmp0],         %[c_bytestream], %[c_bytestream_end] \n\t"
162         "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
163         "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
164 #endif
165         "revb.2h    %[tmp1],         %[tmp1]                              \n\t"
166         "slli.d     %[tmp1],         %[tmp1],         0x01                \n\t"
167         "sub.d      %[tmp1],         %[tmp1],         %[cabac_mask]       \n\t"
168         "add.d      %[c_low],        %[c_low],        %[tmp1]             \n\t"
169         "1:                                                               \n\t"
170         "slli.d     %[tmp1],         %[c_range],      0x11                \n\t"
171         "slt        %[tmp0],         %[c_low],        %[tmp1]             \n\t"
172         "sub.d      %[tmp1],         %[c_low],        %[tmp1]             \n\t"
173         "masknez    %[tmp2],         %[one],          %[tmp0]             \n\t"
174         "maskeqz    %[res],          %[res],          %[tmp0]             \n\t"
175         "or         %[res],          %[res],          %[tmp2]             \n\t"
176         "masknez    %[tmp2],         %[tmp1],         %[tmp0]             \n\t"
177         "maskeqz    %[c_low],        %[c_low],        %[tmp0]             \n\t"
178         "or         %[c_low],        %[c_low],        %[tmp2]             \n\t"
179         : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
180           [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
181           [c_bytestream]"+&r"(c->bytestream), [res]"+&r"(res)
182         : [cabac_mask]"r"(CABAC_MASK),
183 #if !UNCHECKED_BITSTREAM_READER
184           [c_bytestream_end]"r"(c->bytestream_end),
185 #endif
186           [one]"r"(0x01)
187         : "memory"
188     );
189     return res;
190 }
191 
192 #define get_cabac_bypass_sign get_cabac_bypass_sign_loongarch
193 static av_always_inline
get_cabac_bypass_sign_loongarch(CABACContext * c,int val)194 int get_cabac_bypass_sign_loongarch(CABACContext *c, int val)
195 {
196     int64_t tmp0, tmp1;
197     int res = val;
198     __asm__ volatile(
199         "slli.d     %[c_low],        %[c_low],        0x01                \n\t"
200         "and        %[tmp0],         %[c_low],        %[cabac_mask]       \n\t"
201         "bnez       %[tmp0],         1f                                   \n\t"
202         "ld.hu      %[tmp1],         %[c_bytestream], 0x0                 \n\t"
203 #if UNCHECKED_BITSTREAM_READER
204         "addi.d     %[c_bytestream], %[c_bytestream], 0x02                \n\t"
205 #else
206         "slt        %[tmp0],         %[c_bytestream], %[c_bytestream_end] \n\t"
207         "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
208         "add.d      %[c_bytestream], %[c_bytestream], %[tmp0]             \n\t"
209 #endif
210         "revb.2h    %[tmp1],         %[tmp1]                              \n\t"
211         "slli.d     %[tmp1],         %[tmp1],         0x01                \n\t"
212         "sub.d      %[tmp1],         %[tmp1],         %[cabac_mask]       \n\t"
213         "add.d      %[c_low],        %[c_low],        %[tmp1]             \n\t"
214         "1:                                                               \n\t"
215         "slli.d     %[tmp1],         %[c_range],      0x11                \n\t"
216         "slt        %[tmp0],         %[c_low],        %[tmp1]             \n\t"
217         "sub.d      %[tmp1],         %[c_low],        %[tmp1]             \n\t"
218         "masknez    %[tmp1],         %[tmp1],         %[tmp0]             \n\t"
219         "maskeqz    %[c_low],        %[c_low],        %[tmp0]             \n\t"
220         "or         %[c_low],        %[c_low],        %[tmp1]             \n\t"
221         "sub.d      %[tmp1],         %[zero],         %[res]              \n\t"
222         "maskeqz    %[tmp1],         %[tmp1],         %[tmp0]             \n\t"
223         "masknez    %[res],          %[res],          %[tmp0]             \n\t"
224         "or         %[res],          %[res],          %[tmp1]             \n\t"
225         : [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [res]"+&r"(res),
226           [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
227           [c_bytestream]"+&r"(c->bytestream)
228         : [cabac_mask]"r"(CABAC_MASK),
229 #if !UNCHECKED_BITSTREAM_READER
230           [c_bytestream_end]"r"(c->bytestream_end),
231 #endif
232           [zero]"r"(0x0)
233         : "memory"
234     );
235 
236     return res;
237 }
238 #endif /* AVCODEC_LOONGARCH_CABAC_H */
239