• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2 2023-03-01 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "Bcj2.h"
7 #include "CpuArch.h"
8 
9 #define kTopValue ((UInt32)1 << 24)
10 #define kNumBitModelTotalBits 11
11 #define kBitModelTotal (1 << kNumBitModelTotalBits)
12 #define kNumMoveBits 5
13 
14 // UInt32 bcj2_stats[256 + 2][2];
15 
Bcj2Dec_Init(CBcj2Dec * p)16 void Bcj2Dec_Init(CBcj2Dec *p)
17 {
18   unsigned i;
19   p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
20   p->ip = 0;
21   p->temp = 0;
22   p->range = 0;
23   p->code = 0;
24   for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
25     p->probs[i] = kBitModelTotal >> 1;
26 }
27 
Bcj2Dec_Decode(CBcj2Dec * p)28 SRes Bcj2Dec_Decode(CBcj2Dec *p)
29 {
30   UInt32 v = p->temp;
31   // const Byte *src;
32   if (p->range <= 5)
33   {
34     UInt32 code = p->code;
35     p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
36     for (; p->range != 5; p->range++)
37     {
38       if (p->range == 1 && code != 0)
39         return SZ_ERROR_DATA;
40       if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
41       {
42         p->state = BCJ2_STREAM_RC;
43         return SZ_OK;
44       }
45       code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
46       p->code = code;
47     }
48     if (code == 0xffffffff)
49       return SZ_ERROR_DATA;
50     p->range = 0xffffffff;
51   }
52   // else
53   {
54     unsigned state = p->state;
55     // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
56     if (BCJ2_IS_32BIT_STREAM(state))
57     {
58       const Byte *cur = p->bufs[state];
59       if (cur == p->lims[state])
60         return SZ_OK;
61       p->bufs[state] = cur + 4;
62       {
63         const UInt32 ip = p->ip + 4;
64         v = GetBe32a(cur) - ip;
65         p->ip = ip;
66       }
67       state = BCJ2_DEC_STATE_ORIG_0;
68     }
69     if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
70     {
71       Byte *dest = p->dest;
72       for (;;)
73       {
74         if (dest == p->destLim)
75         {
76           p->state = state;
77           p->temp = v;
78           return SZ_OK;
79         }
80         *dest++ = (Byte)v;
81         p->dest = dest;
82         if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
83           break;
84         v >>= 8;
85       }
86     }
87   }
88 
89   // src = p->bufs[BCJ2_STREAM_MAIN];
90   for (;;)
91   {
92     /*
93     if (BCJ2_IS_32BIT_STREAM(p->state))
94       p->state = BCJ2_DEC_STATE_OK;
95     else
96     */
97     {
98       if (p->range < kTopValue)
99       {
100         if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
101         {
102           p->state = BCJ2_STREAM_RC;
103           p->temp = v;
104           return SZ_OK;
105         }
106         p->range <<= 8;
107         p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
108       }
109       {
110         const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
111         const Byte *srcLim;
112         Byte *dest = p->dest;
113         {
114           const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
115           SizeT num = (SizeT)(p->destLim - dest);
116           if (num >= rem)
117             num = rem;
118         #define NUM_ITERS 4
119         #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
120           num &= ~((SizeT)NUM_ITERS - 1);   // if (NUM_ITERS == (1 << x))
121         #else
122           num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
123         #endif
124           srcLim = src + num;
125         }
126 
127         #define NUM_SHIFT_BITS  24
128         #define ONE_ITER(indx) { \
129           const unsigned b = src[indx]; \
130           *dest++ = (Byte)b; \
131           v = (v << NUM_SHIFT_BITS) | b; \
132           if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
133           if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
134               ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
135             /* ++dest */; /* v = b; */ }
136 
137         if (src != srcLim)
138         for (;;)
139         {
140             /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
141                But we can remove dependency chain with v = b in the end of loop. */
142           ONE_ITER(0)
143           #if (NUM_ITERS > 1)
144             ONE_ITER(1)
145           #if (NUM_ITERS > 2)
146             ONE_ITER(2)
147           #if (NUM_ITERS > 3)
148             ONE_ITER(3)
149           #if (NUM_ITERS > 4)
150             ONE_ITER(4)
151           #if (NUM_ITERS > 5)
152             ONE_ITER(5)
153           #if (NUM_ITERS > 6)
154             ONE_ITER(6)
155           #if (NUM_ITERS > 7)
156             ONE_ITER(7)
157           #endif
158           #endif
159           #endif
160           #endif
161           #endif
162           #endif
163           #endif
164 
165           src += NUM_ITERS;
166           if (src == srcLim)
167             break;
168         }
169 
170         if (src == srcLim)
171       #if (NUM_ITERS > 1)
172         for (;;)
173       #endif
174         {
175         #if (NUM_ITERS > 1)
176           if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
177         #endif
178           {
179             const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
180             p->bufs[BCJ2_STREAM_MAIN] = src;
181             p->dest = dest;
182             p->ip += (UInt32)num;
183             /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
184             p->state =
185               src == p->lims[BCJ2_STREAM_MAIN] ?
186                 (unsigned)BCJ2_STREAM_MAIN :
187                 (unsigned)BCJ2_DEC_STATE_ORIG;
188             p->temp = v;
189             return SZ_OK;
190           }
191         #if (NUM_ITERS > 1)
192           ONE_ITER(0)
193           src++;
194         #endif
195         }
196 
197         {
198           const SizeT num = (SizeT)(dest - p->dest);
199           p->dest = dest; // p->dest += num;
200           p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
201           p->ip += (UInt32)num;
202         }
203         {
204           UInt32 bound, ttt;
205           CBcj2Prob *prob; // unsigned index;
206           /*
207           prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
208               2 + (Byte)(v >> 8) :
209               ((v >> 5) & 1));  // ((Byte)v < 0xe8 ? 0 : 1));
210           */
211           {
212             const unsigned c = ((v + 0x17) >> 6) & 1;
213             prob = p->probs + (unsigned)
214                 (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
215                 // (Byte)
216                 // 8x->0     : e9->1     : xxe8->xx+2
217                 // 8x->0x100 : e9->0x101 : xxe8->xx
218                 // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
219                 // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
220           }
221           ttt = *prob;
222           bound = (p->range >> kNumBitModelTotalBits) * ttt;
223           if (p->code < bound)
224           {
225             // bcj2_stats[prob - p->probs][0]++;
226             p->range = bound;
227             *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
228             continue;
229           }
230           {
231             // bcj2_stats[prob - p->probs][1]++;
232             p->range -= bound;
233             p->code -= bound;
234             *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
235           }
236         }
237       }
238     }
239     {
240       /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
241       // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
242       const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
243       const Byte *cur = p->bufs[cj];
244       Byte *dest;
245       SizeT rem;
246       if (cur == p->lims[cj])
247       {
248         p->state = cj;
249         break;
250       }
251       v = GetBe32a(cur);
252       p->bufs[cj] = cur + 4;
253       {
254         const UInt32 ip = p->ip + 4;
255         v -= ip;
256         p->ip = ip;
257       }
258       dest = p->dest;
259       rem = (SizeT)(p->destLim - dest);
260       if (rem < 4)
261       {
262         if ((unsigned)rem > 0) { dest[0] = (Byte)v;  v >>= 8;
263         if ((unsigned)rem > 1) { dest[1] = (Byte)v;  v >>= 8;
264         if ((unsigned)rem > 2) { dest[2] = (Byte)v;  v >>= 8; }}}
265         p->temp = v;
266         p->dest = dest + rem;
267         p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
268         break;
269       }
270       SetUi32(dest, v)
271       v >>= 24;
272       p->dest = dest + 4;
273     }
274   }
275 
276   if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
277   {
278     p->range <<= 8;
279     p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
280   }
281   return SZ_OK;
282 }
283 
284 #undef NUM_ITERS
285 #undef ONE_ITER
286 #undef NUM_SHIFT_BITS
287 #undef kTopValue
288 #undef kNumBitModelTotalBits
289 #undef kBitModelTotal
290 #undef kNumMoveBits
291