1 /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) 2 2023-03-01 : Igor Pavlov : Public domain */ 3 4 #include "Precomp.h" 5 6 #include "Bcj2.h" 7 #include "CpuArch.h" 8 9 #define kTopValue ((UInt32)1 << 24) 10 #define kNumBitModelTotalBits 11 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) 12 #define kNumMoveBits 5 13 14 // UInt32 bcj2_stats[256 + 2][2]; 15 Bcj2Dec_Init(CBcj2Dec * p)16void Bcj2Dec_Init(CBcj2Dec *p) 17 { 18 unsigned i; 19 p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK; 20 p->ip = 0; 21 p->temp = 0; 22 p->range = 0; 23 p->code = 0; 24 for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) 25 p->probs[i] = kBitModelTotal >> 1; 26 } 27 Bcj2Dec_Decode(CBcj2Dec * p)28SRes Bcj2Dec_Decode(CBcj2Dec *p) 29 { 30 UInt32 v = p->temp; 31 // const Byte *src; 32 if (p->range <= 5) 33 { 34 UInt32 code = p->code; 35 p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */ 36 for (; p->range != 5; p->range++) 37 { 38 if (p->range == 1 && code != 0) 39 return SZ_ERROR_DATA; 40 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 41 { 42 p->state = BCJ2_STREAM_RC; 43 return SZ_OK; 44 } 45 code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 46 p->code = code; 47 } 48 if (code == 0xffffffff) 49 return SZ_ERROR_DATA; 50 p->range = 0xffffffff; 51 } 52 // else 53 { 54 unsigned state = p->state; 55 // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop 56 if (BCJ2_IS_32BIT_STREAM(state)) 57 { 58 const Byte *cur = p->bufs[state]; 59 if (cur == p->lims[state]) 60 return SZ_OK; 61 p->bufs[state] = cur + 4; 62 { 63 const UInt32 ip = p->ip + 4; 64 v = GetBe32a(cur) - ip; 65 p->ip = ip; 66 } 67 state = BCJ2_DEC_STATE_ORIG_0; 68 } 69 if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4) 70 { 71 Byte *dest = p->dest; 72 for (;;) 73 { 74 if (dest == p->destLim) 75 { 76 p->state = state; 77 p->temp = v; 78 return SZ_OK; 79 } 80 *dest++ = (Byte)v; 81 p->dest = dest; 82 if (++state == BCJ2_DEC_STATE_ORIG_3 + 1) 83 break; 84 v >>= 8; 85 } 86 } 87 } 88 89 // src = p->bufs[BCJ2_STREAM_MAIN]; 90 for (;;) 91 { 92 /* 93 if (BCJ2_IS_32BIT_STREAM(p->state)) 94 p->state = BCJ2_DEC_STATE_OK; 95 else 96 */ 97 { 98 if (p->range < kTopValue) 99 { 100 if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) 101 { 102 p->state = BCJ2_STREAM_RC; 103 p->temp = v; 104 return SZ_OK; 105 } 106 p->range <<= 8; 107 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 108 } 109 { 110 const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; 111 const Byte *srcLim; 112 Byte *dest = p->dest; 113 { 114 const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); 115 SizeT num = (SizeT)(p->destLim - dest); 116 if (num >= rem) 117 num = rem; 118 #define NUM_ITERS 4 119 #if (NUM_ITERS & (NUM_ITERS - 1)) == 0 120 num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x)) 121 #else 122 num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x)) 123 #endif 124 srcLim = src + num; 125 } 126 127 #define NUM_SHIFT_BITS 24 128 #define ONE_ITER(indx) { \ 129 const unsigned b = src[indx]; \ 130 *dest++ = (Byte)b; \ 131 v = (v << NUM_SHIFT_BITS) | b; \ 132 if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \ 133 if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \ 134 ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \ 135 /* ++dest */; /* v = b; */ } 136 137 if (src != srcLim) 138 for (;;) 139 { 140 /* The dependency chain of 2-cycle for (v) calculation is not big problem here. 141 But we can remove dependency chain with v = b in the end of loop. */ 142 ONE_ITER(0) 143 #if (NUM_ITERS > 1) 144 ONE_ITER(1) 145 #if (NUM_ITERS > 2) 146 ONE_ITER(2) 147 #if (NUM_ITERS > 3) 148 ONE_ITER(3) 149 #if (NUM_ITERS > 4) 150 ONE_ITER(4) 151 #if (NUM_ITERS > 5) 152 ONE_ITER(5) 153 #if (NUM_ITERS > 6) 154 ONE_ITER(6) 155 #if (NUM_ITERS > 7) 156 ONE_ITER(7) 157 #endif 158 #endif 159 #endif 160 #endif 161 #endif 162 #endif 163 #endif 164 165 src += NUM_ITERS; 166 if (src == srcLim) 167 break; 168 } 169 170 if (src == srcLim) 171 #if (NUM_ITERS > 1) 172 for (;;) 173 #endif 174 { 175 #if (NUM_ITERS > 1) 176 if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim) 177 #endif 178 { 179 const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); 180 p->bufs[BCJ2_STREAM_MAIN] = src; 181 p->dest = dest; 182 p->ip += (UInt32)num; 183 /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ 184 p->state = 185 src == p->lims[BCJ2_STREAM_MAIN] ? 186 (unsigned)BCJ2_STREAM_MAIN : 187 (unsigned)BCJ2_DEC_STATE_ORIG; 188 p->temp = v; 189 return SZ_OK; 190 } 191 #if (NUM_ITERS > 1) 192 ONE_ITER(0) 193 src++; 194 #endif 195 } 196 197 { 198 const SizeT num = (SizeT)(dest - p->dest); 199 p->dest = dest; // p->dest += num; 200 p->bufs[BCJ2_STREAM_MAIN] += num; // = src; 201 p->ip += (UInt32)num; 202 } 203 { 204 UInt32 bound, ttt; 205 CBcj2Prob *prob; // unsigned index; 206 /* 207 prob = p->probs + (unsigned)((Byte)v == 0xe8 ? 208 2 + (Byte)(v >> 8) : 209 ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1)); 210 */ 211 { 212 const unsigned c = ((v + 0x17) >> 6) & 1; 213 prob = p->probs + (unsigned) 214 (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1)); 215 // (Byte) 216 // 8x->0 : e9->1 : xxe8->xx+2 217 // 8x->0x100 : e9->0x101 : xxe8->xx 218 // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v)); 219 // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v)); 220 } 221 ttt = *prob; 222 bound = (p->range >> kNumBitModelTotalBits) * ttt; 223 if (p->code < bound) 224 { 225 // bcj2_stats[prob - p->probs][0]++; 226 p->range = bound; 227 *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); 228 continue; 229 } 230 { 231 // bcj2_stats[prob - p->probs][1]++; 232 p->range -= bound; 233 p->code -= bound; 234 *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits)); 235 } 236 } 237 } 238 } 239 { 240 /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */ 241 // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; 242 const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL; 243 const Byte *cur = p->bufs[cj]; 244 Byte *dest; 245 SizeT rem; 246 if (cur == p->lims[cj]) 247 { 248 p->state = cj; 249 break; 250 } 251 v = GetBe32a(cur); 252 p->bufs[cj] = cur + 4; 253 { 254 const UInt32 ip = p->ip + 4; 255 v -= ip; 256 p->ip = ip; 257 } 258 dest = p->dest; 259 rem = (SizeT)(p->destLim - dest); 260 if (rem < 4) 261 { 262 if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8; 263 if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8; 264 if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}} 265 p->temp = v; 266 p->dest = dest + rem; 267 p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; 268 break; 269 } 270 SetUi32(dest, v) 271 v >>= 24; 272 p->dest = dest + 4; 273 } 274 } 275 276 if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC]) 277 { 278 p->range <<= 8; 279 p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; 280 } 281 return SZ_OK; 282 } 283 284 #undef NUM_ITERS 285 #undef ONE_ITER 286 #undef NUM_SHIFT_BITS 287 #undef kTopValue 288 #undef kNumBitModelTotalBits 289 #undef kBitModelTotal 290 #undef kNumMoveBits 291