• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Ppmd7.c -- PPMdH codec
2 2023-04-02 : Igor Pavlov : Public domain
3 This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
4 
5 #include "Precomp.h"
6 
7 #include <string.h>
8 
9 #include "Ppmd7.h"
10 
11 /* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */
12 // #define PPMD7_ORDER_0_SUPPPORT
13 
14 MY_ALIGN(16)
15 static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
16 MY_ALIGN(16)
17 static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
18 
19 #define MAX_FREQ 124
20 #define UNIT_SIZE 12
21 
22 #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE)
23 #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1])
24 #define I2U(indx) ((unsigned)p->Indx2Units[indx])
25 #define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx])
26 
27 #define REF(ptr) Ppmd_GetRef(p, ptr)
28 
29 #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr))
30 
31 #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
32 #define STATS(ctx) Ppmd7_GetStats(p, ctx)
33 #define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
34 #define SUFFIX(ctx) CTX((ctx)->Suffix)
35 
36 typedef CPpmd7_Context * PPMD7_CTX_PTR;
37 
38 struct CPpmd7_Node_;
39 
40 typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref;
41 
42 typedef struct CPpmd7_Node_
43 {
44   UInt16 Stamp; /* must be at offset 0 as CPpmd7_Context::NumStats. Stamp=0 means free */
45   UInt16 NU;
46   CPpmd7_Node_Ref Next; /* must be at offset >= 4 */
47   CPpmd7_Node_Ref Prev;
48 } CPpmd7_Node;
49 
50 #define NODE(r)  Ppmd_GetPtr_Type(p, r, CPpmd7_Node)
51 
Ppmd7_Construct(CPpmd7 * p)52 void Ppmd7_Construct(CPpmd7 *p)
53 {
54   unsigned i, k, m;
55 
56   p->Base = NULL;
57 
58   for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++)
59   {
60     unsigned step = (i >= 12 ? 4 : (i >> 2) + 1);
61     do { p->Units2Indx[k++] = (Byte)i; } while (--step);
62     p->Indx2Units[i] = (Byte)k;
63   }
64 
65   p->NS2BSIndx[0] = (0 << 1);
66   p->NS2BSIndx[1] = (1 << 1);
67   memset(p->NS2BSIndx + 2, (2 << 1), 9);
68   memset(p->NS2BSIndx + 11, (3 << 1), 256 - 11);
69 
70   for (i = 0; i < 3; i++)
71     p->NS2Indx[i] = (Byte)i;
72 
73   for (m = i, k = 1; i < 256; i++)
74   {
75     p->NS2Indx[i] = (Byte)m;
76     if (--k == 0)
77       k = (++m) - 2;
78   }
79 
80   memcpy(p->ExpEscape, PPMD7_kExpEscape, 16);
81 }
82 
83 
Ppmd7_Free(CPpmd7 * p,ISzAllocPtr alloc)84 void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc)
85 {
86   ISzAlloc_Free(alloc, p->Base);
87   p->Size = 0;
88   p->Base = NULL;
89 }
90 
91 
Ppmd7_Alloc(CPpmd7 * p,UInt32 size,ISzAllocPtr alloc)92 BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
93 {
94   if (!p->Base || p->Size != size)
95   {
96     Ppmd7_Free(p, alloc);
97     p->AlignOffset = (4 - size) & 3;
98     if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL)
99       return False;
100     p->Size = size;
101   }
102   return True;
103 }
104 
105 
106 
107 // ---------- Internal Memory Allocator ----------
108 
109 /* We can use CPpmd7_Node in list of free units (as in Ppmd8)
110    But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks().
111    So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode()
112 */
113 
114 #define EMPTY_NODE 0
115 
116 
Ppmd7_InsertNode(CPpmd7 * p,void * node,unsigned indx)117 static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx)
118 {
119   *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
120   // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
121 
122   p->FreeList[indx] = REF(node);
123 
124 }
125 
126 
Ppmd7_RemoveNode(CPpmd7 * p,unsigned indx)127 static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx)
128 {
129   CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
130   p->FreeList[indx] = *node;
131   // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]);
132   // p->FreeList[indx] = node->Next;
133   return node;
134 }
135 
136 
Ppmd7_SplitBlock(CPpmd7 * p,void * ptr,unsigned oldIndx,unsigned newIndx)137 static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
138 {
139   unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
140   ptr = (Byte *)ptr + U2B(I2U(newIndx));
141   if (I2U(i = U2I(nu)) != nu)
142   {
143     unsigned k = I2U(--i);
144     Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
145   }
146   Ppmd7_InsertNode(p, ptr, i);
147 }
148 
149 
150 /* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
151 
152 typedef union
153 {
154   CPpmd7_Node     Node;
155   CPpmd7_Node_Ref NextRef;
156 } CPpmd7_Node_Union;
157 
158 /* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks()
159    we use single linked list similar to Ppmd8 code */
160 
161 
Ppmd7_GlueFreeBlocks(CPpmd7 * p)162 static void Ppmd7_GlueFreeBlocks(CPpmd7 *p)
163 {
164   /*
165   we use first UInt16 field of 12-bytes UNITs as record type stamp
166     CPpmd_State    { Byte Symbol; Byte Freq; : Freq != 0
167     CPpmd7_Context { UInt16 NumStats;        : NumStats != 0
168     CPpmd7_Node    { UInt16 Stamp            : Stamp == 0 for free record
169                                              : Stamp == 1 for head record and guard
170     Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record.
171   */
172   CPpmd7_Node_Ref head, n = 0;
173 
174   p->GlueCount = 255;
175 
176 
177   /* we set guard NODE at LoUnit */
178   if (p->LoUnit != p->HiUnit)
179     ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1;
180 
181   {
182     /* Create list of free blocks.
183        We still need one additional list walk pass before Glue. */
184     unsigned i;
185     for (i = 0; i < PPMD_NUM_INDEXES; i++)
186     {
187       const UInt16 nu = I2U_UInt16(i);
188       CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i];
189       p->FreeList[i] = 0;
190       while (next != 0)
191       {
192         /* Don't change the order of the following commands: */
193         CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next);
194         const CPpmd7_Node_Ref tmp = next;
195         next = un->NextRef;
196         un->Node.Stamp = EMPTY_NODE;
197         un->Node.NU = nu;
198         un->Node.Next = n;
199         n = tmp;
200       }
201     }
202   }
203 
204   head = n;
205   /* Glue and Fill must walk the list in same direction */
206   {
207     /* Glue free blocks */
208     CPpmd7_Node_Ref *prev = &head;
209     while (n)
210     {
211       CPpmd7_Node *node = NODE(n);
212       UInt32 nu = node->NU;
213       n = node->Next;
214       if (nu == 0)
215       {
216         *prev = n;
217         continue;
218       }
219       prev = &node->Next;
220       for (;;)
221       {
222         CPpmd7_Node *node2 = node + nu;
223         nu += node2->NU;
224         if (node2->Stamp != EMPTY_NODE || nu >= 0x10000)
225           break;
226         node->NU = (UInt16)nu;
227         node2->NU = 0;
228       }
229     }
230   }
231 
232   /* Fill lists of free blocks */
233   for (n = head; n != 0;)
234   {
235     CPpmd7_Node *node = NODE(n);
236     UInt32 nu = node->NU;
237     unsigned i;
238     n = node->Next;
239     if (nu == 0)
240       continue;
241     for (; nu > 128; nu -= 128, node += 128)
242       Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1);
243     if (I2U(i = U2I(nu)) != nu)
244     {
245       unsigned k = I2U(--i);
246       Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1);
247     }
248     Ppmd7_InsertNode(p, node, i);
249   }
250 }
251 
252 
253 Z7_NO_INLINE
Ppmd7_AllocUnitsRare(CPpmd7 * p,unsigned indx)254 static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx)
255 {
256   unsigned i;
257 
258   if (p->GlueCount == 0)
259   {
260     Ppmd7_GlueFreeBlocks(p);
261     if (p->FreeList[indx] != 0)
262       return Ppmd7_RemoveNode(p, indx);
263   }
264 
265   i = indx;
266 
267   do
268   {
269     if (++i == PPMD_NUM_INDEXES)
270     {
271       UInt32 numBytes = U2B(I2U(indx));
272       Byte *us = p->UnitsStart;
273       p->GlueCount--;
274       return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL;
275     }
276   }
277   while (p->FreeList[i] == 0);
278 
279   {
280     void *block = Ppmd7_RemoveNode(p, i);
281     Ppmd7_SplitBlock(p, block, i, indx);
282     return block;
283   }
284 }
285 
286 
Ppmd7_AllocUnits(CPpmd7 * p,unsigned indx)287 static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
288 {
289   if (p->FreeList[indx] != 0)
290     return Ppmd7_RemoveNode(p, indx);
291   {
292     UInt32 numBytes = U2B(I2U(indx));
293     Byte *lo = p->LoUnit;
294     if ((UInt32)(p->HiUnit - lo) >= numBytes)
295     {
296       p->LoUnit = lo + numBytes;
297       return lo;
298     }
299   }
300   return Ppmd7_AllocUnitsRare(p, indx);
301 }
302 
303 
304 #define MEM_12_CPY(dest, src, num) \
305   { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
306     do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
307 
308 
309 /*
310 static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU)
311 {
312   unsigned i0 = U2I(oldNU);
313   unsigned i1 = U2I(newNU);
314   if (i0 == i1)
315     return oldPtr;
316   if (p->FreeList[i1] != 0)
317   {
318     void *ptr = Ppmd7_RemoveNode(p, i1);
319     MEM_12_CPY(ptr, oldPtr, newNU)
320     Ppmd7_InsertNode(p, oldPtr, i0);
321     return ptr;
322   }
323   Ppmd7_SplitBlock(p, oldPtr, i0, i1);
324   return oldPtr;
325 }
326 */
327 
328 
329 #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
SetSuccessor(CPpmd_State * p,CPpmd_Void_Ref v)330 static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
331 {
332   Ppmd_SET_SUCCESSOR(p, v)
333 }
334 
335 
336 
337 Z7_NO_INLINE
338 static
Ppmd7_RestartModel(CPpmd7 * p)339 void Ppmd7_RestartModel(CPpmd7 *p)
340 {
341   unsigned i, k;
342 
343   memset(p->FreeList, 0, sizeof(p->FreeList));
344 
345   p->Text = p->Base + p->AlignOffset;
346   p->HiUnit = p->Text + p->Size;
347   p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
348   p->GlueCount = 0;
349 
350   p->OrderFall = p->MaxOrder;
351   p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1;
352   p->PrevSuccess = 0;
353 
354   {
355     CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
356     CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */
357 
358     p->LoUnit += U2B(256 / 2);
359     p->MaxContext = p->MinContext = mc;
360     p->FoundState = s;
361 
362     mc->NumStats = 256;
363     mc->Union2.SummFreq = 256 + 1;
364     mc->Union4.Stats = REF(s);
365     mc->Suffix = 0;
366 
367     for (i = 0; i < 256; i++, s++)
368     {
369       s->Symbol = (Byte)i;
370       s->Freq = 1;
371       SetSuccessor(s, 0);
372     }
373 
374     #ifdef PPMD7_ORDER_0_SUPPPORT
375     if (p->MaxOrder == 0)
376     {
377       CPpmd_Void_Ref r = REF(mc);
378       s = p->FoundState;
379       for (i = 0; i < 256; i++, s++)
380         SetSuccessor(s, r);
381       return;
382     }
383     #endif
384   }
385 
386   for (i = 0; i < 128; i++)
387 
388 
389 
390     for (k = 0; k < 8; k++)
391     {
392       unsigned m;
393       UInt16 *dest = p->BinSumm[i] + k;
394       const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2));
395       for (m = 0; m < 64; m += 8)
396         dest[m] = val;
397     }
398 
399 
400   for (i = 0; i < 25; i++)
401   {
402 
403     CPpmd_See *s = p->See[i];
404 
405 
406 
407     unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4));
408     for (k = 0; k < 16; k++, s++)
409     {
410       s->Summ = (UInt16)summ;
411       s->Shift = (PPMD_PERIOD_BITS - 4);
412       s->Count = 4;
413     }
414   }
415 
416   p->DummySee.Summ = 0; /* unused */
417   p->DummySee.Shift = PPMD_PERIOD_BITS;
418   p->DummySee.Count = 64; /* unused */
419 }
420 
421 
Ppmd7_Init(CPpmd7 * p,unsigned maxOrder)422 void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
423 {
424   p->MaxOrder = maxOrder;
425 
426   Ppmd7_RestartModel(p);
427 }
428 
429 
430 
431 /*
432   Ppmd7_CreateSuccessors()
433   It's called when (FoundState->Successor) is RAW-Successor,
434   that is the link to position in Raw text.
435   So we create Context records and write the links to
436   FoundState->Successor and to identical RAW-Successors in suffix
437   contexts of MinContex.
438 
439   The function returns:
440   if (OrderFall == 0) then MinContext is already at MAX order,
441     { return pointer to new or existing context of same MAX order }
442   else
443     { return pointer to new real context that will be (Order+1) in comparison with MinContext
444 
445   also it can return pointer to real context of same order,
446 */
447 
448 Z7_NO_INLINE
Ppmd7_CreateSuccessors(CPpmd7 * p)449 static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p)
450 {
451   PPMD7_CTX_PTR c = p->MinContext;
452   CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
453   Byte newSym, newFreq;
454   unsigned numPs = 0;
455   CPpmd_State *ps[PPMD7_MAX_ORDER];
456 
457   if (p->OrderFall != 0)
458     ps[numPs++] = p->FoundState;
459 
460   while (c->Suffix)
461   {
462     CPpmd_Void_Ref successor;
463     CPpmd_State *s;
464     c = SUFFIX(c);
465 
466 
467     if (c->NumStats != 1)
468     {
469       Byte sym = p->FoundState->Symbol;
470       for (s = STATS(c); s->Symbol != sym; s++);
471 
472     }
473     else
474     {
475       s = ONE_STATE(c);
476 
477     }
478     successor = SUCCESSOR(s);
479     if (successor != upBranch)
480     {
481       // (c) is real record Context here,
482       c = CTX(successor);
483       if (numPs == 0)
484       {
485         // (c) is real record MAX Order Context here,
486         // So we don't need to create any new contexts.
487         return c;
488       }
489       break;
490     }
491     ps[numPs++] = s;
492   }
493 
494   // All created contexts will have single-symbol with new RAW-Successor
495   // All new RAW-Successors will point to next position in RAW text
496   // after FoundState->Successor
497 
498   newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch);
499   upBranch++;
500 
501 
502   if (c->NumStats == 1)
503     newFreq = ONE_STATE(c)->Freq;
504   else
505   {
506     UInt32 cf, s0;
507     CPpmd_State *s;
508     for (s = STATS(c); s->Symbol != newSym; s++);
509     cf = (UInt32)s->Freq - 1;
510     s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf;
511     /*
512       cf - is frequency of symbol that will be Successor in new context records.
513       s0 - is commulative frequency sum of another symbols from parent context.
514       max(newFreq)= (s->Freq + 1), when (s0 == 1)
515       we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[]
516       so (s->Freq < 128) - is requirement for multi-symbol contexts
517     */
518     newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1));
519   }
520 
521   // Create new single-symbol contexts from low order to high order in loop
522 
523   do
524   {
525     PPMD7_CTX_PTR c1;
526     /* = AllocContext(p); */
527     if (p->HiUnit != p->LoUnit)
528       c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
529     else if (p->FreeList[0] != 0)
530       c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0);
531     else
532     {
533       c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0);
534       if (!c1)
535         return NULL;
536     }
537 
538     c1->NumStats = 1;
539     ONE_STATE(c1)->Symbol = newSym;
540     ONE_STATE(c1)->Freq = newFreq;
541     SetSuccessor(ONE_STATE(c1), upBranch);
542     c1->Suffix = REF(c);
543     SetSuccessor(ps[--numPs], REF(c1));
544     c = c1;
545   }
546   while (numPs != 0);
547 
548   return c;
549 }
550 
551 
552 
553 #define SWAP_STATES(s) \
554   { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
555 
556 
557 void Ppmd7_UpdateModel(CPpmd7 *p);
558 Z7_NO_INLINE
Ppmd7_UpdateModel(CPpmd7 * p)559 void Ppmd7_UpdateModel(CPpmd7 *p)
560 {
561   CPpmd_Void_Ref maxSuccessor, minSuccessor;
562   PPMD7_CTX_PTR c, mc;
563   unsigned s0, ns;
564 
565 
566 
567   if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0)
568   {
569     /* Update Freqs in Suffix Context */
570 
571     c = SUFFIX(p->MinContext);
572 
573     if (c->NumStats == 1)
574     {
575       CPpmd_State *s = ONE_STATE(c);
576       if (s->Freq < 32)
577         s->Freq++;
578     }
579     else
580     {
581       CPpmd_State *s = STATS(c);
582       Byte sym = p->FoundState->Symbol;
583 
584       if (s->Symbol != sym)
585       {
586         do
587         {
588           // s++; if (s->Symbol == sym) break;
589           s++;
590         }
591         while (s->Symbol != sym);
592 
593         if (s[0].Freq >= s[-1].Freq)
594         {
595           SWAP_STATES(s)
596           s--;
597         }
598       }
599 
600       if (s->Freq < MAX_FREQ - 9)
601       {
602         s->Freq = (Byte)(s->Freq + 2);
603         c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2);
604       }
605     }
606   }
607 
608 
609   if (p->OrderFall == 0)
610   {
611     /* MAX ORDER context */
612     /* (FoundState->Successor) is RAW-Successor. */
613     p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p);
614     if (!p->MinContext)
615     {
616       Ppmd7_RestartModel(p);
617       return;
618     }
619     SetSuccessor(p->FoundState, REF(p->MinContext));
620     return;
621   }
622 
623 
624   /* NON-MAX ORDER context */
625 
626   {
627     Byte *text = p->Text;
628     *text++ = p->FoundState->Symbol;
629     p->Text = text;
630     if (text >= p->UnitsStart)
631     {
632       Ppmd7_RestartModel(p);
633       return;
634     }
635     maxSuccessor = REF(text);
636   }
637 
638   minSuccessor = SUCCESSOR(p->FoundState);
639 
640   if (minSuccessor)
641   {
642     // there is Successor for FoundState in MinContext.
643     // So the next context will be one order higher than MinContext.
644 
645     if (minSuccessor <= maxSuccessor)
646     {
647       // minSuccessor is RAW-Successor. So we will create real contexts records:
648       PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p);
649       if (!cs)
650       {
651         Ppmd7_RestartModel(p);
652         return;
653       }
654       minSuccessor = REF(cs);
655     }
656 
657     // minSuccessor now is real Context pointer that points to existing (Order+1) context
658 
659     if (--p->OrderFall == 0)
660     {
661       /*
662       if we move to MaxOrder context, then minSuccessor will be common Succesor for both:
663         MinContext that is (MaxOrder - 1)
664         MaxContext that is (MaxOrder)
665       so we don't need new RAW-Successor, and we can use real minSuccessor
666       as succssors for both MinContext and MaxContext.
667       */
668       maxSuccessor = minSuccessor;
669 
670       /*
671       if (MaxContext != MinContext)
672       {
673         there was order fall from MaxOrder and we don't need current symbol
674         to transfer some RAW-Succesors to real contexts.
675         So we roll back pointer in raw data for one position.
676       }
677       */
678       p->Text -= (p->MaxContext != p->MinContext);
679     }
680   }
681   else
682   {
683     /*
684     FoundState has NULL-Successor here.
685     And only root 0-order context can contain NULL-Successors.
686     We change Successor in FoundState to RAW-Successor,
687     And next context will be same 0-order root Context.
688     */
689     SetSuccessor(p->FoundState, maxSuccessor);
690     minSuccessor = REF(p->MinContext);
691   }
692 
693   mc = p->MinContext;
694   c = p->MaxContext;
695 
696   p->MaxContext = p->MinContext = CTX(minSuccessor);
697 
698   if (c == mc)
699     return;
700 
701   // s0 : is pure Escape Freq
702   s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1);
703 
704   do
705   {
706     unsigned ns1;
707     UInt32 sum;
708 
709     if ((ns1 = c->NumStats) != 1)
710     {
711       if ((ns1 & 1) == 0)
712       {
713         /* Expand for one UNIT */
714         unsigned oldNU = ns1 >> 1;
715         unsigned i = U2I(oldNU);
716         if (i != U2I((size_t)oldNU + 1))
717         {
718           void *ptr = Ppmd7_AllocUnits(p, i + 1);
719           void *oldPtr;
720           if (!ptr)
721           {
722             Ppmd7_RestartModel(p);
723             return;
724           }
725           oldPtr = STATS(c);
726           MEM_12_CPY(ptr, oldPtr, oldNU)
727           Ppmd7_InsertNode(p, oldPtr, i);
728           c->Union4.Stats = STATS_REF(ptr);
729         }
730       }
731       sum = c->Union2.SummFreq;
732       /* max increase of Escape_Freq is 3 here.
733          total increase of Union2.SummFreq for all symbols is less than 256 here */
734       sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
735       /* original PPMdH uses 16-bit variable for (sum) here.
736          But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
737       // sum = (UInt16)sum;
738     }
739     else
740     {
741       // instead of One-symbol context we create 2-symbol context
742       CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0);
743       if (!s)
744       {
745         Ppmd7_RestartModel(p);
746         return;
747       }
748       {
749         unsigned freq = c->Union2.State2.Freq;
750         // s = *ONE_STATE(c);
751         s->Symbol = c->Union2.State2.Symbol;
752         s->Successor_0 = c->Union4.State4.Successor_0;
753         s->Successor_1 = c->Union4.State4.Successor_1;
754         // SetSuccessor(s, c->Union4.Stats);  // call it only for debug purposes to check the order of
755                                               // (Successor_0 and Successor_1) in LE/BE.
756         c->Union4.Stats = REF(s);
757         if (freq < MAX_FREQ / 4 - 1)
758           freq <<= 1;
759         else
760           freq = MAX_FREQ - 4;
761         // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
762         s->Freq = (Byte)freq;
763         // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
764         sum = freq + p->InitEsc + (ns > 3);
765       }
766     }
767 
768     {
769       CPpmd_State *s = STATS(c) + ns1;
770       UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq;
771       UInt32 sf = (UInt32)s0 + sum;
772       s->Symbol = p->FoundState->Symbol;
773       c->NumStats = (UInt16)(ns1 + 1);
774       SetSuccessor(s, maxSuccessor);
775 
776       if (cf < 6 * sf)
777       {
778         cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf);
779         sum += 3;
780         /* It can add (0, 1, 2) to Escape_Freq */
781       }
782       else
783       {
784         cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf);
785         sum += cf;
786       }
787 
788       c->Union2.SummFreq = (UInt16)sum;
789       s->Freq = (Byte)cf;
790     }
791     c = SUFFIX(c);
792   }
793   while (c != mc);
794 }
795 
796 
797 
798 Z7_NO_INLINE
Ppmd7_Rescale(CPpmd7 * p)799 static void Ppmd7_Rescale(CPpmd7 *p)
800 {
801   unsigned i, adder, sumFreq, escFreq;
802   CPpmd_State *stats = STATS(p->MinContext);
803   CPpmd_State *s = p->FoundState;
804 
805   /* Sort the list by Freq */
806   if (s != stats)
807   {
808     CPpmd_State tmp = *s;
809     do
810       s[0] = s[-1];
811     while (--s != stats);
812     *s = tmp;
813   }
814 
815   sumFreq = s->Freq;
816   escFreq = p->MinContext->Union2.SummFreq - sumFreq;
817 
818   /*
819   if (p->OrderFall == 0), adder = 0 : it's     allowed to remove symbol from     MAX Order context
820   if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context
821   */
822 
823   adder = (p->OrderFall != 0);
824 
825   #ifdef PPMD7_ORDER_0_SUPPPORT
826   adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context
827   #endif
828 
829   sumFreq = (sumFreq + 4 + adder) >> 1;
830   i = (unsigned)p->MinContext->NumStats - 1;
831   s->Freq = (Byte)sumFreq;
832 
833   do
834   {
835     unsigned freq = (++s)->Freq;
836     escFreq -= freq;
837     freq = (freq + adder) >> 1;
838     sumFreq += freq;
839     s->Freq = (Byte)freq;
840     if (freq > s[-1].Freq)
841     {
842       CPpmd_State tmp = *s;
843       CPpmd_State *s1 = s;
844       do
845       {
846         s1[0] = s1[-1];
847       }
848       while (--s1 != stats && freq > s1[-1].Freq);
849       *s1 = tmp;
850     }
851   }
852   while (--i);
853 
854   if (s->Freq == 0)
855   {
856     /* Remove all items with Freq == 0 */
857     CPpmd7_Context *mc;
858     unsigned numStats, numStatsNew, n0, n1;
859 
860     i = 0; do { i++; } while ((--s)->Freq == 0);
861 
862     /* We increase (escFreq) for the number of removed symbols.
863        So we will have (0.5) increase for Escape_Freq in avarage per
864        removed symbol after Escape_Freq halving */
865     escFreq += i;
866     mc = p->MinContext;
867     numStats = mc->NumStats;
868     numStatsNew = numStats - i;
869     mc->NumStats = (UInt16)(numStatsNew);
870     n0 = (numStats + 1) >> 1;
871 
872     if (numStatsNew == 1)
873     {
874       /* Create Single-Symbol context */
875       unsigned freq = stats->Freq;
876 
877       do
878       {
879         escFreq >>= 1;
880         freq = (freq + 1) >> 1;
881       }
882       while (escFreq > 1);
883 
884       s = ONE_STATE(mc);
885       *s = *stats;
886       s->Freq = (Byte)freq; // (freq <= 260 / 4)
887       p->FoundState = s;
888       Ppmd7_InsertNode(p, stats, U2I(n0));
889       return;
890     }
891 
892     n1 = (numStatsNew + 1) >> 1;
893     if (n0 != n1)
894     {
895       // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1));
896       unsigned i0 = U2I(n0);
897       unsigned i1 = U2I(n1);
898       if (i0 != i1)
899       {
900         if (p->FreeList[i1] != 0)
901         {
902           void *ptr = Ppmd7_RemoveNode(p, i1);
903           p->MinContext->Union4.Stats = STATS_REF(ptr);
904           MEM_12_CPY(ptr, (const void *)stats, n1)
905           Ppmd7_InsertNode(p, stats, i0);
906         }
907         else
908           Ppmd7_SplitBlock(p, stats, i0, i1);
909       }
910     }
911   }
912   {
913     CPpmd7_Context *mc = p->MinContext;
914     mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1));
915     // Escape_Freq halving here
916     p->FoundState = STATS(mc);
917   }
918 }
919 
920 
Ppmd7_MakeEscFreq(CPpmd7 * p,unsigned numMasked,UInt32 * escFreq)921 CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
922 {
923   CPpmd_See *see;
924   const CPpmd7_Context *mc = p->MinContext;
925   unsigned numStats = mc->NumStats;
926   if (numStats != 256)
927   {
928     unsigned nonMasked = numStats - numMasked;
929     see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
930         + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats)
931         + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats)
932         + 4 * (unsigned)(numMasked > nonMasked) +
933         p->HiBitsFlag;
934     {
935       // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
936       unsigned summ = (UInt16)see->Summ; // & 0xFFFF
937       unsigned r = (summ >> see->Shift);
938       see->Summ = (UInt16)(summ - r);
939       *escFreq = r + (r == 0);
940     }
941   }
942   else
943   {
944     see = &p->DummySee;
945     *escFreq = 1;
946   }
947   return see;
948 }
949 
950 
Ppmd7_NextContext(CPpmd7 * p)951 static void Ppmd7_NextContext(CPpmd7 *p)
952 {
953   PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
954   if (p->OrderFall == 0 && (const Byte *)c > p->Text)
955     p->MaxContext = p->MinContext = c;
956   else
957     Ppmd7_UpdateModel(p);
958 }
959 
960 
Ppmd7_Update1(CPpmd7 * p)961 void Ppmd7_Update1(CPpmd7 *p)
962 {
963   CPpmd_State *s = p->FoundState;
964   unsigned freq = s->Freq;
965   freq += 4;
966   p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
967   s->Freq = (Byte)freq;
968   if (freq > s[-1].Freq)
969   {
970     SWAP_STATES(s)
971     p->FoundState = --s;
972     if (freq > MAX_FREQ)
973       Ppmd7_Rescale(p);
974   }
975   Ppmd7_NextContext(p);
976 }
977 
978 
Ppmd7_Update1_0(CPpmd7 * p)979 void Ppmd7_Update1_0(CPpmd7 *p)
980 {
981   CPpmd_State *s = p->FoundState;
982   CPpmd7_Context *mc = p->MinContext;
983   unsigned freq = s->Freq;
984   unsigned summFreq = mc->Union2.SummFreq;
985   p->PrevSuccess = (2 * freq > summFreq);
986   p->RunLength += (int)p->PrevSuccess;
987   mc->Union2.SummFreq = (UInt16)(summFreq + 4);
988   freq += 4;
989   s->Freq = (Byte)freq;
990   if (freq > MAX_FREQ)
991     Ppmd7_Rescale(p);
992   Ppmd7_NextContext(p);
993 }
994 
995 
996 /*
997 void Ppmd7_UpdateBin(CPpmd7 *p)
998 {
999   unsigned freq = p->FoundState->Freq;
1000   p->FoundState->Freq = (Byte)(freq + (freq < 128));
1001   p->PrevSuccess = 1;
1002   p->RunLength++;
1003   Ppmd7_NextContext(p);
1004 }
1005 */
1006 
Ppmd7_Update2(CPpmd7 * p)1007 void Ppmd7_Update2(CPpmd7 *p)
1008 {
1009   CPpmd_State *s = p->FoundState;
1010   unsigned freq = s->Freq;
1011   freq += 4;
1012   p->RunLength = p->InitRL;
1013   p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
1014   s->Freq = (Byte)freq;
1015   if (freq > MAX_FREQ)
1016     Ppmd7_Rescale(p);
1017   Ppmd7_UpdateModel(p);
1018 }
1019 
1020 
1021 
1022 /*
1023 PPMd Memory Map:
1024 {
1025   [ 0 ]           contains subset of original raw text, that is required to create context
1026                   records, Some symbols are not written, when max order context was reached
1027   [ Text ]        free area
1028   [ UnitsStart ]  CPpmd_State vectors and CPpmd7_Context records
1029   [ LoUnit ]      free  area for CPpmd_State and CPpmd7_Context items
1030 [ HiUnit ]      CPpmd7_Context records
1031   [ Size ]        end of array
1032 }
1033 
1034 These addresses don't cross at any time.
1035 And the following condtions is true for addresses:
1036   (0  <= Text < UnitsStart <= LoUnit <= HiUnit <= Size)
1037 
1038 Raw text is BYTE--aligned.
1039 the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs.
1040 
1041 Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
1042 The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
1043 The code doesn't free UNITs allocated for CPpmd7_Context records.
1044 
1045 The code calls Ppmd7_RestartModel(), when there is no free memory for allocation.
1046 And Ppmd7_RestartModel() changes the state to orignal start state, with full free block.
1047 
1048 
1049 The code allocates UNITs with the following order:
1050 
1051 Allocation of 1 UNIT for Context record
1052   - from free space (HiUnit) down to (LoUnit)
1053   - from FreeList[0]
1054   - Ppmd7_AllocUnitsRare()
1055 
1056 Ppmd7_AllocUnits() for CPpmd_State vectors:
1057   - from FreeList[i]
1058   - from free space (LoUnit) up to (HiUnit)
1059   - Ppmd7_AllocUnitsRare()
1060 
1061 Ppmd7_AllocUnitsRare()
1062   - if (GlueCount == 0)
1063        {  Glue lists, GlueCount = 255, allocate from FreeList[i]] }
1064   - loop for all higher sized FreeList[...] lists
1065   - from (UnitsStart - Text), GlueCount--
1066   - ERROR
1067 
1068 
1069 Each Record with Context contains the CPpmd_State vector, where each
1070 CPpmd_State contains the link to Successor.
1071 There are 3 types of Successor:
1072   1) NULL-Successor   - NULL pointer. NULL-Successor links can be stored
1073                         only in 0-order Root Context Record.
1074                         We use 0 value as NULL-Successor
1075   2) RAW-Successor    - the link to position in raw text,
1076                         that "RAW-Successor" is being created after first
1077                         occurrence of new symbol for some existing context record.
1078                         (RAW-Successor > 0).
1079   3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1),
1080                         that record is being created when we go via RAW-Successor again.
1081 
1082 For any successors at any time: the following condtions are true for Successor links:
1083 (NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor)
1084 
1085 
1086 ---------- Symbol Frequency, SummFreq and Range in Range_Coder ----------
1087 
1088 CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq
1089 
1090 The PPMd code tries to fulfill the condition:
1091   (SummFreq <= (256 * 128 = RC::kBot))
1092 
1093 We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
1094 So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
1095 If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
1096 SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions.
1097 Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for
1098 max-order context.
1099 
1100 When the PPMd code still break (Total <= RC::Range) condition in range coder,
1101 we have two ways to resolve that problem:
1102   1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
1103   2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
1104 */
1105 
1106 #undef MAX_FREQ
1107 #undef UNIT_SIZE
1108 #undef U2B
1109 #undef U2I
1110 #undef I2U
1111 #undef I2U_UInt16
1112 #undef REF
1113 #undef STATS_REF
1114 #undef CTX
1115 #undef STATS
1116 #undef ONE_STATE
1117 #undef SUFFIX
1118 #undef NODE
1119 #undef EMPTY_NODE
1120 #undef MEM_12_CPY
1121 #undef SUCCESSOR
1122 #undef SWAP_STATES
1123