• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 // #include <stdio.h>
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif // _WIN32
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #include <unistd.h>
15 #ifdef USE_POSIX_TIME2
16 #include <sys/time.h>
17 #include <sys/times.h>
18 #endif
19 #endif // USE_POSIX_TIME
20 
21 #ifdef _WIN32
22 #define USE_ALLOCA
23 #endif
24 
25 #ifdef USE_ALLOCA
26 #ifdef _WIN32
27 #include <malloc.h>
28 #else
29 #include <stdlib.h>
30 #endif
31 #define BENCH_ALLOCA_VALUE(index) (((index) * 64 * 21) & 0x7FF)
32 #endif
33 
34 #include "../../../../C/7zCrc.h"
35 #include "../../../../C/RotateDefs.h"
36 #include "../../../../C/CpuArch.h"
37 
38 #ifndef Z7_ST
39 #include "../../../Windows/Synchronization.h"
40 #include "../../../Windows/Thread.h"
41 #endif
42 
43 #include "../../../Windows/FileFind.h"
44 #include "../../../Windows/FileIO.h"
45 #include "../../../Windows/SystemInfo.h"
46 
47 #include "../../../Common/MyBuffer2.h"
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/StringConvert.h"
50 #include "../../../Common/StringToInt.h"
51 #include "../../../Common/Wildcard.h"
52 
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamObjects.h"
55 #include "../../Common/StreamUtils.h"
56 
57 #include "Bench.h"
58 
59 using namespace NWindows;
60 
61 #ifndef Z7_ST
62 static const UInt32 k_LZMA = 0x030101;
63 #endif
64 
65 static const UInt64 kComplexInCommands = (UInt64)1 <<
66   #ifdef UNDER_CE
67     31;
68   #else
69     34;
70   #endif
71 
72 static const UInt32 kComplexInMs = 4000;
73 
SetComplexCommandsMs(UInt32 complexInMs,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)74 static void SetComplexCommandsMs(UInt32 complexInMs,
75     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
76 {
77   complexInCommands = kComplexInCommands;
78   const UInt64 kMinFreq = (UInt64)1000000 * 4;
79   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
80   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
81     cpuFreq = kMinFreq;
82   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
83   {
84     if (complexInMs != 0)
85       complexInCommands = complexInMs * cpuFreq / 1000;
86     else
87       complexInCommands = cpuFreq >> 2;
88   }
89 }
90 
91 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
92 static const unsigned kBenchmarkUsageMultBits = 16;
93 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
94 
Benchmark_GetUsage_Percents(UInt64 usage)95 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
96 {
97   return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
98 }
99 
100 static const unsigned kNumHashDictBits = 17;
101 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
102 
103 static const unsigned kOldLzmaDictBits = 32;
104 
105 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
106 static const size_t kAdditionalSize = (size_t)1 << 16;
107 static const size_t kCompressedAdditionalSize = 1 << 10;
108 
109 static const UInt32 kMaxMethodPropSize = 1 << 6;
110 
111 
112 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
113   { (_buffer_)->Alloc(_size_); \
114   if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
115 
116 
117 class CBaseRandomGenerator
118 {
119   UInt32 A1;
120   UInt32 A2;
121   UInt32 Salt;
122 public:
CBaseRandomGenerator(UInt32 salt=0)123   CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()124   void Init() { A1 = 362436069; A2 = 521288629;}
125   Z7_FORCE_INLINE
GetRnd()126   UInt32 GetRnd()
127   {
128 #if 0
129     // for debug:
130     return 0x0c080400;
131     // return 0;
132 #else
133     return Salt ^
134     (
135       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
136       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
137     );
138 #endif
139   }
140 };
141 
142 
143 static const size_t k_RandBuf_AlignMask = 4 - 1;
144 
145 Z7_NO_INLINE
RandGen_BufAfterPad(Byte * buf,size_t size)146 static void RandGen_BufAfterPad(Byte *buf, size_t size)
147 {
148   CBaseRandomGenerator RG;
149   for (size_t i = 0; i < size; i += 4)
150   {
151     const UInt32 v = RG.GetRnd();
152     SetUi32a(buf + i, v)
153   }
154   /*
155   UInt32 v = RG.GetRnd();
156   for (; i < size; i++)
157   {
158     buf[i] = (Byte)v;
159     v >>= 8;
160   }
161   */
162 }
163 
164 
165 class CBenchRandomGenerator: public CMidAlignedBuffer
166 {
GetVal(UInt32 & res,unsigned numBits)167   static UInt32 GetVal(UInt32 &res, unsigned numBits)
168   {
169     const UInt32 val = res & (((UInt32)1 << numBits) - 1);
170     res >>= numBits;
171     return val;
172   }
173 
GetLen(UInt32 & r)174   static UInt32 GetLen(UInt32 &r)
175   {
176     const unsigned len = (unsigned)GetVal(r, 2);
177     return GetVal(r, 1 + len);
178   }
179 
180 public:
181 
GenerateSimpleRandom(UInt32 salt)182   void GenerateSimpleRandom(UInt32 salt)
183   {
184     CBaseRandomGenerator rg(salt);
185     const size_t bufSize = Size();
186     Byte *buf = (Byte *)*this;
187     for (size_t i = 0; i < bufSize; i++)
188       buf[i] = (Byte)rg.GetRnd();
189   }
190 
GenerateLz(unsigned dictBits,UInt32 salt)191   void GenerateLz(unsigned dictBits, UInt32 salt)
192   {
193     CBaseRandomGenerator rg(salt);
194     size_t pos = 0;
195     size_t rep0 = 1;
196     const size_t bufSize = Size();
197     Byte *buf = (Byte *)*this;
198     unsigned posBits = 1;
199 
200     // printf("\n dictBits = %d\n", (UInt32)dictBits);
201     // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
202 
203     while (pos < bufSize)
204     {
205       /*
206       if (pos >= ((UInt32)1 << 31))
207         printf(" %x\n", pos);
208       */
209       UInt32 r = rg.GetRnd();
210       if (GetVal(r, 1) == 0 || pos < 1024)
211         buf[pos++] = (Byte)(r & 0xFF);
212       else
213       {
214         UInt32 len;
215         len = 1 + GetLen(r);
216 
217         if (GetVal(r, 3) != 0)
218         {
219           len += GetLen(r);
220 
221           while (((size_t)1 << posBits) < pos)
222             posBits++;
223 
224           unsigned numBitsMax = dictBits;
225           if (numBitsMax > posBits)
226             numBitsMax = posBits;
227 
228           const unsigned kAddBits = 6;
229           unsigned numLogBits = 5;
230           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
231             numLogBits = 4;
232 
233           for (;;)
234           {
235             const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
236             r = rg.GetRnd();
237             if (ppp > numBitsMax)
238               continue;
239             // rep0 = GetVal(r, ppp);
240             rep0 = r & (((size_t)1 << ppp) - 1);
241             if (rep0 < pos)
242               break;
243             r = rg.GetRnd();
244           }
245           rep0++;
246         }
247 
248         // len *= 300; // for debug
249         {
250           const size_t rem = bufSize - pos;
251           if (len > rem)
252             len = (UInt32)rem;
253         }
254         Byte *dest = buf + pos;
255         const Byte *src = dest - rep0;
256         pos += len;
257         for (UInt32 i = 0; i < len; i++)
258           *dest++ = *src++;
259       }
260     }
261     // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
262   }
263 };
264 
265 
266 Z7_CLASS_IMP_NOQIB_1(
267   CBenchmarkInStream
268   , ISequentialInStream
269 )
270   const Byte *Data;
271   size_t Pos;
272   size_t Size;
273 public:
274   void Init(const Byte *data, size_t size)
275   {
276     Data = data;
277     Size = size;
278     Pos = 0;
279   }
280   bool WasFinished() const { return Pos == Size; }
281 };
282 
283 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
284 {
285   const UInt32 kMaxBlockSize = (1 << 20);
286   if (size > kMaxBlockSize)
287     size = kMaxBlockSize;
288   const size_t remain = Size - Pos;
289   if (size > remain)
290     size = (UInt32)remain;
291 
292   if (size)
293     memcpy(data, Data + Pos, size);
294 
295   Pos += size;
296   if (processedSize)
297     *processedSize = size;
298   return S_OK;
299 }
300 
301 
302 class CBenchmarkOutStream Z7_final:
303   public ISequentialOutStream,
304   public CMyUnknownImp,
305   public CMidAlignedBuffer
306 {
307   Z7_COM_UNKNOWN_IMP_0
308   Z7_IFACE_COM7_IMP(ISequentialOutStream)
309   // bool _overflow;
310 public:
311   size_t Pos;
312   bool RealCopy;
313   bool CalcCrc;
314   UInt32 Crc;
315 
316   // CBenchmarkOutStream(): _overflow(false) {}
317   void Init(bool realCopy, bool calcCrc)
318   {
319     Crc = CRC_INIT_VAL;
320     RealCopy = realCopy;
321     CalcCrc = calcCrc;
322     // _overflow = false;
323     Pos = 0;
324   }
325 
326   void InitCrc()
327   {
328     Crc = CRC_INIT_VAL;
329   }
330 
331   void Calc(const void *data, size_t size)
332   {
333     Crc = CrcUpdate(Crc, data, size);
334   }
335 
336   size_t GetPos() const { return Pos; }
337 
338   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
339 };
340 
341 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
342 {
343   size_t curSize = Size() - Pos;
344   if (curSize > size)
345     curSize = size;
346   if (curSize != 0)
347   {
348     if (RealCopy)
349       memcpy(((Byte *)*this) + Pos, data, curSize);
350     if (CalcCrc)
351       Calc(data, curSize);
352     Pos += curSize;
353   }
354   if (processedSize)
355     *processedSize = (UInt32)curSize;
356   if (curSize != size)
357   {
358     // _overflow = true;
359     return E_FAIL;
360   }
361   return S_OK;
362 }
363 
364 
365 Z7_CLASS_IMP_NOQIB_1(
366   CCrcOutStream
367   , ISequentialOutStream
368 )
369 public:
370   bool CalcCrc;
371   UInt32 Crc;
372   UInt64 Pos;
373 
374   CCrcOutStream(): CalcCrc(true) {}
375   void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
376   void Calc(const void *data, size_t size)
377   {
378     Crc = CrcUpdate(Crc, data, size);
379   }
380 };
381 
382 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
383 {
384   if (CalcCrc)
385     Calc(data, size);
386   Pos += size;
387   if (processedSize)
388     *processedSize = size;
389   return S_OK;
390 }
391 
392 // #include "../../../../C/My_sys_time.h"
393 
394 static UInt64 GetTimeCount()
395 {
396   #ifdef USE_POSIX_TIME
397   #ifdef USE_POSIX_TIME2
398   timeval v;
399   if (gettimeofday(&v, NULL) == 0)
400     return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
401   return (UInt64)time(NULL) * 1000000;
402   #else
403   return time(NULL);
404   #endif
405   #else
406   LARGE_INTEGER value;
407   if (::QueryPerformanceCounter(&value))
408     return (UInt64)value.QuadPart;
409   return GetTickCount();
410   #endif
411 }
412 
413 static UInt64 GetFreq()
414 {
415   #ifdef USE_POSIX_TIME
416   #ifdef USE_POSIX_TIME2
417   return 1000000;
418   #else
419   return 1;
420   #endif
421   #else
422   LARGE_INTEGER value;
423   if (::QueryPerformanceFrequency(&value))
424     return (UInt64)value.QuadPart;
425   return 1000;
426   #endif
427 }
428 
429 
430 #ifdef USE_POSIX_TIME
431 
432 struct CUserTime
433 {
434   UInt64 Sum;
435   clock_t Prev;
436 
437   void Init()
438   {
439     // Prev = clock();
440     Sum = 0;
441     Prev = 0;
442     Update();
443     Sum = 0;
444   }
445 
446   void Update()
447   {
448     tms t;
449     /* clock_t res = */ times(&t);
450     clock_t newVal = t.tms_utime + t.tms_stime;
451     Sum += (UInt64)(newVal - Prev);
452     Prev = newVal;
453 
454     /*
455     clock_t v = clock();
456     if (v != -1)
457     {
458       Sum += v - Prev;
459       Prev = v;
460     }
461     */
462   }
463   UInt64 GetUserTime()
464   {
465     Update();
466     return Sum;
467   }
468 };
469 
470 #else
471 
472 
473 struct CUserTime
474 {
475   bool UseTick;
476   DWORD Prev_Tick;
477   UInt64 Prev;
478   UInt64 Sum;
479 
480   void Init()
481   {
482     UseTick = false;
483     Prev_Tick = 0;
484     Prev = 0;
485     Sum = 0;
486     Update();
487     Sum = 0;
488   }
489   UInt64 GetUserTime()
490   {
491     Update();
492     return Sum;
493   }
494   void Update();
495 };
496 
497 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
498 
499 void CUserTime::Update()
500 {
501   DWORD new_Tick = GetTickCount();
502   FILETIME creationTime, exitTime, kernelTime, userTime;
503   if (!UseTick &&
504       #ifdef UNDER_CE
505         ::GetThreadTimes(::GetCurrentThread()
506       #else
507         ::GetProcessTimes(::GetCurrentProcess()
508       #endif
509       , &creationTime, &exitTime, &kernelTime, &userTime))
510   {
511     UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
512     Sum += newVal - Prev;
513     Prev = newVal;
514   }
515   else
516   {
517     UseTick = true;
518     Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
519   }
520   Prev_Tick = new_Tick;
521 }
522 
523 
524 #endif
525 
526 static UInt64 GetUserFreq()
527 {
528   #ifdef USE_POSIX_TIME
529   // return CLOCKS_PER_SEC;
530   return (UInt64)sysconf(_SC_CLK_TCK);
531   #else
532   return 10000000;
533   #endif
534 }
535 
536 class CBenchProgressStatus Z7_final
537 {
538   #ifndef Z7_ST
539   NSynchronization::CCriticalSection CS;
540   #endif
541 public:
542   HRESULT Res;
543   bool EncodeMode;
544   void SetResult(HRESULT res)
545   {
546     #ifndef Z7_ST
547     NSynchronization::CCriticalSectionLock lock(CS);
548     #endif
549     Res = res;
550   }
551   HRESULT GetResult()
552   {
553     #ifndef Z7_ST
554     NSynchronization::CCriticalSectionLock lock(CS);
555     #endif
556     return Res;
557   }
558 };
559 
560 struct CBenchInfoCalc
561 {
562   CBenchInfo BenchInfo;
563   CUserTime UserTime;
564 
565   void SetStartTime();
566   void SetFinishTime(CBenchInfo &dest);
567 };
568 
569 void CBenchInfoCalc::SetStartTime()
570 {
571   BenchInfo.GlobalFreq = GetFreq();
572   BenchInfo.UserFreq = GetUserFreq();
573   BenchInfo.GlobalTime = ::GetTimeCount();
574   BenchInfo.UserTime = 0;
575   UserTime.Init();
576 }
577 
578 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
579 {
580   dest = BenchInfo;
581   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
582   dest.UserTime = UserTime.GetUserTime();
583 }
584 
585 class CBenchProgressInfo Z7_final:
586   public ICompressProgressInfo,
587   public CMyUnknownImp,
588   public CBenchInfoCalc
589 {
590   Z7_COM_UNKNOWN_IMP_0
591   Z7_IFACE_COM7_IMP(ICompressProgressInfo)
592 public:
593   CBenchProgressStatus *Status;
594   IBenchCallback *Callback;
595 
596   CBenchProgressInfo(): Callback(NULL) {}
597 };
598 
599 
600 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
601 {
602   HRESULT res = Status->GetResult();
603   if (res != S_OK)
604     return res;
605   if (!Callback)
606     return res;
607 
608   /*
609   static UInt64 inSizePrev = 0;
610   static UInt64 outSizePrev = 0;
611   UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
612   if (inSize)   { val1 = *inSize;  delta1 = val1 - inSizePrev;  inSizePrev  = val1; }
613   if (outSize)  { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2;  }
614   UInt64 percents = delta2 * 1000;
615   if (delta1 != 0)
616     percents /= delta1;
617   printf("=== %7d %7d     %7d %7d  ratio = %4d\n",
618       (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
619       (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
620       (unsigned)percents);
621   */
622 
623   CBenchInfo info;
624   SetFinishTime(info);
625   if (Status->EncodeMode)
626   {
627     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
628     info.PackSize = BenchInfo.PackSize + *outSize;
629     res = Callback->SetEncodeResult(info, false);
630   }
631   else
632   {
633     info.PackSize = BenchInfo.PackSize + *inSize;
634     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
635     res = Callback->SetDecodeResult(info, false);
636   }
637   if (res != S_OK)
638     Status->SetResult(res);
639   return res;
640 }
641 
642 static const unsigned kSubBits = 8;
643 
644 static unsigned GetLogSize(UInt64 size)
645 {
646   unsigned i = 0;
647   for (;;)
648   {
649     i++;  size >>= 1;  if (size == 0) break;
650   }
651   return i;
652 }
653 
654 
655 static UInt32 GetLogSize_Sub(UInt64 size)
656 {
657   if (size <= 1)
658     return 0;
659   const unsigned i = GetLogSize(size) - 1;
660   UInt32 v;
661   if (i <= kSubBits)
662     v = (UInt32)(size) << (kSubBits - i);
663   else
664     v = (UInt32)(size >> (i - kSubBits));
665   return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
666 }
667 
668 
669 static UInt64 Get_UInt64_from_double(double v)
670 {
671   const UInt64 kMaxVal = (UInt64)1 << 62;
672   if (v > (double)(Int64)kMaxVal)
673     return kMaxVal;
674   return (UInt64)v;
675 }
676 
677 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
678 {
679   if (d == 0)
680     d = 1;
681   const double v =
682       (double)(Int64)m1 *
683       (double)(Int64)m2 /
684       (double)(Int64)d;
685   return Get_UInt64_from_double(v);
686   /*
687   unsigned n1 = GetLogSize(m1);
688   unsigned n2 = GetLogSize(m2);
689   while (n1 + n2 > 64)
690   {
691     if (n1 >= n2)
692     {
693       m1 >>= 1;
694       n1--;
695     }
696     else
697     {
698       m2 >>= 1;
699       n2--;
700     }
701     d >>= 1;
702   }
703 
704   if (d == 0)
705     d = 1;
706   return m1 * m2 / d;
707   */
708 }
709 
710 
711 UInt64 CBenchInfo::GetUsage() const
712 {
713   UInt64 userTime = UserTime;
714   UInt64 userFreq = UserFreq;
715   UInt64 globalTime = GlobalTime;
716   UInt64 globalFreq = GlobalFreq;
717 
718   if (userFreq == 0)
719     userFreq = 1;
720   if (globalTime == 0)
721     globalTime = 1;
722 
723   const double v =
724         ((double)(Int64)userTime / (double)(Int64)userFreq)
725       * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
726       * (double)(Int64)kBenchmarkUsageMult;
727   return Get_UInt64_from_double(v);
728   /*
729   return MyMultDiv64(
730         MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
731         globalFreq, globalTime);
732   */
733 }
734 
735 
736 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
737 {
738   if (UserTime == 0)
739   {
740     return 0;
741     // userTime = 1;
742   }
743   UInt64 globalFreq = GlobalFreq;
744   if (globalFreq == 0)
745     globalFreq = 1;
746 
747   const double v =
748         ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
749       * ((double)(Int64)UserFreq  / (double)(Int64)UserTime)
750       * (double)(Int64)rating;
751   return Get_UInt64_from_double(v);
752   /*
753   return MyMultDiv64(
754         MyMultDiv64(rating, UserFreq, UserTime),
755         GlobalTime, globalFreq);
756   */
757 }
758 
759 
760 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
761 {
762   return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
763 }
764 
765 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
766 {
767   return complexity >= 0 ?
768       size * (UInt32)complexity :
769       size / (UInt32)(-complexity);
770 }
771 
772 struct CBenchProps
773 {
774   bool LzmaRatingMode;
775 
776   Int32 EncComplex;
777   Int32 DecComplexCompr;
778   Int32 DecComplexUnc;
779 
780   unsigned KeySize;
781 
782   CBenchProps():
783       LzmaRatingMode(false),
784       KeySize(0)
785     {}
786 
787   void SetLzmaCompexity();
788 
789   UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
790   {
791     const UInt32 kMinSize = 100;
792     if (unpackSize < kMinSize)
793       unpackSize = kMinSize;
794     return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
795   }
796 
797   UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
798   {
799     return
800         GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
801         GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
802   }
803 
804   UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
805   UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
806 };
807 
808 void CBenchProps::SetLzmaCompexity()
809 {
810   EncComplex = 1200;
811   DecComplexUnc = 4;
812   DecComplexCompr = 190;
813   LzmaRatingMode = true;
814 }
815 
816 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
817 {
818   if (dictSize < (1 << kBenchMinDicLogSize))
819     dictSize = (1 << kBenchMinDicLogSize);
820   Int32 encComplex = EncComplex;
821   if (LzmaRatingMode)
822   {
823     /*
824     for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
825     {
826       unsigned rr = GetLogSize_Sub(uu);
827       printf("\n%16I64x , log = %4x", uu, rr);
828       uu += 1;
829       uu += uu / 50;
830     }
831     */
832     // throw 1;
833     const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
834     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
835   }
836   const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
837   return MyMultDiv64(numCommands, freq, elapsedTime);
838 }
839 
840 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
841 {
842   const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
843   return MyMultDiv64(numCommands, freq, elapsedTime);
844 }
845 
846 
847 
848 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
849 {
850   CBenchProps props;
851   props.SetLzmaCompexity();
852   return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
853 }
854 
855 UInt64 CBenchInfo::GetRating_LzmaDec() const
856 {
857   CBenchProps props;
858   props.SetLzmaCompexity();
859   return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
860 }
861 
862 
863 #ifndef Z7_ST
864 
865 #define NUM_CPU_LEVELS_MAX 3
866 
867 struct CAffinityMode
868 {
869   unsigned NumBundleThreads;
870   unsigned NumLevels;
871   unsigned NumCoreThreads;
872   unsigned NumCores;
873   // unsigned DivideNum;
874   UInt32 Sizes[NUM_CPU_LEVELS_MAX];
875 
876   void SetLevels(unsigned numCores, unsigned numCoreThreads);
877   DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
878   bool NeedAffinity() const { return NumBundleThreads != 0; }
879 
880   WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
881   {
882     if (NeedAffinity())
883     {
884       CCpuSet cpuSet;
885       GetAffinityMask(bundleIndex, &cpuSet);
886       return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
887     }
888     return thread.Create(startAddress, parameter);
889   }
890 
891   CAffinityMode():
892     NumBundleThreads(0),
893     NumLevels(0),
894     NumCoreThreads(1)
895     // DivideNum(1)
896     {}
897 };
898 
899 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
900 {
901   NumCores = numCores;
902   NumCoreThreads = numCoreThreads;
903   NumLevels = 0;
904   if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
905     return;
906   UInt32 c = numCores / numCoreThreads;
907   UInt32 c2 = 1;
908   while ((c & 1) == 0)
909   {
910     c >>= 1;
911     c2 <<= 1;
912   }
913   if (c2 != 1)
914     Sizes[NumLevels++] = c2;
915   if (c != 1)
916     Sizes[NumLevels++] = c;
917   if (numCoreThreads != 1)
918     Sizes[NumLevels++] = numCoreThreads;
919   if (NumLevels == 0)
920     Sizes[NumLevels++] = 1;
921 
922   /*
923   printf("\n Cores:");
924   for (unsigned i = 0; i < NumLevels; i++)
925   {
926     printf(" %d", Sizes[i]);
927   }
928   printf("\n");
929   */
930 }
931 
932 
933 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
934 {
935   CpuSet_Zero(cpuSet);
936 
937   if (NumLevels == 0)
938     return 0;
939 
940   // printf("\n%2d", bundleIndex);
941 
942   /*
943   UInt32 low = 0;
944   if (DivideNum != 1)
945   {
946     low = bundleIndex % DivideNum;
947     bundleIndex /= DivideNum;
948   }
949   */
950 
951   UInt32 numGroups = NumCores / NumBundleThreads;
952   UInt32 m = bundleIndex % numGroups;
953   UInt32 v = 0;
954   for (unsigned i = 0; i < NumLevels; i++)
955   {
956     UInt32 size = Sizes[i];
957     while ((size & 1) == 0)
958     {
959       v *= 2;
960       v |= (m & 1);
961       m >>= 1;
962       size >>= 1;
963     }
964     v *= size;
965     v += m % size;
966     m /= size;
967   }
968 
969   // UInt32 nb = NumBundleThreads / DivideNum;
970   UInt32 nb = NumBundleThreads;
971 
972   DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
973   // v += low;
974   mask <<= v;
975 
976   // printf(" %2d %8x \n ", v, (unsigned)mask);
977   #ifdef _WIN32
978     *cpuSet = mask;
979   #else
980   {
981     for (unsigned k = 0; k < nb; k++)
982       CpuSet_Set(cpuSet, v + k);
983   }
984   #endif
985 
986   return mask;
987 }
988 
989 
990 struct CBenchSyncCommon
991 {
992   bool ExitMode;
993   NSynchronization::CManualResetEvent StartEvent;
994 
995   CBenchSyncCommon(): ExitMode(false) {}
996 };
997 
998 #endif
999 
1000 
1001 
1002 enum E_CheckCrcMode
1003 {
1004   k_CheckCrcMode_Never = 0,
1005   k_CheckCrcMode_Always = 1,
1006   k_CheckCrcMode_FirstPass = 2
1007 };
1008 
1009 class CEncoderInfo;
1010 
1011 class CEncoderInfo Z7_final
1012 {
1013   Z7_CLASS_NO_COPY(CEncoderInfo)
1014 
1015 public:
1016 
1017   #ifndef Z7_ST
1018   NWindows::CThread thread[2];
1019   NSynchronization::CManualResetEvent ReadyEvent;
1020   UInt32 NumDecoderSubThreads;
1021   CBenchSyncCommon *Common;
1022   UInt32 EncoderIndex;
1023   UInt32 NumEncoderInternalThreads;
1024   CAffinityMode AffinityMode;
1025   bool IsGlobalMtMode; // if more than one benchmark encoder threads
1026   #endif
1027 
1028   CMyComPtr<ICompressCoder> _encoder;
1029   CMyComPtr<ICompressFilter> _encoderFilter;
1030   CBenchProgressInfo *progressInfoSpec[2];
1031   CMyComPtr<ICompressProgressInfo> progressInfo[2];
1032   UInt64 NumIterations;
1033 
1034   UInt32 Salt;
1035 
1036   #ifdef USE_ALLOCA
1037   size_t AllocaSize;
1038   #endif
1039 
1040   unsigned KeySize;
1041   Byte _key[32];
1042   Byte _iv[16];
1043 
1044   HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1045   {
1046     RINOK(cp->SetKey(_key, KeySize))
1047     return cp->SetInitVector(_iv, sizeof(_iv));
1048   }
1049 
1050   Byte _psw[16];
1051 
1052   bool CheckCrc_Enc;    /* = 1, if we want to check packed data crcs after each pass
1053                                 used for filter and usual coders */
1054   bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1055                                 used only for filter */
1056   E_CheckCrcMode CheckCrcMode_Dec;
1057 
1058   struct CDecoderInfo
1059   {
1060     CEncoderInfo *Encoder;
1061     UInt32 DecoderIndex;
1062     bool CallbackMode;
1063 
1064     #ifdef USE_ALLOCA
1065     size_t AllocaSize;
1066     #endif
1067   };
1068   CDecoderInfo decodersInfo[2];
1069 
1070   CMyComPtr<ICompressCoder> _decoders[2];
1071   CMyComPtr<ICompressFilter> _decoderFilter;
1072 
1073   HRESULT Results[2];
1074   CBenchmarkOutStream *outStreamSpec;
1075   CMyComPtr<ISequentialOutStream> outStream;
1076   IBenchCallback *callback;
1077   IBenchPrintCallback *printCallback;
1078   UInt32 crc;
1079   size_t kBufferSize;
1080   size_t compressedSize;
1081   const Byte *uncompressedDataPtr;
1082 
1083   const Byte *fileData;
1084   CBenchRandomGenerator rg;
1085 
1086   CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1087 
1088   // CBenchmarkOutStream *propStreamSpec;
1089   Byte propsData[kMaxMethodPropSize];
1090   CBufPtrSeqOutStream *propStreamSpec;
1091   CMyComPtr<ISequentialOutStream> propStream;
1092 
1093   unsigned generateDictBits;
1094   COneMethodInfo _method;
1095 
1096   // for decode
1097   size_t _uncompressedDataSize;
1098 
1099   HRESULT Generate();
1100   HRESULT Encode();
1101   HRESULT Decode(UInt32 decoderIndex);
1102 
1103   CEncoderInfo():
1104     #ifndef Z7_ST
1105     Common(NULL),
1106     IsGlobalMtMode(true),
1107     #endif
1108     Salt(0),
1109     KeySize(0),
1110     CheckCrc_Enc(true),
1111     UseRealData_Enc(true),
1112     CheckCrcMode_Dec(k_CheckCrcMode_Always),
1113     outStreamSpec(NULL),
1114     callback(NULL),
1115     printCallback(NULL),
1116     fileData(NULL),
1117     propStreamSpec(NULL)
1118     {}
1119 
1120   #ifndef Z7_ST
1121 
1122   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1123   {
1124     HRESULT res;
1125     CEncoderInfo *encoder = (CEncoderInfo *)param;
1126     try
1127     {
1128       #ifdef USE_ALLOCA
1129       alloca(encoder->AllocaSize);
1130       #endif
1131 
1132       res = encoder->Encode();
1133     }
1134     catch(...)
1135     {
1136       res = E_FAIL;
1137     }
1138     encoder->Results[0] = res;
1139     if (res != S_OK)
1140       encoder->progressInfoSpec[0]->Status->SetResult(res);
1141     encoder->ReadyEvent.Set();
1142     return THREAD_FUNC_RET_ZERO;
1143   }
1144 
1145   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1146   {
1147     CDecoderInfo *decoder = (CDecoderInfo *)param;
1148 
1149     #ifdef USE_ALLOCA
1150     alloca(decoder->AllocaSize);
1151     // printf("\nalloca=%d\n", (unsigned)decoder->AllocaSize);
1152     #endif
1153 
1154     CEncoderInfo *encoder = decoder->Encoder;
1155     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1156     return THREAD_FUNC_RET_ZERO;
1157   }
1158 
1159   HRESULT CreateEncoderThread()
1160   {
1161     WRes res = 0;
1162     if (!ReadyEvent.IsCreated())
1163       res = ReadyEvent.Create();
1164     if (res == 0)
1165       res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1166           EncoderIndex);
1167     return HRESULT_FROM_WIN32(res);
1168   }
1169 
1170   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1171       #ifdef USE_ALLOCA
1172       , size_t allocaSize
1173       #endif
1174       )
1175   {
1176     CDecoderInfo &decoder = decodersInfo[index];
1177     decoder.DecoderIndex = index;
1178     decoder.Encoder = this;
1179 
1180     #ifdef USE_ALLOCA
1181     decoder.AllocaSize = allocaSize;
1182     #endif
1183 
1184     decoder.CallbackMode = callbackMode;
1185 
1186     WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1187         // EncoderIndex * NumEncoderInternalThreads + index
1188         EncoderIndex
1189         );
1190 
1191     return HRESULT_FROM_WIN32(res);
1192   }
1193 
1194   #endif
1195 };
1196 
1197 
1198 
1199 
1200 static size_t GetBenchCompressedSize(size_t bufferSize)
1201 {
1202   return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1203   // kBufferSize / 2;
1204 }
1205 
1206 
1207 HRESULT CEncoderInfo::Generate()
1208 {
1209   const COneMethodInfo &method = _method;
1210 
1211   // we need extra space, if input data is already compressed
1212   const size_t kCompressedBufferSize = _encoderFilter ?
1213       kBufferSize :
1214       GetBenchCompressedSize(kBufferSize);
1215 
1216   if (kCompressedBufferSize < kBufferSize)
1217     return E_FAIL;
1218 
1219   uncompressedDataPtr = fileData;
1220   if (fileData)
1221   {
1222     #if !defined(Z7_ST)
1223     if (IsGlobalMtMode)
1224     {
1225       /* we copy the data to local buffer of thread to eliminate
1226          using of shared buffer by different threads */
1227       ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228       memcpy((Byte *)rg, fileData, kBufferSize);
1229       uncompressedDataPtr = (const Byte *)rg;
1230     }
1231     #endif
1232   }
1233   else
1234   {
1235     ALLOC_WITH_HRESULT(&rg, kBufferSize)
1236     // DWORD ttt = GetTickCount();
1237     if (generateDictBits == 0)
1238       rg.GenerateSimpleRandom(Salt);
1239     else
1240     {
1241       if (generateDictBits >= sizeof(size_t) * 8
1242           && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1243         return E_INVALIDARG;
1244       rg.GenerateLz(generateDictBits, Salt);
1245       // return E_ABORT; // for debug
1246     }
1247     // printf("\n%d\n            ", GetTickCount() - ttt);
1248 
1249     crc = CrcCalc((const Byte *)rg, rg.Size());
1250     uncompressedDataPtr = (const Byte *)rg;
1251   }
1252 
1253   if (!outStream)
1254   {
1255     outStreamSpec = new CBenchmarkOutStream;
1256     outStream = outStreamSpec;
1257   }
1258 
1259   ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1260 
1261   if (_encoderFilter)
1262   {
1263     /* we try to reduce the number of memcpy() in main encoding loop.
1264        so we copy data to temp buffers here */
1265     ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1266     memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1267     memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1268   }
1269 
1270   if (!propStream)
1271   {
1272     propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1273     propStream = propStreamSpec;
1274   }
1275   // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1276   // propStreamSpec->Init(true, false);
1277   propStreamSpec->Init(propsData, sizeof(propsData));
1278 
1279 
1280   CMyComPtr<IUnknown> coder;
1281   if (_encoderFilter)
1282     coder = _encoderFilter;
1283   else
1284     coder = _encoder;
1285   {
1286     CMyComPtr<ICompressSetCoderProperties> scp;
1287     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1288     if (scp)
1289     {
1290       const UInt64 reduceSize = kBufferSize;
1291 
1292       /* in posix new thread uses same affinity as parent thread,
1293          so we don't need to send affinity to coder in posix */
1294       UInt64 affMask;
1295       #if !defined(Z7_ST) && defined(_WIN32)
1296       {
1297         CCpuSet cpuSet;
1298         affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1299       }
1300       #else
1301         affMask = 0;
1302       #endif
1303       // affMask <<= 3; // debug line: to test no affinity in coder;
1304       // affMask = 0;
1305 
1306       RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1307     }
1308     else
1309     {
1310       if (method.AreThereNonOptionalProps())
1311         return E_INVALIDARG;
1312     }
1313 
1314     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1315     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1316     if (writeCoderProps)
1317     {
1318       RINOK(writeCoderProps->WriteCoderProperties(propStream))
1319     }
1320 
1321     {
1322       CMyComPtr<ICryptoSetPassword> sp;
1323       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1324       if (sp)
1325       {
1326         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1327 
1328         // we must call encoding one time to calculate password key for key cache.
1329         // it must be after WriteCoderProperties!
1330         Byte temp[16];
1331         memset(temp, 0, sizeof(temp));
1332 
1333         if (_encoderFilter)
1334         {
1335           _encoderFilter->Init();
1336           _encoderFilter->Filter(temp, sizeof(temp));
1337         }
1338         else
1339         {
1340           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1341           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1342           inStreamSpec->Init(temp, sizeof(temp));
1343 
1344           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1345           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1346           crcStreamSpec->Init();
1347 
1348           RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1349         }
1350       }
1351     }
1352   }
1353 
1354   return S_OK;
1355 }
1356 
1357 
1358 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1359 {
1360   while (size != 0)
1361   {
1362     UInt32 cur = crc ? 1 << 17 : 1 << 24;
1363     if (cur > size)
1364       cur = (UInt32)size;
1365     UInt32 processed = filter->Filter(data, cur);
1366     /* if (processed > size) (in AES filter), we must fill last block with zeros.
1367        but it is not important for benchmark. So we just copy that data without filtering.
1368        if (processed == 0) then filter can't process more  */
1369     if (processed > size || processed == 0)
1370       processed = (UInt32)size;
1371     if (crc)
1372       *crc = CrcUpdate(*crc, data, processed);
1373     data += processed;
1374     size -= processed;
1375   }
1376 }
1377 
1378 
1379 HRESULT CEncoderInfo::Encode()
1380 {
1381   // printf("\nCEncoderInfo::Generate\n");
1382 
1383   RINOK(Generate())
1384 
1385   // printf("\n2222\n");
1386 
1387   #ifndef Z7_ST
1388   if (Common)
1389   {
1390     Results[0] = S_OK;
1391     WRes wres = ReadyEvent.Set();
1392     if (wres == 0)
1393       wres = Common->StartEvent.Lock();
1394     if (wres != 0)
1395       return HRESULT_FROM_WIN32(wres);
1396     if (Common->ExitMode)
1397       return S_OK;
1398   }
1399   else
1400   #endif
1401   {
1402     CBenchProgressInfo *bpi = progressInfoSpec[0];
1403     bpi->SetStartTime();
1404   }
1405 
1406 
1407   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1408   bi.UnpackSize = 0;
1409   bi.PackSize = 0;
1410   CMyComPtr<ICryptoProperties> cp;
1411   CMyComPtr<IUnknown> coder;
1412   if (_encoderFilter)
1413     coder = _encoderFilter;
1414   else
1415     coder = _encoder;
1416   coder.QueryInterface(IID_ICryptoProperties, &cp);
1417   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1418   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1419 
1420   if (cp)
1421   {
1422     RINOK(Set_Key_and_IV(cp))
1423   }
1424 
1425   compressedSize = 0;
1426   if (_encoderFilter)
1427     compressedSize = kBufferSize;
1428 
1429   // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1430   UInt64 prev = 0;
1431 
1432   const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1433   const bool useCrc = (mask < NumIterations);
1434   bool crcPrev_defined = false;
1435   UInt32 crcPrev = 0;
1436 
1437   bool useRealData_Enc = UseRealData_Enc;
1438   bool data_Was_Changed = false;
1439   if (useRealData_Enc)
1440   {
1441     /* we want memcpy() for each iteration including first iteration.
1442        So results will be equal for different number of iterations */
1443     data_Was_Changed = true;
1444   }
1445 
1446   const UInt64 numIterations = NumIterations;
1447   UInt64 i = numIterations;
1448     // printCallback->NewLine();
1449 
1450   while (i != 0)
1451   {
1452     i--;
1453     if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1454     {
1455       prev = bi.UnpackSize;
1456       RINOK(printCallback->CheckBreak())
1457     }
1458 
1459     /*
1460     CBenchInfo info;
1461     progressInfoSpec[0]->SetStartTime();
1462     */
1463 
1464     bool calcCrc = false;
1465     if (useCrc)
1466       calcCrc = (((UInt32)i & mask) == 0);
1467 
1468     if (_encoderFilter)
1469     {
1470       Byte *filterData = rgCopy;
1471       if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1472       {
1473         // printf("\nfilterData = (Byte *)*outStreamSpec;\n");
1474         filterData = (Byte *)*outStreamSpec;
1475         if (data_Was_Changed)
1476         {
1477           // printf("\nmemcpy(filterData, uncompressedDataPtr\n");
1478           memcpy(filterData, uncompressedDataPtr, kBufferSize);
1479         }
1480         data_Was_Changed = true;
1481       }
1482       _encoderFilter->Init();
1483       if (calcCrc)
1484       {
1485         // printf("\nInitCrc\n");
1486         outStreamSpec->InitCrc();
1487       }
1488       // printf("\nMy_FilterBench\n");
1489       My_FilterBench(_encoderFilter, filterData, kBufferSize,
1490           calcCrc ? &outStreamSpec->Crc : NULL);
1491     }
1492     else
1493     {
1494       outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1495       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1496       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1497       if (!inStreamSpec->WasFinished())
1498         return E_FAIL;
1499       if (compressedSize != outStreamSpec->Pos)
1500       {
1501         if (compressedSize != 0)
1502           return E_FAIL;
1503         compressedSize = outStreamSpec->Pos;
1504       }
1505     }
1506 
1507     // outStreamSpec->Print();
1508 
1509     if (calcCrc)
1510     {
1511       const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1512       if (crcPrev_defined && crcPrev != crc2)
1513         return E_FAIL;
1514       crcPrev = crc2;
1515       crcPrev_defined = true;
1516     }
1517 
1518     bi.UnpackSize += kBufferSize;
1519     bi.PackSize += compressedSize;
1520 
1521     /*
1522     {
1523       progressInfoSpec[0]->SetFinishTime(info);
1524       info.UnpackSize = 0;
1525       info.PackSize = 0;
1526       info.NumIterations = 1;
1527 
1528       info.UnpackSize = kBufferSize;
1529       info.PackSize = compressedSize;
1530       // printf("\n%7d\n", encoder.compressedSize);
1531 
1532       RINOK(callback->SetEncodeResult(info, true))
1533       printCallback->NewLine();
1534     }
1535     */
1536 
1537   }
1538 
1539   _encoder.Release();
1540   _encoderFilter.Release();
1541   return S_OK;
1542 }
1543 
1544 
1545 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1546 {
1547   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1548   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1549   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1550   CMyComPtr<IUnknown> coder;
1551   if (_decoderFilter)
1552   {
1553     if (decoderIndex != 0)
1554       return E_FAIL;
1555     coder = _decoderFilter;
1556   }
1557   else
1558     coder = decoder;
1559 
1560   // printf("\ndecoderIndex = %d, stack = %p", decoderIndex, &coder);
1561 
1562   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1563   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1564   if (!setDecProps && propStreamSpec->GetPos() != 0)
1565     return E_FAIL;
1566 
1567   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1568   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1569 
1570   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1571   pi->BenchInfo.UnpackSize = 0;
1572   pi->BenchInfo.PackSize = 0;
1573 
1574   #ifndef Z7_ST
1575   {
1576     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1577     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1578     if (setCoderMt)
1579     {
1580       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1581     }
1582   }
1583   #endif
1584 
1585   CMyComPtr<ICompressSetCoderProperties> scp;
1586   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1587   if (scp)
1588   {
1589     const UInt64 reduceSize = _uncompressedDataSize;
1590     RINOK(_method.SetCoderProps(scp, &reduceSize))
1591   }
1592 
1593   CMyComPtr<ICryptoProperties> cp;
1594   coder.QueryInterface(IID_ICryptoProperties, &cp);
1595 
1596   if (setDecProps)
1597   {
1598     RINOK(setDecProps->SetDecoderProperties2(
1599         /* (const Byte *)*propStreamSpec, */
1600         propsData,
1601         (UInt32)propStreamSpec->GetPos()))
1602   }
1603 
1604   {
1605     CMyComPtr<ICryptoSetPassword> sp;
1606     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1607     if (sp)
1608     {
1609       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1610     }
1611   }
1612 
1613   UInt64 prev = 0;
1614 
1615   if (cp)
1616   {
1617     RINOK(Set_Key_and_IV(cp))
1618   }
1619 
1620   CMyComPtr<ICompressSetFinishMode> setFinishMode;
1621 
1622   if (_decoderFilter)
1623   {
1624     if (compressedSize > rgCopy.Size())
1625       return E_FAIL;
1626   }
1627   else
1628   {
1629     decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1630   }
1631 
1632   const UInt64 numIterations = NumIterations;
1633   const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1634 
1635   for (UInt64 i = 0; i < numIterations; i++)
1636   {
1637     if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1638     {
1639       RINOK(printCallback->CheckBreak())
1640       prev = pi->BenchInfo.UnpackSize;
1641     }
1642 
1643     const UInt64 outSize = kBufferSize;
1644     bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1645 
1646     crcOutStreamSpec->Init();
1647 
1648     if (_decoderFilter)
1649     {
1650       Byte *filterData = (Byte *)*outStreamSpec;
1651       if (calcCrc)
1652       {
1653         calcCrc = (i == 0);
1654         if (checkCrcMode == k_CheckCrcMode_Always)
1655         {
1656           calcCrc = true;
1657           memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1658           filterData = rgCopy;
1659         }
1660       }
1661       _decoderFilter->Init();
1662       My_FilterBench(_decoderFilter, filterData, compressedSize,
1663           calcCrc ? &crcOutStreamSpec->Crc : NULL);
1664     }
1665     else
1666     {
1667       crcOutStreamSpec->CalcCrc = calcCrc;
1668       inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1669 
1670       if (setFinishMode)
1671       {
1672         RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1673       }
1674 
1675       RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1676 
1677       if (setFinishMode)
1678       {
1679         if (!inStreamSpec->WasFinished())
1680           return S_FALSE;
1681 
1682         CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1683         decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1684 
1685         if (getInStreamProcessedSize)
1686         {
1687           UInt64 processed;
1688           RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1689           if (processed != compressedSize)
1690             return S_FALSE;
1691         }
1692       }
1693 
1694       if (crcOutStreamSpec->Pos != outSize)
1695         return S_FALSE;
1696     }
1697 
1698     if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1699       return S_FALSE;
1700 
1701     pi->BenchInfo.UnpackSize += kBufferSize;
1702     pi->BenchInfo.PackSize += compressedSize;
1703   }
1704 
1705   decoder.Release();
1706   _decoderFilter.Release();
1707   return S_OK;
1708 }
1709 
1710 
1711 static const UInt32 kNumThreadsMax = (1 << 12);
1712 
1713 struct CBenchEncoders
1714 {
1715   CEncoderInfo *encoders;
1716   CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1717   ~CBenchEncoders() { delete []encoders; }
1718 };
1719 
1720 
1721 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1722 {
1723   if (numCommands < (1 << 4))
1724     numCommands = (1 << 4);
1725   UInt64 res = complexInCommands / numCommands;
1726   return (res == 0 ? 1 : res);
1727 }
1728 
1729 
1730 
1731 #ifndef Z7_ST
1732 
1733 // ---------- CBenchThreadsFlusher ----------
1734 
1735 struct CBenchThreadsFlusher
1736 {
1737   CBenchEncoders *EncodersSpec;
1738   CBenchSyncCommon Common;
1739   unsigned NumThreads;
1740   bool NeedClose;
1741 
1742   CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1743 
1744   ~CBenchThreadsFlusher()
1745   {
1746     StartAndWait(true);
1747   }
1748 
1749   WRes StartAndWait(bool exitMode = false);
1750 };
1751 
1752 
1753 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1754 {
1755   if (!NeedClose)
1756     return 0;
1757 
1758   Common.ExitMode = exitMode;
1759   WRes res = Common.StartEvent.Set();
1760 
1761   for (unsigned i = 0; i < NumThreads; i++)
1762   {
1763     NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1764     if (t.IsCreated())
1765     {
1766       WRes res2 = t.Wait_Close();
1767       if (res == 0)
1768         res = res2;
1769     }
1770   }
1771   NeedClose = false;
1772   return res;
1773 }
1774 
1775 #endif // Z7_ST
1776 
1777 
1778 
1779 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1780 {
1781   for (size_t i = 0; i < size; i++)
1782   {
1783     data[i] = (Byte)startValue;
1784     startValue++;
1785   }
1786 }
1787 
1788 
1789 
1790 static HRESULT MethodBench(
1791     DECL_EXTERNAL_CODECS_LOC_VARS
1792     UInt64 complexInCommands,
1793     #ifndef Z7_ST
1794       bool oldLzmaBenchMode,
1795       UInt32 numThreads,
1796       const CAffinityMode *affinityMode,
1797     #endif
1798     const COneMethodInfo &method2,
1799     size_t uncompressedDataSize,
1800     const Byte *fileData,
1801     unsigned generateDictBits,
1802 
1803     IBenchPrintCallback *printCallback,
1804     IBenchCallback *callback,
1805     CBenchProps *benchProps)
1806 {
1807   COneMethodInfo method = method2;
1808   UInt64 methodId;
1809   UInt32 numStreams;
1810   bool isFilter;
1811   const int codecIndex = FindMethod_Index(
1812       EXTERNAL_CODECS_LOC_VARS
1813       method.MethodName, true,
1814       methodId, numStreams, isFilter);
1815   if (codecIndex < 0)
1816     return E_NOTIMPL;
1817   if (numStreams != 1)
1818     return E_INVALIDARG;
1819 
1820   UInt32 numEncoderThreads = 1;
1821   UInt32 numSubDecoderThreads = 1;
1822 
1823   #ifndef Z7_ST
1824     numEncoderThreads = numThreads;
1825 
1826     if (oldLzmaBenchMode)
1827     if (methodId == k_LZMA)
1828     {
1829       if (numThreads == 1 && method.Get_NumThreads() < 0)
1830         method.AddProp_NumThreads(1);
1831       const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1832       if (numThreads > 1 && numLzmaThreads > 1)
1833       {
1834         numEncoderThreads = (numThreads + 1) / 2; // 20.03
1835         numSubDecoderThreads = 2;
1836       }
1837     }
1838 
1839   const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1840 
1841   #endif
1842 
1843   CBenchEncoders encodersSpec(numEncoderThreads);
1844   CEncoderInfo *encoders = encodersSpec.encoders;
1845 
1846   UInt32 i;
1847 
1848   for (i = 0; i < numEncoderThreads; i++)
1849   {
1850     CEncoderInfo &encoder = encoders[i];
1851     encoder.callback = (i == 0) ? callback : NULL;
1852     encoder.printCallback = printCallback;
1853 
1854     #ifndef Z7_ST
1855     encoder.EncoderIndex = i;
1856     encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1857     encoder.AffinityMode = *affinityMode;
1858 
1859     /*
1860     if (numSubDecoderThreads > 1)
1861     if (encoder.AffinityMode.NeedAffinity()
1862         && encoder.AffinityMode.NumBundleThreads == 1)
1863     {
1864       // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1865       if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1866         encoder.AffinityMode.NumBundleThreads *= 2;
1867     }
1868     */
1869 
1870     #endif
1871 
1872     {
1873       CCreatedCoder cod;
1874       RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1875       encoder._encoder = cod.Coder;
1876       if (!encoder._encoder && !encoder._encoderFilter)
1877         return E_NOTIMPL;
1878     }
1879 
1880     SetPseudoRand(encoder._iv,  sizeof(encoder._iv), 17);
1881     SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1882     SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1883 
1884     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1885     {
1886       CCreatedCoder cod;
1887       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1888       RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1889       decoder = cod.Coder;
1890       if (!encoder._decoderFilter && !decoder)
1891         return E_NOTIMPL;
1892     }
1893 
1894     encoder.UseRealData_Enc =
1895     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1896 
1897     encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1898     if (benchProps->DecComplexCompr +
1899         benchProps->DecComplexUnc <= 30)
1900       encoder.CheckCrcMode_Dec =
1901           k_CheckCrcMode_FirstPass; // for filters
1902           // k_CheckCrcMode_Never; // for debug
1903           // k_CheckCrcMode_Always; // for debug
1904     if (fileData)
1905     {
1906       encoder.UseRealData_Enc = true;
1907       encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1908     }
1909   }
1910 
1911   UInt32 crc = 0;
1912   if (fileData)
1913     crc = CrcCalc(fileData, uncompressedDataSize);
1914 
1915   for (i = 0; i < numEncoderThreads; i++)
1916   {
1917     CEncoderInfo &encoder = encoders[i];
1918     encoder._method = method;
1919     encoder.generateDictBits = generateDictBits;
1920     encoder._uncompressedDataSize = uncompressedDataSize;
1921     encoder.kBufferSize = uncompressedDataSize;
1922     encoder.fileData = fileData;
1923     encoder.crc = crc;
1924   }
1925 
1926   CBenchProgressStatus status;
1927   status.Res = S_OK;
1928   status.EncodeMode = true;
1929 
1930   #ifndef Z7_ST
1931   CBenchThreadsFlusher encoderFlusher;
1932   if (mtEncMode)
1933   {
1934     WRes wres = encoderFlusher.Common.StartEvent.Create();
1935     if (wres != 0)
1936       return HRESULT_FROM_WIN32(wres);
1937     encoderFlusher.NumThreads = numEncoderThreads;
1938     encoderFlusher.EncodersSpec = &encodersSpec;
1939     encoderFlusher.NeedClose = true;
1940   }
1941   #endif
1942 
1943   for (i = 0; i < numEncoderThreads; i++)
1944   {
1945     CEncoderInfo &encoder = encoders[i];
1946     encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1947     // encoder.NumIterations = 3;
1948     {
1949 #if 0
1950       #define kCrcPoly 0xEDB88320
1951       UInt32 r = i;
1952       unsigned num = numEncoderThreads < 256 ? 8 : 16;
1953       do
1954         r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
1955       while (--num);
1956       encoder.Salt = r;
1957 #else
1958       UInt32 salt0 = g_CrcTable[(Byte)i];
1959       UInt32 salt1 = g_CrcTable[(Byte)(i >> 8)];
1960       encoder.Salt = salt0 ^ (salt1 << 3);
1961 #endif
1962     }
1963 
1964     // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1965     // printf("\n encoder index = %d, Salt = %8x\n", i, encoder.Salt);
1966 
1967     encoder.KeySize = benchProps->KeySize;
1968 
1969     for (int j = 0; j < 2; j++)
1970     {
1971       CBenchProgressInfo *spec = new CBenchProgressInfo;
1972       encoder.progressInfoSpec[j] = spec;
1973       encoder.progressInfo[j] = spec;
1974       spec->Status = &status;
1975     }
1976 
1977     if (i == 0)
1978     {
1979       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1980       bpi->Callback = callback;
1981       bpi->BenchInfo.NumIterations = numEncoderThreads;
1982     }
1983 
1984     #ifndef Z7_ST
1985     if (mtEncMode)
1986     {
1987       #ifdef USE_ALLOCA
1988       encoder.AllocaSize = BENCH_ALLOCA_VALUE(i);
1989       #endif
1990 
1991       encoder.Common = &encoderFlusher.Common;
1992       encoder.IsGlobalMtMode = numEncoderThreads > 1;
1993       RINOK(encoder.CreateEncoderThread())
1994     }
1995     #endif
1996   }
1997 
1998   if (printCallback)
1999   {
2000     RINOK(printCallback->CheckBreak())
2001   }
2002 
2003   #ifndef Z7_ST
2004   if (mtEncMode)
2005   {
2006     for (i = 0; i < numEncoderThreads; i++)
2007     {
2008       CEncoderInfo &encoder = encoders[i];
2009       const WRes wres = encoder.ReadyEvent.Lock();
2010       if (wres != 0)
2011         return HRESULT_FROM_WIN32(wres);
2012       RINOK(encoder.Results[0])
2013     }
2014 
2015     CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
2016     bpi->SetStartTime();
2017 
2018     const WRes wres = encoderFlusher.StartAndWait();
2019     if (status.Res == 0 && wres != 0)
2020       return HRESULT_FROM_WIN32(wres);
2021   }
2022   else
2023   #endif
2024   {
2025     RINOK(encoders[0].Encode())
2026   }
2027 
2028   RINOK(status.Res)
2029 
2030   CBenchInfo info;
2031 
2032   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2033   info.UnpackSize = 0;
2034   info.PackSize = 0;
2035   info.NumIterations = encoders[0].NumIterations;
2036 
2037   for (i = 0; i < numEncoderThreads; i++)
2038   {
2039     const CEncoderInfo &encoder = encoders[i];
2040     info.UnpackSize += encoder.kBufferSize;
2041     info.PackSize += encoder.compressedSize;
2042     // printf("\n%7d\n", encoder.compressedSize);
2043   }
2044 
2045   RINOK(callback->SetEncodeResult(info, true))
2046 
2047 
2048 
2049 
2050   // ---------- Decode ----------
2051 
2052   status.Res = S_OK;
2053   status.EncodeMode = false;
2054 
2055   const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2056   #ifndef Z7_ST
2057   const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2058   #endif
2059 
2060   for (i = 0; i < numEncoderThreads; i++)
2061   {
2062     CEncoderInfo &encoder = encoders[i];
2063 
2064     /*
2065     #ifndef Z7_ST
2066     // encoder.affinityMode = *affinityMode;
2067     if (encoder.NumEncoderInternalThreads != 1)
2068       encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2069     #endif
2070     */
2071 
2072 
2073     if (i == 0)
2074     {
2075       encoder.NumIterations = GetNumIterations(
2076           benchProps->GetNumCommands_Dec(
2077               encoder.compressedSize,
2078               encoder.kBufferSize),
2079           complexInCommands);
2080       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2081       bpi->Callback = callback;
2082       bpi->BenchInfo.NumIterations = numDecoderThreads;
2083       bpi->SetStartTime();
2084     }
2085     else
2086       encoder.NumIterations = encoders[0].NumIterations;
2087 
2088     #ifndef Z7_ST
2089     {
2090       const int numSubThreads = method.Get_NumThreads();
2091       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2092     }
2093     if (mtDecoderMode)
2094     {
2095       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2096       {
2097         const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2098             #ifdef USE_ALLOCA
2099             , BENCH_ALLOCA_VALUE(i * numSubDecoderThreads + j)
2100             #endif
2101             );
2102         RINOK(res)
2103       }
2104     }
2105     else
2106     #endif
2107     {
2108       RINOK(encoder.Decode(0))
2109     }
2110   }
2111 
2112   #ifndef Z7_ST
2113   if (mtDecoderMode)
2114   {
2115     WRes wres = 0;
2116     HRESULT res = S_OK;
2117     for (i = 0; i < numEncoderThreads; i++)
2118       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2119       {
2120         CEncoderInfo &encoder = encoders[i];
2121         const WRes wres2 = encoder.thread[j].
2122             // Wait(); // later we can get thread times from thread in UNDER_CE
2123             Wait_Close();
2124         if (wres == 0 && wres2 != 0)
2125           wres = wres2;
2126         const HRESULT res2 = encoder.Results[j];
2127         if (res == 0 && res2 != 0)
2128           res = res2;
2129       }
2130     if (wres != 0)
2131       return HRESULT_FROM_WIN32(wres);
2132     RINOK(res)
2133   }
2134   #endif // Z7_ST
2135 
2136   RINOK(status.Res)
2137   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2138 
2139   /*
2140   #ifndef Z7_ST
2141   #ifdef UNDER_CE
2142   if (mtDecoderMode)
2143     for (i = 0; i < numEncoderThreads; i++)
2144       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2145       {
2146         FILETIME creationTime, exitTime, kernelTime, userTime;
2147         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2148           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2149       }
2150   #endif
2151   #endif
2152   */
2153 
2154   info.UnpackSize = 0;
2155   info.PackSize = 0;
2156   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2157 
2158   for (i = 0; i < numEncoderThreads; i++)
2159   {
2160     const CEncoderInfo &encoder = encoders[i];
2161     info.UnpackSize += encoder.kBufferSize;
2162     info.PackSize += encoder.compressedSize;
2163   }
2164 
2165   // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2166   RINOK(callback->SetDecodeResult(info, true))
2167 
2168   return S_OK;
2169 }
2170 
2171 
2172 
2173 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2174 {
2175   /*
2176   if (dictSizeLog < 32)
2177     return (UInt32)1 << dictSizeLog;
2178   else
2179     return (UInt32)(Int32)-1;
2180   */
2181   return (UInt64)1 << dictSizeLog;
2182 }
2183 
2184 
2185 // it's limit of current LZMA implementation that can be changed later
2186 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2187 
2188 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2189 {
2190   if (dict == 0)
2191     dict = 1;
2192   if (dict > kLzmaMaxDictSize)
2193     dict = kLzmaMaxDictSize;
2194   UInt32 hs = (UInt32)dict - 1;
2195   hs |= (hs >> 1);
2196   hs |= (hs >> 2);
2197   hs |= (hs >> 4);
2198   hs |= (hs >> 8);
2199   hs >>= 1;
2200   hs |= 0xFFFF;
2201   if (hs > (1 << 24))
2202     hs >>= 1;
2203   hs++;
2204   hs += (1 << 16);
2205 
2206   const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2207   UInt64 blockSize = (UInt64)dict + (1 << 16)
2208       + (multiThread ? (1 << 20) : 0);
2209   blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2210   if (blockSize >= kBlockSizeMax)
2211     blockSize = kBlockSizeMax;
2212 
2213   UInt64 son = (UInt64)dict;
2214   if (btMode)
2215     son *= 2;
2216   const UInt64 v = (hs + son) * 4 + blockSize +
2217       (1 << 20) + (multiThread ? (6 << 20) : 0);
2218 
2219   // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2220   // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2221   return v;
2222 }
2223 
2224 
2225 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2226 {
2227   const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2228   const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2229   if (level < 0)
2230     level = 5;
2231   const int algo = (level < 5 ? 0 : 1);
2232   const int btMode = (algo == 0 ? 0 : 1);
2233 
2234   UInt32 numBigThreads = numThreads;
2235   const bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2236   if (btMode)
2237   {
2238     if (!totalBench && lzmaMt)
2239       numBigThreads /= 2;
2240   }
2241   return ((UInt64)kBufferSize + kCompressedBufferSize +
2242     GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2243 }
2244 
2245 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2246 {
2247   // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2248   return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2249 }
2250 
2251 
2252 // ---------- CRC and HASH ----------
2253 
2254 struct CCrcInfo_Base
2255 {
2256   CMidAlignedBuffer Buffer;
2257   const Byte *Data;
2258   size_t Size;
2259   bool CreateLocalBuf;
2260   UInt32 CheckSum_Res;
2261 
2262   CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2263 
2264   HRESULT Generate(const Byte *data, size_t size);
2265   HRESULT CrcProcess(UInt64 numIterations,
2266       const UInt32 *checkSum, IHasher *hf,
2267       IBenchPrintCallback *callback);
2268 };
2269 
2270 
2271 // for debug: define it to test hash calling with unaligned data
2272 // #define Z7_BENCH_HASH_ALIGN_BUF_OFFSET  3
2273 
2274 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2275 {
2276   Size = size;
2277   Data = data;
2278   if (!data || CreateLocalBuf)
2279   {
2280     Byte *buf;
2281     const size_t size2 = (size + k_RandBuf_AlignMask) & ~(size_t)k_RandBuf_AlignMask;
2282     if (size2 < size)
2283       return E_OUTOFMEMORY;
2284 #ifdef Z7_BENCH_HASH_ALIGN_BUF_OFFSET
2285     ALLOC_WITH_HRESULT(&Buffer, size2 + Z7_BENCH_HASH_ALIGN_BUF_OFFSET)
2286     buf = Buffer + Z7_BENCH_HASH_ALIGN_BUF_OFFSET;
2287 #else
2288     ALLOC_WITH_HRESULT(&Buffer, size2)
2289     buf = Buffer;
2290 #endif
2291     Data = buf;
2292     if (!data)
2293       RandGen_BufAfterPad(buf, size);
2294     else if (size != 0) // (CreateLocalBuf == true)
2295       memcpy(buf, data, size);
2296   }
2297   return S_OK;
2298 }
2299 
2300 
2301 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2302     const UInt32 *checkSum, IHasher *hf,
2303     IBenchPrintCallback *callback)
2304 {
2305   MY_ALIGN(16)
2306   UInt32 hash32[64 / 4];
2307   memset(hash32, 0, sizeof(hash32));
2308 
2309   CheckSum_Res = 0;
2310 
2311   const UInt32 hashSize = hf->GetDigestSize();
2312   if (hashSize > sizeof(hash32))
2313     return S_FALSE;
2314 
2315   const Byte *buf = Data;
2316   const size_t size = Size;
2317   UInt32 checkSum_Prev = 0;
2318 
2319   UInt64 prev = 0;
2320   UInt64 cur = 0;
2321 
2322   do
2323   {
2324     hf->Init();
2325     size_t pos = 0;
2326     do
2327     {
2328       const size_t rem = size - pos;
2329       const UInt32 kStep = ((UInt32)1 << 31);
2330       const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2331       hf->Update(buf + pos, curSize);
2332       pos += curSize;
2333     }
2334     while (pos != size);
2335 
2336     hf->Final((Byte *)(void *)hash32);
2337     UInt32 sum = 0;
2338     for (UInt32 j = 0; j < hashSize; j += 4)
2339     {
2340       sum = rotlFixed(sum, 11);
2341       sum += GetUi32((const Byte *)(const void *)hash32 + j);
2342     }
2343     if (checkSum)
2344     {
2345       if (sum != *checkSum)
2346         return S_FALSE;
2347     }
2348     else
2349     {
2350       checkSum_Prev = sum;
2351       checkSum = &checkSum_Prev;
2352     }
2353     if (callback)
2354     {
2355       cur += size;
2356       if (cur - prev >= ((UInt32)1 << 30))
2357       {
2358         prev = cur;
2359         RINOK(callback->CheckBreak())
2360       }
2361     }
2362   }
2363   while (--numIterations);
2364 
2365   CheckSum_Res = checkSum_Prev;
2366   return S_OK;
2367 }
2368 
2369 extern
2370 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2371 UInt32 g_BenchCpuFreqTemp = 1;
2372 
2373 #define YY1 sum += val; sum ^= val;
2374 #define YY3 YY1 YY1 YY1 YY1
2375 #define YY5 YY3 YY3 YY3 YY3
2376 #define YY7 YY5 YY5 YY5 YY5
2377 static const UInt32 kNumFreqCommands = 128;
2378 
2379 EXTERN_C_BEGIN
2380 
2381 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2382 {
2383   for (UInt32 i = 0; i < num; i++)
2384   {
2385     YY7
2386   }
2387   return sum;
2388 }
2389 
2390 EXTERN_C_END
2391 
2392 
2393 #ifndef Z7_ST
2394 
2395 struct CBaseThreadInfo
2396 {
2397   NWindows::CThread Thread;
2398   IBenchPrintCallback *Callback;
2399   HRESULT CallbackRes;
2400 
2401   WRes Wait_If_Created()
2402   {
2403     if (!Thread.IsCreated())
2404       return 0;
2405     return Thread.Wait_Close();
2406   }
2407 };
2408 
2409 struct CFreqInfo: public CBaseThreadInfo
2410 {
2411   UInt32 ValRes;
2412   UInt32 Size;
2413   UInt64 NumIterations;
2414 };
2415 
2416 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2417 {
2418   CFreqInfo *p = (CFreqInfo *)param;
2419 
2420   UInt32 sum = g_BenchCpuFreqTemp;
2421   for (UInt64 k = p->NumIterations; k > 0; k--)
2422   {
2423     if (p->Callback)
2424     {
2425       p->CallbackRes = p->Callback->CheckBreak();
2426       if (p->CallbackRes != S_OK)
2427         break;
2428     }
2429     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2430   }
2431   p->ValRes = sum;
2432   return THREAD_FUNC_RET_ZERO;
2433 }
2434 
2435 struct CFreqThreads
2436 {
2437   CFreqInfo *Items;
2438   UInt32 NumThreads;
2439 
2440   CFreqThreads(): Items(NULL), NumThreads(0) {}
2441 
2442   WRes WaitAll()
2443   {
2444     WRes wres = 0;
2445     for (UInt32 i = 0; i < NumThreads; i++)
2446     {
2447       WRes wres2 = Items[i].Wait_If_Created();
2448       if (wres == 0 && wres2 != 0)
2449         wres = wres2;
2450     }
2451     NumThreads = 0;
2452     return wres;
2453   }
2454 
2455   ~CFreqThreads()
2456   {
2457     WaitAll();
2458     delete []Items;
2459   }
2460 };
2461 
2462 
2463 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2464 
2465 struct CCrcInfo: public CBaseThreadInfo
2466 {
2467   const Byte *Data;
2468   size_t Size;
2469   UInt64 NumIterations;
2470   bool CheckSumDefined;
2471   UInt32 CheckSum;
2472   CMyComPtr<IHasher> Hasher;
2473   HRESULT Res;
2474   UInt32 CheckSum_Res;
2475 
2476   #ifndef Z7_ST
2477   NSynchronization::CManualResetEvent ReadyEvent;
2478   UInt32 ThreadIndex;
2479   CBenchSyncCommon *Common;
2480   CAffinityMode AffinityMode;
2481   #endif
2482 
2483   // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2484   // so we uses non-local CCrcInfo_Base.
2485   CCrcInfo_Base crcib;
2486 
2487   HRESULT CreateThread()
2488   {
2489     WRes res = 0;
2490     if (!ReadyEvent.IsCreated())
2491       res = ReadyEvent.Create();
2492     if (res == 0)
2493       res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2494           ThreadIndex);
2495     return HRESULT_FROM_WIN32(res);
2496   }
2497 
2498   #ifdef USE_ALLOCA
2499   size_t AllocaSize;
2500   #endif
2501 
2502   void Process();
2503 
2504   CCrcInfo(): Res(E_FAIL) {}
2505 };
2506 
2507 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2508 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2509 
2510 void CCrcInfo::Process()
2511 {
2512   crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2513   // we can use additional Generate() passes to reduce some time effects for new page allocation
2514   // for (unsigned y = 0; y < 10; y++)
2515   Res = crcib.Generate(Data, Size);
2516 
2517   // if (Common)
2518   {
2519     WRes wres = ReadyEvent.Set();
2520     if (wres != 0)
2521     {
2522       if (Res == 0)
2523         Res = HRESULT_FROM_WIN32(wres);
2524       return;
2525     }
2526     if (Res != 0)
2527       return;
2528 
2529     wres = Common->StartEvent.Lock();
2530 
2531     if (wres != 0)
2532     {
2533       Res = HRESULT_FROM_WIN32(wres);
2534       return;
2535     }
2536     if (Common->ExitMode)
2537       return;
2538   }
2539 
2540   Res = crcib.CrcProcess(NumIterations,
2541       CheckSumDefined ? &CheckSum : NULL, Hasher,
2542       Callback);
2543   CheckSum_Res = crcib.CheckSum_Res;
2544   /*
2545   We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2546   to time of benchmark. So we don't free Buffer here
2547   */
2548   // crcib.Buffer.Free();
2549 }
2550 
2551 
2552 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2553 {
2554   CCrcInfo *p = (CCrcInfo *)param;
2555 
2556   #ifdef USE_ALLOCA
2557   alloca(p->AllocaSize);
2558   #endif
2559   p->Process();
2560   return THREAD_FUNC_RET_ZERO;
2561 }
2562 
2563 
2564 struct CCrcThreads
2565 {
2566   CCrcInfo *Items;
2567   unsigned NumThreads;
2568   CBenchSyncCommon Common;
2569   bool NeedClose;
2570 
2571   CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2572 
2573   WRes StartAndWait(bool exitMode = false);
2574 
2575   ~CCrcThreads()
2576   {
2577     StartAndWait(true);
2578     delete []Items;
2579   }
2580 };
2581 
2582 
2583 WRes CCrcThreads::StartAndWait(bool exitMode)
2584 {
2585   if (!NeedClose)
2586     return 0;
2587 
2588   Common.ExitMode = exitMode;
2589   WRes wres = Common.StartEvent.Set();
2590 
2591   for (unsigned i = 0; i < NumThreads; i++)
2592   {
2593     WRes wres2 = Items[i].Wait_If_Created();
2594     if (wres == 0 && wres2 != 0)
2595       wres = wres2;
2596   }
2597   NumThreads = 0;
2598   NeedClose = false;
2599   return wres;
2600 }
2601 
2602 #endif
2603 
2604 
2605 /*
2606 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2607 {
2608   UInt32 crc = CRC_INIT_VAL;
2609   for (size_t i = 0; i < size; i++)
2610     crc = CRC_UPDATE_BYTE(crc, buf[i]);
2611   return CRC_GET_DIGEST(crc);
2612 }
2613 */
2614 
2615 /*
2616 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2617 {
2618   RandGen(buf, size, RG);
2619   return CrcCalc1(buf, size);
2620 }
2621 */
2622 
2623 static bool CrcInternalTest()
2624 {
2625   CAlignedBuffer buffer;
2626   const size_t kBufSize = 1 << 11;
2627   const size_t kCheckSize = 1 << 6;
2628   buffer.Alloc(kBufSize);
2629   if (!buffer.IsAllocated())
2630     return false;
2631   Byte *buf = (Byte *)buffer;
2632   RandGen_BufAfterPad(buf, kBufSize);
2633   UInt32 sum = 0;
2634   for (size_t i = 0; i < kBufSize - kCheckSize * 2; i += kCheckSize - 1)
2635     for (size_t j = 0; j < kCheckSize; j++)
2636     {
2637       sum = rotlFixed(sum, 11);
2638       sum += CrcCalc(buf + i + j, j);
2639     }
2640   return sum == 0x28462c7c;
2641 }
2642 
2643 struct CBenchMethod
2644 {
2645   unsigned Weight;
2646   unsigned DictBits;
2647   Int32 EncComplex;
2648   Int32 DecComplexCompr;
2649   Int32 DecComplexUnc;
2650   const char *Name;
2651   // unsigned KeySize;
2652 };
2653 
2654 // #define USE_SW_CMPLX
2655 
2656 #ifdef USE_SW_CMPLX
2657 #define CMPLX(x) ((x) * 1000)
2658 #else
2659 #define CMPLX(x) (x)
2660 #endif
2661 
2662 static const CBenchMethod g_Bench[] =
2663 {
2664   // { 40, 17,  357,  145,   20, "LZMA:x1" },
2665   // { 20, 18,  360,  145,   20, "LZMA2:x1:mt2" },
2666 
2667   { 20, 18,  360,  145,   20, "LZMA:x1" },
2668   { 20, 22,  600,  145,   20, "LZMA:x3" },
2669 
2670   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
2671   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
2672 
2673   { 10, 16,  124,   40,   14, "Deflate:x1" },
2674   { 20, 16,  376,   40,   14, "Deflate:x5" },
2675   { 10, 16, 1082,   40,   14, "Deflate:x7" },
2676   { 10, 17,  422,   40,   14, "Deflate64:x5" },
2677 
2678   { 10, 15,  590,   69,   69, "BZip2:x1" },
2679   { 20, 19,  815,  122,  122, "BZip2:x5" },
2680   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
2681   { 10, 19, 2530,  122,  122, "BZip2:x7" },
2682 
2683   // { 10, 18, 1010,    0, 1150, "PPMDZip:x1" },
2684   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
2685   // { 10, 22, 1655,    0, 1830, "PPMDZip:x5" },
2686   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
2687 
2688   // {  2,  0,  -16,    0,  -16, "Swap2" },
2689   {  2,  0,  -16,    0,  -16, "Swap4" },
2690 
2691   // {  2,  0,    3,    0,    4, "Delta:1" },
2692   // {  2,  0,    3,    0,    4, "Delta:2" },
2693   // {  2,  0,    3,    0,    4, "Delta:3" },
2694   {  2,  0,    3,    0,    4, "Delta:4" },
2695   // {  2,  0,    3,    0,    4, "Delta:8" },
2696   // {  2,  0,    3,    0,    4, "Delta:32" },
2697 
2698   {  2,  0,    2,    0,    2, "BCJ" },
2699   {  2,  0,    1,    0,    1, "ARM64" },
2700   {  2,  0,    1,    0,    1, "RISCV" },
2701 
2702   // { 10,  0,   18,    0,   18, "AES128CBC:1" },
2703   // { 10,  0,   21,    0,   21, "AES192CBC:1" },
2704   { 10,  0,   24,    0,   24, "AES256CBC:1" },
2705 
2706   // { 10,  0,   18,    0,   18, "AES128CTR:1" },
2707   // { 10,  0,   21,    0,   21, "AES192CTR:1" },
2708   // { 10,  0,   24,    0,   24, "AES256CTR:1" },
2709   // {  2,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2710   // {  2,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2711   {  2,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2712 
2713   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2714   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2715   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2716 
2717   // {  1,  0, CMPLX(6), 0, -2, "AES128CBC:3" },
2718   // {  1,  0, CMPLX(7), 0, -2, "AES192CBC:3" },
2719   {  1,  0, CMPLX(8), 0, -2, "AES256CBC:3" }
2720 
2721   // {  1,  0, CMPLX(1), 0, -2, "AES128CTR:3" },
2722   // {  1,  0, CMPLX(1), 0, -2, "AES192CTR:3" },
2723   // {  1,  0, CMPLX(1), 0, -2, "AES256CTR:3" },
2724 };
2725 
2726 struct CBenchHash
2727 {
2728   unsigned Weight;
2729   UInt32 Complex;
2730   UInt32 CheckSum;
2731   const char *Name;
2732 };
2733 
2734 // #define ARM_CRC_MUL 100
2735 #define ARM_CRC_MUL 1
2736 
2737 #define k_Hash_Complex_Mult 256
2738 
2739 static const CBenchHash g_Hash[] =
2740 {
2741   { 20,   256, 0x21e207bb, "CRC32:12" } ,
2742   {  2,   128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2743   {  2,    64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2744   { 10,   256, 0x41b901d1, "CRC64" },
2745   { 10,    64, 0x43eac94f, "XXH64" },
2746 
2747   { 10, 5100,       0x7913ba03, "SHA256:1" },
2748   {  2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2749 
2750   { 10, 2340,       0xff769021, "SHA1:1" },
2751   {  2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2752 
2753   {  2,  4096, 0x85189d02, "BLAKE2sp:1" },
2754   {  2,  1024, 0x85189d02, "BLAKE2sp:2" }, // sse2-way4-fast
2755   {  2,   512, 0x85189d02, "BLAKE2sp:3" }  // avx2-way8-fast
2756 #if 0
2757   , {  2,  2048, 0x85189d02, "BLAKE2sp:4" } // sse2-way1
2758   , {  2,  1024, 0x85189d02, "BLAKE2sp:5" } // sse2-way2
2759   , {  2,  1024, 0x85189d02, "BLAKE2sp:6" } // avx2-way2
2760   , {  2,  1024, 0x85189d02, "BLAKE2sp:7" } // avx2-way4
2761 #endif
2762 };
2763 
2764 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2765 {
2766   char s[128];
2767   unsigned startPos = (unsigned)sizeof(s) - 32;
2768   memset(s, ' ', startPos);
2769   ConvertUInt64ToString(value, s + startPos);
2770   // if (withSpace)
2771   {
2772     startPos--;
2773     size++;
2774   }
2775   unsigned len = (unsigned)strlen(s + startPos);
2776   if (size > len)
2777   {
2778     size -= len;
2779     if (startPos < size)
2780       startPos = 0;
2781     else
2782       startPos -= size;
2783   }
2784   f.Print(s + startPos);
2785 }
2786 
2787 static const unsigned kFieldSize_Name = 12;
2788 static const unsigned kFieldSize_SmallName = 4;
2789 static const unsigned kFieldSize_Speed = 9;
2790 static const unsigned kFieldSize_Usage = 5;
2791 static const unsigned kFieldSize_RU = 6;
2792 static const unsigned kFieldSize_Rating = 6;
2793 static const unsigned kFieldSize_EU = 5;
2794 static const unsigned kFieldSize_Effec = 5;
2795 static const unsigned kFieldSize_CrcSpeed = 8;
2796 
2797 
2798 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2799 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2800 
2801 
2802 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2803 {
2804   PrintNumber(f, (rating + 500000) / 1000000, size);
2805 }
2806 
2807 
2808 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2809 {
2810   UInt64 v = 0;
2811   if (divider != 0)
2812     v = (val * 100 + divider / 2) / divider;
2813   PrintNumber(f, v, size);
2814 }
2815 
2816 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2817 {
2818   char s[256];
2819   memset(s, (Byte)c, size);
2820   s[size] = 0;
2821   f.Print(s);
2822 }
2823 
2824 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2825 {
2826   PrintChars(f, ' ', size);
2827 }
2828 
2829 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2830 {
2831   PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2832 }
2833 
2834 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2835 {
2836   PrintUsage(f, usage, kFieldSize_Usage);
2837   PrintRating(f, rpu, kFieldSize_RU);
2838   PrintRating(f, rating, kFieldSize_Rating);
2839   if (showFreq)
2840   {
2841     if (cpuFreq == 0)
2842       PrintSpaces(f, kFieldSize_EUAndEffec);
2843     else
2844     {
2845       PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2846       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2847     }
2848   }
2849 }
2850 
2851 
2852 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2853 {
2854   Speed = info.GetUnpackSizeSpeed();
2855   Usage = info.GetUsage();
2856   RPU = info.GetRatingPerUsage(Rating);
2857 }
2858 
2859 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2860 {
2861   NumIterations2 *= weight;
2862   RPU *= weight;
2863   Rating *= weight;
2864   Usage *= weight;
2865   Speed *= weight;
2866 }
2867 
2868 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2869 {
2870   Rating += r.Rating;
2871   Usage += r.Usage;
2872   RPU += r.RPU;
2873   Speed += r.Speed;
2874     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2875   NumIterations2 += r.NumIterations2;
2876 }
2877 
2878 static void PrintResults(IBenchPrintCallback *f,
2879     const CBenchInfo &info,
2880     unsigned weight,
2881     UInt64 rating,
2882     bool showFreq, UInt64 cpuFreq,
2883     CTotalBenchRes *res)
2884 {
2885   CTotalBenchRes t;
2886   t.Rating = rating;
2887   t.NumIterations2 = 1;
2888   t.Generate_From_BenchInfo(info);
2889 
2890   if (f)
2891   {
2892     if (t.Speed != 0)
2893       PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2894     else
2895       PrintSpaces(*f, 1 + kFieldSize_Speed);
2896   }
2897   if (f)
2898   {
2899     PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2900   }
2901 
2902   if (res)
2903   {
2904     // res->NumIterations1++;
2905     t.Mult_For_Weight(weight);
2906     res->Update_With_Res(t);
2907   }
2908 }
2909 
2910 static void PrintTotals(IBenchPrintCallback &f,
2911     bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2912 {
2913   const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2914   const UInt64 speed = res.Speed / numIterations2;
2915   if (showSpeed && speed != 0)
2916     PrintNumber(f, speed / 1024, kFieldSize_Speed);
2917   else
2918     PrintSpaces(f, 1 + kFieldSize_Speed);
2919 
2920   // PrintSpaces(f, 1 + kFieldSize_Speed);
2921   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2922   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2923 }
2924 
2925 
2926 static void PrintHex(AString &s, UInt64 v)
2927 {
2928   char temp[32];
2929   ConvertUInt64ToHex(v, temp);
2930   s += temp;
2931 }
2932 
2933 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2934 {
2935   AString s;
2936   // s.Add_UInt32(ti.numProcessThreads);
2937   unsigned numSysThreads = ti.GetNumSystemThreads();
2938   if (ti.GetNumProcessThreads() != numSysThreads)
2939   {
2940     // if (ti.numProcessThreads != ti.numSysThreads)
2941     {
2942       s += " / ";
2943       s.Add_UInt32(numSysThreads);
2944     }
2945     s += " : ";
2946     #ifdef _WIN32
2947     PrintHex(s, ti.processAffinityMask);
2948     s += " / ";
2949     PrintHex(s, ti.systemAffinityMask);
2950     #else
2951     unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2952     if (i == 0)
2953       i = 4;
2954     for (; i >= 4; )
2955     {
2956       i -= 4;
2957       unsigned val = 0;
2958       for (unsigned k = 0; k < 4; k++)
2959       {
2960         const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2961         val += (bit << k);
2962       }
2963       PrintHex(s, val);
2964     }
2965     #endif
2966   }
2967   return s;
2968 }
2969 
2970 
2971 #ifdef Z7_LARGE_PAGES
2972 
2973 #ifdef _WIN32
2974 extern bool g_LargePagesMode;
2975 extern "C"
2976 {
2977   extern SIZE_T g_LargePageSize;
2978 }
2979 #endif
2980 
2981 void Add_LargePages_String(AString &s)
2982 {
2983   #ifdef _WIN32
2984   if (g_LargePagesMode || g_LargePageSize != 0)
2985   {
2986     s.Add_OptSpaced("(LP-");
2987     PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
2988     #ifdef MY_CPU_X86_OR_AMD64
2989     if (CPU_IsSupported_PageGB())
2990       s += "-1G";
2991     #endif
2992     if (!g_LargePagesMode)
2993       s += "-NA";
2994     s += ")";
2995   }
2996   #else
2997     s += "";
2998   #endif
2999 }
3000 
3001 #endif
3002 
3003 
3004 
3005 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
3006     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
3007 {
3008   f.Print("RAM ");
3009   f.Print(sizeString);
3010   if (size_Defined)
3011     PrintNumber(f, (size >> 20), 6);
3012   else
3013     f.Print("      ?");
3014   f.Print(" MB");
3015 
3016   #ifdef Z7_LARGE_PAGES
3017   {
3018     AString s;
3019     Add_LargePages_String(s);
3020     f.Print(s);
3021   }
3022   #endif
3023 
3024   f.Print(",  # ");
3025   f.Print(threadsString);
3026   PrintNumber(f, numThreads, 3);
3027 }
3028 
3029 
3030 
3031 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
3032 {
3033   bool NeedPrint;
3034   bool Use2Columns;
3035   bool ShowFreq;
3036   unsigned NameFieldSize;
3037 
3038   unsigned EncodeWeight;
3039   unsigned DecodeWeight;
3040 
3041   UInt64 CpuFreq;
3042   UInt64 DictSize;
3043 
3044   IBenchPrintCallback *_file;
3045   CBenchProps BenchProps;
3046   CTotalBenchRes EncodeRes;
3047   CTotalBenchRes DecodeRes;
3048 
3049   CBenchInfo BenchInfo_Results[2];
3050 
3051   CBenchCallbackToPrint():
3052       NeedPrint(true),
3053       Use2Columns(false),
3054       ShowFreq(false),
3055       NameFieldSize(0),
3056       EncodeWeight(1),
3057       DecodeWeight(1),
3058       CpuFreq(0)
3059       {}
3060 
3061   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3062   void Print(const char *s);
3063   void NewLine();
3064 
3065   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3066   HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3067   HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3068 };
3069 
3070 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3071 {
3072   ShowFreq = showFreq;
3073   CpuFreq = cpuFreq;
3074   return S_OK;
3075 }
3076 
3077 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3078 {
3079   RINOK(_file->CheckBreak())
3080   if (final)
3081     BenchInfo_Results[0] = info;
3082   if (final)
3083   if (NeedPrint)
3084   {
3085     const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3086     PrintResults(_file, info,
3087         EncodeWeight, rating,
3088         ShowFreq, CpuFreq, &EncodeRes);
3089     if (!Use2Columns)
3090       _file->NewLine();
3091   }
3092   return S_OK;
3093 }
3094 
3095 static const char * const kSep = "  | ";
3096 
3097 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3098 {
3099   RINOK(_file->CheckBreak())
3100   if (final)
3101     BenchInfo_Results[1] = info;
3102   if (final)
3103   if (NeedPrint)
3104   {
3105     const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3106     if (Use2Columns)
3107       _file->Print(kSep);
3108     else
3109       PrintSpaces(*_file, NameFieldSize);
3110     CBenchInfo info2 = info;
3111     info2.UnpackSize *= info2.NumIterations;
3112     info2.PackSize *= info2.NumIterations;
3113     info2.NumIterations = 1;
3114     PrintResults(_file, info2,
3115         DecodeWeight, rating,
3116         ShowFreq, CpuFreq, &DecodeRes);
3117   }
3118   return S_OK;
3119 }
3120 
3121 void CBenchCallbackToPrint::Print(const char *s)
3122 {
3123   _file->Print(s);
3124 }
3125 
3126 void CBenchCallbackToPrint::NewLine()
3127 {
3128   _file->NewLine();
3129 }
3130 
3131 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3132 {
3133   f.Print(s);
3134   int numSpaces = (int)size - (int)MyStringLen(s);
3135   if (numSpaces > 0)
3136     PrintSpaces(f, (unsigned)numSpaces);
3137 }
3138 
3139 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3140 {
3141   int numSpaces = (int)size - (int)MyStringLen(s);
3142   if (numSpaces > 0)
3143     PrintSpaces(f, (unsigned)numSpaces);
3144   f.Print(s);
3145 }
3146 
3147 
3148 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3149 {
3150   UString wildc = GetUnicodeString(mask);
3151   UString bname = GetUnicodeString(name);
3152   wildc.MakeLower_Ascii();
3153   bname.MakeLower_Ascii();
3154   return DoesWildcardMatchName(wildc, bname);
3155 }
3156 
3157 
3158 static HRESULT TotalBench(
3159     DECL_EXTERNAL_CODECS_LOC_VARS
3160     const COneMethodInfo &methodMask,
3161     UInt64 complexInCommands,
3162   #ifndef Z7_ST
3163     UInt32 numThreads,
3164     const CAffinityMode *affinityMode,
3165   #endif
3166     bool forceUnpackSize,
3167     size_t unpackSize,
3168     const Byte *fileData,
3169     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3170 {
3171   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3172   {
3173     const CBenchMethod &bench = g_Bench[i];
3174     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3175       continue;
3176     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3177     {
3178       unsigned keySize = 32;
3179            if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3180       else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3181       callback->BenchProps.KeySize = keySize;
3182     }
3183     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3184     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3185     callback->BenchProps.EncComplex = bench.EncComplex;
3186 
3187     COneMethodInfo method;
3188     NCOM::CPropVariant propVariant;
3189     propVariant = bench.Name;
3190     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3191 
3192     size_t unpackSize2 = unpackSize;
3193     if (!forceUnpackSize && bench.DictBits == 0)
3194       unpackSize2 = kFilterUnpackSize;
3195 
3196     callback->EncodeWeight = bench.Weight;
3197     callback->DecodeWeight = bench.Weight;
3198 
3199     const HRESULT res = MethodBench(
3200         EXTERNAL_CODECS_LOC_VARS
3201         complexInCommands,
3202         #ifndef Z7_ST
3203         false, numThreads, affinityMode,
3204         #endif
3205         method,
3206         unpackSize2, fileData,
3207         bench.DictBits,
3208         printCallback, callback, &callback->BenchProps);
3209 
3210     if (res == E_NOTIMPL)
3211     {
3212       // callback->Print(" ---");
3213       // we need additional empty line as line for decompression results
3214       if (!callback->Use2Columns)
3215         callback->NewLine();
3216     }
3217     else
3218     {
3219       RINOK(res)
3220     }
3221 
3222     callback->NewLine();
3223   }
3224   return S_OK;
3225 }
3226 
3227 
3228 struct CFreqBench
3229 {
3230   // in:
3231   UInt64 complexInCommands;
3232   UInt32 numThreads;
3233   bool showFreq;
3234   UInt64 specifiedFreq;
3235 
3236   // out:
3237   UInt64 CpuFreqRes;
3238   UInt64 UsageRes;
3239   UInt32 res;
3240 
3241   CFreqBench()
3242     {}
3243 
3244   HRESULT FreqBench(IBenchPrintCallback *_file
3245       #ifndef Z7_ST
3246       , const CAffinityMode *affinityMode
3247       #endif
3248       );
3249 };
3250 
3251 
3252 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3253     #ifndef Z7_ST
3254     , const CAffinityMode *affinityMode
3255     #endif
3256     )
3257 {
3258   res = 0;
3259   CpuFreqRes = 0;
3260   UsageRes = 0;
3261 
3262   if (numThreads == 0)
3263     numThreads = 1;
3264 
3265   #ifdef Z7_ST
3266   numThreads = 1;
3267   #endif
3268 
3269   const UInt32 complexity = kNumFreqCommands;
3270   UInt64 numIterations = complexInCommands / complexity;
3271   UInt32 numIterations2 = 1 << 30;
3272   if (numIterations > numIterations2)
3273     numIterations /= numIterations2;
3274   else
3275   {
3276     numIterations2 = (UInt32)numIterations;
3277     numIterations = 1;
3278   }
3279 
3280   CBenchInfoCalc progressInfoSpec;
3281 
3282   #ifndef Z7_ST
3283 
3284   bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3285 
3286   if (mtMode)
3287   {
3288     CFreqThreads threads;
3289     threads.Items = new CFreqInfo[numThreads];
3290     UInt32 i;
3291     for (i = 0; i < numThreads; i++)
3292     {
3293       CFreqInfo &info = threads.Items[i];
3294       info.Callback = _file;
3295       info.CallbackRes = S_OK;
3296       info.NumIterations = numIterations;
3297       info.Size = numIterations2;
3298     }
3299     progressInfoSpec.SetStartTime();
3300     for (i = 0; i < numThreads; i++)
3301     {
3302       // Sleep(10);
3303       CFreqInfo &info = threads.Items[i];
3304       WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3305       if (info.Thread.IsCreated())
3306         threads.NumThreads++;
3307       if (wres != 0)
3308         return HRESULT_FROM_WIN32(wres);
3309     }
3310     WRes wres = threads.WaitAll();
3311     if (wres != 0)
3312       return HRESULT_FROM_WIN32(wres);
3313     for (i = 0; i < numThreads; i++)
3314     {
3315       RINOK(threads.Items[i].CallbackRes)
3316     }
3317   }
3318   else
3319   #endif
3320   {
3321     progressInfoSpec.SetStartTime();
3322     UInt32 sum = g_BenchCpuFreqTemp;
3323     UInt64 k = numIterations;
3324     do
3325     {
3326       sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3327       if (_file)
3328       {
3329         RINOK(_file->CheckBreak())
3330       }
3331     }
3332     while (--k);
3333     res += sum;
3334   }
3335 
3336   if (res == 0x12345678)
3337   if (_file)
3338   {
3339     RINOK(_file->CheckBreak())
3340   }
3341 
3342   CBenchInfo info;
3343   progressInfoSpec.SetFinishTime(info);
3344 
3345   info.UnpackSize = 0;
3346   info.PackSize = 0;
3347   info.NumIterations = 1;
3348 
3349   const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3350   const UInt64 rating = info.GetSpeed(numCommands);
3351   CpuFreqRes = rating / numThreads;
3352   UsageRes = info.GetUsage();
3353 
3354   if (_file)
3355   {
3356     PrintResults(_file, info,
3357           0, // weight
3358           rating,
3359           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3360     RINOK(_file->CheckBreak())
3361   }
3362 
3363   return S_OK;
3364 }
3365 
3366 
3367 
3368 static HRESULT CrcBench(
3369     DECL_EXTERNAL_CODECS_LOC_VARS
3370     UInt64 complexInCommands,
3371     UInt32 numThreads,
3372     const size_t bufferSize,
3373     const Byte *fileData,
3374 
3375     UInt64 &speed,
3376     UInt64 &usage,
3377 
3378     UInt32 complexity, unsigned benchWeight,
3379     const UInt32 *checkSum,
3380     const COneMethodInfo &method,
3381     IBenchPrintCallback *_file,
3382     #ifndef Z7_ST
3383     const CAffinityMode *affinityMode,
3384     #endif
3385     bool showRating,
3386     CTotalBenchRes *encodeRes,
3387     bool showFreq, UInt64 cpuFreq)
3388 {
3389   if (numThreads == 0)
3390     numThreads = 1;
3391 
3392   #ifdef Z7_ST
3393   numThreads = 1;
3394   #endif
3395 
3396   const AString &methodName = method.MethodName;
3397   // methodName.RemoveChar(L'-');
3398   CMethodId hashID;
3399   if (!FindHashMethod(
3400       EXTERNAL_CODECS_LOC_VARS
3401       methodName, hashID))
3402     return E_NOTIMPL;
3403 
3404   /*
3405   // if will generate random data in each thread, instead of global data
3406   CMidAlignedBuffer buffer;
3407   if (!fileData)
3408   {
3409     ALLOC_WITH_HRESULT(&buffer, bufferSize)
3410     RandGen(buffer, bufferSize);
3411     fileData = buffer;
3412   }
3413   */
3414 
3415   const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3416   UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3417   if (numIterations == 0)
3418     numIterations = 1;
3419 
3420   CBenchInfoCalc progressInfoSpec;
3421   CBenchInfo info;
3422 
3423   #ifndef Z7_ST
3424   bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3425 
3426   if (mtEncMode)
3427   {
3428     CCrcThreads threads;
3429     threads.Items = new CCrcInfo[numThreads];
3430     {
3431       WRes wres = threads.Common.StartEvent.Create();
3432       if (wres != 0)
3433         return HRESULT_FROM_WIN32(wres);
3434       threads.NeedClose = true;
3435     }
3436 
3437     UInt32 i;
3438     for (i = 0; i < numThreads; i++)
3439     {
3440       CCrcInfo &ci = threads.Items[i];
3441       AString name;
3442       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3443       if (!ci.Hasher)
3444         return E_NOTIMPL;
3445       CMyComPtr<ICompressSetCoderProperties> scp;
3446       ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3447       if (scp)
3448       {
3449         RINOK(method.SetCoderProps(scp))
3450       }
3451 
3452       ci.Callback = _file;
3453       ci.Data = fileData;
3454       ci.NumIterations = numIterations;
3455       ci.Size = bufferSize;
3456       ci.CheckSumDefined = false;
3457       if (checkSum)
3458       {
3459         ci.CheckSum = *checkSum;
3460         ci.CheckSumDefined = true;
3461       }
3462 
3463       #ifdef USE_ALLOCA
3464       ci.AllocaSize = BENCH_ALLOCA_VALUE(i);
3465       #endif
3466     }
3467 
3468     for (i = 0; i < numThreads; i++)
3469     {
3470       CCrcInfo &ci = threads.Items[i];
3471       ci.ThreadIndex = i;
3472       ci.Common = &threads.Common;
3473       ci.AffinityMode = *affinityMode;
3474       HRESULT hres = ci.CreateThread();
3475       if (ci.Thread.IsCreated())
3476         threads.NumThreads++;
3477       if (hres != 0)
3478         return hres;
3479     }
3480 
3481     for (i = 0; i < numThreads; i++)
3482     {
3483       CCrcInfo &ci = threads.Items[i];
3484       WRes wres = ci.ReadyEvent.Lock();
3485       if (wres != 0)
3486         return HRESULT_FROM_WIN32(wres);
3487       RINOK(ci.Res)
3488     }
3489 
3490     progressInfoSpec.SetStartTime();
3491 
3492     WRes wres = threads.StartAndWait();
3493     if (wres != 0)
3494       return HRESULT_FROM_WIN32(wres);
3495 
3496     progressInfoSpec.SetFinishTime(info);
3497 
3498     for (i = 0; i < numThreads; i++)
3499     {
3500       RINOK(threads.Items[i].Res)
3501       if (i != 0)
3502         if (threads.Items[i].CheckSum_Res !=
3503             threads.Items[i - 1].CheckSum_Res)
3504           return S_FALSE;
3505     }
3506   }
3507   else
3508   #endif
3509   {
3510     CMyComPtr<IHasher> hasher;
3511     AString name;
3512     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3513     if (!hasher)
3514       return E_NOTIMPL;
3515     CMyComPtr<ICompressSetCoderProperties> scp;
3516     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3517     if (scp)
3518     {
3519       RINOK(method.SetCoderProps(scp))
3520     }
3521     CCrcInfo_Base crcib;
3522     crcib.CreateLocalBuf = false;
3523     RINOK(crcib.Generate(fileData, bufferSize))
3524     progressInfoSpec.SetStartTime();
3525     RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3526     progressInfoSpec.SetFinishTime(info);
3527   }
3528 
3529 
3530   UInt64 unpSize = numIterations * bufferSize;
3531   UInt64 unpSizeThreads = unpSize * numThreads;
3532   info.UnpackSize = unpSizeThreads;
3533   info.PackSize = unpSizeThreads;
3534   info.NumIterations = 1;
3535 
3536   if (_file)
3537   {
3538     if (showRating)
3539     {
3540       UInt64 unpSizeThreads2 = unpSizeThreads;
3541       if (unpSizeThreads2 == 0)
3542         unpSizeThreads2 = numIterations * 1 * numThreads;
3543       const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3544       const UInt64 rating = info.GetSpeed(numCommands);
3545       PrintResults(_file, info,
3546           benchWeight, rating,
3547           showFreq, cpuFreq, encodeRes);
3548     }
3549     RINOK(_file->CheckBreak())
3550   }
3551 
3552   speed = info.GetSpeed(unpSizeThreads);
3553   usage = info.GetUsage();
3554 
3555   return S_OK;
3556 }
3557 
3558 
3559 
3560 static HRESULT TotalBench_Hash(
3561     DECL_EXTERNAL_CODECS_LOC_VARS
3562     const COneMethodInfo &methodMask,
3563     UInt64 complexInCommands,
3564     UInt32 numThreads,
3565     size_t bufSize,
3566     const Byte *fileData,
3567     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3568     #ifndef Z7_ST
3569     const CAffinityMode *affinityMode,
3570     #endif
3571     CTotalBenchRes *encodeRes,
3572     bool showFreq, UInt64 cpuFreq)
3573 {
3574   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3575   {
3576     const CBenchHash &bench = g_Hash[i];
3577     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3578       continue;
3579     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3580     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3581     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3582     // callback->BenchProps.EncComplex = bench.EncComplex;
3583 
3584     COneMethodInfo method;
3585     NCOM::CPropVariant propVariant;
3586     propVariant = bench.Name;
3587     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3588 
3589     UInt64 speed, usage;
3590 
3591     const HRESULT res = CrcBench(
3592         EXTERNAL_CODECS_LOC_VARS
3593         complexInCommands,
3594         numThreads, bufSize, fileData,
3595         speed, usage,
3596         bench.Complex, bench.Weight,
3597         (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3598         method,
3599         printCallback,
3600      #ifndef Z7_ST
3601         affinityMode,
3602      #endif
3603         true, // showRating
3604         encodeRes, showFreq, cpuFreq);
3605     if (res == E_NOTIMPL)
3606     {
3607       // callback->Print(" ---");
3608     }
3609     else
3610     {
3611       RINOK(res)
3612     }
3613     callback->NewLine();
3614   }
3615   return S_OK;
3616 }
3617 
3618 struct CTempValues
3619 {
3620   UInt64 *Values;
3621   CTempValues(): Values(NULL) {}
3622   void Alloc(UInt32 num) { Values = new UInt64[num]; }
3623   ~CTempValues() { delete []Values; }
3624 };
3625 
3626 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3627 {
3628   const wchar_t *end;
3629   UInt64 result = ConvertStringToUInt64(s, &end);
3630   if (*end != 0 || s.IsEmpty())
3631     prop = s;
3632   else if (result <= (UInt32)0xFFFFFFFF)
3633     prop = (UInt32)result;
3634   else
3635     prop = result;
3636 }
3637 
3638 
3639 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3640 {
3641   return StringsAreEqualNoCase_Ascii(fullName, shortName);
3642 }
3643 
3644 
3645 
3646 
3647 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3648 {
3649   PrintRequirements(f, "usage:", true, usage, "Benchmark threads:   ", threads);
3650 }
3651 
3652 
3653 static void Print_Delimiter(IBenchPrintCallback &f)
3654 {
3655   f.Print(" |");
3656 }
3657 
3658 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3659 {
3660   char s[16];
3661   ConvertUInt32ToString(pow, s);
3662   unsigned pos = MyStringLen(s);
3663   s[pos++] = ':';
3664   s[pos] = 0;
3665   PrintLeft(f, s, kFieldSize_SmallName); // 4
3666 }
3667 
3668 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3669     UInt64 usage, UInt64 speed)
3670 {
3671   PrintUsage(f, usage, kFieldSize_Usage);
3672   PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3673 }
3674 
3675 
3676 HRESULT Bench(
3677     DECL_EXTERNAL_CODECS_LOC_VARS
3678     IBenchPrintCallback *printCallback,
3679     IBenchCallback *benchCallback,
3680     const CObjectVector<CProperty> &props,
3681     UInt32 numIterations,
3682     bool multiDict,
3683     IBenchFreqCallback *freqCallback)
3684 {
3685   // for (int y = 0; y < 10000; y++)
3686   if (!CrcInternalTest())
3687     return E_FAIL;
3688 
3689   UInt32 numCPUs = 1;
3690   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
3691 
3692   NSystem::CProcessAffinity threadsInfo;
3693   threadsInfo.InitST();
3694 
3695   #ifndef Z7_ST
3696 
3697   if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3698     numCPUs = threadsInfo.GetNumProcessThreads();
3699   else
3700     numCPUs = NSystem::GetNumberOfProcessors();
3701 
3702   #endif
3703 
3704   // numCPUs = 24;
3705   /*
3706   {
3707     DWORD_PTR mask = (1 << 0);
3708     DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3709     old = old;
3710     DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3711     old2 = old2;
3712     return 0;
3713   }
3714   */
3715 
3716   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3717 
3718   UInt32 numThreadsSpecified = numCPUs;
3719   bool needSetComplexity = false;
3720   UInt32 testTimeMs = kComplexInMs;
3721   UInt32 startDicLog = 22;
3722   bool startDicLog_Defined = false;
3723   UInt64 specifiedFreq = 0;
3724   bool multiThreadTests = false;
3725   UInt64 complexInCommands = kComplexInCommands;
3726   UInt32 numThreads_Start = 1;
3727 
3728   #ifndef Z7_ST
3729   CAffinityMode affinityMode;
3730   #endif
3731 
3732 
3733   COneMethodInfo method;
3734 
3735   CMidAlignedBuffer fileDataBuffer;
3736   bool use_fileData = false;
3737   bool isFixedDict = false;
3738 
3739   {
3740   unsigned i;
3741 
3742   if (printCallback)
3743   {
3744     for (i = 0; i < props.Size(); i++)
3745     {
3746       const CProperty &property = props[i];
3747       printCallback->Print(" ");
3748       printCallback->Print(GetAnsiString(property.Name));
3749       if (!property.Value.IsEmpty())
3750       {
3751         printCallback->Print("=");
3752         printCallback->Print(GetAnsiString(property.Value));
3753       }
3754     }
3755     if (!props.IsEmpty())
3756       printCallback->NewLine();
3757   }
3758 
3759 
3760   for (i = 0; i < props.Size(); i++)
3761   {
3762     const CProperty &property = props[i];
3763     UString name (property.Name);
3764     name.MakeLower_Ascii();
3765 
3766     if (name.IsEqualTo("file"))
3767     {
3768       if (property.Value.IsEmpty())
3769         return E_INVALIDARG;
3770 
3771       NFile::NIO::CInFile file;
3772       if (!file.Open(us2fs(property.Value)))
3773         return GetLastError_noZero_HRESULT();
3774       size_t len;
3775       {
3776         UInt64 len64;
3777         if (!file.GetLength(len64))
3778           return GetLastError_noZero_HRESULT();
3779         if (printCallback)
3780         {
3781           printCallback->Print("file size =");
3782           PrintNumber(*printCallback, len64, 0);
3783           printCallback->NewLine();
3784         }
3785         len = (size_t)len64;
3786         if (len != len64)
3787           return E_INVALIDARG;
3788       }
3789 
3790       // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3791 
3792       ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3793       use_fileData = true;
3794 
3795       {
3796         size_t processed;
3797         if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3798           return GetLastError_noZero_HRESULT();
3799         if (processed != len)
3800           return E_FAIL;
3801       }
3802       continue;
3803     }
3804 
3805     NCOM::CPropVariant propVariant;
3806     if (!property.Value.IsEmpty())
3807       ParseNumberString(property.Value, propVariant);
3808 
3809     if (name.IsEqualTo("time"))
3810     {
3811       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3812       needSetComplexity = true;
3813       testTimeMs *= 1000;
3814       continue;
3815     }
3816 
3817     if (name.IsEqualTo("timems"))
3818     {
3819       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3820       needSetComplexity = true;
3821       continue;
3822     }
3823 
3824     if (name.IsEqualTo("tic"))
3825     {
3826       UInt32 v;
3827       RINOK(ParsePropToUInt32(UString(), propVariant, v))
3828       if (v >= 64)
3829         return E_INVALIDARG;
3830       complexInCommands = (UInt64)1 << v;
3831       continue;
3832     }
3833 
3834     const bool isCurrent_fixedDict = name.IsEqualTo("df");
3835     if (isCurrent_fixedDict)
3836       isFixedDict = true;
3837     if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3838     {
3839       RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3840       if (startDicLog > 32)
3841         return E_INVALIDARG;
3842       startDicLog_Defined = true;
3843       continue;
3844     }
3845 
3846     if (name.IsEqualTo("mts"))
3847     {
3848       RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3849       continue;
3850     }
3851 
3852     if (name.IsEqualTo("af"))
3853     {
3854       UInt32 bundle;
3855       RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3856       if (bundle > 0 && bundle < numCPUs)
3857       {
3858         #ifndef Z7_ST
3859         affinityMode.SetLevels(numCPUs, 2);
3860         affinityMode.NumBundleThreads = bundle;
3861         #endif
3862       }
3863       continue;
3864     }
3865 
3866     if (name.IsEqualTo("freq"))
3867     {
3868       UInt32 freq32 = 0;
3869       RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3870       if (freq32 == 0)
3871         return E_INVALIDARG;
3872       specifiedFreq = (UInt64)freq32 * 1000000;
3873 
3874       if (printCallback)
3875       {
3876         printCallback->Print("freq=");
3877         PrintNumber(*printCallback, freq32, 0);
3878         printCallback->NewLine();
3879       }
3880 
3881       continue;
3882     }
3883 
3884     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3885     {
3886       const UString s = name.Ptr(2);
3887       if (s.IsEqualTo("*")
3888           || (s.IsEmpty()
3889             && propVariant.vt == VT_BSTR
3890             && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3891       {
3892         multiThreadTests = true;
3893         continue;
3894       }
3895       #ifndef Z7_ST
3896       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3897       #endif
3898       continue;
3899     }
3900 
3901     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3902   }
3903   }
3904 
3905   if (printCallback)
3906   {
3907     AString s;
3908 
3909 #if 1 || !defined(Z7_MSC_VER_ORIGINAL) || (Z7_MSC_VER_ORIGINAL >= 1900)
3910     s += "Compiler: ";
3911     GetCompiler(s);
3912     printCallback->Print(s);
3913     printCallback->NewLine();
3914     s.Empty();
3915 #endif
3916 
3917     GetSystemInfoText(s);
3918     printCallback->Print(s);
3919     printCallback->NewLine();
3920   }
3921 
3922   if (printCallback)
3923   {
3924     printCallback->Print("1T CPU Freq (MHz):");
3925   }
3926 
3927   if (printCallback || freqCallback)
3928   {
3929     UInt64 numMilCommands = 1 << 6;
3930     if (specifiedFreq != 0)
3931     {
3932       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3933         numMilCommands >>= 1;
3934     }
3935 
3936     for (int jj = 0;; jj++)
3937     {
3938       if (printCallback)
3939         RINOK(printCallback->CheckBreak())
3940 
3941       UInt64 start = ::GetTimeCount();
3942       UInt32 sum = (UInt32)start;
3943       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3944       if (sum == 0xF1541213)
3945         if (printCallback)
3946           printCallback->Print("");
3947       const UInt64 realDelta = ::GetTimeCount() - start;
3948       start = realDelta;
3949       if (start == 0)
3950         start = 1;
3951       if (start > (UInt64)1 << 61)
3952         start = 1;
3953       const UInt64 freq = GetFreq();
3954       // mips is constant in some compilers
3955       const UInt64 hzVal = MyMultDiv64(numMilCommands * 1000000, freq, start);
3956       const UInt64 mipsVal = numMilCommands * freq / start;
3957       if (printCallback)
3958       {
3959         if (realDelta == 0)
3960         {
3961           printCallback->Print(" -");
3962         }
3963         else
3964         {
3965           // PrintNumber(*printCallback, start, 0);
3966           PrintNumber(*printCallback, mipsVal, 5);
3967         }
3968       }
3969       if (freqCallback)
3970       {
3971         RINOK(freqCallback->AddCpuFreq(1, hzVal, kBenchmarkUsageMult))
3972       }
3973 
3974       if (jj >= 1)
3975       {
3976         bool needStop = (numMilCommands >= (1 <<
3977           #ifdef _DEBUG
3978             7
3979           #else
3980             11
3981           #endif
3982           ));
3983         if (start >= freq * 16)
3984         {
3985           printCallback->Print(" (Cmplx)");
3986           if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
3987           {
3988             needSetComplexity = true;
3989           }
3990           needStop = true;
3991         }
3992         if (needSetComplexity)
3993           SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
3994         if (needStop)
3995           break;
3996         numMilCommands <<= 1;
3997       }
3998     }
3999     if (freqCallback)
4000     {
4001       RINOK(freqCallback->FreqsFinished(1))
4002     }
4003   }
4004 
4005   if (numThreadsSpecified >= 2)
4006   if (printCallback || freqCallback)
4007   {
4008     if (printCallback)
4009       printCallback->NewLine();
4010 
4011     /* it can show incorrect frequency for HT threads.
4012        so we reduce freq test to (numCPUs / 2) */
4013 
4014     UInt32 numThreads = (numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2 : numThreadsSpecified);
4015     if (numThreads < 1)
4016       numThreads = 1;
4017 
4018     if (printCallback)
4019     {
4020       char s[128];
4021       ConvertUInt64ToString(numThreads, s);
4022       printCallback->Print(s);
4023       printCallback->Print("T CPU Freq (MHz):");
4024     }
4025     UInt64 numMilCommands = 1 <<
4026           #ifdef _DEBUG
4027             7;
4028           #else
4029             10;
4030           #endif
4031 
4032     if (specifiedFreq != 0)
4033     {
4034       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
4035         numMilCommands >>= 1;
4036     }
4037 
4038     // for (int jj = 0;; jj++)
4039     for (;;)
4040     {
4041       if (printCallback)
4042         RINOK(printCallback->CheckBreak())
4043 
4044       {
4045         // PrintLeft(f, "CPU", kFieldSize_Name);
4046 
4047         // UInt32 resVal;
4048 
4049         CFreqBench fb;
4050         fb.complexInCommands = numMilCommands * 1000000;
4051         fb.numThreads = numThreads;
4052         // showFreq;
4053         // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4054         fb.showFreq = true;
4055         fb.specifiedFreq = 1;
4056 
4057         const HRESULT res = fb.FreqBench(NULL /* printCallback */
4058             #ifndef Z7_ST
4059               , &affinityMode
4060             #endif
4061             );
4062         RINOK(res)
4063 
4064         if (freqCallback)
4065         {
4066           RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4067         }
4068 
4069         if (printCallback)
4070         {
4071           /*
4072           if (realDelta == 0)
4073           {
4074             printCallback->Print(" -");
4075           }
4076           else
4077           */
4078           {
4079             // PrintNumber(*printCallback, start, 0);
4080             PrintUsage(*printCallback, fb.UsageRes, 3);
4081             printCallback->Print("%");
4082             PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4083             printCallback->Print("  ");
4084 
4085             // PrintNumber(*printCallback, fb.UsageRes, 5);
4086           }
4087         }
4088       }
4089       // if (jj >= 1)
4090       {
4091         const bool needStop = (numMilCommands >= (1 <<
4092           #ifdef _DEBUG
4093             7
4094           #else
4095             11
4096           #endif
4097           ));
4098         if (needStop)
4099           break;
4100         numMilCommands <<= 1;
4101       }
4102     }
4103     if (freqCallback)
4104     {
4105       RINOK(freqCallback->FreqsFinished(numThreads))
4106     }
4107   }
4108 
4109 
4110   if (printCallback)
4111   {
4112     printCallback->NewLine();
4113     printCallback->NewLine();
4114     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4115     printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4116     printCallback->NewLine();
4117   }
4118 
4119   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4120     return E_INVALIDARG;
4121 
4122   UInt64 dict = (UInt64)1 << startDicLog;
4123   const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4124 
4125   const unsigned level = method.GetLevel();
4126 
4127   AString &methodName = method.MethodName;
4128   const AString original_MethodName = methodName;
4129   if (methodName.IsEmpty())
4130     methodName = "LZMA";
4131 
4132   if (benchCallback)
4133   {
4134     CBenchProps benchProps;
4135     benchProps.SetLzmaCompexity();
4136     const UInt64 dictSize = method.Get_Lzma_DicSize();
4137 
4138     size_t uncompressedDataSize;
4139     if (use_fileData)
4140     {
4141       uncompressedDataSize = fileDataBuffer.Size();
4142     }
4143     else
4144     {
4145       uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4146       if (uncompressedDataSize < dictSize)
4147         return E_INVALIDARG;
4148     }
4149 
4150     return MethodBench(
4151         EXTERNAL_CODECS_LOC_VARS
4152         complexInCommands,
4153       #ifndef Z7_ST
4154         true, numThreadsSpecified,
4155         &affinityMode,
4156       #endif
4157         method,
4158         uncompressedDataSize, (const Byte *)fileDataBuffer,
4159         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4160   }
4161 
4162   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4163     methodName = "crc32";
4164 
4165   CMethodId hashID;
4166   const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4167   int codecIndex = -1;
4168   bool isFilter = false;
4169   if (!isHashMethod)
4170   {
4171     UInt32 numStreams;
4172     codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4173         true,  // encode
4174         hashID, numStreams, isFilter);
4175     // we can allow non filter for BW tests
4176     if (!isFilter) codecIndex = -1;
4177   }
4178 
4179   CBenchCallbackToPrint callback;
4180   callback.Init();
4181   callback._file = printCallback;
4182 
4183   if (isHashMethod || codecIndex != -1)
4184   {
4185     if (!printCallback)
4186       return S_FALSE;
4187     IBenchPrintCallback &f = *printCallback;
4188 
4189     UInt64 dict64 = dict;
4190     if (!dictIsDefined)
4191       dict64 = (1 << 27);
4192     if (use_fileData)
4193     {
4194       if (!dictIsDefined)
4195         dict64 = fileDataBuffer.Size();
4196       else if (dict64 > fileDataBuffer.Size())
4197         dict64 = fileDataBuffer.Size();
4198     }
4199 
4200     for (;;)
4201     {
4202       const int index = method.FindProp(NCoderPropID::kDictionarySize);
4203       if (index < 0)
4204         break;
4205       method.Props.Delete((unsigned)index);
4206     }
4207 
4208     // methodName.RemoveChar(L'-');
4209     Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4210     const UInt32 *checkSum = NULL;
4211     int benchIndex = -1;
4212 
4213     if (isHashMethod)
4214     {
4215       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4216       {
4217         const CBenchHash &h = g_Hash[i];
4218         AString benchMethod (h.Name);
4219         AString benchProps;
4220         const int propPos = benchMethod.Find(':');
4221         if (propPos >= 0)
4222         {
4223           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4224           benchMethod.DeleteFrom((unsigned)propPos);
4225         }
4226 
4227         if (AreSameMethodNames(benchMethod, methodName))
4228         {
4229           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4230           /*
4231           bool isMainMethod = method.PropsString.IsEmpty();
4232           if (isMainMethod)
4233             isMainMethod = !checkSum
4234                 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4235           if (sameProps || isMainMethod)
4236           */
4237           {
4238             complexity = (Int32)h.Complex;
4239             checkSum = &h.CheckSum;
4240             if (sameProps)
4241               break;
4242             /*
4243             if property. is not specified, we use the complexity
4244             for latest fastest method (crc32:64)
4245             */
4246           }
4247         }
4248       }
4249       // if (!checkSum) return E_NOTIMPL;
4250     }
4251     else
4252     {
4253       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4254       {
4255         const CBenchMethod &bench = g_Bench[i];
4256         AString benchMethod (bench.Name);
4257         AString benchProps;
4258         const int propPos = benchMethod.Find(':');
4259         if (propPos >= 0)
4260         {
4261           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4262           benchMethod.DeleteFrom((unsigned)propPos);
4263         }
4264 
4265         if (AreSameMethodNames(benchMethod, methodName))
4266         {
4267           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4268           // bool isMainMethod = method.PropsString.IsEmpty();
4269           // if (sameProps || isMainMethod)
4270           {
4271             benchIndex = (int)i;
4272             if (sameProps)
4273               break;
4274           }
4275         }
4276       }
4277       // if (benchIndex < 0) return E_NOTIMPL;
4278     }
4279 
4280     {
4281       /* we count usage only for crc and filter. non-filters are not supported */
4282       UInt64 usage = (1 << 20);
4283       UInt64 bufSize = dict64;
4284       UInt32 numBlocks = isHashMethod ? 1 : 3;
4285       if (use_fileData)
4286       {
4287         usage += fileDataBuffer.Size();
4288         if (bufSize > fileDataBuffer.Size())
4289           bufSize = fileDataBuffer.Size();
4290         if (isHashMethod)
4291         {
4292           numBlocks = 0;
4293           #ifndef Z7_ST
4294           if (numThreadsSpecified != 1)
4295             numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4296           #endif
4297         }
4298       }
4299       usage += numThreadsSpecified * bufSize * numBlocks;
4300       Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4301     }
4302 
4303     CUIntVector numThreadsVector;
4304     {
4305       unsigned nt = numThreads_Start;
4306       for (;;)
4307       {
4308         if (nt > numThreadsSpecified)
4309           break;
4310         numThreadsVector.Add(nt);
4311         const unsigned next = nt * 2;
4312         const UInt32 ntHalf= numThreadsSpecified / 2;
4313         if (ntHalf > nt && ntHalf < next)
4314           numThreadsVector.Add(ntHalf);
4315         if (numThreadsSpecified > nt && numThreadsSpecified < next)
4316           numThreadsVector.Add(numThreadsSpecified);
4317         nt = next;
4318       }
4319     }
4320 
4321     unsigned numColumns = isHashMethod ? 1 : 2;
4322     CTempValues speedTotals;
4323     CTempValues usageTotals;
4324     {
4325       const unsigned numItems = numThreadsVector.Size() * numColumns;
4326       speedTotals.Alloc(numItems);
4327       usageTotals.Alloc(numItems);
4328       for (unsigned i = 0; i < numItems; i++)
4329       {
4330         speedTotals.Values[i] = 0;
4331         usageTotals.Values[i] = 0;
4332       }
4333     }
4334 
4335     f.NewLine();
4336     for (unsigned line = 0; line < 3; line++)
4337     {
4338       f.NewLine();
4339       f.Print(line == 0 ? "THRD" : line == 1 ? "    " : "Size");
4340       FOR_VECTOR (ti, numThreadsVector)
4341       {
4342         if (ti != 0)
4343           Print_Delimiter(f);
4344         if (line == 0)
4345         {
4346           PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4347           PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4348         }
4349         else
4350         {
4351           for (unsigned c = 0; c < numColumns; c++)
4352           {
4353             PrintRight(f, line == 1 ? "Usage" : "%",    kFieldSize_Usage + 1);
4354             PrintRight(f, line == 1 ? "BW"    : "MB/s", kFieldSize_CrcSpeed + 1);
4355           }
4356         }
4357       }
4358     }
4359     f.NewLine();
4360 
4361     UInt64 numSteps = 0;
4362 
4363     // for (UInt32 iter = 0; iter < numIterations; iter++)
4364     // {
4365     unsigned pow = 10; // kNumHashDictBits
4366     if (startDicLog_Defined)
4367       pow = startDicLog;
4368 
4369     // #define NUM_SUB_BITS 2
4370     // pow <<= NUM_SUB_BITS;
4371     for (;; pow++)
4372     {
4373       const UInt64 bufSize = (UInt64)1 << pow;
4374       // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4375       // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4376 
4377       size_t dataSize = fileDataBuffer.Size();
4378       if (dataSize > bufSize || !use_fileData)
4379         dataSize = (size_t)bufSize;
4380 
4381       for (UInt32 iter = 0; iter < numIterations; iter++)
4382       {
4383         Print_Pow(f, pow);
4384         // PrintNumber(f, bufSize >> 10, 4);
4385 
4386         FOR_VECTOR (ti, numThreadsVector)
4387         {
4388           RINOK(f.CheckBreak())
4389           const UInt32 numThreads = numThreadsVector[ti];
4390           if (isHashMethod)
4391           {
4392             UInt64 speed = 0;
4393             UInt64 usage = 0;
4394             const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4395               numThreads,
4396               dataSize, (const Byte *)fileDataBuffer,
4397               speed, usage,
4398               (UInt32)complexity,
4399               1, // benchWeight,
4400               (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4401               method,
4402               &f,
4403             #ifndef Z7_ST
4404               &affinityMode,
4405             #endif
4406               false, // showRating
4407               NULL, false, 0);
4408             RINOK(res)
4409 
4410             if (ti != 0)
4411               Print_Delimiter(f);
4412 
4413             Bench_BW_Print_Usage_Speed(f, usage, speed);
4414             speedTotals.Values[ti] += speed;
4415             usageTotals.Values[ti] += usage;
4416           }
4417           else
4418           {
4419             {
4420               unsigned keySize = 32;
4421                    if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4422               else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4423               callback.BenchProps.KeySize = keySize;
4424             }
4425 
4426             COneMethodInfo method2 = method;
4427             unsigned bench_DictBits;
4428 
4429             if (benchIndex >= 0)
4430             {
4431               const CBenchMethod &bench = g_Bench[benchIndex];
4432               callback.BenchProps.EncComplex = bench.EncComplex;
4433               callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4434               callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4435               bench_DictBits = bench.DictBits;
4436               // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4437             }
4438             else
4439             {
4440               bench_DictBits = kOldLzmaDictBits; // = 32 default
4441               if (isFilter)
4442               {
4443                 const unsigned k_UnknownCoderComplexity = 4;
4444                 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4445                 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4446               }
4447               else
4448               {
4449                 callback.BenchProps.EncComplex = 1 << 10;
4450                 callback.BenchProps.DecComplexUnc = 1 << 6;
4451               }
4452               callback.BenchProps.DecComplexCompr = 0;
4453             }
4454             callback.NeedPrint = false;
4455 
4456             if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4457             {
4458               const NCOM::CPropVariant propVariant = (UInt32)pow;
4459               RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4460             }
4461 
4462             const HRESULT res = MethodBench(
4463                 EXTERNAL_CODECS_LOC_VARS
4464                 complexInCommands,
4465               #ifndef Z7_ST
4466                 false, // oldLzmaBenchMode
4467                 numThreadsVector[ti],
4468                 &affinityMode,
4469               #endif
4470                 method2,
4471                 dataSize, (const Byte *)fileDataBuffer,
4472                 bench_DictBits,
4473                 printCallback,
4474                 &callback,
4475                 &callback.BenchProps);
4476             RINOK(res)
4477 
4478             if (ti != 0)
4479               Print_Delimiter(f);
4480 
4481             for (unsigned i = 0; i < 2; i++)
4482             {
4483               const CBenchInfo &bi = callback.BenchInfo_Results[i];
4484               const UInt64 usage = bi.GetUsage();
4485               const UInt64 speed = bi.GetUnpackSizeSpeed();
4486               usageTotals.Values[ti * 2 + i] += usage;
4487               speedTotals.Values[ti * 2 + i] += speed;
4488               Bench_BW_Print_Usage_Speed(f, usage, speed);
4489             }
4490           }
4491         }
4492 
4493         f.NewLine();
4494         numSteps++;
4495       }
4496       if (dataSize >= dict64)
4497         break;
4498     }
4499 
4500     if (numSteps != 0)
4501     {
4502       f.Print("Avg:");
4503       for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4504       {
4505         if (ti != 0)
4506           Print_Delimiter(f);
4507         for (unsigned i = 0; i < numColumns; i++)
4508           Bench_BW_Print_Usage_Speed(f,
4509               usageTotals.Values[ti * numColumns + i] / numSteps,
4510               speedTotals.Values[ti * numColumns + i] / numSteps);
4511       }
4512       f.NewLine();
4513     }
4514 
4515     return S_OK;
4516   }
4517 
4518   bool use2Columns = false;
4519 
4520   bool totalBenchMode = false;
4521   bool onlyHashBench = false;
4522   if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4523   {
4524     onlyHashBench = true;
4525     methodName = "*";
4526     totalBenchMode = true;
4527   }
4528   else if (methodName.Find('*') >= 0)
4529     totalBenchMode = true;
4530 
4531   // ---------- Threads loop ----------
4532   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4533   {
4534 
4535   UInt32 numThreads = numThreadsSpecified;
4536 
4537   if (!multiThreadTests)
4538   {
4539     if (threadsPassIndex != 0)
4540       break;
4541   }
4542   else
4543   {
4544     numThreads = 1;
4545     if (threadsPassIndex != 0)
4546     {
4547       if (numCPUs < 2)
4548         break;
4549       numThreads = numCPUs;
4550       if (threadsPassIndex == 1)
4551       {
4552         if (numCPUs >= 4)
4553           numThreads = numCPUs / 2;
4554       }
4555       else if (numCPUs < 4)
4556         break;
4557     }
4558   }
4559 
4560   IBenchPrintCallback &f = *printCallback;
4561 
4562   if (threadsPassIndex > 0)
4563   {
4564     f.NewLine();
4565     f.NewLine();
4566   }
4567 
4568   if (!dictIsDefined && !onlyHashBench)
4569   {
4570     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4571     unsigned dicSizeLog = dicSizeLog_Main;
4572 
4573     #ifdef UNDER_CE
4574     dicSizeLog = (UInt64)1 << 20;
4575     #endif
4576 
4577     if (ramSize_Defined)
4578     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4579       if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4580         break;
4581 
4582     dict = (UInt64)1 << dicSizeLog;
4583 
4584     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4585     {
4586       f.Print("Dictionary reduced to: ");
4587       PrintNumber(f, dicSizeLog, 1);
4588       f.NewLine();
4589     }
4590   }
4591 
4592   Print_Usage_and_Threads(f,
4593       onlyHashBench ?
4594         GetBenchMemoryUsage_Hash(numThreads, dict) :
4595         GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4596       numThreads);
4597 
4598   f.NewLine();
4599 
4600   f.NewLine();
4601 
4602   if (totalBenchMode)
4603   {
4604     callback.NameFieldSize = kFieldSize_Name;
4605     use2Columns = false;
4606   }
4607   else
4608   {
4609     callback.NameFieldSize = kFieldSize_SmallName;
4610     use2Columns = true;
4611   }
4612   callback.Use2Columns = use2Columns;
4613 
4614   bool showFreq = false;
4615   UInt64 cpuFreq = 0;
4616 
4617   if (totalBenchMode)
4618   {
4619     showFreq = true;
4620   }
4621 
4622   unsigned fileldSize = kFieldSize_TotalSize;
4623   if (showFreq)
4624     fileldSize += kFieldSize_EUAndEffec;
4625 
4626   if (use2Columns)
4627   {
4628     PrintSpaces(f, callback.NameFieldSize);
4629     PrintRight(f, "Compressing", fileldSize);
4630     f.Print(kSep);
4631     PrintRight(f, "Decompressing", fileldSize);
4632   }
4633   f.NewLine();
4634   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4635 
4636   int j;
4637 
4638   for (j = 0; j < 2; j++)
4639   {
4640     PrintRight(f, "Speed", kFieldSize_Speed + 1);
4641     PrintRight(f, "Usage", kFieldSize_Usage + 1);
4642     PrintRight(f, "R/U", kFieldSize_RU + 1);
4643     PrintRight(f, "Rating", kFieldSize_Rating + 1);
4644     if (showFreq)
4645     {
4646       PrintRight(f, "E/U", kFieldSize_EU + 1);
4647       PrintRight(f, "Effec", kFieldSize_Effec + 1);
4648     }
4649     if (!use2Columns)
4650       break;
4651     if (j == 0)
4652       f.Print(kSep);
4653   }
4654 
4655   f.NewLine();
4656   PrintSpaces(f, callback.NameFieldSize);
4657 
4658   for (j = 0; j < 2; j++)
4659   {
4660     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4661     PrintRight(f, "%", kFieldSize_Usage + 1);
4662     PrintRight(f, "MIPS", kFieldSize_RU + 1);
4663     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4664     if (showFreq)
4665     {
4666       PrintRight(f, "%", kFieldSize_EU + 1);
4667       PrintRight(f, "%", kFieldSize_Effec + 1);
4668     }
4669     if (!use2Columns)
4670       break;
4671     if (j == 0)
4672       f.Print(kSep);
4673   }
4674 
4675   f.NewLine();
4676   f.NewLine();
4677 
4678   if (specifiedFreq != 0)
4679     cpuFreq = specifiedFreq;
4680 
4681   // bool showTotalSpeed = false;
4682 
4683   if (totalBenchMode)
4684   {
4685     for (UInt32 i = 0; i < numIterations; i++)
4686     {
4687       if (i != 0)
4688         printCallback->NewLine();
4689 
4690       const unsigned kNumCpuTests = 3;
4691       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4692       {
4693         PrintLeft(f, "CPU", kFieldSize_Name);
4694 
4695         // UInt32 resVal;
4696 
4697         CFreqBench fb;
4698         fb.complexInCommands = complexInCommands;
4699         fb.numThreads = numThreads;
4700         // showFreq;
4701         fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4702         fb.specifiedFreq = specifiedFreq;
4703 
4704         const HRESULT res = fb.FreqBench(printCallback
4705             #ifndef Z7_ST
4706               , &affinityMode
4707             #endif
4708             );
4709         RINOK(res)
4710 
4711         cpuFreq = fb.CpuFreqRes;
4712         callback.NewLine();
4713 
4714         if (specifiedFreq != 0)
4715           cpuFreq = specifiedFreq;
4716 
4717         if (testTimeMs >= 1000)
4718         if (freqTest == kNumCpuTests - 1)
4719         {
4720           // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4721         }
4722       }
4723       callback.NewLine();
4724 
4725       // return S_OK; // change it
4726 
4727       callback.SetFreq(true, cpuFreq);
4728 
4729       if (!onlyHashBench)
4730       {
4731         size_t dataSize = (size_t)dict;
4732         if (use_fileData)
4733         {
4734           dataSize = fileDataBuffer.Size();
4735           if (dictIsDefined && dataSize > dict)
4736             dataSize = (size_t)dict;
4737         }
4738 
4739         const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4740             method, complexInCommands,
4741           #ifndef Z7_ST
4742             numThreads,
4743             &affinityMode,
4744           #endif
4745             dictIsDefined || use_fileData, // forceUnpackSize
4746             dataSize,
4747             (const Byte *)fileDataBuffer,
4748             printCallback, &callback);
4749         RINOK(res)
4750       }
4751 
4752       {
4753         size_t dataSize = (size_t)1 << kNumHashDictBits;
4754         if (dictIsDefined)
4755         {
4756           dataSize = (size_t)dict;
4757           if (dataSize != dict)
4758             return E_OUTOFMEMORY;
4759         }
4760         if (use_fileData)
4761         {
4762           dataSize = fileDataBuffer.Size();
4763           if (dictIsDefined && dataSize > dict)
4764             dataSize = (size_t)dict;
4765         }
4766 
4767         const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4768             method, complexInCommands,
4769             numThreads,
4770             dataSize, (const Byte *)fileDataBuffer,
4771             printCallback, &callback,
4772         #ifndef Z7_ST
4773           &affinityMode,
4774         #endif
4775           &callback.EncodeRes, true, cpuFreq);
4776         RINOK(res)
4777       }
4778 
4779       callback.NewLine();
4780       {
4781         PrintLeft(f, "CPU", kFieldSize_Name);
4782 
4783         CFreqBench fb;
4784         fb.complexInCommands = complexInCommands;
4785         fb.numThreads = numThreads;
4786         // showFreq;
4787         fb.showFreq = (specifiedFreq != 0);
4788         fb.specifiedFreq = specifiedFreq;
4789 
4790         const HRESULT res = fb.FreqBench(printCallback
4791           #ifndef Z7_ST
4792             , &affinityMode
4793           #endif
4794           );
4795         RINOK(res)
4796         callback.NewLine();
4797       }
4798     }
4799   }
4800   else
4801   {
4802     needSetComplexity = true;
4803     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4804     {
4805       unsigned i;
4806       for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4807       {
4808         const CBenchMethod &h = g_Bench[i];
4809         AString benchMethod (h.Name);
4810         AString benchProps;
4811         const int propPos = benchMethod.Find(':');
4812         if (propPos >= 0)
4813         {
4814           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4815           benchMethod.DeleteFrom((unsigned)propPos);
4816         }
4817 
4818         if (AreSameMethodNames(benchMethod, methodName))
4819         {
4820           if (benchProps.IsEmpty()
4821               || (benchProps == "x5" && method.PropsString.IsEmpty())
4822               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4823           {
4824             callback.BenchProps.EncComplex = h.EncComplex;
4825             callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4826             callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4827             needSetComplexity = false;
4828             break;
4829           }
4830         }
4831       }
4832       /*
4833       if (i == Z7_ARRAY_SIZE(g_Bench))
4834         return E_NOTIMPL;
4835       */
4836     }
4837     if (needSetComplexity)
4838       callback.BenchProps.SetLzmaCompexity();
4839 
4840   if (startDicLog < kBenchMinDicLogSize)
4841     startDicLog = kBenchMinDicLogSize;
4842 
4843   for (unsigned i = 0; i < numIterations; i++)
4844   {
4845     unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4846     if (!multiDict)
4847       pow = 32;
4848     while (GetDictSizeFromLog(pow) > dict && pow > 0)
4849       pow--;
4850     for (; GetDictSizeFromLog(pow) <= dict; pow++)
4851     {
4852       Print_Pow(f, pow);
4853       callback.DictSize = (UInt64)1 << pow;
4854 
4855       COneMethodInfo method2 = method;
4856 
4857       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4858       {
4859         // We add dictionary size property.
4860         // method2 can have two different dictionary size properties.
4861         // And last property is main.
4862         NCOM::CPropVariant propVariant = (UInt32)pow;
4863         RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4864       }
4865 
4866       size_t uncompressedDataSize;
4867       if (use_fileData)
4868       {
4869         uncompressedDataSize = fileDataBuffer.Size();
4870       }
4871       else
4872       {
4873         uncompressedDataSize = (size_t)callback.DictSize;
4874         if (uncompressedDataSize != callback.DictSize)
4875           return E_OUTOFMEMORY;
4876         if (uncompressedDataSize >= (1 << 18))
4877           uncompressedDataSize += kAdditionalSize;
4878       }
4879 
4880       const HRESULT res = MethodBench(
4881           EXTERNAL_CODECS_LOC_VARS
4882           complexInCommands,
4883         #ifndef Z7_ST
4884           true, numThreads,
4885           &affinityMode,
4886         #endif
4887           method2,
4888           uncompressedDataSize, (const Byte *)fileDataBuffer,
4889           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4890       f.NewLine();
4891       RINOK(res)
4892       if (!multiDict)
4893         break;
4894     }
4895   }
4896   }
4897 
4898   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4899 
4900   if (use2Columns)
4901   {
4902     f.Print(kSep);
4903     PrintChars(f, '-', fileldSize);
4904   }
4905 
4906   f.NewLine();
4907 
4908   if (use2Columns)
4909   {
4910     PrintLeft(f, "Avr:", callback.NameFieldSize);
4911     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4912     f.Print(kSep);
4913     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4914     f.NewLine();
4915   }
4916 
4917   PrintLeft(f, "Tot:", callback.NameFieldSize);
4918   CTotalBenchRes midRes;
4919   midRes = callback.EncodeRes;
4920   midRes.Update_With_Res(callback.DecodeRes);
4921 
4922   // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4923   PrintTotals(f, showFreq, cpuFreq, false, midRes);
4924   f.NewLine();
4925 
4926   }
4927   return S_OK;
4928 }
4929