• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 #include <stdio.h>
6 
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11 
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18 
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22 
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <malloc.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30 
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/Alloc.h"
33 #include "../../../../C/CpuArch.h"
34 
35 #include "../../../Windows/System.h"
36 
37 #ifndef _7ZIP_ST
38 #include "../../../Windows/Synchronization.h"
39 #include "../../../Windows/Thread.h"
40 #endif
41 
42 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
43 #define USE_WIN_FILE
44 #endif
45 
46 #ifdef USE_WIN_FILE
47 #include "../../../Windows/FileIO.h"
48 #endif
49 
50 
51 #include "../../../Common/IntToString.h"
52 #include "../../../Common/StringConvert.h"
53 #include "../../../Common/StringToInt.h"
54 
55 #include "../../Common/MethodProps.h"
56 #include "../../Common/StreamUtils.h"
57 
58 #include "Bench.h"
59 
60 using namespace NWindows;
61 
62 static const UInt32 k_LZMA = 0x030101;
63 
64 static const UInt64 kComplexInCommands = (UInt64)1 <<
65   #ifdef UNDER_CE
66     31;
67   #else
68     34;
69   #endif
70 
71 static const UInt32 kComplexInSeconds = 4;
72 
SetComplexCommands(UInt32 complexInSeconds,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)73 static void SetComplexCommands(UInt32 complexInSeconds,
74     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
75 {
76   complexInCommands = kComplexInCommands;
77   const UInt64 kMinFreq = (UInt64)1000000 * 4;
78   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
79   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
80     cpuFreq = kMinFreq;
81   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
82   {
83     if (complexInSeconds != 0)
84       complexInCommands = complexInSeconds * cpuFreq;
85     else
86       complexInCommands = cpuFreq >> 2;
87   }
88 }
89 
90 static const unsigned kNumHashDictBits = 17;
91 static const UInt32 kFilterUnpackSize = (48 << 10);
92 
93 static const unsigned kOldLzmaDictBits = 30;
94 
95 static const UInt32 kAdditionalSize = (1 << 16);
96 static const UInt32 kCompressedAdditionalSize = (1 << 10);
97 static const UInt32 kMaxLzmaPropSize = 5;
98 
99 class CBaseRandomGenerator
100 {
101   UInt32 A1;
102   UInt32 A2;
103 public:
CBaseRandomGenerator()104   CBaseRandomGenerator() { Init(); }
Init()105   void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()106   UInt32 GetRnd()
107   {
108     return
109       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
110       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
111   }
112 };
113 
114 
115 static const unsigned kBufferAlignment = 1 << 4;
116 
117 struct CBenchBuffer
118 {
119   size_t BufferSize;
120 
121   #ifdef _WIN32
122 
123   Byte *Buffer;
124 
CBenchBufferCBenchBuffer125   CBenchBuffer(): BufferSize(0), Buffer(NULL) {}
~CBenchBufferCBenchBuffer126   ~CBenchBuffer() { ::MidFree(Buffer); }
127 
AllocAlignedMaskCBenchBuffer128   void AllocAlignedMask(size_t size, size_t)
129   {
130     ::MidFree(Buffer);
131     BufferSize = 0;
132     Buffer = (Byte *)::MidAlloc(size);
133     if (Buffer)
134       BufferSize = size;
135   }
136 
137   #else
138 
139   Byte *Buffer;
140   Byte *_bufBase;
141 
CBenchBufferCBenchBuffer142   CBenchBuffer(): BufferSize(0), Buffer(NULL), _bufBase(NULL){}
~CBenchBufferCBenchBuffer143   ~CBenchBuffer() { ::MidFree(_bufBase); }
144 
AllocAlignedMaskCBenchBuffer145   void AllocAlignedMask(size_t size, size_t alignMask)
146   {
147     ::MidFree(_bufBase);
148     Buffer = NULL;
149     BufferSize = 0;
150     _bufBase = (Byte *)::MidAlloc(size + alignMask);
151 
152     if (_bufBase)
153     {
154       // Buffer = (Byte *)(((uintptr_t)_bufBase + alignMask) & ~(uintptr_t)alignMask);
155          Buffer = (Byte *)(((ptrdiff_t)_bufBase + alignMask) & ~(ptrdiff_t)alignMask);
156       BufferSize = size;
157     }
158   }
159 
160   #endif
161 
AllocCBenchBuffer162   bool Alloc(size_t size)
163   {
164     if (Buffer && BufferSize == size)
165       return true;
166     AllocAlignedMask(size, kBufferAlignment - 1);
167     return (Buffer != NULL || size == 0);
168   }
169 };
170 
171 
172 class CBenchRandomGenerator: public CBenchBuffer
173 {
GetVal(UInt32 & res,unsigned numBits)174   static UInt32 GetVal(UInt32 &res, unsigned numBits)
175   {
176     UInt32 val = res & (((UInt32)1 << numBits) - 1);
177     res >>= numBits;
178     return val;
179   }
180 
GetLen(UInt32 & r)181   static UInt32 GetLen(UInt32 &r)
182   {
183     UInt32 len = GetVal(r, 2);
184     return GetVal(r, 1 + len);
185   }
186 
187 public:
188 
GenerateSimpleRandom(CBaseRandomGenerator * _RG_)189   void GenerateSimpleRandom(CBaseRandomGenerator *_RG_)
190   {
191     CBaseRandomGenerator rg = *_RG_;
192     const size_t bufSize = BufferSize;
193     Byte *buf = Buffer;
194     for (size_t i = 0; i < bufSize; i++)
195       buf[i] = (Byte)rg.GetRnd();
196     *_RG_ = rg;
197   }
198 
GenerateLz(unsigned dictBits,CBaseRandomGenerator * _RG_)199   void GenerateLz(unsigned dictBits, CBaseRandomGenerator *_RG_)
200   {
201     CBaseRandomGenerator rg = *_RG_;
202     UInt32 pos = 0;
203     UInt32 rep0 = 1;
204     const size_t bufSize = BufferSize;
205     Byte *buf = Buffer;
206     unsigned posBits = 1;
207 
208     while (pos < bufSize)
209     {
210       UInt32 r = rg.GetRnd();
211       if (GetVal(r, 1) == 0 || pos < 1024)
212         buf[pos++] = (Byte)(r & 0xFF);
213       else
214       {
215         UInt32 len;
216         len = 1 + GetLen(r);
217 
218         if (GetVal(r, 3) != 0)
219         {
220           len += GetLen(r);
221 
222           while (((UInt32)1 << posBits) < pos)
223             posBits++;
224 
225           unsigned numBitsMax = dictBits;
226           if (numBitsMax > posBits)
227             numBitsMax = posBits;
228 
229           const unsigned kAddBits = 6;
230           unsigned numLogBits = 5;
231           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
232             numLogBits = 4;
233 
234           for (;;)
235           {
236             UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
237             r = rg.GetRnd();
238             if (ppp > numBitsMax)
239               continue;
240             rep0 = GetVal(r, ppp);
241             if (rep0 < pos)
242               break;
243             r = rg.GetRnd();
244           }
245           rep0++;
246         }
247 
248         {
249           UInt32 rem = (UInt32)bufSize - pos;
250           if (len > rem)
251             len = rem;
252         }
253         Byte *dest = buf + pos;
254         const Byte *src = dest - rep0;
255         pos += len;
256         for (UInt32 i = 0; i < len; i++)
257           *dest++ = *src++;
258       }
259     }
260 
261     *_RG_ = rg;
262   }
263 };
264 
265 
266 class CBenchmarkInStream:
267   public ISequentialInStream,
268   public CMyUnknownImp
269 {
270   const Byte *Data;
271   size_t Pos;
272   size_t Size;
273 public:
274   MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)275   void Init(const Byte *data, size_t size)
276   {
277     Data = data;
278     Size = size;
279     Pos = 0;
280   }
281   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
282 };
283 
Read(void * data,UInt32 size,UInt32 * processedSize)284 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
285 {
286   size_t remain = Size - Pos;
287   UInt32 kMaxBlockSize = (1 << 20);
288   if (size > kMaxBlockSize)
289     size = kMaxBlockSize;
290   if (size > remain)
291     size = (UInt32)remain;
292   for (UInt32 i = 0; i < size; i++)
293     ((Byte *)data)[i] = Data[Pos + i];
294   Pos += size;
295   if (processedSize)
296     *processedSize = size;
297   return S_OK;
298 }
299 
300 class CBenchmarkOutStream:
301   public ISequentialOutStream,
302   public CBenchBuffer,
303   public CMyUnknownImp
304 {
305   // bool _overflow;
306 public:
307   size_t Pos;
308   bool RealCopy;
309   bool CalcCrc;
310   UInt32 Crc;
311 
312   // CBenchmarkOutStream(): _overflow(false) {}
Init(bool realCopy,bool calcCrc)313   void Init(bool realCopy, bool calcCrc)
314   {
315     Crc = CRC_INIT_VAL;
316     RealCopy = realCopy;
317     CalcCrc = calcCrc;
318     // _overflow = false;
319     Pos = 0;
320   }
321 
322   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
323 
324   MY_UNKNOWN_IMP
325   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
326 };
327 
Write(const void * data,UInt32 size,UInt32 * processedSize)328 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
329 {
330   size_t curSize = BufferSize - Pos;
331   if (curSize > size)
332     curSize = size;
333   if (curSize != 0)
334   {
335     if (RealCopy)
336       memcpy(Buffer + Pos, data, curSize);
337     if (CalcCrc)
338       Crc = CrcUpdate(Crc, data, curSize);
339     Pos += curSize;
340   }
341   if (processedSize)
342     *processedSize = (UInt32)curSize;
343   if (curSize != size)
344   {
345     // _overflow = true;
346     return E_FAIL;
347   }
348   return S_OK;
349 }
350 
351 class CCrcOutStream:
352   public ISequentialOutStream,
353   public CMyUnknownImp
354 {
355 public:
356   bool CalcCrc;
357   UInt32 Crc;
358   MY_UNKNOWN_IMP
359 
CCrcOutStream()360   CCrcOutStream(): CalcCrc(true) {};
Init()361   void Init() { Crc = CRC_INIT_VAL; }
362   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
363 };
364 
Write(const void * data,UInt32 size,UInt32 * processedSize)365 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
366 {
367   if (CalcCrc)
368     Crc = CrcUpdate(Crc, data, size);
369   if (processedSize)
370     *processedSize = size;
371   return S_OK;
372 }
373 
GetTimeCount()374 static UInt64 GetTimeCount()
375 {
376   #ifdef USE_POSIX_TIME
377   #ifdef USE_POSIX_TIME2
378   timeval v;
379   if (gettimeofday(&v, 0) == 0)
380     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
381   return (UInt64)time(NULL) * 1000000;
382   #else
383   return time(NULL);
384   #endif
385   #else
386   /*
387   LARGE_INTEGER value;
388   if (::QueryPerformanceCounter(&value))
389     return value.QuadPart;
390   */
391   return GetTickCount();
392   #endif
393 }
394 
GetFreq()395 static UInt64 GetFreq()
396 {
397   #ifdef USE_POSIX_TIME
398   #ifdef USE_POSIX_TIME2
399   return 1000000;
400   #else
401   return 1;
402   #endif
403   #else
404   /*
405   LARGE_INTEGER value;
406   if (::QueryPerformanceFrequency(&value))
407     return value.QuadPart;
408   */
409   return 1000;
410   #endif
411 }
412 
413 #ifdef USE_POSIX_TIME
414 
415 struct CUserTime
416 {
417   UInt64 Sum;
418   clock_t Prev;
419 
InitCUserTime420   void Init()
421   {
422     Prev = clock();
423     Sum = 0;
424   }
425 
GetUserTimeCUserTime426   UInt64 GetUserTime()
427   {
428     clock_t v = clock();
429     Sum += v - Prev;
430     Prev = v;
431     return Sum;
432   }
433 };
434 
435 #else
436 
GetTime64(const FILETIME & t)437 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
GetWinUserTime()438 UInt64 GetWinUserTime()
439 {
440   FILETIME creationTime, exitTime, kernelTime, userTime;
441   if (
442   #ifdef UNDER_CE
443     ::GetThreadTimes(::GetCurrentThread()
444   #else
445     ::GetProcessTimes(::GetCurrentProcess()
446   #endif
447     , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
448     return GetTime64(userTime) + GetTime64(kernelTime);
449   return (UInt64)GetTickCount() * 10000;
450 }
451 
452 struct CUserTime
453 {
454   UInt64 StartTime;
455 
InitCUserTime456   void Init() { StartTime = GetWinUserTime(); }
GetUserTimeCUserTime457   UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
458 };
459 
460 #endif
461 
GetUserFreq()462 static UInt64 GetUserFreq()
463 {
464   #ifdef USE_POSIX_TIME
465   return CLOCKS_PER_SEC;
466   #else
467   return 10000000;
468   #endif
469 }
470 
471 class CBenchProgressStatus
472 {
473   #ifndef _7ZIP_ST
474   NSynchronization::CCriticalSection CS;
475   #endif
476 public:
477   HRESULT Res;
478   bool EncodeMode;
SetResult(HRESULT res)479   void SetResult(HRESULT res)
480   {
481     #ifndef _7ZIP_ST
482     NSynchronization::CCriticalSectionLock lock(CS);
483     #endif
484     Res = res;
485   }
GetResult()486   HRESULT GetResult()
487   {
488     #ifndef _7ZIP_ST
489     NSynchronization::CCriticalSectionLock lock(CS);
490     #endif
491     return Res;
492   }
493 };
494 
495 struct CBenchInfoCalc
496 {
497   CBenchInfo BenchInfo;
498   CUserTime UserTime;
499 
500   void SetStartTime();
501   void SetFinishTime(CBenchInfo &dest);
502 };
503 
SetStartTime()504 void CBenchInfoCalc::SetStartTime()
505 {
506   BenchInfo.GlobalFreq = GetFreq();
507   BenchInfo.UserFreq = GetUserFreq();
508   BenchInfo.GlobalTime = ::GetTimeCount();
509   BenchInfo.UserTime = 0;
510   UserTime.Init();
511 }
512 
SetFinishTime(CBenchInfo & dest)513 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
514 {
515   dest = BenchInfo;
516   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
517   dest.UserTime = UserTime.GetUserTime();
518 }
519 
520 class CBenchProgressInfo:
521   public ICompressProgressInfo,
522   public CMyUnknownImp,
523   public CBenchInfoCalc
524 {
525 public:
526   CBenchProgressStatus *Status;
527   HRESULT Res;
528   IBenchCallback *Callback;
529 
CBenchProgressInfo()530   CBenchProgressInfo(): Callback(0) {}
531   MY_UNKNOWN_IMP
532   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
533 };
534 
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)535 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
536 {
537   HRESULT res = Status->GetResult();
538   if (res != S_OK)
539     return res;
540   if (!Callback)
541     return res;
542   CBenchInfo info;
543   SetFinishTime(info);
544   if (Status->EncodeMode)
545   {
546     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
547     info.PackSize = BenchInfo.PackSize + *outSize;
548     res = Callback->SetEncodeResult(info, false);
549   }
550   else
551   {
552     info.PackSize = BenchInfo.PackSize + *inSize;
553     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
554     res = Callback->SetDecodeResult(info, false);
555   }
556   if (res != S_OK)
557     Status->SetResult(res);
558   return res;
559 }
560 
561 static const unsigned kSubBits = 8;
562 
GetLogSize(UInt32 size)563 static UInt32 GetLogSize(UInt32 size)
564 {
565   for (unsigned i = kSubBits; i < 32; i++)
566     for (UInt32 j = 0; j < (1 << kSubBits); j++)
567       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
568         return (i << kSubBits) + j;
569   return (32 << kSubBits);
570 }
571 
NormalizeVals(UInt64 & v1,UInt64 & v2)572 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
573 {
574   while (v1 > 1000000)
575   {
576     v1 >>= 1;
577     v2 >>= 1;
578   }
579 }
580 
GetUsage() const581 UInt64 CBenchInfo::GetUsage() const
582 {
583   UInt64 userTime = UserTime;
584   UInt64 userFreq = UserFreq;
585   UInt64 globalTime = GlobalTime;
586   UInt64 globalFreq = GlobalFreq;
587   NormalizeVals(userTime, userFreq);
588   NormalizeVals(globalFreq, globalTime);
589   if (userFreq == 0)
590     userFreq = 1;
591   if (globalTime == 0)
592     globalTime = 1;
593   return userTime * globalFreq * 1000000 / userFreq / globalTime;
594 }
595 
GetRatingPerUsage(UInt64 rating) const596 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
597 {
598   UInt64 userTime = UserTime;
599   UInt64 userFreq = UserFreq;
600   UInt64 globalTime = GlobalTime;
601   UInt64 globalFreq = GlobalFreq;
602   NormalizeVals(userFreq, userTime);
603   NormalizeVals(globalTime, globalFreq);
604   if (globalFreq == 0)
605     globalFreq = 1;
606   if (userTime == 0)
607     userTime = 1;
608   return userFreq * globalTime / globalFreq * rating / userTime;
609 }
610 
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)611 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
612 {
613   UInt64 elTime = elapsedTime;
614   NormalizeVals(freq, elTime);
615   if (elTime == 0)
616     elTime = 1;
617   return value * freq / elTime;
618 }
619 
GetSpeed(UInt64 numCommands) const620 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
621 {
622   return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
623 }
624 
625 struct CBenchProps
626 {
627   bool LzmaRatingMode;
628 
629   UInt32 EncComplex;
630   UInt32 DecComplexCompr;
631   UInt32 DecComplexUnc;
632 
CBenchPropsCBenchProps633   CBenchProps(): LzmaRatingMode(false) {}
634   void SetLzmaCompexity();
635 
GeComprCommandsCBenchProps636   UInt64 GeComprCommands(UInt64 unpackSize)
637   {
638     return unpackSize * EncComplex;
639   }
640 
GeDecomprCommandsCBenchProps641   UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
642   {
643     return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
644   }
645 
646   UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
647   UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
648 };
649 
SetLzmaCompexity()650 void CBenchProps::SetLzmaCompexity()
651 {
652   EncComplex = 1200;
653   DecComplexUnc = 4;
654   DecComplexCompr = 190;
655   LzmaRatingMode = true;
656 }
657 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)658 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
659 {
660   if (dictSize < (1 << kBenchMinDicLogSize))
661     dictSize = (1 << kBenchMinDicLogSize);
662   UInt64 encComplex = EncComplex;
663   if (LzmaRatingMode)
664   {
665     UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
666     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
667   }
668   UInt64 numCommands = (UInt64)size * encComplex;
669   return MyMultDiv64(numCommands, elapsedTime, freq);
670 }
671 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)672 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
673 {
674   UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
675   return MyMultDiv64(numCommands, elapsedTime, freq);
676 }
677 
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)678 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
679 {
680   CBenchProps props;
681   props.SetLzmaCompexity();
682   return props.GetCompressRating(dictSize, elapsedTime, freq, size);
683 }
684 
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)685 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
686 {
687   CBenchProps props;
688   props.SetLzmaCompexity();
689   return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
690 }
691 
692 struct CEncoderInfo;
693 
694 struct CEncoderInfo
695 {
696   #ifndef _7ZIP_ST
697   NWindows::CThread thread[2];
698   UInt32 NumDecoderSubThreads;
699   #endif
700   CMyComPtr<ICompressCoder> _encoder;
701   CMyComPtr<ICompressFilter> _encoderFilter;
702   CBenchProgressInfo *progressInfoSpec[2];
703   CMyComPtr<ICompressProgressInfo> progressInfo[2];
704   UInt64 NumIterations;
705 
706   #ifdef USE_ALLOCA
707   size_t AllocaSize;
708   #endif
709 
710   Byte _key[32];
711   Byte _iv[16];
712   Byte _psw[16];
713   bool CheckCrc_Enc;
714   bool CheckCrc_Dec;
715 
716   struct CDecoderInfo
717   {
718     CEncoderInfo *Encoder;
719     UInt32 DecoderIndex;
720     bool CallbackMode;
721 
722     #ifdef USE_ALLOCA
723     size_t AllocaSize;
724     #endif
725   };
726   CDecoderInfo decodersInfo[2];
727 
728   CMyComPtr<ICompressCoder> _decoders[2];
729   CMyComPtr<ICompressFilter> _decoderFilter;
730 
731   HRESULT Results[2];
732   CBenchmarkOutStream *outStreamSpec;
733   CMyComPtr<ISequentialOutStream> outStream;
734   IBenchCallback *callback;
735   IBenchPrintCallback *printCallback;
736   UInt32 crc;
737   size_t kBufferSize;
738   size_t compressedSize;
739   const Byte *uncompressedDataPtr;
740 
741   const Byte *fileData;
742   CBenchRandomGenerator rg;
743 
744   CBenchBuffer rgCopy; // it must be 16-byte aligned !!!
745   CBenchmarkOutStream *propStreamSpec;
746   CMyComPtr<ISequentialOutStream> propStream;
747 
748   // for decode
749   COneMethodInfo _method;
750   size_t _uncompressedDataSize;
751 
752   HRESULT Init(
753       const COneMethodInfo &method,
754       unsigned generateDictBits,
755       CBaseRandomGenerator *rg);
756   HRESULT Encode();
757   HRESULT Decode(UInt32 decoderIndex);
758 
CEncoderInfoCEncoderInfo759   CEncoderInfo():
760     fileData(NULL),
761     CheckCrc_Enc(true),
762     CheckCrc_Dec(true),
763     outStreamSpec(0), callback(0), printCallback(0), propStreamSpec(0) {}
764 
765   #ifndef _7ZIP_ST
766 
EncodeThreadFunctionCEncoderInfo767   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
768   {
769     HRESULT res;
770     CEncoderInfo *encoder = (CEncoderInfo *)param;
771     try
772     {
773       #ifdef USE_ALLOCA
774       alloca(encoder->AllocaSize);
775       #endif
776 
777       res = encoder->Encode();
778       encoder->Results[0] = res;
779     }
780     catch(...)
781     {
782       res = E_FAIL;
783     }
784     if (res != S_OK)
785       encoder->progressInfoSpec[0]->Status->SetResult(res);
786     return 0;
787   }
788 
DecodeThreadFunctionCEncoderInfo789   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
790   {
791     CDecoderInfo *decoder = (CDecoderInfo *)param;
792 
793     #ifdef USE_ALLOCA
794     alloca(decoder->AllocaSize);
795     #endif
796 
797     CEncoderInfo *encoder = decoder->Encoder;
798     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
799     return 0;
800   }
801 
CreateEncoderThreadCEncoderInfo802   HRESULT CreateEncoderThread()
803   {
804     return thread[0].Create(EncodeThreadFunction, this);
805   }
806 
CreateDecoderThreadCEncoderInfo807   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
808       #ifdef USE_ALLOCA
809       , size_t allocaSize
810       #endif
811       )
812   {
813     CDecoderInfo &decoder = decodersInfo[index];
814     decoder.DecoderIndex = index;
815     decoder.Encoder = this;
816 
817     #ifdef USE_ALLOCA
818     decoder.AllocaSize = allocaSize;
819     #endif
820 
821     decoder.CallbackMode = callbackMode;
822     return thread[index].Create(DecodeThreadFunction, &decoder);
823   }
824 
825   #endif
826 };
827 
828 
Init(const COneMethodInfo & method,unsigned generateDictBits,CBaseRandomGenerator * rgLoc)829 HRESULT CEncoderInfo::Init(
830     const COneMethodInfo &method,
831     unsigned generateDictBits,
832     CBaseRandomGenerator *rgLoc)
833 {
834   // we need extra space, if input data is already compressed
835   const size_t kCompressedBufferSize =
836       kCompressedAdditionalSize +
837       kBufferSize + kBufferSize / 16;
838       // kBufferSize / 2;
839 
840   if (kCompressedBufferSize < kBufferSize)
841     return E_FAIL;
842 
843   uncompressedDataPtr = fileData;
844 
845   if (!fileData)
846   {
847     if (!rg.Alloc(kBufferSize))
848       return E_OUTOFMEMORY;
849 
850     // DWORD ttt = GetTickCount();
851     if (generateDictBits == 0)
852       rg.GenerateSimpleRandom(rgLoc);
853     else
854       rg.GenerateLz(generateDictBits, rgLoc);
855     // printf("\n%d\n            ", GetTickCount() - ttt);
856 
857     crc = CrcCalc(rg.Buffer, rg.BufferSize);
858     uncompressedDataPtr = rg.Buffer;
859   }
860 
861   if (_encoderFilter)
862   {
863     if (!rgCopy.Alloc(kBufferSize))
864       return E_OUTOFMEMORY;
865   }
866 
867 
868   outStreamSpec = new CBenchmarkOutStream;
869   outStream = outStreamSpec;
870   if (!outStreamSpec->Alloc(kCompressedBufferSize))
871     return E_OUTOFMEMORY;
872 
873   propStreamSpec = 0;
874   if (!propStream)
875   {
876     propStreamSpec = new CBenchmarkOutStream;
877     propStream = propStreamSpec;
878   }
879   if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
880     return E_OUTOFMEMORY;
881   propStreamSpec->Init(true, false);
882 
883 
884   CMyComPtr<IUnknown> coder;
885   if (_encoderFilter)
886     coder = _encoderFilter;
887   else
888     coder = _encoder;
889   {
890     CMyComPtr<ICompressSetCoderProperties> scp;
891     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
892     if (scp)
893     {
894       UInt64 reduceSize = kBufferSize;
895       RINOK(method.SetCoderProps(scp, &reduceSize));
896     }
897     else
898     {
899       if (method.AreThereNonOptionalProps())
900         return E_INVALIDARG;
901     }
902 
903     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
904     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
905     if (writeCoderProps)
906     {
907       RINOK(writeCoderProps->WriteCoderProperties(propStream));
908     }
909 
910     {
911       CMyComPtr<ICryptoSetPassword> sp;
912       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
913       if (sp)
914       {
915         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
916 
917         // we must call encoding one time to calculate password key for key cache.
918         // it must be after WriteCoderProperties!
919         Byte temp[16];
920         memset(temp, 0, sizeof(temp));
921 
922         if (_encoderFilter)
923         {
924           _encoderFilter->Init();
925           _encoderFilter->Filter(temp, sizeof(temp));
926         }
927         else
928         {
929           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
930           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
931           inStreamSpec->Init(temp, sizeof(temp));
932 
933           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
934           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
935           crcStreamSpec->Init();
936 
937           RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
938         }
939       }
940     }
941   }
942 
943   return S_OK;
944 }
945 
946 
My_FilterBench(ICompressFilter * filter,Byte * data,size_t size)947 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
948 {
949   while (size != 0)
950   {
951     UInt32 cur = (UInt32)1 << 31;
952     if (cur > size)
953       cur = (UInt32)size;
954     UInt32 processed = filter->Filter(data, cur);
955     data += processed;
956     // if (processed > size) (in AES filter), we must fill last block with zeros.
957     // but it is not important for benchmark. So we just copy that data without filtering.
958     if (processed > size || processed == 0)
959       break;
960     size -= processed;
961   }
962 }
963 
964 
Encode()965 HRESULT CEncoderInfo::Encode()
966 {
967   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
968   bi.UnpackSize = 0;
969   bi.PackSize = 0;
970   CMyComPtr<ICryptoProperties> cp;
971   CMyComPtr<IUnknown> coder;
972   if (_encoderFilter)
973     coder = _encoderFilter;
974   else
975     coder = _encoder;
976   coder.QueryInterface(IID_ICryptoProperties, &cp);
977   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
978   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
979   UInt64 prev = 0;
980 
981   UInt32 crcPrev = 0;
982 
983   if (cp)
984   {
985     RINOK(cp->SetKey(_key, sizeof(_key)));
986     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
987   }
988 
989   for (UInt64 i = 0; i < NumIterations; i++)
990   {
991     if (printCallback && bi.UnpackSize - prev > (1 << 20))
992     {
993       RINOK(printCallback->CheckBreak());
994       prev = bi.UnpackSize;
995     }
996 
997     bool isLast = (i == NumIterations - 1);
998     bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
999     outStreamSpec->Init(isLast, calcCrc);
1000 
1001     if (_encoderFilter)
1002     {
1003       memcpy(rgCopy.Buffer, uncompressedDataPtr, kBufferSize);
1004       _encoderFilter->Init();
1005       My_FilterBench(_encoderFilter, rgCopy.Buffer, kBufferSize);
1006       RINOK(WriteStream(outStream, rgCopy.Buffer, kBufferSize));
1007     }
1008     else
1009     {
1010       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1011       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
1012     }
1013 
1014     // outStreamSpec->Print();
1015 
1016     UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
1017     if (i == 0)
1018       crcPrev = crcNew;
1019     else if (calcCrc && crcPrev != crcNew)
1020       return E_FAIL;
1021 
1022     compressedSize = outStreamSpec->Pos;
1023     bi.UnpackSize += kBufferSize;
1024     bi.PackSize += compressedSize;
1025   }
1026 
1027   _encoder.Release();
1028   _encoderFilter.Release();
1029   return S_OK;
1030 }
1031 
1032 
Decode(UInt32 decoderIndex)1033 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1034 {
1035   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1036   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1037   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1038   CMyComPtr<IUnknown> coder;
1039   if (_decoderFilter)
1040   {
1041     if (decoderIndex != 0)
1042       return E_FAIL;
1043     coder = _decoderFilter;
1044   }
1045   else
1046     coder = decoder;
1047 
1048   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1049   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1050   if (!setDecProps && propStreamSpec->Pos != 0)
1051     return E_FAIL;
1052 
1053   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1054   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1055 
1056   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1057   pi->BenchInfo.UnpackSize = 0;
1058   pi->BenchInfo.PackSize = 0;
1059 
1060   #ifndef _7ZIP_ST
1061   {
1062     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1063     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1064     if (setCoderMt)
1065     {
1066       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
1067     }
1068   }
1069   #endif
1070 
1071   CMyComPtr<ICompressSetCoderProperties> scp;
1072   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1073   if (scp)
1074   {
1075     UInt64 reduceSize = _uncompressedDataSize;
1076     RINOK(_method.SetCoderProps(scp, &reduceSize));
1077   }
1078 
1079   CMyComPtr<ICryptoProperties> cp;
1080   coder.QueryInterface(IID_ICryptoProperties, &cp);
1081 
1082   if (setDecProps)
1083   {
1084     RINOK(setDecProps->SetDecoderProperties2(propStreamSpec->Buffer, (UInt32)propStreamSpec->Pos));
1085   }
1086 
1087   {
1088     CMyComPtr<ICryptoSetPassword> sp;
1089     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1090     if (sp)
1091     {
1092       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
1093     }
1094   }
1095 
1096   UInt64 prev = 0;
1097 
1098   if (cp)
1099   {
1100     RINOK(cp->SetKey(_key, sizeof(_key)));
1101     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
1102   }
1103 
1104   for (UInt64 i = 0; i < NumIterations; i++)
1105   {
1106     if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
1107     {
1108       RINOK(printCallback->CheckBreak());
1109       prev = pi->BenchInfo.UnpackSize;
1110     }
1111 
1112     inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
1113     crcOutStreamSpec->Init();
1114 
1115     UInt64 outSize = kBufferSize;
1116     crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
1117 
1118     if (_decoderFilter)
1119     {
1120       if (compressedSize > rgCopy.BufferSize)
1121         return E_FAIL;
1122       memcpy(rgCopy.Buffer, outStreamSpec->Buffer, compressedSize);
1123       _decoderFilter->Init();
1124       My_FilterBench(_decoderFilter, rgCopy.Buffer, compressedSize);
1125       RINOK(WriteStream(crcOutStream, rgCopy.Buffer, compressedSize));
1126     }
1127     else
1128     {
1129       RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
1130     }
1131 
1132     if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1133       return S_FALSE;
1134     pi->BenchInfo.UnpackSize += kBufferSize;
1135     pi->BenchInfo.PackSize += compressedSize;
1136   }
1137 
1138   decoder.Release();
1139   _decoderFilter.Release();
1140   return S_OK;
1141 }
1142 
1143 
1144 static const UInt32 kNumThreadsMax = (1 << 12);
1145 
1146 struct CBenchEncoders
1147 {
1148   CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders1149   CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders1150   ~CBenchEncoders() { delete []encoders; }
1151 };
1152 
1153 
GetNumIterations(UInt64 numCommands,UInt64 complexInCommands)1154 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1155 {
1156   if (numCommands < (1 << 4))
1157     numCommands = (1 << 4);
1158   UInt64 res = complexInCommands / numCommands;
1159   return (res == 0 ? 1 : res);
1160 }
1161 
1162 
MethodBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,bool oldLzmaBenchMode,UInt32 numThreads,const COneMethodInfo & method2,size_t uncompressedDataSize,const Byte * fileData,unsigned generateDictBits,IBenchPrintCallback * printCallback,IBenchCallback * callback,CBenchProps * benchProps)1163 static HRESULT MethodBench(
1164     DECL_EXTERNAL_CODECS_LOC_VARS
1165     UInt64 complexInCommands,
1166     bool
1167       #ifndef _7ZIP_ST
1168         oldLzmaBenchMode
1169       #endif
1170     ,
1171     UInt32
1172       #ifndef _7ZIP_ST
1173         numThreads
1174       #endif
1175     ,
1176     const COneMethodInfo &method2,
1177     size_t uncompressedDataSize,
1178     const Byte *fileData,
1179     unsigned generateDictBits,
1180 
1181     IBenchPrintCallback *printCallback,
1182     IBenchCallback *callback,
1183     CBenchProps *benchProps)
1184 {
1185   COneMethodInfo method = method2;
1186   UInt64 methodId;
1187   UInt32 numStreams;
1188   if (!FindMethod(
1189       EXTERNAL_CODECS_LOC_VARS
1190       method.MethodName, methodId, numStreams))
1191     return E_NOTIMPL;
1192   if (numStreams != 1)
1193     return E_INVALIDARG;
1194 
1195   UInt32 numEncoderThreads = 1;
1196   UInt32 numSubDecoderThreads = 1;
1197 
1198   #ifndef _7ZIP_ST
1199     numEncoderThreads = numThreads;
1200 
1201     if (oldLzmaBenchMode && methodId == k_LZMA)
1202     {
1203       bool fixedNumber;
1204       UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(fixedNumber);
1205       if (!fixedNumber && numThreads == 1)
1206         method.AddProp_NumThreads(1);
1207       if (numThreads > 1 && numLzmaThreads > 1)
1208       {
1209         numEncoderThreads = numThreads / 2;
1210         numSubDecoderThreads = 2;
1211       }
1212     }
1213   #endif
1214 
1215   CBenchEncoders encodersSpec(numEncoderThreads);
1216   CEncoderInfo *encoders = encodersSpec.encoders;
1217 
1218   UInt32 i;
1219 
1220   for (i = 0; i < numEncoderThreads; i++)
1221   {
1222     CEncoderInfo &encoder = encoders[i];
1223     encoder.callback = (i == 0) ? callback : 0;
1224     encoder.printCallback = printCallback;
1225 
1226     {
1227       CCreatedCoder cod;
1228       RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, true, encoder._encoderFilter, cod));
1229       encoder._encoder = cod.Coder;
1230       if (!encoder._encoder && !encoder._encoderFilter)
1231         return E_NOTIMPL;
1232     }
1233 
1234     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1235     encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1236 
1237     memset(encoder._iv, 0, sizeof(encoder._iv));
1238     memset(encoder._key, 0, sizeof(encoder._key));
1239     memset(encoder._psw, 0, sizeof(encoder._psw));
1240 
1241     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1242     {
1243       CCreatedCoder cod;
1244       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1245       RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
1246       decoder = cod.Coder;
1247       if (!encoder._decoderFilter && !decoder)
1248         return E_NOTIMPL;
1249     }
1250   }
1251 
1252   CBaseRandomGenerator rg;
1253   rg.Init();
1254 
1255   UInt32 crc = 0;
1256   if (fileData)
1257     crc = CrcCalc(fileData, uncompressedDataSize);
1258 
1259   for (i = 0; i < numEncoderThreads; i++)
1260   {
1261     CEncoderInfo &encoder = encoders[i];
1262     encoder._method = method;
1263     encoder._uncompressedDataSize = uncompressedDataSize;
1264     encoder.kBufferSize = uncompressedDataSize;
1265     encoder.fileData = fileData;
1266     encoder.crc = crc;
1267 
1268     RINOK(encoders[i].Init(method, generateDictBits, &rg));
1269   }
1270 
1271   CBenchProgressStatus status;
1272   status.Res = S_OK;
1273   status.EncodeMode = true;
1274 
1275   for (i = 0; i < numEncoderThreads; i++)
1276   {
1277     CEncoderInfo &encoder = encoders[i];
1278     encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1279 
1280     for (int j = 0; j < 2; j++)
1281     {
1282       CBenchProgressInfo *spec = new CBenchProgressInfo;
1283       encoder.progressInfoSpec[j] = spec;
1284       encoder.progressInfo[j] = spec;
1285       spec->Status = &status;
1286     }
1287 
1288     if (i == 0)
1289     {
1290       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1291       bpi->Callback = callback;
1292       bpi->BenchInfo.NumIterations = numEncoderThreads;
1293       bpi->SetStartTime();
1294     }
1295 
1296     #ifndef _7ZIP_ST
1297     if (numEncoderThreads > 1)
1298     {
1299       #ifdef USE_ALLOCA
1300       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1301       #endif
1302 
1303       RINOK(encoder.CreateEncoderThread())
1304     }
1305     else
1306     #endif
1307     {
1308       RINOK(encoder.Encode());
1309     }
1310   }
1311 
1312   #ifndef _7ZIP_ST
1313   if (numEncoderThreads > 1)
1314     for (i = 0; i < numEncoderThreads; i++)
1315       encoders[i].thread[0].Wait();
1316   #endif
1317 
1318   RINOK(status.Res);
1319 
1320   CBenchInfo info;
1321 
1322   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1323   info.UnpackSize = 0;
1324   info.PackSize = 0;
1325   info.NumIterations = encoders[0].NumIterations;
1326 
1327   for (i = 0; i < numEncoderThreads; i++)
1328   {
1329     CEncoderInfo &encoder = encoders[i];
1330     info.UnpackSize += encoder.kBufferSize;
1331     info.PackSize += encoder.compressedSize;
1332   }
1333 
1334   RINOK(callback->SetEncodeResult(info, true));
1335 
1336 
1337   status.Res = S_OK;
1338   status.EncodeMode = false;
1339 
1340   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1341 
1342   for (i = 0; i < numEncoderThreads; i++)
1343   {
1344     CEncoderInfo &encoder = encoders[i];
1345 
1346     if (i == 0)
1347     {
1348       encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1349       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1350       bpi->Callback = callback;
1351       bpi->BenchInfo.NumIterations = numDecoderThreads;
1352       bpi->SetStartTime();
1353     }
1354     else
1355       encoder.NumIterations = encoders[0].NumIterations;
1356 
1357     #ifndef _7ZIP_ST
1358     {
1359       int numSubThreads = method.Get_NumThreads();
1360       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1361     }
1362     if (numDecoderThreads > 1)
1363     {
1364       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1365       {
1366         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1367             #ifdef USE_ALLOCA
1368             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1369             #endif
1370             );
1371         RINOK(res);
1372       }
1373     }
1374     else
1375     #endif
1376     {
1377       RINOK(encoder.Decode(0));
1378     }
1379   }
1380 
1381   #ifndef _7ZIP_ST
1382   HRESULT res = S_OK;
1383   if (numDecoderThreads > 1)
1384     for (i = 0; i < numEncoderThreads; i++)
1385       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1386       {
1387         CEncoderInfo &encoder = encoders[i];
1388         encoder.thread[j].Wait();
1389         if (encoder.Results[j] != S_OK)
1390           res = encoder.Results[j];
1391       }
1392   RINOK(res);
1393   #endif
1394 
1395   RINOK(status.Res);
1396   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1397 
1398   #ifndef _7ZIP_ST
1399   #ifdef UNDER_CE
1400   if (numDecoderThreads > 1)
1401     for (i = 0; i < numEncoderThreads; i++)
1402       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1403       {
1404         FILETIME creationTime, exitTime, kernelTime, userTime;
1405         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1406           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1407       }
1408   #endif
1409   #endif
1410 
1411   info.UnpackSize = 0;
1412   info.PackSize = 0;
1413   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1414 
1415   for (i = 0; i < numEncoderThreads; i++)
1416   {
1417     CEncoderInfo &encoder = encoders[i];
1418     info.UnpackSize += encoder.kBufferSize;
1419     info.PackSize += encoder.compressedSize;
1420   }
1421 
1422   RINOK(callback->SetDecodeResult(info, false));
1423   RINOK(callback->SetDecodeResult(info, true));
1424 
1425   return S_OK;
1426 }
1427 
1428 
GetLZMAUsage(bool multiThread,UInt32 dictionary)1429 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1430 {
1431   UInt32 hs = dictionary - 1;
1432   hs |= (hs >> 1);
1433   hs |= (hs >> 2);
1434   hs |= (hs >> 4);
1435   hs |= (hs >> 8);
1436   hs >>= 1;
1437   hs |= 0xFFFF;
1438   if (hs > (1 << 24))
1439     hs >>= 1;
1440   hs++;
1441   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1442       (1 << 20) + (multiThread ? (6 << 20) : 0);
1443 }
1444 
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary,bool totalBench)1445 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
1446 {
1447   const UInt32 kBufferSize = dictionary;
1448   const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
1449   bool lzmaMt = (totalBench || numThreads > 1);
1450   UInt32 numBigThreads = numThreads;
1451   if (!totalBench && lzmaMt)
1452     numBigThreads /= 2;
1453   return ((UInt64)kBufferSize + kCompressedBufferSize +
1454     GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
1455 }
1456 
CrcBig(const void * data,UInt32 size,UInt64 numIterations,const UInt32 * checkSum,IHasher * hf,IBenchPrintCallback * callback)1457 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1458     const UInt32 *checkSum, IHasher *hf,
1459     IBenchPrintCallback *callback)
1460 {
1461   Byte hash[64];
1462   UInt64 i;
1463   for (i = 0; i < sizeof(hash); i++)
1464     hash[i] = 0;
1465   for (i = 0; i < numIterations; i++)
1466   {
1467     if (callback && (i & 0xFF) == 0)
1468     {
1469       RINOK(callback->CheckBreak());
1470     }
1471     hf->Init();
1472     hf->Update(data, size);
1473     hf->Final(hash);
1474     UInt32 hashSize = hf->GetDigestSize();
1475     if (hashSize > sizeof(hash))
1476       return S_FALSE;
1477     UInt32 sum = 0;
1478     for (UInt32 j = 0; j < hashSize; j += 4)
1479       sum ^= GetUi32(hash + j);
1480     if (checkSum && sum != *checkSum)
1481     {
1482       return S_FALSE;
1483     }
1484   }
1485   return S_OK;
1486 }
1487 
1488 UInt32 g_BenchCpuFreqTemp = 1;
1489 
1490 #define YY1 sum += val; sum ^= val;
1491 #define YY3 YY1 YY1 YY1 YY1
1492 #define YY5 YY3 YY3 YY3 YY3
1493 #define YY7 YY5 YY5 YY5 YY5
1494 static const UInt32 kNumFreqCommands = 128;
1495 
1496 EXTERN_C_BEGIN
1497 
CountCpuFreq(UInt32 sum,UInt32 num,UInt32 val)1498 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1499 {
1500   for (UInt32 i = 0; i < num; i++)
1501   {
1502     YY7
1503   }
1504   return sum;
1505 }
1506 
1507 EXTERN_C_END
1508 
1509 
1510 #ifndef _7ZIP_ST
1511 
1512 struct CFreqInfo
1513 {
1514   NWindows::CThread Thread;
1515   IBenchPrintCallback *Callback;
1516   HRESULT CallbackRes;
1517   UInt32 ValRes;
1518   UInt32 Size;
1519   UInt64 NumIterations;
1520 
WaitCFreqInfo1521   void Wait()
1522   {
1523     Thread.Wait();
1524     Thread.Close();
1525   }
1526 };
1527 
FreqThreadFunction(void * param)1528 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1529 {
1530   CFreqInfo *p = (CFreqInfo *)param;
1531 
1532   UInt32 sum = g_BenchCpuFreqTemp;
1533   for (UInt64 k = p->NumIterations; k > 0; k--)
1534   {
1535     p->CallbackRes = p->Callback->CheckBreak();
1536     if (p->CallbackRes != S_OK)
1537       return 0;
1538     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1539   }
1540   p->ValRes = sum;
1541   return 0;
1542 }
1543 
1544 struct CFreqThreads
1545 {
1546   CFreqInfo *Items;
1547   UInt32 NumThreads;
1548 
CFreqThreadsCFreqThreads1549   CFreqThreads(): Items(0), NumThreads(0) {}
WaitAllCFreqThreads1550   void WaitAll()
1551   {
1552     for (UInt32 i = 0; i < NumThreads; i++)
1553       Items[i].Wait();
1554     NumThreads = 0;
1555   }
~CFreqThreadsCFreqThreads1556   ~CFreqThreads()
1557   {
1558     WaitAll();
1559     delete []Items;
1560   }
1561 };
1562 
1563 struct CCrcInfo
1564 {
1565   NWindows::CThread Thread;
1566   IBenchPrintCallback *Callback;
1567   HRESULT CallbackRes;
1568 
1569   const Byte *Data;
1570   UInt32 Size;
1571   UInt64 NumIterations;
1572   bool CheckSumDefined;
1573   UInt32 CheckSum;
1574   CMyComPtr<IHasher> Hasher;
1575   HRESULT Res;
1576 
1577   #ifdef USE_ALLOCA
1578   size_t AllocaSize;
1579   #endif
1580 
WaitCCrcInfo1581   void Wait()
1582   {
1583     Thread.Wait();
1584     Thread.Close();
1585   }
1586 };
1587 
CrcThreadFunction(void * param)1588 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1589 {
1590   CCrcInfo *p = (CCrcInfo *)param;
1591 
1592   #ifdef USE_ALLOCA
1593   alloca(p->AllocaSize);
1594   #endif
1595 
1596   p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1597       p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1598       p->Callback);
1599   return 0;
1600 }
1601 
1602 struct CCrcThreads
1603 {
1604   CCrcInfo *Items;
1605   UInt32 NumThreads;
1606 
CCrcThreadsCCrcThreads1607   CCrcThreads(): Items(0), NumThreads(0) {}
WaitAllCCrcThreads1608   void WaitAll()
1609   {
1610     for (UInt32 i = 0; i < NumThreads; i++)
1611       Items[i].Wait();
1612     NumThreads = 0;
1613   }
~CCrcThreadsCCrcThreads1614   ~CCrcThreads()
1615   {
1616     WaitAll();
1617     delete []Items;
1618   }
1619 };
1620 
1621 #endif
1622 
CrcCalc1(const Byte * buf,UInt32 size)1623 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
1624 {
1625   UInt32 crc = CRC_INIT_VAL;;
1626   for (UInt32 i = 0; i < size; i++)
1627     crc = CRC_UPDATE_BYTE(crc, buf[i]);
1628   return CRC_GET_DIGEST(crc);
1629 }
1630 
RandGen(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1631 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1632 {
1633   for (UInt32 i = 0; i < size; i++)
1634     buf[i] = (Byte)RG.GetRnd();
1635 }
1636 
RandGenCrc(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1637 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1638 {
1639   RandGen(buf, size, RG);
1640   return CrcCalc1(buf, size);
1641 }
1642 
CrcInternalTest()1643 bool CrcInternalTest()
1644 {
1645   CBenchBuffer buffer;
1646   const UInt32 kBufferSize0 = (1 << 8);
1647   const UInt32 kBufferSize1 = (1 << 10);
1648   const UInt32 kCheckSize = (1 << 5);
1649   if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
1650     return false;
1651   Byte *buf = buffer.Buffer;
1652   UInt32 i;
1653   for (i = 0; i < kBufferSize0; i++)
1654     buf[i] = (Byte)i;
1655   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1656   if (crc1 != 0x29058C73)
1657     return false;
1658   CBaseRandomGenerator RG;
1659   RandGen(buf + kBufferSize0, kBufferSize1, RG);
1660   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1661     for (UInt32 j = 0; j < kCheckSize; j++)
1662       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1663         return false;
1664   return true;
1665 }
1666 
1667 struct CBenchMethod
1668 {
1669   unsigned Weight;
1670   unsigned DictBits;
1671   UInt32 EncComplex;
1672   UInt32 DecComplexCompr;
1673   UInt32 DecComplexUnc;
1674   const char *Name;
1675 };
1676 
1677 static const CBenchMethod g_Bench[] =
1678 {
1679   { 40, 17,  357,  145,   20, "LZMA:x1" },
1680   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
1681   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
1682 
1683   { 10, 16,  124,   40,   14, "Deflate:x1" },
1684   { 20, 16,  376,   40,   14, "Deflate:x5" },
1685   { 10, 16, 1082,   40,   14, "Deflate:x7" },
1686   { 10, 17,  422,   40,   14, "Deflate64:x5" },
1687 
1688   { 10, 15,  590,   69,   69, "BZip2:x1" },
1689   { 20, 19,  815,  122,  122, "BZip2:x5" },
1690   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
1691   { 10, 19, 2530,  122,  122, "BZip2:x7" },
1692 
1693   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
1694   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
1695 
1696   {  2,  0,    6,    0,    6, "Delta:4" },
1697   {  2,  0,    4,    0,    4, "BCJ" },
1698 
1699   { 10,  0,   24,    0,   24, "AES256CBC:1" },
1700   {  2,  0,    8,    0,    2, "AES256CBC:2" }
1701 };
1702 
1703 struct CBenchHash
1704 {
1705   unsigned Weight;
1706   UInt32 Complex;
1707   UInt32 CheckSum;
1708   const char *Name;
1709 };
1710 
1711 static const CBenchHash g_Hash[] =
1712 {
1713   {  1,  1820, 0x8F8FEDAB, "CRC32:1" },
1714   { 10,   558, 0x8F8FEDAB, "CRC32:4" },
1715   { 10,   339, 0x8F8FEDAB, "CRC32:8" },
1716   { 10,   512, 0xDF1C17CC, "CRC64" },
1717   { 10,  5100, 0x2D79FF2E, "SHA256" },
1718   { 10,  2340, 0x4C25132B, "SHA1" },
1719   {  2,  5500, 0xE084E913, "BLAKE2sp" }
1720 };
1721 
1722 struct CTotalBenchRes
1723 {
1724   // UInt64 NumIterations1; // for Usage
1725   UInt64 NumIterations2; // for Rating / RPU
1726 
1727   UInt64 Rating;
1728   UInt64 Usage;
1729   UInt64 RPU;
1730 
InitCTotalBenchRes1731   void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
1732 
SetSumCTotalBenchRes1733   void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1734   {
1735     Rating = (r1.Rating + r2.Rating);
1736     Usage = (r1.Usage + r2.Usage);
1737     RPU = (r1.RPU + r2.RPU);
1738     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
1739     NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
1740   }
1741 };
1742 
PrintNumber(IBenchPrintCallback & f,UInt64 value,unsigned size)1743 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
1744 {
1745   char s[128];
1746   unsigned startPos = (unsigned)sizeof(s) - 32;
1747   memset(s, ' ', startPos);
1748   ConvertUInt64ToString(value, s + startPos);
1749   // if (withSpace)
1750   {
1751     startPos--;
1752     size++;
1753   }
1754   unsigned len = (unsigned)strlen(s + startPos);
1755   if (size > len)
1756   {
1757     startPos -= (size - len);
1758     if (startPos < 0)
1759       startPos = 0;
1760   }
1761   f.Print(s + startPos);
1762 }
1763 
1764 static const unsigned kFieldSize_Name = 12;
1765 static const unsigned kFieldSize_SmallName = 4;
1766 static const unsigned kFieldSize_Speed = 9;
1767 static const unsigned kFieldSize_Usage = 5;
1768 static const unsigned kFieldSize_RU = 6;
1769 static const unsigned kFieldSize_Rating = 6;
1770 static const unsigned kFieldSize_EU = 5;
1771 static const unsigned kFieldSize_Effec = 5;
1772 
1773 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1774 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1775 
1776 
PrintRating(IBenchPrintCallback & f,UInt64 rating,unsigned size)1777 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
1778 {
1779   PrintNumber(f, (rating + 500000) / 1000000, size);
1780 }
1781 
1782 
PrintPercents(IBenchPrintCallback & f,UInt64 val,UInt64 divider,unsigned size)1783 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
1784 {
1785   PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1786 }
1787 
PrintChars(IBenchPrintCallback & f,char c,unsigned size)1788 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
1789 {
1790   char s[256];
1791   memset(s, (Byte)c, size);
1792   s[size] = 0;
1793   f.Print(s);
1794 }
1795 
PrintSpaces(IBenchPrintCallback & f,unsigned size)1796 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
1797 {
1798   PrintChars(f, ' ', size);
1799 }
1800 
PrintResults(IBenchPrintCallback & f,UInt64 usage,UInt64 rpu,UInt64 rating,bool showFreq,UInt64 cpuFreq)1801 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1802 {
1803   PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1804   PrintRating(f, rpu, kFieldSize_RU);
1805   PrintRating(f, rating, kFieldSize_Rating);
1806   if (showFreq)
1807   {
1808     if (cpuFreq == 0)
1809       PrintSpaces(f, kFieldSize_EUAndEffec);
1810     else
1811     {
1812       UInt64 ddd = cpuFreq * usage / 100;
1813       if (ddd == 0)
1814         ddd = 1;
1815       PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1816       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1817     }
1818   }
1819 }
1820 
PrintResults(IBenchPrintCallback * f,const CBenchInfo & info,unsigned weight,UInt64 rating,bool showFreq,UInt64 cpuFreq,CTotalBenchRes * res)1821 static void PrintResults(IBenchPrintCallback *f,
1822     const CBenchInfo &info,
1823     unsigned weight,
1824     UInt64 rating,
1825     bool showFreq, UInt64 cpuFreq,
1826     CTotalBenchRes *res)
1827 {
1828   UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1829   if (f)
1830   {
1831     if (speed != 0)
1832       PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1833     else
1834       PrintSpaces(*f, 1 + kFieldSize_Speed);
1835   }
1836   UInt64 usage = info.GetUsage();
1837   UInt64 rpu = info.GetRatingPerUsage(rating);
1838   if (f)
1839   {
1840     PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1841   }
1842 
1843   if (res)
1844   {
1845     // res->NumIterations1++;
1846     res->NumIterations2 += weight;
1847     res->RPU += (rpu * weight);
1848     res->Rating += (rating * weight);
1849     res->Usage += (usage * weight);
1850   }
1851 }
1852 
PrintTotals(IBenchPrintCallback & f,bool showFreq,UInt64 cpuFreq,const CTotalBenchRes & res)1853 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1854 {
1855   PrintSpaces(f, 1 + kFieldSize_Speed);
1856   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
1857   UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
1858   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
1859 }
1860 
PrintRequirements(IBenchPrintCallback & f,const char * sizeString,bool size_Defined,UInt64 size,const char * threadsString,UInt32 numThreads)1861 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
1862     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
1863 {
1864   f.Print("RAM ");
1865   f.Print(sizeString);
1866   if (size_Defined)
1867     PrintNumber(f, (size >> 20), 6);
1868   else
1869     f.Print("      ?");
1870   f.Print(" MB,  # ");
1871   f.Print(threadsString);
1872   PrintNumber(f, numThreads, 3);
1873   f.NewLine();
1874 }
1875 
1876 struct CBenchCallbackToPrint: public IBenchCallback
1877 {
1878   CBenchProps BenchProps;
1879   CTotalBenchRes EncodeRes;
1880   CTotalBenchRes DecodeRes;
1881   IBenchPrintCallback *_file;
1882   UInt32 DictSize;
1883 
1884   bool Use2Columns;
1885   unsigned NameFieldSize;
1886 
1887   bool ShowFreq;
1888   UInt64 CpuFreq;
1889 
1890   unsigned EncodeWeight;
1891   unsigned DecodeWeight;
1892 
CBenchCallbackToPrintCBenchCallbackToPrint1893   CBenchCallbackToPrint():
1894       Use2Columns(false),
1895       NameFieldSize(0),
1896       ShowFreq(false),
1897       CpuFreq(0),
1898       EncodeWeight(1),
1899       DecodeWeight(1)
1900       {}
1901 
InitCBenchCallbackToPrint1902   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
1903   void Print(const char *s);
1904   void NewLine();
1905 
1906   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
1907   HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
1908   HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
1909 };
1910 
SetFreq(bool showFreq,UInt64 cpuFreq)1911 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
1912 {
1913   ShowFreq = showFreq;
1914   CpuFreq = cpuFreq;
1915   return S_OK;
1916 }
1917 
SetEncodeResult(const CBenchInfo & info,bool final)1918 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
1919 {
1920   RINOK(_file->CheckBreak());
1921   if (final)
1922   {
1923     UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
1924     PrintResults(_file, info,
1925         EncodeWeight, rating,
1926         ShowFreq, CpuFreq, &EncodeRes);
1927     if (!Use2Columns)
1928       _file->NewLine();
1929   }
1930   return S_OK;
1931 }
1932 
1933 static const char *kSep = "  | ";
1934 
SetDecodeResult(const CBenchInfo & info,bool final)1935 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
1936 {
1937   RINOK(_file->CheckBreak());
1938   if (final)
1939   {
1940     UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
1941     if (Use2Columns)
1942       _file->Print(kSep);
1943     else
1944       PrintSpaces(*_file, NameFieldSize);
1945     CBenchInfo info2 = info;
1946     info2.UnpackSize *= info2.NumIterations;
1947     info2.PackSize *= info2.NumIterations;
1948     info2.NumIterations = 1;
1949     PrintResults(_file, info2,
1950         DecodeWeight, rating,
1951         ShowFreq, CpuFreq, &DecodeRes);
1952   }
1953   return S_OK;
1954 }
1955 
Print(const char * s)1956 void CBenchCallbackToPrint::Print(const char *s)
1957 {
1958   _file->Print(s);
1959 }
1960 
NewLine()1961 void CBenchCallbackToPrint::NewLine()
1962 {
1963   _file->NewLine();
1964 }
1965 
PrintLeft(IBenchPrintCallback & f,const char * s,unsigned size)1966 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
1967 {
1968   f.Print(s);
1969   int numSpaces = size - MyStringLen(s);
1970   if (numSpaces > 0)
1971     PrintSpaces(f, numSpaces);
1972 }
1973 
PrintRight(IBenchPrintCallback & f,const char * s,unsigned size)1974 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
1975 {
1976   int numSpaces = size - MyStringLen(s);
1977   if (numSpaces > 0)
1978     PrintSpaces(f, numSpaces);
1979   f.Print(s);
1980 }
1981 
TotalBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,bool forceUnpackSize,size_t unpackSize,const Byte * fileData,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback)1982 static HRESULT TotalBench(
1983     DECL_EXTERNAL_CODECS_LOC_VARS
1984     UInt64 complexInCommands,
1985     UInt32 numThreads,
1986     bool forceUnpackSize,
1987     size_t unpackSize,
1988     const Byte *fileData,
1989     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
1990 {
1991   for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
1992   {
1993     const CBenchMethod &bench = g_Bench[i];
1994     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
1995     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
1996     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
1997     callback->BenchProps.EncComplex = bench.EncComplex;
1998 
1999     COneMethodInfo method;
2000     NCOM::CPropVariant propVariant;
2001     propVariant = bench.Name;
2002     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2003 
2004     size_t unpackSize2 = unpackSize;
2005     if (!forceUnpackSize && bench.DictBits == 0)
2006       unpackSize2 = kFilterUnpackSize;
2007 
2008     callback->EncodeWeight = bench.Weight;
2009     callback->DecodeWeight = bench.Weight;
2010 
2011     HRESULT res = MethodBench(
2012         EXTERNAL_CODECS_LOC_VARS
2013         complexInCommands,
2014         false, numThreads, method,
2015         unpackSize2, fileData,
2016         bench.DictBits,
2017         printCallback, callback, &callback->BenchProps);
2018 
2019     if (res == E_NOTIMPL)
2020     {
2021       // callback->Print(" ---");
2022       // we need additional empty line as line for decompression results
2023       if (!callback->Use2Columns)
2024         callback->NewLine();
2025     }
2026     else
2027     {
2028       RINOK(res);
2029     }
2030 
2031     callback->NewLine();
2032   }
2033   return S_OK;
2034 }
2035 
2036 
FreqBench(UInt64 complexInCommands,UInt32 numThreads,IBenchPrintCallback * _file,bool showFreq,UInt64 specifiedFreq,UInt64 & cpuFreq,UInt32 & res)2037 static HRESULT FreqBench(
2038     UInt64 complexInCommands,
2039     UInt32 numThreads,
2040     IBenchPrintCallback *_file,
2041     bool showFreq,
2042     UInt64 specifiedFreq,
2043     UInt64 &cpuFreq,
2044     UInt32 &res)
2045 {
2046   res = 0;
2047   cpuFreq = 0;
2048 
2049   UInt32 bufferSize = 1 << 20;
2050   UInt32 complexity = kNumFreqCommands;
2051   if (numThreads == 0)
2052     numThreads = 1;
2053 
2054   #ifdef _7ZIP_ST
2055   numThreads = 1;
2056   #endif
2057 
2058   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2059   UInt64 numIterations = complexInCommands / complexity / bsize;
2060   if (numIterations == 0)
2061     numIterations = 1;
2062 
2063   CBenchInfoCalc progressInfoSpec;
2064 
2065   #ifndef _7ZIP_ST
2066   CFreqThreads threads;
2067   if (numThreads > 1)
2068   {
2069     threads.Items = new CFreqInfo[numThreads];
2070     UInt32 i;
2071     for (i = 0; i < numThreads; i++)
2072     {
2073       CFreqInfo &info = threads.Items[i];
2074       info.Callback = _file;
2075       info.CallbackRes = S_OK;
2076       info.NumIterations = numIterations;
2077       info.Size = bufferSize;
2078     }
2079     progressInfoSpec.SetStartTime();
2080     for (i = 0; i < numThreads; i++)
2081     {
2082       CFreqInfo &info = threads.Items[i];
2083       RINOK(info.Thread.Create(FreqThreadFunction, &info));
2084       threads.NumThreads++;
2085     }
2086     threads.WaitAll();
2087     for (i = 0; i < numThreads; i++)
2088     {
2089       RINOK(threads.Items[i].CallbackRes);
2090     }
2091   }
2092   else
2093   #endif
2094   {
2095     progressInfoSpec.SetStartTime();
2096     UInt32 sum = g_BenchCpuFreqTemp;
2097     for (UInt64 k = numIterations; k > 0; k--)
2098     {
2099       RINOK(_file->CheckBreak());
2100       sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
2101     }
2102     res += sum;
2103   }
2104 
2105   CBenchInfo info;
2106   progressInfoSpec.SetFinishTime(info);
2107 
2108   info.UnpackSize = 0;
2109   info.PackSize = 0;
2110   info.NumIterations = 1;
2111 
2112   if (_file)
2113   {
2114     {
2115       UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
2116       UInt64 rating = info.GetSpeed(numCommands);
2117       cpuFreq = rating / numThreads;
2118       PrintResults(_file, info,
2119           0, // weight
2120           rating,
2121           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
2122     }
2123     RINOK(_file->CheckBreak());
2124   }
2125 
2126   return S_OK;
2127 }
2128 
2129 
2130 
CrcBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufferSize,UInt64 & speed,UInt32 complexity,unsigned benchWeight,const UInt32 * checkSum,const COneMethodInfo & method,IBenchPrintCallback * _file,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2131 static HRESULT CrcBench(
2132     DECL_EXTERNAL_CODECS_LOC_VARS
2133     UInt64 complexInCommands,
2134     UInt32 numThreads, UInt32 bufferSize,
2135     UInt64 &speed,
2136     UInt32 complexity, unsigned benchWeight,
2137     const UInt32 *checkSum,
2138     const COneMethodInfo &method,
2139     IBenchPrintCallback *_file,
2140     CTotalBenchRes *encodeRes,
2141     bool showFreq, UInt64 cpuFreq)
2142 {
2143   if (numThreads == 0)
2144     numThreads = 1;
2145 
2146   #ifdef _7ZIP_ST
2147   numThreads = 1;
2148   #endif
2149 
2150   AString methodName = method.MethodName;
2151   // methodName.RemoveChar(L'-');
2152   CMethodId hashID;
2153   if (!FindHashMethod(
2154       EXTERNAL_CODECS_LOC_VARS
2155       methodName, hashID))
2156     return E_NOTIMPL;
2157 
2158   CBenchBuffer buffer;
2159   size_t totalSize = (size_t)bufferSize * numThreads;
2160   if (totalSize / numThreads != bufferSize)
2161     return E_OUTOFMEMORY;
2162   if (!buffer.Alloc(totalSize))
2163     return E_OUTOFMEMORY;
2164 
2165   Byte *buf = buffer.Buffer;
2166   CBaseRandomGenerator RG;
2167   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2168   UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
2169   if (numIterations == 0)
2170     numIterations = 1;
2171 
2172   CBenchInfoCalc progressInfoSpec;
2173 
2174   #ifndef _7ZIP_ST
2175   CCrcThreads threads;
2176   if (numThreads > 1)
2177   {
2178     threads.Items = new CCrcInfo[numThreads];
2179 
2180     UInt32 i;
2181     for (i = 0; i < numThreads; i++)
2182     {
2183       CCrcInfo &info = threads.Items[i];
2184       AString name;
2185       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
2186       if (!info.Hasher)
2187         return E_NOTIMPL;
2188       CMyComPtr<ICompressSetCoderProperties> scp;
2189       info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2190       if (scp)
2191       {
2192         UInt64 reduceSize = 1;
2193         RINOK(method.SetCoderProps(scp, &reduceSize));
2194       }
2195 
2196       Byte *data = buf + (size_t)bufferSize * i;
2197       info.Callback = _file;
2198       info.Data = data;
2199       info.NumIterations = numIterations;
2200       info.Size = bufferSize;
2201       /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
2202       info.CheckSumDefined = false;
2203       if (checkSum)
2204       {
2205         info.CheckSum = *checkSum;
2206         info.CheckSumDefined = (checkSum && (i == 0));
2207       }
2208 
2209       #ifdef USE_ALLOCA
2210       info.AllocaSize = (i * 16 * 21) & 0x7FF;
2211       #endif
2212     }
2213 
2214     progressInfoSpec.SetStartTime();
2215 
2216     for (i = 0; i < numThreads; i++)
2217     {
2218       CCrcInfo &info = threads.Items[i];
2219       RINOK(info.Thread.Create(CrcThreadFunction, &info));
2220       threads.NumThreads++;
2221     }
2222     threads.WaitAll();
2223     for (i = 0; i < numThreads; i++)
2224     {
2225       RINOK(threads.Items[i].Res);
2226     }
2227   }
2228   else
2229   #endif
2230   {
2231     /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
2232     progressInfoSpec.SetStartTime();
2233     CMyComPtr<IHasher> hasher;
2234     AString name;
2235     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
2236     if (!hasher)
2237       return E_NOTIMPL;
2238     CMyComPtr<ICompressSetCoderProperties> scp;
2239     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2240     if (scp)
2241     {
2242       UInt64 reduceSize = 1;
2243       RINOK(method.SetCoderProps(scp, &reduceSize));
2244     }
2245     RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
2246   }
2247 
2248   CBenchInfo info;
2249   progressInfoSpec.SetFinishTime(info);
2250 
2251   UInt64 unpSize = numIterations * bufferSize;
2252   UInt64 unpSizeThreads = unpSize * numThreads;
2253   info.UnpackSize = unpSizeThreads;
2254   info.PackSize = unpSizeThreads;
2255   info.NumIterations = 1;
2256 
2257   if (_file)
2258   {
2259     {
2260       UInt64 numCommands = unpSizeThreads * complexity / 256;
2261       UInt64 rating = info.GetSpeed(numCommands);
2262       PrintResults(_file, info,
2263           benchWeight, rating,
2264           showFreq, cpuFreq, encodeRes);
2265     }
2266     RINOK(_file->CheckBreak());
2267   }
2268 
2269   speed = info.GetSpeed(unpSizeThreads);
2270 
2271   return S_OK;
2272 }
2273 
TotalBench_Hash(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufSize,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2274 static HRESULT TotalBench_Hash(
2275     DECL_EXTERNAL_CODECS_LOC_VARS
2276     UInt64 complexInCommands,
2277     UInt32 numThreads, UInt32 bufSize,
2278     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2279     CTotalBenchRes *encodeRes,
2280     bool showFreq, UInt64 cpuFreq)
2281 {
2282   for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2283   {
2284     const CBenchHash &bench = g_Hash[i];
2285     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2286     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2287     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2288     // callback->BenchProps.EncComplex = bench.EncComplex;
2289 
2290     COneMethodInfo method;
2291     NCOM::CPropVariant propVariant;
2292     propVariant = bench.Name;
2293     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2294 
2295     UInt64 speed;
2296     HRESULT res = CrcBench(
2297         EXTERNAL_CODECS_LOC_VARS
2298         complexInCommands,
2299         numThreads, bufSize,
2300         speed,
2301         bench.Complex, bench.Weight,
2302         &bench.CheckSum, method,
2303         printCallback, encodeRes, showFreq, cpuFreq);
2304     if (res == E_NOTIMPL)
2305     {
2306       // callback->Print(" ---");
2307     }
2308     else
2309     {
2310       RINOK(res);
2311     }
2312     callback->NewLine();
2313   }
2314   return S_OK;
2315 }
2316 
2317 struct CTempValues
2318 {
2319   UInt64 *Values;
CTempValuesCTempValues2320   CTempValues(UInt32 num) { Values = new UInt64[num]; }
~CTempValuesCTempValues2321   ~CTempValues() { delete []Values; }
2322 };
2323 
ParseNumberString(const UString & s,NCOM::CPropVariant & prop)2324 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2325 {
2326   const wchar_t *end;
2327   UInt64 result = ConvertStringToUInt64(s, &end);
2328   if (*end != 0 || s.IsEmpty())
2329     prop = s;
2330   else if (result <= (UInt32)0xFFFFFFFF)
2331     prop = (UInt32)result;
2332   else
2333     prop = result;
2334 }
2335 
GetNumThreadsNext(unsigned i,UInt32 numThreads)2336 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2337 {
2338   if (i < 2)
2339     return i + 1;
2340   i -= 1;
2341   UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2342   return (num <= numThreads) ? num : numThreads;
2343 }
2344 
AreSameMethodNames(const char * fullName,const char * shortName)2345 static bool AreSameMethodNames(const char *fullName, const char *shortName)
2346 {
2347   for (;;)
2348   {
2349     char c2 = *shortName++;
2350     if (c2 == 0)
2351       return true;
2352     char c1 = *fullName++;
2353     if (MyCharLower_Ascii(c1) != MyCharLower_Ascii(c2))
2354       return false;
2355   }
2356 }
2357 
2358 
2359 #ifdef MY_CPU_X86_OR_AMD64
2360 
PrintCpuChars(AString & s,UInt32 v)2361 static void PrintCpuChars(AString &s, UInt32 v)
2362 {
2363   for (int j = 0; j < 4; j++)
2364   {
2365     Byte b = (Byte)(v & 0xFF);
2366     v >>= 8;
2367     if (b == 0)
2368       break;
2369     s += (char)b;
2370   }
2371 }
2372 
x86cpuid_to_String(const Cx86cpuid & c,AString & s)2373 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
2374 {
2375   s.Empty();
2376 
2377   UInt32 maxFunc2 = 0;
2378   UInt32 t[3];
2379 
2380   MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
2381 
2382   bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
2383 
2384   if (!fullNameIsAvail)
2385   {
2386     for (int i = 0; i < 3; i++)
2387       PrintCpuChars(s, c.vendor[i]);
2388   }
2389   else
2390   {
2391     for (int i = 0; i < 3; i++)
2392     {
2393       UInt32 d[4] = { 0 };
2394       MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
2395       for (int j = 0; j < 4; j++)
2396         PrintCpuChars(s, d[j]);
2397     }
2398   }
2399 
2400   s.Add_Space_if_NotEmpty();
2401   {
2402     char temp[32];
2403     ConvertUInt32ToHex(c.ver, temp);
2404     s += '(';
2405     s += temp;
2406     s += ')';
2407   }
2408 }
2409 
2410 #endif
2411 
2412 
GetCpuName(AString & s)2413 void GetCpuName(AString &s)
2414 {
2415   s.Empty();
2416 
2417   #ifdef MY_CPU_X86_OR_AMD64
2418   {
2419     Cx86cpuid cpuid;
2420     if (x86cpuid_CheckAndRead(&cpuid))
2421     {
2422       x86cpuid_to_String(cpuid, s);
2423       return;
2424     }
2425     #ifdef MY_CPU_AMD64
2426     s = "x64";
2427     #else
2428     s = "x86";
2429     #endif
2430   }
2431   #else
2432 
2433     #ifdef MY_CPU_LE
2434       s = "LE";
2435     #elif defined(MY_CPU_BE)
2436       s = "BE";
2437     #endif
2438 
2439   #endif
2440 }
2441 
2442 
Bench(DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback * printCallback,IBenchCallback * benchCallback,const CObjectVector<CProperty> & props,UInt32 numIterations,bool multiDict)2443 HRESULT Bench(
2444     DECL_EXTERNAL_CODECS_LOC_VARS
2445     IBenchPrintCallback *printCallback,
2446     IBenchCallback *benchCallback,
2447     const CObjectVector<CProperty> &props,
2448     UInt32 numIterations,
2449     bool multiDict)
2450 {
2451   if (!CrcInternalTest())
2452     return S_FALSE;
2453 
2454   UInt32 numCPUs = 1;
2455   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
2456 
2457   #ifndef _7ZIP_ST
2458   numCPUs = NSystem::GetNumberOfProcessors();
2459   #endif
2460 
2461   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
2462 
2463   UInt32 numThreadsSpecified = numCPUs;
2464 
2465   UInt32 testTime = kComplexInSeconds;
2466 
2467   UInt64 specifiedFreq = 0;
2468 
2469   bool multiThreadTests = false;
2470 
2471   COneMethodInfo method;
2472 
2473   CBenchBuffer fileDataBuffer;
2474 
2475   {
2476   unsigned i;
2477   for (i = 0; i < props.Size(); i++)
2478   {
2479     const CProperty &property = props[i];
2480     UString name = property.Name;
2481     name.MakeLower_Ascii();
2482 
2483     if (name.IsEqualTo("file"))
2484     {
2485       if (property.Value.IsEmpty())
2486         return E_INVALIDARG;
2487 
2488       #ifdef USE_WIN_FILE
2489 
2490       NFile::NIO::CInFile file;
2491       if (!file.Open(us2fs(property.Value)))
2492         return E_INVALIDARG;
2493       UInt64 len;
2494       if (!file.GetLength(len))
2495         return E_FAIL;
2496       if (len >= ((UInt32)1 << 31) || len == 0)
2497         return E_INVALIDARG;
2498       if (!fileDataBuffer.Alloc((size_t)len))
2499         return E_OUTOFMEMORY;
2500       UInt32 processedSize;
2501       file.Read(fileDataBuffer.Buffer, (UInt32)len, processedSize);
2502       if (processedSize != len)
2503         return E_FAIL;
2504       if (printCallback)
2505       {
2506         printCallback->Print("file size =");
2507         // printCallback->Print(GetOemString(property.Value));
2508         PrintNumber(*printCallback, len, 0);
2509         printCallback->NewLine();
2510       }
2511       continue;
2512 
2513       #else
2514 
2515       return E_NOTIMPL;
2516 
2517       #endif
2518     }
2519 
2520     NCOM::CPropVariant propVariant;
2521     if (!property.Value.IsEmpty())
2522       ParseNumberString(property.Value, propVariant);
2523 
2524     if (name.IsEqualTo("time"))
2525     {
2526       RINOK(ParsePropToUInt32(L"", propVariant, testTime));
2527       continue;
2528     }
2529 
2530     if (name.IsEqualTo("freq"))
2531     {
2532       UInt32 freq32 = 0;
2533       RINOK(ParsePropToUInt32(L"", propVariant, freq32));
2534       if (freq32 == 0)
2535         return E_INVALIDARG;
2536       specifiedFreq = (UInt64)freq32 * 1000000;
2537 
2538       if (printCallback)
2539       {
2540         printCallback->Print("freq=");
2541         PrintNumber(*printCallback, freq32, 0);
2542         printCallback->NewLine();
2543       }
2544 
2545       continue;
2546     }
2547 
2548     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
2549     {
2550       UString s = name.Ptr(2);
2551       if (s == L"*")
2552       {
2553         multiThreadTests = true;
2554         continue;
2555       }
2556       if (s.IsEmpty() && propVariant.vt == VT_BSTR)
2557       {
2558         if (wcscmp(propVariant.bstrVal, L"*") == 0)
2559         {
2560           multiThreadTests = true;
2561           continue;
2562         }
2563       }
2564       #ifndef _7ZIP_ST
2565       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
2566       #endif
2567       continue;
2568     }
2569 
2570     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
2571   }
2572   }
2573 
2574   if (printCallback)
2575   {
2576     AString s;
2577     GetCpuName(s);
2578     printCallback->Print(s);
2579     printCallback->NewLine();
2580   }
2581 
2582   if (printCallback)
2583   {
2584     printCallback->Print("CPU Freq:");
2585   }
2586 
2587   UInt64 complexInCommands = kComplexInCommands;
2588 
2589   if (printCallback /* || benchCallback */)
2590   {
2591     UInt64 numMilCommands = 1 << 6;
2592     if (specifiedFreq != 0)
2593     {
2594       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
2595         numMilCommands >>= 1;
2596     }
2597 
2598     for (int jj = 0;; jj++)
2599     {
2600       if (printCallback)
2601         RINOK(printCallback->CheckBreak());
2602 
2603       UInt64 start = ::GetTimeCount();
2604       UInt32 sum = (UInt32)start;
2605       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
2606       const UInt64 realDelta = ::GetTimeCount() - start;
2607       start = realDelta;
2608       if (start == 0)
2609         start = 1;
2610       UInt64 freq = GetFreq();
2611       // mips is constant in some compilers
2612       const UInt64 mipsVal = numMilCommands * freq / start;
2613       if (printCallback)
2614       {
2615         if (realDelta == 0)
2616         {
2617           printCallback->Print(" -");
2618         }
2619         else
2620         {
2621           // PrintNumber(*printCallback, start, 0);
2622           PrintNumber(*printCallback, mipsVal, 5 + ((sum == 0xF1541213) ? 1 : 0));
2623         }
2624       }
2625       /*
2626       if (benchCallback)
2627         benchCallback->AddCpuFreq(mipsVal);
2628       */
2629 
2630       if (jj >= 3)
2631       {
2632         SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
2633         if (jj >= 8 || start >= freq)
2634           break;
2635         // break; // change it
2636         numMilCommands <<= 1;
2637       }
2638     }
2639   }
2640 
2641   if (printCallback)
2642   {
2643     printCallback->NewLine();
2644     printCallback->NewLine();
2645     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
2646   }
2647 
2648   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
2649     return E_INVALIDARG;
2650 
2651   UInt32 dict;
2652   bool dictIsDefined = method.Get_DicSize(dict);
2653 
2654   if (method.MethodName.IsEmpty())
2655     method.MethodName = "LZMA";
2656 
2657   if (benchCallback)
2658   {
2659     CBenchProps benchProps;
2660     benchProps.SetLzmaCompexity();
2661     UInt32 dictSize = method.Get_Lzma_DicSize();
2662     UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
2663     return MethodBench(
2664         EXTERNAL_CODECS_LOC_VARS
2665         complexInCommands,
2666         true, numThreadsSpecified,
2667         method,
2668         uncompressedDataSize, fileDataBuffer.Buffer,
2669         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
2670   }
2671 
2672   AString methodName = method.MethodName;
2673   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
2674     methodName = "crc32";
2675   method.MethodName = methodName;
2676   CMethodId hashID;
2677 
2678   if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
2679   {
2680     if (!printCallback)
2681       return S_FALSE;
2682     IBenchPrintCallback &f = *printCallback;
2683     if (!dictIsDefined)
2684       dict = (1 << 24);
2685 
2686 
2687     // methhodName.RemoveChar(L'-');
2688     UInt32 complexity = 10000;
2689     const UInt32 *checkSum = NULL;
2690     {
2691       for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2692       {
2693         const CBenchHash &h = g_Hash[i];
2694         AString s = h.Name;
2695         AString hProp;
2696         int propPos = s.Find(':');
2697         if (propPos >= 0)
2698         {
2699           hProp = s.Ptr(propPos + 1);
2700           s.DeleteFrom(propPos);
2701         }
2702 
2703         if (AreSameMethodNames(s, methodName))
2704         {
2705           complexity = h.Complex;
2706           checkSum = &h.CheckSum;
2707           if (method.PropsString.IsEqualTo_Ascii_NoCase(hProp))
2708             break;
2709         }
2710       }
2711     }
2712 
2713     f.NewLine();
2714     f.Print("Size");
2715     const unsigned kFieldSize_CrcSpeed = 6;
2716     unsigned numThreadsTests = 0;
2717     for (;;)
2718     {
2719       UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
2720       PrintNumber(f, t, kFieldSize_CrcSpeed);
2721       numThreadsTests++;
2722       if (t >= numThreadsSpecified)
2723         break;
2724     }
2725     f.NewLine();
2726     f.NewLine();
2727     CTempValues speedTotals(numThreadsTests);
2728     {
2729       for (unsigned ti = 0; ti < numThreadsTests; ti++)
2730         speedTotals.Values[ti] = 0;
2731     }
2732 
2733     UInt64 numSteps = 0;
2734     for (UInt32 i = 0; i < numIterations; i++)
2735     {
2736       for (unsigned pow = 10; pow < 32; pow++)
2737       {
2738         UInt32 bufSize = (UInt32)1 << pow;
2739         if (bufSize > dict)
2740           break;
2741         char s[16];
2742         ConvertUInt32ToString(pow, s);
2743         unsigned pos = MyStringLen(s);
2744         s[pos++] = ':';
2745         s[pos++] = ' ';
2746         s[pos] = 0;
2747         f.Print(s);
2748 
2749         for (unsigned ti = 0; ti < numThreadsTests; ti++)
2750         {
2751           RINOK(f.CheckBreak());
2752           UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
2753           UInt64 speed = 0;
2754           RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
2755               t, bufSize, speed,
2756               complexity,
2757               1, // benchWeight,
2758               (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
2759           PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
2760           speedTotals.Values[ti] += speed;
2761         }
2762         f.NewLine();
2763         numSteps++;
2764       }
2765     }
2766     if (numSteps != 0)
2767     {
2768       f.NewLine();
2769       f.Print("Avg:");
2770       for (unsigned ti = 0; ti < numThreadsTests; ti++)
2771       {
2772         PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
2773       }
2774       f.NewLine();
2775     }
2776     return S_OK;
2777   }
2778 
2779   bool use2Columns = false;
2780 
2781   bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
2782   bool onlyHashBench = false;
2783   if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
2784   {
2785     onlyHashBench = true;
2786     totalBenchMode = true;
2787   }
2788 
2789   // ---------- Threads loop ----------
2790   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
2791   {
2792 
2793   UInt32 numThreads = numThreadsSpecified;
2794 
2795   if (!multiThreadTests)
2796   {
2797     if (threadsPassIndex != 0)
2798       break;
2799   }
2800   else
2801   {
2802     numThreads = 1;
2803     if (threadsPassIndex != 0)
2804     {
2805       if (numCPUs < 2)
2806         break;
2807       numThreads = numCPUs;
2808       if (threadsPassIndex == 1)
2809       {
2810         if (numCPUs >= 4)
2811           numThreads = numCPUs / 2;
2812       }
2813       else if (numCPUs < 4)
2814         break;
2815     }
2816   }
2817 
2818   CBenchCallbackToPrint callback;
2819   callback.Init();
2820   callback._file = printCallback;
2821 
2822   IBenchPrintCallback &f = *printCallback;
2823 
2824   if (threadsPassIndex > 0)
2825   {
2826     f.NewLine();
2827     f.NewLine();
2828   }
2829 
2830   if (!dictIsDefined)
2831   {
2832     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
2833     unsigned dicSizeLog = dicSizeLog_Main;
2834 
2835     #ifdef UNDER_CE
2836     dicSizeLog = (UInt64)1 << 20;
2837     #endif
2838 
2839     if (ramSize_Defined)
2840     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
2841       if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
2842         break;
2843 
2844     dict = (UInt32)1 << dicSizeLog;
2845 
2846     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
2847     {
2848       f.Print("Dictionary reduced to: ");
2849       PrintNumber(f, dicSizeLog, 1);
2850       f.NewLine();
2851     }
2852   }
2853 
2854   PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads:   ", numThreads);
2855 
2856   f.NewLine();
2857 
2858   if (totalBenchMode)
2859   {
2860     callback.NameFieldSize = kFieldSize_Name;
2861     use2Columns = false;
2862   }
2863   else
2864   {
2865     callback.NameFieldSize = kFieldSize_SmallName;
2866     use2Columns = true;
2867   }
2868   callback.Use2Columns = use2Columns;
2869 
2870   bool showFreq = false;
2871   UInt64 cpuFreq = 0;
2872 
2873   if (totalBenchMode)
2874   {
2875     showFreq = true;
2876   }
2877 
2878   unsigned fileldSize = kFieldSize_TotalSize;
2879   if (showFreq)
2880     fileldSize += kFieldSize_EUAndEffec;
2881 
2882   if (use2Columns)
2883   {
2884     PrintSpaces(f, callback.NameFieldSize);
2885     PrintRight(f, "Compressing", fileldSize);
2886     f.Print(kSep);
2887     PrintRight(f, "Decompressing", fileldSize);
2888   }
2889   f.NewLine();
2890   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
2891 
2892   int j;
2893 
2894   for (j = 0; j < 2; j++)
2895   {
2896     PrintRight(f, "Speed", kFieldSize_Speed + 1);
2897     PrintRight(f, "Usage", kFieldSize_Usage + 1);
2898     PrintRight(f, "R/U", kFieldSize_RU + 1);
2899     PrintRight(f, "Rating", kFieldSize_Rating + 1);
2900     if (showFreq)
2901     {
2902       PrintRight(f, "E/U", kFieldSize_EU + 1);
2903       PrintRight(f, "Effec", kFieldSize_Effec + 1);
2904     }
2905     if (!use2Columns)
2906       break;
2907     if (j == 0)
2908       f.Print(kSep);
2909   }
2910 
2911   f.NewLine();
2912   PrintSpaces(f, callback.NameFieldSize);
2913 
2914   for (j = 0; j < 2; j++)
2915   {
2916     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
2917     PrintRight(f, "%", kFieldSize_Usage + 1);
2918     PrintRight(f, "MIPS", kFieldSize_RU + 1);
2919     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
2920     if (showFreq)
2921     {
2922       PrintRight(f, "%", kFieldSize_EU + 1);
2923       PrintRight(f, "%", kFieldSize_Effec + 1);
2924     }
2925     if (!use2Columns)
2926       break;
2927     if (j == 0)
2928       f.Print(kSep);
2929   }
2930 
2931   f.NewLine();
2932   f.NewLine();
2933 
2934   if (specifiedFreq != 0)
2935     cpuFreq = specifiedFreq;
2936 
2937 
2938   if (totalBenchMode)
2939   {
2940     for (UInt32 i = 0; i < numIterations; i++)
2941     {
2942       if (i != 0)
2943         printCallback->NewLine();
2944       HRESULT res;
2945 
2946       const unsigned kNumCpuTests = 3;
2947       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
2948       {
2949         PrintLeft(f, "CPU", kFieldSize_Name);
2950         UInt32 resVal;
2951         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
2952             (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
2953             specifiedFreq,
2954             cpuFreq, resVal));
2955         callback.NewLine();
2956 
2957         if (specifiedFreq != 0)
2958           cpuFreq = specifiedFreq;
2959 
2960         if (freqTest == kNumCpuTests - 1)
2961           SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
2962       }
2963       callback.NewLine();
2964 
2965       callback.SetFreq(true, cpuFreq);
2966 
2967       if (!onlyHashBench)
2968       {
2969         res = TotalBench(EXTERNAL_CODECS_LOC_VARS
2970             complexInCommands, numThreads,
2971             dictIsDefined || fileDataBuffer.Buffer, // forceUnpackSize
2972             fileDataBuffer.Buffer ? fileDataBuffer.BufferSize : dict,
2973             fileDataBuffer.Buffer,
2974             printCallback, &callback);
2975         RINOK(res);
2976       }
2977 
2978       res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
2979           1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
2980       RINOK(res);
2981 
2982       callback.NewLine();
2983       {
2984         PrintLeft(f, "CPU", kFieldSize_Name);
2985         UInt32 resVal;
2986         UInt64 cpuFreqLastTemp = cpuFreq;
2987         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
2988             specifiedFreq != 0, // showFreq
2989             specifiedFreq,
2990             cpuFreqLastTemp, resVal));
2991         callback.NewLine();
2992       }
2993     }
2994   }
2995   else
2996   {
2997     bool needSetComplexity = true;
2998     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
2999     {
3000       for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
3001       {
3002         const CBenchMethod &h = g_Bench[i];
3003         AString s = h.Name;
3004         if (AreSameMethodNames(h.Name, methodName))
3005         {
3006           callback.BenchProps.EncComplex = h.EncComplex;
3007           callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
3008           callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
3009           needSetComplexity = false;
3010           break;
3011         }
3012       }
3013     }
3014     if (needSetComplexity)
3015       callback.BenchProps.SetLzmaCompexity();
3016 
3017   for (unsigned i = 0; i < numIterations; i++)
3018   {
3019     const unsigned kStartDicLog = 22;
3020     unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
3021     if (!multiDict)
3022       pow = 31;
3023     while (((UInt32)1 << pow) > dict && pow > 0)
3024       pow--;
3025     for (; ((UInt32)1 << pow) <= dict; pow++)
3026     {
3027       char s[16];
3028       ConvertUInt32ToString(pow, s);
3029       unsigned pos = MyStringLen(s);
3030       s[pos++] = ':';
3031       s[pos] = 0;
3032       PrintLeft(f, s, kFieldSize_SmallName);
3033       callback.DictSize = (UInt32)1 << pow;
3034 
3035       COneMethodInfo method2 = method;
3036 
3037       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
3038       {
3039         // We add dictionary size property.
3040         // method2 can have two different dictionary size properties.
3041         // And last property is main.
3042         NCOM::CPropVariant propVariant = (UInt32)pow;
3043         RINOK(method2.ParseMethodFromPROPVARIANT(L"d", propVariant));
3044       }
3045 
3046       size_t uncompressedDataSize;
3047       if (fileDataBuffer.Buffer)
3048       {
3049         uncompressedDataSize = fileDataBuffer.BufferSize;
3050       }
3051       else
3052       {
3053         uncompressedDataSize = callback.DictSize;
3054         if (uncompressedDataSize >= (1 << 18))
3055           uncompressedDataSize += kAdditionalSize;
3056       }
3057 
3058       HRESULT res = MethodBench(
3059           EXTERNAL_CODECS_LOC_VARS
3060           complexInCommands,
3061           true, numThreads,
3062           method2,
3063           uncompressedDataSize, fileDataBuffer.Buffer,
3064           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
3065       f.NewLine();
3066       RINOK(res);
3067       if (!multiDict)
3068         break;
3069     }
3070   }
3071   }
3072 
3073   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
3074 
3075   if (use2Columns)
3076   {
3077     f.Print(kSep);
3078     PrintChars(f, '-', fileldSize);
3079   }
3080 
3081   f.NewLine();
3082 
3083   if (use2Columns)
3084   {
3085     PrintLeft(f, "Avr:", callback.NameFieldSize);
3086     PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
3087     f.Print(kSep);
3088     PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
3089     f.NewLine();
3090   }
3091 
3092   PrintLeft(f, "Tot:", callback.NameFieldSize);
3093   CTotalBenchRes midRes;
3094   midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
3095   PrintTotals(f, showFreq, cpuFreq, midRes);
3096   f.NewLine();
3097 
3098   }
3099   return S_OK;
3100 }
3101