1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 #include <stdio.h>
6
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif
11
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #ifdef USE_POSIX_TIME2
15 #include <sys/time.h>
16 #endif
17 #endif
18
19 #ifdef _WIN32
20 #define USE_ALLOCA
21 #endif
22
23 #ifdef USE_ALLOCA
24 #ifdef _WIN32
25 #include <malloc.h>
26 #else
27 #include <stdlib.h>
28 #endif
29 #endif
30
31 #include "../../../../C/7zCrc.h"
32 #include "../../../../C/Alloc.h"
33 #include "../../../../C/CpuArch.h"
34
35 #include "../../../Windows/System.h"
36
37 #ifndef _7ZIP_ST
38 #include "../../../Windows/Synchronization.h"
39 #include "../../../Windows/Thread.h"
40 #endif
41
42 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
43 #define USE_WIN_FILE
44 #endif
45
46 #ifdef USE_WIN_FILE
47 #include "../../../Windows/FileIO.h"
48 #endif
49
50
51 #include "../../../Common/IntToString.h"
52 #include "../../../Common/StringConvert.h"
53 #include "../../../Common/StringToInt.h"
54
55 #include "../../Common/MethodProps.h"
56 #include "../../Common/StreamUtils.h"
57
58 #include "Bench.h"
59
60 using namespace NWindows;
61
62 static const UInt32 k_LZMA = 0x030101;
63
64 static const UInt64 kComplexInCommands = (UInt64)1 <<
65 #ifdef UNDER_CE
66 31;
67 #else
68 34;
69 #endif
70
71 static const UInt32 kComplexInSeconds = 4;
72
SetComplexCommands(UInt32 complexInSeconds,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)73 static void SetComplexCommands(UInt32 complexInSeconds,
74 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
75 {
76 complexInCommands = kComplexInCommands;
77 const UInt64 kMinFreq = (UInt64)1000000 * 4;
78 const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
79 if (cpuFreq < kMinFreq && !isSpecifiedFreq)
80 cpuFreq = kMinFreq;
81 if (cpuFreq < kMaxFreq || isSpecifiedFreq)
82 {
83 if (complexInSeconds != 0)
84 complexInCommands = complexInSeconds * cpuFreq;
85 else
86 complexInCommands = cpuFreq >> 2;
87 }
88 }
89
90 static const unsigned kNumHashDictBits = 17;
91 static const UInt32 kFilterUnpackSize = (48 << 10);
92
93 static const unsigned kOldLzmaDictBits = 30;
94
95 static const UInt32 kAdditionalSize = (1 << 16);
96 static const UInt32 kCompressedAdditionalSize = (1 << 10);
97 static const UInt32 kMaxLzmaPropSize = 5;
98
99 class CBaseRandomGenerator
100 {
101 UInt32 A1;
102 UInt32 A2;
103 public:
CBaseRandomGenerator()104 CBaseRandomGenerator() { Init(); }
Init()105 void Init() { A1 = 362436069; A2 = 521288629;}
GetRnd()106 UInt32 GetRnd()
107 {
108 return
109 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
110 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
111 }
112 };
113
114
115 static const unsigned kBufferAlignment = 1 << 4;
116
117 struct CBenchBuffer
118 {
119 size_t BufferSize;
120
121 #ifdef _WIN32
122
123 Byte *Buffer;
124
CBenchBufferCBenchBuffer125 CBenchBuffer(): BufferSize(0), Buffer(NULL) {}
~CBenchBufferCBenchBuffer126 ~CBenchBuffer() { ::MidFree(Buffer); }
127
AllocAlignedMaskCBenchBuffer128 void AllocAlignedMask(size_t size, size_t)
129 {
130 ::MidFree(Buffer);
131 BufferSize = 0;
132 Buffer = (Byte *)::MidAlloc(size);
133 if (Buffer)
134 BufferSize = size;
135 }
136
137 #else
138
139 Byte *Buffer;
140 Byte *_bufBase;
141
CBenchBufferCBenchBuffer142 CBenchBuffer(): BufferSize(0), Buffer(NULL), _bufBase(NULL){}
~CBenchBufferCBenchBuffer143 ~CBenchBuffer() { ::MidFree(_bufBase); }
144
AllocAlignedMaskCBenchBuffer145 void AllocAlignedMask(size_t size, size_t alignMask)
146 {
147 ::MidFree(_bufBase);
148 Buffer = NULL;
149 BufferSize = 0;
150 _bufBase = (Byte *)::MidAlloc(size + alignMask);
151
152 if (_bufBase)
153 {
154 // Buffer = (Byte *)(((uintptr_t)_bufBase + alignMask) & ~(uintptr_t)alignMask);
155 Buffer = (Byte *)(((ptrdiff_t)_bufBase + alignMask) & ~(ptrdiff_t)alignMask);
156 BufferSize = size;
157 }
158 }
159
160 #endif
161
AllocCBenchBuffer162 bool Alloc(size_t size)
163 {
164 if (Buffer && BufferSize == size)
165 return true;
166 AllocAlignedMask(size, kBufferAlignment - 1);
167 return (Buffer != NULL || size == 0);
168 }
169 };
170
171
172 class CBenchRandomGenerator: public CBenchBuffer
173 {
GetVal(UInt32 & res,unsigned numBits)174 static UInt32 GetVal(UInt32 &res, unsigned numBits)
175 {
176 UInt32 val = res & (((UInt32)1 << numBits) - 1);
177 res >>= numBits;
178 return val;
179 }
180
GetLen(UInt32 & r)181 static UInt32 GetLen(UInt32 &r)
182 {
183 UInt32 len = GetVal(r, 2);
184 return GetVal(r, 1 + len);
185 }
186
187 public:
188
GenerateSimpleRandom(CBaseRandomGenerator * _RG_)189 void GenerateSimpleRandom(CBaseRandomGenerator *_RG_)
190 {
191 CBaseRandomGenerator rg = *_RG_;
192 const size_t bufSize = BufferSize;
193 Byte *buf = Buffer;
194 for (size_t i = 0; i < bufSize; i++)
195 buf[i] = (Byte)rg.GetRnd();
196 *_RG_ = rg;
197 }
198
GenerateLz(unsigned dictBits,CBaseRandomGenerator * _RG_)199 void GenerateLz(unsigned dictBits, CBaseRandomGenerator *_RG_)
200 {
201 CBaseRandomGenerator rg = *_RG_;
202 UInt32 pos = 0;
203 UInt32 rep0 = 1;
204 const size_t bufSize = BufferSize;
205 Byte *buf = Buffer;
206 unsigned posBits = 1;
207
208 while (pos < bufSize)
209 {
210 UInt32 r = rg.GetRnd();
211 if (GetVal(r, 1) == 0 || pos < 1024)
212 buf[pos++] = (Byte)(r & 0xFF);
213 else
214 {
215 UInt32 len;
216 len = 1 + GetLen(r);
217
218 if (GetVal(r, 3) != 0)
219 {
220 len += GetLen(r);
221
222 while (((UInt32)1 << posBits) < pos)
223 posBits++;
224
225 unsigned numBitsMax = dictBits;
226 if (numBitsMax > posBits)
227 numBitsMax = posBits;
228
229 const unsigned kAddBits = 6;
230 unsigned numLogBits = 5;
231 if (numBitsMax <= (1 << 4) - 1 + kAddBits)
232 numLogBits = 4;
233
234 for (;;)
235 {
236 UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
237 r = rg.GetRnd();
238 if (ppp > numBitsMax)
239 continue;
240 rep0 = GetVal(r, ppp);
241 if (rep0 < pos)
242 break;
243 r = rg.GetRnd();
244 }
245 rep0++;
246 }
247
248 {
249 UInt32 rem = (UInt32)bufSize - pos;
250 if (len > rem)
251 len = rem;
252 }
253 Byte *dest = buf + pos;
254 const Byte *src = dest - rep0;
255 pos += len;
256 for (UInt32 i = 0; i < len; i++)
257 *dest++ = *src++;
258 }
259 }
260
261 *_RG_ = rg;
262 }
263 };
264
265
266 class CBenchmarkInStream:
267 public ISequentialInStream,
268 public CMyUnknownImp
269 {
270 const Byte *Data;
271 size_t Pos;
272 size_t Size;
273 public:
274 MY_UNKNOWN_IMP
Init(const Byte * data,size_t size)275 void Init(const Byte *data, size_t size)
276 {
277 Data = data;
278 Size = size;
279 Pos = 0;
280 }
281 STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
282 };
283
Read(void * data,UInt32 size,UInt32 * processedSize)284 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
285 {
286 size_t remain = Size - Pos;
287 UInt32 kMaxBlockSize = (1 << 20);
288 if (size > kMaxBlockSize)
289 size = kMaxBlockSize;
290 if (size > remain)
291 size = (UInt32)remain;
292 for (UInt32 i = 0; i < size; i++)
293 ((Byte *)data)[i] = Data[Pos + i];
294 Pos += size;
295 if (processedSize)
296 *processedSize = size;
297 return S_OK;
298 }
299
300 class CBenchmarkOutStream:
301 public ISequentialOutStream,
302 public CBenchBuffer,
303 public CMyUnknownImp
304 {
305 // bool _overflow;
306 public:
307 size_t Pos;
308 bool RealCopy;
309 bool CalcCrc;
310 UInt32 Crc;
311
312 // CBenchmarkOutStream(): _overflow(false) {}
Init(bool realCopy,bool calcCrc)313 void Init(bool realCopy, bool calcCrc)
314 {
315 Crc = CRC_INIT_VAL;
316 RealCopy = realCopy;
317 CalcCrc = calcCrc;
318 // _overflow = false;
319 Pos = 0;
320 }
321
322 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
323
324 MY_UNKNOWN_IMP
325 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
326 };
327
Write(const void * data,UInt32 size,UInt32 * processedSize)328 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
329 {
330 size_t curSize = BufferSize - Pos;
331 if (curSize > size)
332 curSize = size;
333 if (curSize != 0)
334 {
335 if (RealCopy)
336 memcpy(Buffer + Pos, data, curSize);
337 if (CalcCrc)
338 Crc = CrcUpdate(Crc, data, curSize);
339 Pos += curSize;
340 }
341 if (processedSize)
342 *processedSize = (UInt32)curSize;
343 if (curSize != size)
344 {
345 // _overflow = true;
346 return E_FAIL;
347 }
348 return S_OK;
349 }
350
351 class CCrcOutStream:
352 public ISequentialOutStream,
353 public CMyUnknownImp
354 {
355 public:
356 bool CalcCrc;
357 UInt32 Crc;
358 MY_UNKNOWN_IMP
359
CCrcOutStream()360 CCrcOutStream(): CalcCrc(true) {};
Init()361 void Init() { Crc = CRC_INIT_VAL; }
362 STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
363 };
364
Write(const void * data,UInt32 size,UInt32 * processedSize)365 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
366 {
367 if (CalcCrc)
368 Crc = CrcUpdate(Crc, data, size);
369 if (processedSize)
370 *processedSize = size;
371 return S_OK;
372 }
373
GetTimeCount()374 static UInt64 GetTimeCount()
375 {
376 #ifdef USE_POSIX_TIME
377 #ifdef USE_POSIX_TIME2
378 timeval v;
379 if (gettimeofday(&v, 0) == 0)
380 return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
381 return (UInt64)time(NULL) * 1000000;
382 #else
383 return time(NULL);
384 #endif
385 #else
386 /*
387 LARGE_INTEGER value;
388 if (::QueryPerformanceCounter(&value))
389 return value.QuadPart;
390 */
391 return GetTickCount();
392 #endif
393 }
394
GetFreq()395 static UInt64 GetFreq()
396 {
397 #ifdef USE_POSIX_TIME
398 #ifdef USE_POSIX_TIME2
399 return 1000000;
400 #else
401 return 1;
402 #endif
403 #else
404 /*
405 LARGE_INTEGER value;
406 if (::QueryPerformanceFrequency(&value))
407 return value.QuadPart;
408 */
409 return 1000;
410 #endif
411 }
412
413 #ifdef USE_POSIX_TIME
414
415 struct CUserTime
416 {
417 UInt64 Sum;
418 clock_t Prev;
419
InitCUserTime420 void Init()
421 {
422 Prev = clock();
423 Sum = 0;
424 }
425
GetUserTimeCUserTime426 UInt64 GetUserTime()
427 {
428 clock_t v = clock();
429 Sum += v - Prev;
430 Prev = v;
431 return Sum;
432 }
433 };
434
435 #else
436
GetTime64(const FILETIME & t)437 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
GetWinUserTime()438 UInt64 GetWinUserTime()
439 {
440 FILETIME creationTime, exitTime, kernelTime, userTime;
441 if (
442 #ifdef UNDER_CE
443 ::GetThreadTimes(::GetCurrentThread()
444 #else
445 ::GetProcessTimes(::GetCurrentProcess()
446 #endif
447 , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
448 return GetTime64(userTime) + GetTime64(kernelTime);
449 return (UInt64)GetTickCount() * 10000;
450 }
451
452 struct CUserTime
453 {
454 UInt64 StartTime;
455
InitCUserTime456 void Init() { StartTime = GetWinUserTime(); }
GetUserTimeCUserTime457 UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
458 };
459
460 #endif
461
GetUserFreq()462 static UInt64 GetUserFreq()
463 {
464 #ifdef USE_POSIX_TIME
465 return CLOCKS_PER_SEC;
466 #else
467 return 10000000;
468 #endif
469 }
470
471 class CBenchProgressStatus
472 {
473 #ifndef _7ZIP_ST
474 NSynchronization::CCriticalSection CS;
475 #endif
476 public:
477 HRESULT Res;
478 bool EncodeMode;
SetResult(HRESULT res)479 void SetResult(HRESULT res)
480 {
481 #ifndef _7ZIP_ST
482 NSynchronization::CCriticalSectionLock lock(CS);
483 #endif
484 Res = res;
485 }
GetResult()486 HRESULT GetResult()
487 {
488 #ifndef _7ZIP_ST
489 NSynchronization::CCriticalSectionLock lock(CS);
490 #endif
491 return Res;
492 }
493 };
494
495 struct CBenchInfoCalc
496 {
497 CBenchInfo BenchInfo;
498 CUserTime UserTime;
499
500 void SetStartTime();
501 void SetFinishTime(CBenchInfo &dest);
502 };
503
SetStartTime()504 void CBenchInfoCalc::SetStartTime()
505 {
506 BenchInfo.GlobalFreq = GetFreq();
507 BenchInfo.UserFreq = GetUserFreq();
508 BenchInfo.GlobalTime = ::GetTimeCount();
509 BenchInfo.UserTime = 0;
510 UserTime.Init();
511 }
512
SetFinishTime(CBenchInfo & dest)513 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
514 {
515 dest = BenchInfo;
516 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
517 dest.UserTime = UserTime.GetUserTime();
518 }
519
520 class CBenchProgressInfo:
521 public ICompressProgressInfo,
522 public CMyUnknownImp,
523 public CBenchInfoCalc
524 {
525 public:
526 CBenchProgressStatus *Status;
527 HRESULT Res;
528 IBenchCallback *Callback;
529
CBenchProgressInfo()530 CBenchProgressInfo(): Callback(0) {}
531 MY_UNKNOWN_IMP
532 STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
533 };
534
SetRatioInfo(const UInt64 * inSize,const UInt64 * outSize)535 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
536 {
537 HRESULT res = Status->GetResult();
538 if (res != S_OK)
539 return res;
540 if (!Callback)
541 return res;
542 CBenchInfo info;
543 SetFinishTime(info);
544 if (Status->EncodeMode)
545 {
546 info.UnpackSize = BenchInfo.UnpackSize + *inSize;
547 info.PackSize = BenchInfo.PackSize + *outSize;
548 res = Callback->SetEncodeResult(info, false);
549 }
550 else
551 {
552 info.PackSize = BenchInfo.PackSize + *inSize;
553 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
554 res = Callback->SetDecodeResult(info, false);
555 }
556 if (res != S_OK)
557 Status->SetResult(res);
558 return res;
559 }
560
561 static const unsigned kSubBits = 8;
562
GetLogSize(UInt32 size)563 static UInt32 GetLogSize(UInt32 size)
564 {
565 for (unsigned i = kSubBits; i < 32; i++)
566 for (UInt32 j = 0; j < (1 << kSubBits); j++)
567 if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
568 return (i << kSubBits) + j;
569 return (32 << kSubBits);
570 }
571
NormalizeVals(UInt64 & v1,UInt64 & v2)572 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
573 {
574 while (v1 > 1000000)
575 {
576 v1 >>= 1;
577 v2 >>= 1;
578 }
579 }
580
GetUsage() const581 UInt64 CBenchInfo::GetUsage() const
582 {
583 UInt64 userTime = UserTime;
584 UInt64 userFreq = UserFreq;
585 UInt64 globalTime = GlobalTime;
586 UInt64 globalFreq = GlobalFreq;
587 NormalizeVals(userTime, userFreq);
588 NormalizeVals(globalFreq, globalTime);
589 if (userFreq == 0)
590 userFreq = 1;
591 if (globalTime == 0)
592 globalTime = 1;
593 return userTime * globalFreq * 1000000 / userFreq / globalTime;
594 }
595
GetRatingPerUsage(UInt64 rating) const596 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
597 {
598 UInt64 userTime = UserTime;
599 UInt64 userFreq = UserFreq;
600 UInt64 globalTime = GlobalTime;
601 UInt64 globalFreq = GlobalFreq;
602 NormalizeVals(userFreq, userTime);
603 NormalizeVals(globalTime, globalFreq);
604 if (globalFreq == 0)
605 globalFreq = 1;
606 if (userTime == 0)
607 userTime = 1;
608 return userFreq * globalTime / globalFreq * rating / userTime;
609 }
610
MyMultDiv64(UInt64 value,UInt64 elapsedTime,UInt64 freq)611 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
612 {
613 UInt64 elTime = elapsedTime;
614 NormalizeVals(freq, elTime);
615 if (elTime == 0)
616 elTime = 1;
617 return value * freq / elTime;
618 }
619
GetSpeed(UInt64 numCommands) const620 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
621 {
622 return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
623 }
624
625 struct CBenchProps
626 {
627 bool LzmaRatingMode;
628
629 UInt32 EncComplex;
630 UInt32 DecComplexCompr;
631 UInt32 DecComplexUnc;
632
CBenchPropsCBenchProps633 CBenchProps(): LzmaRatingMode(false) {}
634 void SetLzmaCompexity();
635
GeComprCommandsCBenchProps636 UInt64 GeComprCommands(UInt64 unpackSize)
637 {
638 return unpackSize * EncComplex;
639 }
640
GeDecomprCommandsCBenchProps641 UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
642 {
643 return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
644 }
645
646 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
647 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
648 };
649
SetLzmaCompexity()650 void CBenchProps::SetLzmaCompexity()
651 {
652 EncComplex = 1200;
653 DecComplexUnc = 4;
654 DecComplexCompr = 190;
655 LzmaRatingMode = true;
656 }
657
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)658 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
659 {
660 if (dictSize < (1 << kBenchMinDicLogSize))
661 dictSize = (1 << kBenchMinDicLogSize);
662 UInt64 encComplex = EncComplex;
663 if (LzmaRatingMode)
664 {
665 UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
666 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
667 }
668 UInt64 numCommands = (UInt64)size * encComplex;
669 return MyMultDiv64(numCommands, elapsedTime, freq);
670 }
671
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)672 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
673 {
674 UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
675 return MyMultDiv64(numCommands, elapsedTime, freq);
676 }
677
GetCompressRating(UInt32 dictSize,UInt64 elapsedTime,UInt64 freq,UInt64 size)678 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
679 {
680 CBenchProps props;
681 props.SetLzmaCompexity();
682 return props.GetCompressRating(dictSize, elapsedTime, freq, size);
683 }
684
GetDecompressRating(UInt64 elapsedTime,UInt64 freq,UInt64 outSize,UInt64 inSize,UInt64 numIterations)685 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
686 {
687 CBenchProps props;
688 props.SetLzmaCompexity();
689 return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
690 }
691
692 struct CEncoderInfo;
693
694 struct CEncoderInfo
695 {
696 #ifndef _7ZIP_ST
697 NWindows::CThread thread[2];
698 UInt32 NumDecoderSubThreads;
699 #endif
700 CMyComPtr<ICompressCoder> _encoder;
701 CMyComPtr<ICompressFilter> _encoderFilter;
702 CBenchProgressInfo *progressInfoSpec[2];
703 CMyComPtr<ICompressProgressInfo> progressInfo[2];
704 UInt64 NumIterations;
705
706 #ifdef USE_ALLOCA
707 size_t AllocaSize;
708 #endif
709
710 Byte _key[32];
711 Byte _iv[16];
712 Byte _psw[16];
713 bool CheckCrc_Enc;
714 bool CheckCrc_Dec;
715
716 struct CDecoderInfo
717 {
718 CEncoderInfo *Encoder;
719 UInt32 DecoderIndex;
720 bool CallbackMode;
721
722 #ifdef USE_ALLOCA
723 size_t AllocaSize;
724 #endif
725 };
726 CDecoderInfo decodersInfo[2];
727
728 CMyComPtr<ICompressCoder> _decoders[2];
729 CMyComPtr<ICompressFilter> _decoderFilter;
730
731 HRESULT Results[2];
732 CBenchmarkOutStream *outStreamSpec;
733 CMyComPtr<ISequentialOutStream> outStream;
734 IBenchCallback *callback;
735 IBenchPrintCallback *printCallback;
736 UInt32 crc;
737 size_t kBufferSize;
738 size_t compressedSize;
739 const Byte *uncompressedDataPtr;
740
741 const Byte *fileData;
742 CBenchRandomGenerator rg;
743
744 CBenchBuffer rgCopy; // it must be 16-byte aligned !!!
745 CBenchmarkOutStream *propStreamSpec;
746 CMyComPtr<ISequentialOutStream> propStream;
747
748 // for decode
749 COneMethodInfo _method;
750 size_t _uncompressedDataSize;
751
752 HRESULT Init(
753 const COneMethodInfo &method,
754 unsigned generateDictBits,
755 CBaseRandomGenerator *rg);
756 HRESULT Encode();
757 HRESULT Decode(UInt32 decoderIndex);
758
CEncoderInfoCEncoderInfo759 CEncoderInfo():
760 fileData(NULL),
761 CheckCrc_Enc(true),
762 CheckCrc_Dec(true),
763 outStreamSpec(0), callback(0), printCallback(0), propStreamSpec(0) {}
764
765 #ifndef _7ZIP_ST
766
EncodeThreadFunctionCEncoderInfo767 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
768 {
769 HRESULT res;
770 CEncoderInfo *encoder = (CEncoderInfo *)param;
771 try
772 {
773 #ifdef USE_ALLOCA
774 alloca(encoder->AllocaSize);
775 #endif
776
777 res = encoder->Encode();
778 encoder->Results[0] = res;
779 }
780 catch(...)
781 {
782 res = E_FAIL;
783 }
784 if (res != S_OK)
785 encoder->progressInfoSpec[0]->Status->SetResult(res);
786 return 0;
787 }
788
DecodeThreadFunctionCEncoderInfo789 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
790 {
791 CDecoderInfo *decoder = (CDecoderInfo *)param;
792
793 #ifdef USE_ALLOCA
794 alloca(decoder->AllocaSize);
795 #endif
796
797 CEncoderInfo *encoder = decoder->Encoder;
798 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
799 return 0;
800 }
801
CreateEncoderThreadCEncoderInfo802 HRESULT CreateEncoderThread()
803 {
804 return thread[0].Create(EncodeThreadFunction, this);
805 }
806
CreateDecoderThreadCEncoderInfo807 HRESULT CreateDecoderThread(unsigned index, bool callbackMode
808 #ifdef USE_ALLOCA
809 , size_t allocaSize
810 #endif
811 )
812 {
813 CDecoderInfo &decoder = decodersInfo[index];
814 decoder.DecoderIndex = index;
815 decoder.Encoder = this;
816
817 #ifdef USE_ALLOCA
818 decoder.AllocaSize = allocaSize;
819 #endif
820
821 decoder.CallbackMode = callbackMode;
822 return thread[index].Create(DecodeThreadFunction, &decoder);
823 }
824
825 #endif
826 };
827
828
Init(const COneMethodInfo & method,unsigned generateDictBits,CBaseRandomGenerator * rgLoc)829 HRESULT CEncoderInfo::Init(
830 const COneMethodInfo &method,
831 unsigned generateDictBits,
832 CBaseRandomGenerator *rgLoc)
833 {
834 // we need extra space, if input data is already compressed
835 const size_t kCompressedBufferSize =
836 kCompressedAdditionalSize +
837 kBufferSize + kBufferSize / 16;
838 // kBufferSize / 2;
839
840 if (kCompressedBufferSize < kBufferSize)
841 return E_FAIL;
842
843 uncompressedDataPtr = fileData;
844
845 if (!fileData)
846 {
847 if (!rg.Alloc(kBufferSize))
848 return E_OUTOFMEMORY;
849
850 // DWORD ttt = GetTickCount();
851 if (generateDictBits == 0)
852 rg.GenerateSimpleRandom(rgLoc);
853 else
854 rg.GenerateLz(generateDictBits, rgLoc);
855 // printf("\n%d\n ", GetTickCount() - ttt);
856
857 crc = CrcCalc(rg.Buffer, rg.BufferSize);
858 uncompressedDataPtr = rg.Buffer;
859 }
860
861 if (_encoderFilter)
862 {
863 if (!rgCopy.Alloc(kBufferSize))
864 return E_OUTOFMEMORY;
865 }
866
867
868 outStreamSpec = new CBenchmarkOutStream;
869 outStream = outStreamSpec;
870 if (!outStreamSpec->Alloc(kCompressedBufferSize))
871 return E_OUTOFMEMORY;
872
873 propStreamSpec = 0;
874 if (!propStream)
875 {
876 propStreamSpec = new CBenchmarkOutStream;
877 propStream = propStreamSpec;
878 }
879 if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
880 return E_OUTOFMEMORY;
881 propStreamSpec->Init(true, false);
882
883
884 CMyComPtr<IUnknown> coder;
885 if (_encoderFilter)
886 coder = _encoderFilter;
887 else
888 coder = _encoder;
889 {
890 CMyComPtr<ICompressSetCoderProperties> scp;
891 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
892 if (scp)
893 {
894 UInt64 reduceSize = kBufferSize;
895 RINOK(method.SetCoderProps(scp, &reduceSize));
896 }
897 else
898 {
899 if (method.AreThereNonOptionalProps())
900 return E_INVALIDARG;
901 }
902
903 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
904 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
905 if (writeCoderProps)
906 {
907 RINOK(writeCoderProps->WriteCoderProperties(propStream));
908 }
909
910 {
911 CMyComPtr<ICryptoSetPassword> sp;
912 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
913 if (sp)
914 {
915 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
916
917 // we must call encoding one time to calculate password key for key cache.
918 // it must be after WriteCoderProperties!
919 Byte temp[16];
920 memset(temp, 0, sizeof(temp));
921
922 if (_encoderFilter)
923 {
924 _encoderFilter->Init();
925 _encoderFilter->Filter(temp, sizeof(temp));
926 }
927 else
928 {
929 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
930 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
931 inStreamSpec->Init(temp, sizeof(temp));
932
933 CCrcOutStream *crcStreamSpec = new CCrcOutStream;
934 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
935 crcStreamSpec->Init();
936
937 RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
938 }
939 }
940 }
941 }
942
943 return S_OK;
944 }
945
946
My_FilterBench(ICompressFilter * filter,Byte * data,size_t size)947 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
948 {
949 while (size != 0)
950 {
951 UInt32 cur = (UInt32)1 << 31;
952 if (cur > size)
953 cur = (UInt32)size;
954 UInt32 processed = filter->Filter(data, cur);
955 data += processed;
956 // if (processed > size) (in AES filter), we must fill last block with zeros.
957 // but it is not important for benchmark. So we just copy that data without filtering.
958 if (processed > size || processed == 0)
959 break;
960 size -= processed;
961 }
962 }
963
964
Encode()965 HRESULT CEncoderInfo::Encode()
966 {
967 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
968 bi.UnpackSize = 0;
969 bi.PackSize = 0;
970 CMyComPtr<ICryptoProperties> cp;
971 CMyComPtr<IUnknown> coder;
972 if (_encoderFilter)
973 coder = _encoderFilter;
974 else
975 coder = _encoder;
976 coder.QueryInterface(IID_ICryptoProperties, &cp);
977 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
978 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
979 UInt64 prev = 0;
980
981 UInt32 crcPrev = 0;
982
983 if (cp)
984 {
985 RINOK(cp->SetKey(_key, sizeof(_key)));
986 RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
987 }
988
989 for (UInt64 i = 0; i < NumIterations; i++)
990 {
991 if (printCallback && bi.UnpackSize - prev > (1 << 20))
992 {
993 RINOK(printCallback->CheckBreak());
994 prev = bi.UnpackSize;
995 }
996
997 bool isLast = (i == NumIterations - 1);
998 bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
999 outStreamSpec->Init(isLast, calcCrc);
1000
1001 if (_encoderFilter)
1002 {
1003 memcpy(rgCopy.Buffer, uncompressedDataPtr, kBufferSize);
1004 _encoderFilter->Init();
1005 My_FilterBench(_encoderFilter, rgCopy.Buffer, kBufferSize);
1006 RINOK(WriteStream(outStream, rgCopy.Buffer, kBufferSize));
1007 }
1008 else
1009 {
1010 inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1011 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
1012 }
1013
1014 // outStreamSpec->Print();
1015
1016 UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
1017 if (i == 0)
1018 crcPrev = crcNew;
1019 else if (calcCrc && crcPrev != crcNew)
1020 return E_FAIL;
1021
1022 compressedSize = outStreamSpec->Pos;
1023 bi.UnpackSize += kBufferSize;
1024 bi.PackSize += compressedSize;
1025 }
1026
1027 _encoder.Release();
1028 _encoderFilter.Release();
1029 return S_OK;
1030 }
1031
1032
Decode(UInt32 decoderIndex)1033 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1034 {
1035 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1036 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1037 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1038 CMyComPtr<IUnknown> coder;
1039 if (_decoderFilter)
1040 {
1041 if (decoderIndex != 0)
1042 return E_FAIL;
1043 coder = _decoderFilter;
1044 }
1045 else
1046 coder = decoder;
1047
1048 CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1049 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1050 if (!setDecProps && propStreamSpec->Pos != 0)
1051 return E_FAIL;
1052
1053 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1054 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1055
1056 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1057 pi->BenchInfo.UnpackSize = 0;
1058 pi->BenchInfo.PackSize = 0;
1059
1060 #ifndef _7ZIP_ST
1061 {
1062 CMyComPtr<ICompressSetCoderMt> setCoderMt;
1063 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1064 if (setCoderMt)
1065 {
1066 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
1067 }
1068 }
1069 #endif
1070
1071 CMyComPtr<ICompressSetCoderProperties> scp;
1072 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1073 if (scp)
1074 {
1075 UInt64 reduceSize = _uncompressedDataSize;
1076 RINOK(_method.SetCoderProps(scp, &reduceSize));
1077 }
1078
1079 CMyComPtr<ICryptoProperties> cp;
1080 coder.QueryInterface(IID_ICryptoProperties, &cp);
1081
1082 if (setDecProps)
1083 {
1084 RINOK(setDecProps->SetDecoderProperties2(propStreamSpec->Buffer, (UInt32)propStreamSpec->Pos));
1085 }
1086
1087 {
1088 CMyComPtr<ICryptoSetPassword> sp;
1089 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1090 if (sp)
1091 {
1092 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
1093 }
1094 }
1095
1096 UInt64 prev = 0;
1097
1098 if (cp)
1099 {
1100 RINOK(cp->SetKey(_key, sizeof(_key)));
1101 RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
1102 }
1103
1104 for (UInt64 i = 0; i < NumIterations; i++)
1105 {
1106 if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
1107 {
1108 RINOK(printCallback->CheckBreak());
1109 prev = pi->BenchInfo.UnpackSize;
1110 }
1111
1112 inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
1113 crcOutStreamSpec->Init();
1114
1115 UInt64 outSize = kBufferSize;
1116 crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
1117
1118 if (_decoderFilter)
1119 {
1120 if (compressedSize > rgCopy.BufferSize)
1121 return E_FAIL;
1122 memcpy(rgCopy.Buffer, outStreamSpec->Buffer, compressedSize);
1123 _decoderFilter->Init();
1124 My_FilterBench(_decoderFilter, rgCopy.Buffer, compressedSize);
1125 RINOK(WriteStream(crcOutStream, rgCopy.Buffer, compressedSize));
1126 }
1127 else
1128 {
1129 RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
1130 }
1131
1132 if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1133 return S_FALSE;
1134 pi->BenchInfo.UnpackSize += kBufferSize;
1135 pi->BenchInfo.PackSize += compressedSize;
1136 }
1137
1138 decoder.Release();
1139 _decoderFilter.Release();
1140 return S_OK;
1141 }
1142
1143
1144 static const UInt32 kNumThreadsMax = (1 << 12);
1145
1146 struct CBenchEncoders
1147 {
1148 CEncoderInfo *encoders;
CBenchEncodersCBenchEncoders1149 CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
~CBenchEncodersCBenchEncoders1150 ~CBenchEncoders() { delete []encoders; }
1151 };
1152
1153
GetNumIterations(UInt64 numCommands,UInt64 complexInCommands)1154 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1155 {
1156 if (numCommands < (1 << 4))
1157 numCommands = (1 << 4);
1158 UInt64 res = complexInCommands / numCommands;
1159 return (res == 0 ? 1 : res);
1160 }
1161
1162
MethodBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,bool oldLzmaBenchMode,UInt32 numThreads,const COneMethodInfo & method2,size_t uncompressedDataSize,const Byte * fileData,unsigned generateDictBits,IBenchPrintCallback * printCallback,IBenchCallback * callback,CBenchProps * benchProps)1163 static HRESULT MethodBench(
1164 DECL_EXTERNAL_CODECS_LOC_VARS
1165 UInt64 complexInCommands,
1166 bool
1167 #ifndef _7ZIP_ST
1168 oldLzmaBenchMode
1169 #endif
1170 ,
1171 UInt32
1172 #ifndef _7ZIP_ST
1173 numThreads
1174 #endif
1175 ,
1176 const COneMethodInfo &method2,
1177 size_t uncompressedDataSize,
1178 const Byte *fileData,
1179 unsigned generateDictBits,
1180
1181 IBenchPrintCallback *printCallback,
1182 IBenchCallback *callback,
1183 CBenchProps *benchProps)
1184 {
1185 COneMethodInfo method = method2;
1186 UInt64 methodId;
1187 UInt32 numStreams;
1188 if (!FindMethod(
1189 EXTERNAL_CODECS_LOC_VARS
1190 method.MethodName, methodId, numStreams))
1191 return E_NOTIMPL;
1192 if (numStreams != 1)
1193 return E_INVALIDARG;
1194
1195 UInt32 numEncoderThreads = 1;
1196 UInt32 numSubDecoderThreads = 1;
1197
1198 #ifndef _7ZIP_ST
1199 numEncoderThreads = numThreads;
1200
1201 if (oldLzmaBenchMode && methodId == k_LZMA)
1202 {
1203 bool fixedNumber;
1204 UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(fixedNumber);
1205 if (!fixedNumber && numThreads == 1)
1206 method.AddProp_NumThreads(1);
1207 if (numThreads > 1 && numLzmaThreads > 1)
1208 {
1209 numEncoderThreads = numThreads / 2;
1210 numSubDecoderThreads = 2;
1211 }
1212 }
1213 #endif
1214
1215 CBenchEncoders encodersSpec(numEncoderThreads);
1216 CEncoderInfo *encoders = encodersSpec.encoders;
1217
1218 UInt32 i;
1219
1220 for (i = 0; i < numEncoderThreads; i++)
1221 {
1222 CEncoderInfo &encoder = encoders[i];
1223 encoder.callback = (i == 0) ? callback : 0;
1224 encoder.printCallback = printCallback;
1225
1226 {
1227 CCreatedCoder cod;
1228 RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, true, encoder._encoderFilter, cod));
1229 encoder._encoder = cod.Coder;
1230 if (!encoder._encoder && !encoder._encoderFilter)
1231 return E_NOTIMPL;
1232 }
1233
1234 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1235 encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1236
1237 memset(encoder._iv, 0, sizeof(encoder._iv));
1238 memset(encoder._key, 0, sizeof(encoder._key));
1239 memset(encoder._psw, 0, sizeof(encoder._psw));
1240
1241 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1242 {
1243 CCreatedCoder cod;
1244 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1245 RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
1246 decoder = cod.Coder;
1247 if (!encoder._decoderFilter && !decoder)
1248 return E_NOTIMPL;
1249 }
1250 }
1251
1252 CBaseRandomGenerator rg;
1253 rg.Init();
1254
1255 UInt32 crc = 0;
1256 if (fileData)
1257 crc = CrcCalc(fileData, uncompressedDataSize);
1258
1259 for (i = 0; i < numEncoderThreads; i++)
1260 {
1261 CEncoderInfo &encoder = encoders[i];
1262 encoder._method = method;
1263 encoder._uncompressedDataSize = uncompressedDataSize;
1264 encoder.kBufferSize = uncompressedDataSize;
1265 encoder.fileData = fileData;
1266 encoder.crc = crc;
1267
1268 RINOK(encoders[i].Init(method, generateDictBits, &rg));
1269 }
1270
1271 CBenchProgressStatus status;
1272 status.Res = S_OK;
1273 status.EncodeMode = true;
1274
1275 for (i = 0; i < numEncoderThreads; i++)
1276 {
1277 CEncoderInfo &encoder = encoders[i];
1278 encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1279
1280 for (int j = 0; j < 2; j++)
1281 {
1282 CBenchProgressInfo *spec = new CBenchProgressInfo;
1283 encoder.progressInfoSpec[j] = spec;
1284 encoder.progressInfo[j] = spec;
1285 spec->Status = &status;
1286 }
1287
1288 if (i == 0)
1289 {
1290 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1291 bpi->Callback = callback;
1292 bpi->BenchInfo.NumIterations = numEncoderThreads;
1293 bpi->SetStartTime();
1294 }
1295
1296 #ifndef _7ZIP_ST
1297 if (numEncoderThreads > 1)
1298 {
1299 #ifdef USE_ALLOCA
1300 encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1301 #endif
1302
1303 RINOK(encoder.CreateEncoderThread())
1304 }
1305 else
1306 #endif
1307 {
1308 RINOK(encoder.Encode());
1309 }
1310 }
1311
1312 #ifndef _7ZIP_ST
1313 if (numEncoderThreads > 1)
1314 for (i = 0; i < numEncoderThreads; i++)
1315 encoders[i].thread[0].Wait();
1316 #endif
1317
1318 RINOK(status.Res);
1319
1320 CBenchInfo info;
1321
1322 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1323 info.UnpackSize = 0;
1324 info.PackSize = 0;
1325 info.NumIterations = encoders[0].NumIterations;
1326
1327 for (i = 0; i < numEncoderThreads; i++)
1328 {
1329 CEncoderInfo &encoder = encoders[i];
1330 info.UnpackSize += encoder.kBufferSize;
1331 info.PackSize += encoder.compressedSize;
1332 }
1333
1334 RINOK(callback->SetEncodeResult(info, true));
1335
1336
1337 status.Res = S_OK;
1338 status.EncodeMode = false;
1339
1340 UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1341
1342 for (i = 0; i < numEncoderThreads; i++)
1343 {
1344 CEncoderInfo &encoder = encoders[i];
1345
1346 if (i == 0)
1347 {
1348 encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1349 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1350 bpi->Callback = callback;
1351 bpi->BenchInfo.NumIterations = numDecoderThreads;
1352 bpi->SetStartTime();
1353 }
1354 else
1355 encoder.NumIterations = encoders[0].NumIterations;
1356
1357 #ifndef _7ZIP_ST
1358 {
1359 int numSubThreads = method.Get_NumThreads();
1360 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1361 }
1362 if (numDecoderThreads > 1)
1363 {
1364 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1365 {
1366 HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1367 #ifdef USE_ALLOCA
1368 , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1369 #endif
1370 );
1371 RINOK(res);
1372 }
1373 }
1374 else
1375 #endif
1376 {
1377 RINOK(encoder.Decode(0));
1378 }
1379 }
1380
1381 #ifndef _7ZIP_ST
1382 HRESULT res = S_OK;
1383 if (numDecoderThreads > 1)
1384 for (i = 0; i < numEncoderThreads; i++)
1385 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1386 {
1387 CEncoderInfo &encoder = encoders[i];
1388 encoder.thread[j].Wait();
1389 if (encoder.Results[j] != S_OK)
1390 res = encoder.Results[j];
1391 }
1392 RINOK(res);
1393 #endif
1394
1395 RINOK(status.Res);
1396 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1397
1398 #ifndef _7ZIP_ST
1399 #ifdef UNDER_CE
1400 if (numDecoderThreads > 1)
1401 for (i = 0; i < numEncoderThreads; i++)
1402 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1403 {
1404 FILETIME creationTime, exitTime, kernelTime, userTime;
1405 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1406 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1407 }
1408 #endif
1409 #endif
1410
1411 info.UnpackSize = 0;
1412 info.PackSize = 0;
1413 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1414
1415 for (i = 0; i < numEncoderThreads; i++)
1416 {
1417 CEncoderInfo &encoder = encoders[i];
1418 info.UnpackSize += encoder.kBufferSize;
1419 info.PackSize += encoder.compressedSize;
1420 }
1421
1422 RINOK(callback->SetDecodeResult(info, false));
1423 RINOK(callback->SetDecodeResult(info, true));
1424
1425 return S_OK;
1426 }
1427
1428
GetLZMAUsage(bool multiThread,UInt32 dictionary)1429 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1430 {
1431 UInt32 hs = dictionary - 1;
1432 hs |= (hs >> 1);
1433 hs |= (hs >> 2);
1434 hs |= (hs >> 4);
1435 hs |= (hs >> 8);
1436 hs >>= 1;
1437 hs |= 0xFFFF;
1438 if (hs > (1 << 24))
1439 hs >>= 1;
1440 hs++;
1441 return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1442 (1 << 20) + (multiThread ? (6 << 20) : 0);
1443 }
1444
GetBenchMemoryUsage(UInt32 numThreads,UInt32 dictionary,bool totalBench)1445 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
1446 {
1447 const UInt32 kBufferSize = dictionary;
1448 const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
1449 bool lzmaMt = (totalBench || numThreads > 1);
1450 UInt32 numBigThreads = numThreads;
1451 if (!totalBench && lzmaMt)
1452 numBigThreads /= 2;
1453 return ((UInt64)kBufferSize + kCompressedBufferSize +
1454 GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
1455 }
1456
CrcBig(const void * data,UInt32 size,UInt64 numIterations,const UInt32 * checkSum,IHasher * hf,IBenchPrintCallback * callback)1457 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1458 const UInt32 *checkSum, IHasher *hf,
1459 IBenchPrintCallback *callback)
1460 {
1461 Byte hash[64];
1462 UInt64 i;
1463 for (i = 0; i < sizeof(hash); i++)
1464 hash[i] = 0;
1465 for (i = 0; i < numIterations; i++)
1466 {
1467 if (callback && (i & 0xFF) == 0)
1468 {
1469 RINOK(callback->CheckBreak());
1470 }
1471 hf->Init();
1472 hf->Update(data, size);
1473 hf->Final(hash);
1474 UInt32 hashSize = hf->GetDigestSize();
1475 if (hashSize > sizeof(hash))
1476 return S_FALSE;
1477 UInt32 sum = 0;
1478 for (UInt32 j = 0; j < hashSize; j += 4)
1479 sum ^= GetUi32(hash + j);
1480 if (checkSum && sum != *checkSum)
1481 {
1482 return S_FALSE;
1483 }
1484 }
1485 return S_OK;
1486 }
1487
1488 UInt32 g_BenchCpuFreqTemp = 1;
1489
1490 #define YY1 sum += val; sum ^= val;
1491 #define YY3 YY1 YY1 YY1 YY1
1492 #define YY5 YY3 YY3 YY3 YY3
1493 #define YY7 YY5 YY5 YY5 YY5
1494 static const UInt32 kNumFreqCommands = 128;
1495
1496 EXTERN_C_BEGIN
1497
CountCpuFreq(UInt32 sum,UInt32 num,UInt32 val)1498 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1499 {
1500 for (UInt32 i = 0; i < num; i++)
1501 {
1502 YY7
1503 }
1504 return sum;
1505 }
1506
1507 EXTERN_C_END
1508
1509
1510 #ifndef _7ZIP_ST
1511
1512 struct CFreqInfo
1513 {
1514 NWindows::CThread Thread;
1515 IBenchPrintCallback *Callback;
1516 HRESULT CallbackRes;
1517 UInt32 ValRes;
1518 UInt32 Size;
1519 UInt64 NumIterations;
1520
WaitCFreqInfo1521 void Wait()
1522 {
1523 Thread.Wait();
1524 Thread.Close();
1525 }
1526 };
1527
FreqThreadFunction(void * param)1528 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1529 {
1530 CFreqInfo *p = (CFreqInfo *)param;
1531
1532 UInt32 sum = g_BenchCpuFreqTemp;
1533 for (UInt64 k = p->NumIterations; k > 0; k--)
1534 {
1535 p->CallbackRes = p->Callback->CheckBreak();
1536 if (p->CallbackRes != S_OK)
1537 return 0;
1538 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1539 }
1540 p->ValRes = sum;
1541 return 0;
1542 }
1543
1544 struct CFreqThreads
1545 {
1546 CFreqInfo *Items;
1547 UInt32 NumThreads;
1548
CFreqThreadsCFreqThreads1549 CFreqThreads(): Items(0), NumThreads(0) {}
WaitAllCFreqThreads1550 void WaitAll()
1551 {
1552 for (UInt32 i = 0; i < NumThreads; i++)
1553 Items[i].Wait();
1554 NumThreads = 0;
1555 }
~CFreqThreadsCFreqThreads1556 ~CFreqThreads()
1557 {
1558 WaitAll();
1559 delete []Items;
1560 }
1561 };
1562
1563 struct CCrcInfo
1564 {
1565 NWindows::CThread Thread;
1566 IBenchPrintCallback *Callback;
1567 HRESULT CallbackRes;
1568
1569 const Byte *Data;
1570 UInt32 Size;
1571 UInt64 NumIterations;
1572 bool CheckSumDefined;
1573 UInt32 CheckSum;
1574 CMyComPtr<IHasher> Hasher;
1575 HRESULT Res;
1576
1577 #ifdef USE_ALLOCA
1578 size_t AllocaSize;
1579 #endif
1580
WaitCCrcInfo1581 void Wait()
1582 {
1583 Thread.Wait();
1584 Thread.Close();
1585 }
1586 };
1587
CrcThreadFunction(void * param)1588 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1589 {
1590 CCrcInfo *p = (CCrcInfo *)param;
1591
1592 #ifdef USE_ALLOCA
1593 alloca(p->AllocaSize);
1594 #endif
1595
1596 p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1597 p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1598 p->Callback);
1599 return 0;
1600 }
1601
1602 struct CCrcThreads
1603 {
1604 CCrcInfo *Items;
1605 UInt32 NumThreads;
1606
CCrcThreadsCCrcThreads1607 CCrcThreads(): Items(0), NumThreads(0) {}
WaitAllCCrcThreads1608 void WaitAll()
1609 {
1610 for (UInt32 i = 0; i < NumThreads; i++)
1611 Items[i].Wait();
1612 NumThreads = 0;
1613 }
~CCrcThreadsCCrcThreads1614 ~CCrcThreads()
1615 {
1616 WaitAll();
1617 delete []Items;
1618 }
1619 };
1620
1621 #endif
1622
CrcCalc1(const Byte * buf,UInt32 size)1623 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
1624 {
1625 UInt32 crc = CRC_INIT_VAL;;
1626 for (UInt32 i = 0; i < size; i++)
1627 crc = CRC_UPDATE_BYTE(crc, buf[i]);
1628 return CRC_GET_DIGEST(crc);
1629 }
1630
RandGen(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1631 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1632 {
1633 for (UInt32 i = 0; i < size; i++)
1634 buf[i] = (Byte)RG.GetRnd();
1635 }
1636
RandGenCrc(Byte * buf,UInt32 size,CBaseRandomGenerator & RG)1637 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1638 {
1639 RandGen(buf, size, RG);
1640 return CrcCalc1(buf, size);
1641 }
1642
CrcInternalTest()1643 bool CrcInternalTest()
1644 {
1645 CBenchBuffer buffer;
1646 const UInt32 kBufferSize0 = (1 << 8);
1647 const UInt32 kBufferSize1 = (1 << 10);
1648 const UInt32 kCheckSize = (1 << 5);
1649 if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
1650 return false;
1651 Byte *buf = buffer.Buffer;
1652 UInt32 i;
1653 for (i = 0; i < kBufferSize0; i++)
1654 buf[i] = (Byte)i;
1655 UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1656 if (crc1 != 0x29058C73)
1657 return false;
1658 CBaseRandomGenerator RG;
1659 RandGen(buf + kBufferSize0, kBufferSize1, RG);
1660 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1661 for (UInt32 j = 0; j < kCheckSize; j++)
1662 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1663 return false;
1664 return true;
1665 }
1666
1667 struct CBenchMethod
1668 {
1669 unsigned Weight;
1670 unsigned DictBits;
1671 UInt32 EncComplex;
1672 UInt32 DecComplexCompr;
1673 UInt32 DecComplexUnc;
1674 const char *Name;
1675 };
1676
1677 static const CBenchMethod g_Bench[] =
1678 {
1679 { 40, 17, 357, 145, 20, "LZMA:x1" },
1680 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
1681 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
1682
1683 { 10, 16, 124, 40, 14, "Deflate:x1" },
1684 { 20, 16, 376, 40, 14, "Deflate:x5" },
1685 { 10, 16, 1082, 40, 14, "Deflate:x7" },
1686 { 10, 17, 422, 40, 14, "Deflate64:x5" },
1687
1688 { 10, 15, 590, 69, 69, "BZip2:x1" },
1689 { 20, 19, 815, 122, 122, "BZip2:x5" },
1690 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
1691 { 10, 19, 2530, 122, 122, "BZip2:x7" },
1692
1693 { 10, 18, 1010, 0, 1150, "PPMD:x1" },
1694 { 10, 22, 1655, 0, 1830, "PPMD:x5" },
1695
1696 { 2, 0, 6, 0, 6, "Delta:4" },
1697 { 2, 0, 4, 0, 4, "BCJ" },
1698
1699 { 10, 0, 24, 0, 24, "AES256CBC:1" },
1700 { 2, 0, 8, 0, 2, "AES256CBC:2" }
1701 };
1702
1703 struct CBenchHash
1704 {
1705 unsigned Weight;
1706 UInt32 Complex;
1707 UInt32 CheckSum;
1708 const char *Name;
1709 };
1710
1711 static const CBenchHash g_Hash[] =
1712 {
1713 { 1, 1820, 0x8F8FEDAB, "CRC32:1" },
1714 { 10, 558, 0x8F8FEDAB, "CRC32:4" },
1715 { 10, 339, 0x8F8FEDAB, "CRC32:8" },
1716 { 10, 512, 0xDF1C17CC, "CRC64" },
1717 { 10, 5100, 0x2D79FF2E, "SHA256" },
1718 { 10, 2340, 0x4C25132B, "SHA1" },
1719 { 2, 5500, 0xE084E913, "BLAKE2sp" }
1720 };
1721
1722 struct CTotalBenchRes
1723 {
1724 // UInt64 NumIterations1; // for Usage
1725 UInt64 NumIterations2; // for Rating / RPU
1726
1727 UInt64 Rating;
1728 UInt64 Usage;
1729 UInt64 RPU;
1730
InitCTotalBenchRes1731 void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
1732
SetSumCTotalBenchRes1733 void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1734 {
1735 Rating = (r1.Rating + r2.Rating);
1736 Usage = (r1.Usage + r2.Usage);
1737 RPU = (r1.RPU + r2.RPU);
1738 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
1739 NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
1740 }
1741 };
1742
PrintNumber(IBenchPrintCallback & f,UInt64 value,unsigned size)1743 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
1744 {
1745 char s[128];
1746 unsigned startPos = (unsigned)sizeof(s) - 32;
1747 memset(s, ' ', startPos);
1748 ConvertUInt64ToString(value, s + startPos);
1749 // if (withSpace)
1750 {
1751 startPos--;
1752 size++;
1753 }
1754 unsigned len = (unsigned)strlen(s + startPos);
1755 if (size > len)
1756 {
1757 startPos -= (size - len);
1758 if (startPos < 0)
1759 startPos = 0;
1760 }
1761 f.Print(s + startPos);
1762 }
1763
1764 static const unsigned kFieldSize_Name = 12;
1765 static const unsigned kFieldSize_SmallName = 4;
1766 static const unsigned kFieldSize_Speed = 9;
1767 static const unsigned kFieldSize_Usage = 5;
1768 static const unsigned kFieldSize_RU = 6;
1769 static const unsigned kFieldSize_Rating = 6;
1770 static const unsigned kFieldSize_EU = 5;
1771 static const unsigned kFieldSize_Effec = 5;
1772
1773 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1774 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1775
1776
PrintRating(IBenchPrintCallback & f,UInt64 rating,unsigned size)1777 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
1778 {
1779 PrintNumber(f, (rating + 500000) / 1000000, size);
1780 }
1781
1782
PrintPercents(IBenchPrintCallback & f,UInt64 val,UInt64 divider,unsigned size)1783 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
1784 {
1785 PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1786 }
1787
PrintChars(IBenchPrintCallback & f,char c,unsigned size)1788 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
1789 {
1790 char s[256];
1791 memset(s, (Byte)c, size);
1792 s[size] = 0;
1793 f.Print(s);
1794 }
1795
PrintSpaces(IBenchPrintCallback & f,unsigned size)1796 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
1797 {
1798 PrintChars(f, ' ', size);
1799 }
1800
PrintResults(IBenchPrintCallback & f,UInt64 usage,UInt64 rpu,UInt64 rating,bool showFreq,UInt64 cpuFreq)1801 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1802 {
1803 PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1804 PrintRating(f, rpu, kFieldSize_RU);
1805 PrintRating(f, rating, kFieldSize_Rating);
1806 if (showFreq)
1807 {
1808 if (cpuFreq == 0)
1809 PrintSpaces(f, kFieldSize_EUAndEffec);
1810 else
1811 {
1812 UInt64 ddd = cpuFreq * usage / 100;
1813 if (ddd == 0)
1814 ddd = 1;
1815 PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1816 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1817 }
1818 }
1819 }
1820
PrintResults(IBenchPrintCallback * f,const CBenchInfo & info,unsigned weight,UInt64 rating,bool showFreq,UInt64 cpuFreq,CTotalBenchRes * res)1821 static void PrintResults(IBenchPrintCallback *f,
1822 const CBenchInfo &info,
1823 unsigned weight,
1824 UInt64 rating,
1825 bool showFreq, UInt64 cpuFreq,
1826 CTotalBenchRes *res)
1827 {
1828 UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1829 if (f)
1830 {
1831 if (speed != 0)
1832 PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1833 else
1834 PrintSpaces(*f, 1 + kFieldSize_Speed);
1835 }
1836 UInt64 usage = info.GetUsage();
1837 UInt64 rpu = info.GetRatingPerUsage(rating);
1838 if (f)
1839 {
1840 PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1841 }
1842
1843 if (res)
1844 {
1845 // res->NumIterations1++;
1846 res->NumIterations2 += weight;
1847 res->RPU += (rpu * weight);
1848 res->Rating += (rating * weight);
1849 res->Usage += (usage * weight);
1850 }
1851 }
1852
PrintTotals(IBenchPrintCallback & f,bool showFreq,UInt64 cpuFreq,const CTotalBenchRes & res)1853 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1854 {
1855 PrintSpaces(f, 1 + kFieldSize_Speed);
1856 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
1857 UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
1858 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
1859 }
1860
PrintRequirements(IBenchPrintCallback & f,const char * sizeString,bool size_Defined,UInt64 size,const char * threadsString,UInt32 numThreads)1861 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
1862 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
1863 {
1864 f.Print("RAM ");
1865 f.Print(sizeString);
1866 if (size_Defined)
1867 PrintNumber(f, (size >> 20), 6);
1868 else
1869 f.Print(" ?");
1870 f.Print(" MB, # ");
1871 f.Print(threadsString);
1872 PrintNumber(f, numThreads, 3);
1873 f.NewLine();
1874 }
1875
1876 struct CBenchCallbackToPrint: public IBenchCallback
1877 {
1878 CBenchProps BenchProps;
1879 CTotalBenchRes EncodeRes;
1880 CTotalBenchRes DecodeRes;
1881 IBenchPrintCallback *_file;
1882 UInt32 DictSize;
1883
1884 bool Use2Columns;
1885 unsigned NameFieldSize;
1886
1887 bool ShowFreq;
1888 UInt64 CpuFreq;
1889
1890 unsigned EncodeWeight;
1891 unsigned DecodeWeight;
1892
CBenchCallbackToPrintCBenchCallbackToPrint1893 CBenchCallbackToPrint():
1894 Use2Columns(false),
1895 NameFieldSize(0),
1896 ShowFreq(false),
1897 CpuFreq(0),
1898 EncodeWeight(1),
1899 DecodeWeight(1)
1900 {}
1901
InitCBenchCallbackToPrint1902 void Init() { EncodeRes.Init(); DecodeRes.Init(); }
1903 void Print(const char *s);
1904 void NewLine();
1905
1906 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
1907 HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
1908 HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
1909 };
1910
SetFreq(bool showFreq,UInt64 cpuFreq)1911 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
1912 {
1913 ShowFreq = showFreq;
1914 CpuFreq = cpuFreq;
1915 return S_OK;
1916 }
1917
SetEncodeResult(const CBenchInfo & info,bool final)1918 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
1919 {
1920 RINOK(_file->CheckBreak());
1921 if (final)
1922 {
1923 UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
1924 PrintResults(_file, info,
1925 EncodeWeight, rating,
1926 ShowFreq, CpuFreq, &EncodeRes);
1927 if (!Use2Columns)
1928 _file->NewLine();
1929 }
1930 return S_OK;
1931 }
1932
1933 static const char *kSep = " | ";
1934
SetDecodeResult(const CBenchInfo & info,bool final)1935 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
1936 {
1937 RINOK(_file->CheckBreak());
1938 if (final)
1939 {
1940 UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
1941 if (Use2Columns)
1942 _file->Print(kSep);
1943 else
1944 PrintSpaces(*_file, NameFieldSize);
1945 CBenchInfo info2 = info;
1946 info2.UnpackSize *= info2.NumIterations;
1947 info2.PackSize *= info2.NumIterations;
1948 info2.NumIterations = 1;
1949 PrintResults(_file, info2,
1950 DecodeWeight, rating,
1951 ShowFreq, CpuFreq, &DecodeRes);
1952 }
1953 return S_OK;
1954 }
1955
Print(const char * s)1956 void CBenchCallbackToPrint::Print(const char *s)
1957 {
1958 _file->Print(s);
1959 }
1960
NewLine()1961 void CBenchCallbackToPrint::NewLine()
1962 {
1963 _file->NewLine();
1964 }
1965
PrintLeft(IBenchPrintCallback & f,const char * s,unsigned size)1966 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
1967 {
1968 f.Print(s);
1969 int numSpaces = size - MyStringLen(s);
1970 if (numSpaces > 0)
1971 PrintSpaces(f, numSpaces);
1972 }
1973
PrintRight(IBenchPrintCallback & f,const char * s,unsigned size)1974 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
1975 {
1976 int numSpaces = size - MyStringLen(s);
1977 if (numSpaces > 0)
1978 PrintSpaces(f, numSpaces);
1979 f.Print(s);
1980 }
1981
TotalBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,bool forceUnpackSize,size_t unpackSize,const Byte * fileData,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback)1982 static HRESULT TotalBench(
1983 DECL_EXTERNAL_CODECS_LOC_VARS
1984 UInt64 complexInCommands,
1985 UInt32 numThreads,
1986 bool forceUnpackSize,
1987 size_t unpackSize,
1988 const Byte *fileData,
1989 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
1990 {
1991 for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
1992 {
1993 const CBenchMethod &bench = g_Bench[i];
1994 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
1995 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
1996 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
1997 callback->BenchProps.EncComplex = bench.EncComplex;
1998
1999 COneMethodInfo method;
2000 NCOM::CPropVariant propVariant;
2001 propVariant = bench.Name;
2002 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2003
2004 size_t unpackSize2 = unpackSize;
2005 if (!forceUnpackSize && bench.DictBits == 0)
2006 unpackSize2 = kFilterUnpackSize;
2007
2008 callback->EncodeWeight = bench.Weight;
2009 callback->DecodeWeight = bench.Weight;
2010
2011 HRESULT res = MethodBench(
2012 EXTERNAL_CODECS_LOC_VARS
2013 complexInCommands,
2014 false, numThreads, method,
2015 unpackSize2, fileData,
2016 bench.DictBits,
2017 printCallback, callback, &callback->BenchProps);
2018
2019 if (res == E_NOTIMPL)
2020 {
2021 // callback->Print(" ---");
2022 // we need additional empty line as line for decompression results
2023 if (!callback->Use2Columns)
2024 callback->NewLine();
2025 }
2026 else
2027 {
2028 RINOK(res);
2029 }
2030
2031 callback->NewLine();
2032 }
2033 return S_OK;
2034 }
2035
2036
FreqBench(UInt64 complexInCommands,UInt32 numThreads,IBenchPrintCallback * _file,bool showFreq,UInt64 specifiedFreq,UInt64 & cpuFreq,UInt32 & res)2037 static HRESULT FreqBench(
2038 UInt64 complexInCommands,
2039 UInt32 numThreads,
2040 IBenchPrintCallback *_file,
2041 bool showFreq,
2042 UInt64 specifiedFreq,
2043 UInt64 &cpuFreq,
2044 UInt32 &res)
2045 {
2046 res = 0;
2047 cpuFreq = 0;
2048
2049 UInt32 bufferSize = 1 << 20;
2050 UInt32 complexity = kNumFreqCommands;
2051 if (numThreads == 0)
2052 numThreads = 1;
2053
2054 #ifdef _7ZIP_ST
2055 numThreads = 1;
2056 #endif
2057
2058 UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2059 UInt64 numIterations = complexInCommands / complexity / bsize;
2060 if (numIterations == 0)
2061 numIterations = 1;
2062
2063 CBenchInfoCalc progressInfoSpec;
2064
2065 #ifndef _7ZIP_ST
2066 CFreqThreads threads;
2067 if (numThreads > 1)
2068 {
2069 threads.Items = new CFreqInfo[numThreads];
2070 UInt32 i;
2071 for (i = 0; i < numThreads; i++)
2072 {
2073 CFreqInfo &info = threads.Items[i];
2074 info.Callback = _file;
2075 info.CallbackRes = S_OK;
2076 info.NumIterations = numIterations;
2077 info.Size = bufferSize;
2078 }
2079 progressInfoSpec.SetStartTime();
2080 for (i = 0; i < numThreads; i++)
2081 {
2082 CFreqInfo &info = threads.Items[i];
2083 RINOK(info.Thread.Create(FreqThreadFunction, &info));
2084 threads.NumThreads++;
2085 }
2086 threads.WaitAll();
2087 for (i = 0; i < numThreads; i++)
2088 {
2089 RINOK(threads.Items[i].CallbackRes);
2090 }
2091 }
2092 else
2093 #endif
2094 {
2095 progressInfoSpec.SetStartTime();
2096 UInt32 sum = g_BenchCpuFreqTemp;
2097 for (UInt64 k = numIterations; k > 0; k--)
2098 {
2099 RINOK(_file->CheckBreak());
2100 sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
2101 }
2102 res += sum;
2103 }
2104
2105 CBenchInfo info;
2106 progressInfoSpec.SetFinishTime(info);
2107
2108 info.UnpackSize = 0;
2109 info.PackSize = 0;
2110 info.NumIterations = 1;
2111
2112 if (_file)
2113 {
2114 {
2115 UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
2116 UInt64 rating = info.GetSpeed(numCommands);
2117 cpuFreq = rating / numThreads;
2118 PrintResults(_file, info,
2119 0, // weight
2120 rating,
2121 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
2122 }
2123 RINOK(_file->CheckBreak());
2124 }
2125
2126 return S_OK;
2127 }
2128
2129
2130
CrcBench(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufferSize,UInt64 & speed,UInt32 complexity,unsigned benchWeight,const UInt32 * checkSum,const COneMethodInfo & method,IBenchPrintCallback * _file,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2131 static HRESULT CrcBench(
2132 DECL_EXTERNAL_CODECS_LOC_VARS
2133 UInt64 complexInCommands,
2134 UInt32 numThreads, UInt32 bufferSize,
2135 UInt64 &speed,
2136 UInt32 complexity, unsigned benchWeight,
2137 const UInt32 *checkSum,
2138 const COneMethodInfo &method,
2139 IBenchPrintCallback *_file,
2140 CTotalBenchRes *encodeRes,
2141 bool showFreq, UInt64 cpuFreq)
2142 {
2143 if (numThreads == 0)
2144 numThreads = 1;
2145
2146 #ifdef _7ZIP_ST
2147 numThreads = 1;
2148 #endif
2149
2150 AString methodName = method.MethodName;
2151 // methodName.RemoveChar(L'-');
2152 CMethodId hashID;
2153 if (!FindHashMethod(
2154 EXTERNAL_CODECS_LOC_VARS
2155 methodName, hashID))
2156 return E_NOTIMPL;
2157
2158 CBenchBuffer buffer;
2159 size_t totalSize = (size_t)bufferSize * numThreads;
2160 if (totalSize / numThreads != bufferSize)
2161 return E_OUTOFMEMORY;
2162 if (!buffer.Alloc(totalSize))
2163 return E_OUTOFMEMORY;
2164
2165 Byte *buf = buffer.Buffer;
2166 CBaseRandomGenerator RG;
2167 UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
2168 UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
2169 if (numIterations == 0)
2170 numIterations = 1;
2171
2172 CBenchInfoCalc progressInfoSpec;
2173
2174 #ifndef _7ZIP_ST
2175 CCrcThreads threads;
2176 if (numThreads > 1)
2177 {
2178 threads.Items = new CCrcInfo[numThreads];
2179
2180 UInt32 i;
2181 for (i = 0; i < numThreads; i++)
2182 {
2183 CCrcInfo &info = threads.Items[i];
2184 AString name;
2185 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
2186 if (!info.Hasher)
2187 return E_NOTIMPL;
2188 CMyComPtr<ICompressSetCoderProperties> scp;
2189 info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2190 if (scp)
2191 {
2192 UInt64 reduceSize = 1;
2193 RINOK(method.SetCoderProps(scp, &reduceSize));
2194 }
2195
2196 Byte *data = buf + (size_t)bufferSize * i;
2197 info.Callback = _file;
2198 info.Data = data;
2199 info.NumIterations = numIterations;
2200 info.Size = bufferSize;
2201 /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
2202 info.CheckSumDefined = false;
2203 if (checkSum)
2204 {
2205 info.CheckSum = *checkSum;
2206 info.CheckSumDefined = (checkSum && (i == 0));
2207 }
2208
2209 #ifdef USE_ALLOCA
2210 info.AllocaSize = (i * 16 * 21) & 0x7FF;
2211 #endif
2212 }
2213
2214 progressInfoSpec.SetStartTime();
2215
2216 for (i = 0; i < numThreads; i++)
2217 {
2218 CCrcInfo &info = threads.Items[i];
2219 RINOK(info.Thread.Create(CrcThreadFunction, &info));
2220 threads.NumThreads++;
2221 }
2222 threads.WaitAll();
2223 for (i = 0; i < numThreads; i++)
2224 {
2225 RINOK(threads.Items[i].Res);
2226 }
2227 }
2228 else
2229 #endif
2230 {
2231 /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
2232 progressInfoSpec.SetStartTime();
2233 CMyComPtr<IHasher> hasher;
2234 AString name;
2235 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
2236 if (!hasher)
2237 return E_NOTIMPL;
2238 CMyComPtr<ICompressSetCoderProperties> scp;
2239 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
2240 if (scp)
2241 {
2242 UInt64 reduceSize = 1;
2243 RINOK(method.SetCoderProps(scp, &reduceSize));
2244 }
2245 RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
2246 }
2247
2248 CBenchInfo info;
2249 progressInfoSpec.SetFinishTime(info);
2250
2251 UInt64 unpSize = numIterations * bufferSize;
2252 UInt64 unpSizeThreads = unpSize * numThreads;
2253 info.UnpackSize = unpSizeThreads;
2254 info.PackSize = unpSizeThreads;
2255 info.NumIterations = 1;
2256
2257 if (_file)
2258 {
2259 {
2260 UInt64 numCommands = unpSizeThreads * complexity / 256;
2261 UInt64 rating = info.GetSpeed(numCommands);
2262 PrintResults(_file, info,
2263 benchWeight, rating,
2264 showFreq, cpuFreq, encodeRes);
2265 }
2266 RINOK(_file->CheckBreak());
2267 }
2268
2269 speed = info.GetSpeed(unpSizeThreads);
2270
2271 return S_OK;
2272 }
2273
TotalBench_Hash(DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands,UInt32 numThreads,UInt32 bufSize,IBenchPrintCallback * printCallback,CBenchCallbackToPrint * callback,CTotalBenchRes * encodeRes,bool showFreq,UInt64 cpuFreq)2274 static HRESULT TotalBench_Hash(
2275 DECL_EXTERNAL_CODECS_LOC_VARS
2276 UInt64 complexInCommands,
2277 UInt32 numThreads, UInt32 bufSize,
2278 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2279 CTotalBenchRes *encodeRes,
2280 bool showFreq, UInt64 cpuFreq)
2281 {
2282 for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2283 {
2284 const CBenchHash &bench = g_Hash[i];
2285 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2286 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2287 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2288 // callback->BenchProps.EncComplex = bench.EncComplex;
2289
2290 COneMethodInfo method;
2291 NCOM::CPropVariant propVariant;
2292 propVariant = bench.Name;
2293 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
2294
2295 UInt64 speed;
2296 HRESULT res = CrcBench(
2297 EXTERNAL_CODECS_LOC_VARS
2298 complexInCommands,
2299 numThreads, bufSize,
2300 speed,
2301 bench.Complex, bench.Weight,
2302 &bench.CheckSum, method,
2303 printCallback, encodeRes, showFreq, cpuFreq);
2304 if (res == E_NOTIMPL)
2305 {
2306 // callback->Print(" ---");
2307 }
2308 else
2309 {
2310 RINOK(res);
2311 }
2312 callback->NewLine();
2313 }
2314 return S_OK;
2315 }
2316
2317 struct CTempValues
2318 {
2319 UInt64 *Values;
CTempValuesCTempValues2320 CTempValues(UInt32 num) { Values = new UInt64[num]; }
~CTempValuesCTempValues2321 ~CTempValues() { delete []Values; }
2322 };
2323
ParseNumberString(const UString & s,NCOM::CPropVariant & prop)2324 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2325 {
2326 const wchar_t *end;
2327 UInt64 result = ConvertStringToUInt64(s, &end);
2328 if (*end != 0 || s.IsEmpty())
2329 prop = s;
2330 else if (result <= (UInt32)0xFFFFFFFF)
2331 prop = (UInt32)result;
2332 else
2333 prop = result;
2334 }
2335
GetNumThreadsNext(unsigned i,UInt32 numThreads)2336 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2337 {
2338 if (i < 2)
2339 return i + 1;
2340 i -= 1;
2341 UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2342 return (num <= numThreads) ? num : numThreads;
2343 }
2344
AreSameMethodNames(const char * fullName,const char * shortName)2345 static bool AreSameMethodNames(const char *fullName, const char *shortName)
2346 {
2347 for (;;)
2348 {
2349 char c2 = *shortName++;
2350 if (c2 == 0)
2351 return true;
2352 char c1 = *fullName++;
2353 if (MyCharLower_Ascii(c1) != MyCharLower_Ascii(c2))
2354 return false;
2355 }
2356 }
2357
2358
2359 #ifdef MY_CPU_X86_OR_AMD64
2360
PrintCpuChars(AString & s,UInt32 v)2361 static void PrintCpuChars(AString &s, UInt32 v)
2362 {
2363 for (int j = 0; j < 4; j++)
2364 {
2365 Byte b = (Byte)(v & 0xFF);
2366 v >>= 8;
2367 if (b == 0)
2368 break;
2369 s += (char)b;
2370 }
2371 }
2372
x86cpuid_to_String(const Cx86cpuid & c,AString & s)2373 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
2374 {
2375 s.Empty();
2376
2377 UInt32 maxFunc2 = 0;
2378 UInt32 t[3];
2379
2380 MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
2381
2382 bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
2383
2384 if (!fullNameIsAvail)
2385 {
2386 for (int i = 0; i < 3; i++)
2387 PrintCpuChars(s, c.vendor[i]);
2388 }
2389 else
2390 {
2391 for (int i = 0; i < 3; i++)
2392 {
2393 UInt32 d[4] = { 0 };
2394 MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
2395 for (int j = 0; j < 4; j++)
2396 PrintCpuChars(s, d[j]);
2397 }
2398 }
2399
2400 s.Add_Space_if_NotEmpty();
2401 {
2402 char temp[32];
2403 ConvertUInt32ToHex(c.ver, temp);
2404 s += '(';
2405 s += temp;
2406 s += ')';
2407 }
2408 }
2409
2410 #endif
2411
2412
GetCpuName(AString & s)2413 void GetCpuName(AString &s)
2414 {
2415 s.Empty();
2416
2417 #ifdef MY_CPU_X86_OR_AMD64
2418 {
2419 Cx86cpuid cpuid;
2420 if (x86cpuid_CheckAndRead(&cpuid))
2421 {
2422 x86cpuid_to_String(cpuid, s);
2423 return;
2424 }
2425 #ifdef MY_CPU_AMD64
2426 s = "x64";
2427 #else
2428 s = "x86";
2429 #endif
2430 }
2431 #else
2432
2433 #ifdef MY_CPU_LE
2434 s = "LE";
2435 #elif defined(MY_CPU_BE)
2436 s = "BE";
2437 #endif
2438
2439 #endif
2440 }
2441
2442
Bench(DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback * printCallback,IBenchCallback * benchCallback,const CObjectVector<CProperty> & props,UInt32 numIterations,bool multiDict)2443 HRESULT Bench(
2444 DECL_EXTERNAL_CODECS_LOC_VARS
2445 IBenchPrintCallback *printCallback,
2446 IBenchCallback *benchCallback,
2447 const CObjectVector<CProperty> &props,
2448 UInt32 numIterations,
2449 bool multiDict)
2450 {
2451 if (!CrcInternalTest())
2452 return S_FALSE;
2453
2454 UInt32 numCPUs = 1;
2455 UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
2456
2457 #ifndef _7ZIP_ST
2458 numCPUs = NSystem::GetNumberOfProcessors();
2459 #endif
2460
2461 bool ramSize_Defined = NSystem::GetRamSize(ramSize);
2462
2463 UInt32 numThreadsSpecified = numCPUs;
2464
2465 UInt32 testTime = kComplexInSeconds;
2466
2467 UInt64 specifiedFreq = 0;
2468
2469 bool multiThreadTests = false;
2470
2471 COneMethodInfo method;
2472
2473 CBenchBuffer fileDataBuffer;
2474
2475 {
2476 unsigned i;
2477 for (i = 0; i < props.Size(); i++)
2478 {
2479 const CProperty &property = props[i];
2480 UString name = property.Name;
2481 name.MakeLower_Ascii();
2482
2483 if (name.IsEqualTo("file"))
2484 {
2485 if (property.Value.IsEmpty())
2486 return E_INVALIDARG;
2487
2488 #ifdef USE_WIN_FILE
2489
2490 NFile::NIO::CInFile file;
2491 if (!file.Open(us2fs(property.Value)))
2492 return E_INVALIDARG;
2493 UInt64 len;
2494 if (!file.GetLength(len))
2495 return E_FAIL;
2496 if (len >= ((UInt32)1 << 31) || len == 0)
2497 return E_INVALIDARG;
2498 if (!fileDataBuffer.Alloc((size_t)len))
2499 return E_OUTOFMEMORY;
2500 UInt32 processedSize;
2501 file.Read(fileDataBuffer.Buffer, (UInt32)len, processedSize);
2502 if (processedSize != len)
2503 return E_FAIL;
2504 if (printCallback)
2505 {
2506 printCallback->Print("file size =");
2507 // printCallback->Print(GetOemString(property.Value));
2508 PrintNumber(*printCallback, len, 0);
2509 printCallback->NewLine();
2510 }
2511 continue;
2512
2513 #else
2514
2515 return E_NOTIMPL;
2516
2517 #endif
2518 }
2519
2520 NCOM::CPropVariant propVariant;
2521 if (!property.Value.IsEmpty())
2522 ParseNumberString(property.Value, propVariant);
2523
2524 if (name.IsEqualTo("time"))
2525 {
2526 RINOK(ParsePropToUInt32(L"", propVariant, testTime));
2527 continue;
2528 }
2529
2530 if (name.IsEqualTo("freq"))
2531 {
2532 UInt32 freq32 = 0;
2533 RINOK(ParsePropToUInt32(L"", propVariant, freq32));
2534 if (freq32 == 0)
2535 return E_INVALIDARG;
2536 specifiedFreq = (UInt64)freq32 * 1000000;
2537
2538 if (printCallback)
2539 {
2540 printCallback->Print("freq=");
2541 PrintNumber(*printCallback, freq32, 0);
2542 printCallback->NewLine();
2543 }
2544
2545 continue;
2546 }
2547
2548 if (name.IsPrefixedBy_Ascii_NoCase("mt"))
2549 {
2550 UString s = name.Ptr(2);
2551 if (s == L"*")
2552 {
2553 multiThreadTests = true;
2554 continue;
2555 }
2556 if (s.IsEmpty() && propVariant.vt == VT_BSTR)
2557 {
2558 if (wcscmp(propVariant.bstrVal, L"*") == 0)
2559 {
2560 multiThreadTests = true;
2561 continue;
2562 }
2563 }
2564 #ifndef _7ZIP_ST
2565 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
2566 #endif
2567 continue;
2568 }
2569
2570 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
2571 }
2572 }
2573
2574 if (printCallback)
2575 {
2576 AString s;
2577 GetCpuName(s);
2578 printCallback->Print(s);
2579 printCallback->NewLine();
2580 }
2581
2582 if (printCallback)
2583 {
2584 printCallback->Print("CPU Freq:");
2585 }
2586
2587 UInt64 complexInCommands = kComplexInCommands;
2588
2589 if (printCallback /* || benchCallback */)
2590 {
2591 UInt64 numMilCommands = 1 << 6;
2592 if (specifiedFreq != 0)
2593 {
2594 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
2595 numMilCommands >>= 1;
2596 }
2597
2598 for (int jj = 0;; jj++)
2599 {
2600 if (printCallback)
2601 RINOK(printCallback->CheckBreak());
2602
2603 UInt64 start = ::GetTimeCount();
2604 UInt32 sum = (UInt32)start;
2605 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
2606 const UInt64 realDelta = ::GetTimeCount() - start;
2607 start = realDelta;
2608 if (start == 0)
2609 start = 1;
2610 UInt64 freq = GetFreq();
2611 // mips is constant in some compilers
2612 const UInt64 mipsVal = numMilCommands * freq / start;
2613 if (printCallback)
2614 {
2615 if (realDelta == 0)
2616 {
2617 printCallback->Print(" -");
2618 }
2619 else
2620 {
2621 // PrintNumber(*printCallback, start, 0);
2622 PrintNumber(*printCallback, mipsVal, 5 + ((sum == 0xF1541213) ? 1 : 0));
2623 }
2624 }
2625 /*
2626 if (benchCallback)
2627 benchCallback->AddCpuFreq(mipsVal);
2628 */
2629
2630 if (jj >= 3)
2631 {
2632 SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
2633 if (jj >= 8 || start >= freq)
2634 break;
2635 // break; // change it
2636 numMilCommands <<= 1;
2637 }
2638 }
2639 }
2640
2641 if (printCallback)
2642 {
2643 printCallback->NewLine();
2644 printCallback->NewLine();
2645 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
2646 }
2647
2648 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
2649 return E_INVALIDARG;
2650
2651 UInt32 dict;
2652 bool dictIsDefined = method.Get_DicSize(dict);
2653
2654 if (method.MethodName.IsEmpty())
2655 method.MethodName = "LZMA";
2656
2657 if (benchCallback)
2658 {
2659 CBenchProps benchProps;
2660 benchProps.SetLzmaCompexity();
2661 UInt32 dictSize = method.Get_Lzma_DicSize();
2662 UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
2663 return MethodBench(
2664 EXTERNAL_CODECS_LOC_VARS
2665 complexInCommands,
2666 true, numThreadsSpecified,
2667 method,
2668 uncompressedDataSize, fileDataBuffer.Buffer,
2669 kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
2670 }
2671
2672 AString methodName = method.MethodName;
2673 if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
2674 methodName = "crc32";
2675 method.MethodName = methodName;
2676 CMethodId hashID;
2677
2678 if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
2679 {
2680 if (!printCallback)
2681 return S_FALSE;
2682 IBenchPrintCallback &f = *printCallback;
2683 if (!dictIsDefined)
2684 dict = (1 << 24);
2685
2686
2687 // methhodName.RemoveChar(L'-');
2688 UInt32 complexity = 10000;
2689 const UInt32 *checkSum = NULL;
2690 {
2691 for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2692 {
2693 const CBenchHash &h = g_Hash[i];
2694 AString s = h.Name;
2695 AString hProp;
2696 int propPos = s.Find(':');
2697 if (propPos >= 0)
2698 {
2699 hProp = s.Ptr(propPos + 1);
2700 s.DeleteFrom(propPos);
2701 }
2702
2703 if (AreSameMethodNames(s, methodName))
2704 {
2705 complexity = h.Complex;
2706 checkSum = &h.CheckSum;
2707 if (method.PropsString.IsEqualTo_Ascii_NoCase(hProp))
2708 break;
2709 }
2710 }
2711 }
2712
2713 f.NewLine();
2714 f.Print("Size");
2715 const unsigned kFieldSize_CrcSpeed = 6;
2716 unsigned numThreadsTests = 0;
2717 for (;;)
2718 {
2719 UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
2720 PrintNumber(f, t, kFieldSize_CrcSpeed);
2721 numThreadsTests++;
2722 if (t >= numThreadsSpecified)
2723 break;
2724 }
2725 f.NewLine();
2726 f.NewLine();
2727 CTempValues speedTotals(numThreadsTests);
2728 {
2729 for (unsigned ti = 0; ti < numThreadsTests; ti++)
2730 speedTotals.Values[ti] = 0;
2731 }
2732
2733 UInt64 numSteps = 0;
2734 for (UInt32 i = 0; i < numIterations; i++)
2735 {
2736 for (unsigned pow = 10; pow < 32; pow++)
2737 {
2738 UInt32 bufSize = (UInt32)1 << pow;
2739 if (bufSize > dict)
2740 break;
2741 char s[16];
2742 ConvertUInt32ToString(pow, s);
2743 unsigned pos = MyStringLen(s);
2744 s[pos++] = ':';
2745 s[pos++] = ' ';
2746 s[pos] = 0;
2747 f.Print(s);
2748
2749 for (unsigned ti = 0; ti < numThreadsTests; ti++)
2750 {
2751 RINOK(f.CheckBreak());
2752 UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
2753 UInt64 speed = 0;
2754 RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
2755 t, bufSize, speed,
2756 complexity,
2757 1, // benchWeight,
2758 (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
2759 PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
2760 speedTotals.Values[ti] += speed;
2761 }
2762 f.NewLine();
2763 numSteps++;
2764 }
2765 }
2766 if (numSteps != 0)
2767 {
2768 f.NewLine();
2769 f.Print("Avg:");
2770 for (unsigned ti = 0; ti < numThreadsTests; ti++)
2771 {
2772 PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
2773 }
2774 f.NewLine();
2775 }
2776 return S_OK;
2777 }
2778
2779 bool use2Columns = false;
2780
2781 bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
2782 bool onlyHashBench = false;
2783 if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
2784 {
2785 onlyHashBench = true;
2786 totalBenchMode = true;
2787 }
2788
2789 // ---------- Threads loop ----------
2790 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
2791 {
2792
2793 UInt32 numThreads = numThreadsSpecified;
2794
2795 if (!multiThreadTests)
2796 {
2797 if (threadsPassIndex != 0)
2798 break;
2799 }
2800 else
2801 {
2802 numThreads = 1;
2803 if (threadsPassIndex != 0)
2804 {
2805 if (numCPUs < 2)
2806 break;
2807 numThreads = numCPUs;
2808 if (threadsPassIndex == 1)
2809 {
2810 if (numCPUs >= 4)
2811 numThreads = numCPUs / 2;
2812 }
2813 else if (numCPUs < 4)
2814 break;
2815 }
2816 }
2817
2818 CBenchCallbackToPrint callback;
2819 callback.Init();
2820 callback._file = printCallback;
2821
2822 IBenchPrintCallback &f = *printCallback;
2823
2824 if (threadsPassIndex > 0)
2825 {
2826 f.NewLine();
2827 f.NewLine();
2828 }
2829
2830 if (!dictIsDefined)
2831 {
2832 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
2833 unsigned dicSizeLog = dicSizeLog_Main;
2834
2835 #ifdef UNDER_CE
2836 dicSizeLog = (UInt64)1 << 20;
2837 #endif
2838
2839 if (ramSize_Defined)
2840 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
2841 if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
2842 break;
2843
2844 dict = (UInt32)1 << dicSizeLog;
2845
2846 if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
2847 {
2848 f.Print("Dictionary reduced to: ");
2849 PrintNumber(f, dicSizeLog, 1);
2850 f.NewLine();
2851 }
2852 }
2853
2854 PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads: ", numThreads);
2855
2856 f.NewLine();
2857
2858 if (totalBenchMode)
2859 {
2860 callback.NameFieldSize = kFieldSize_Name;
2861 use2Columns = false;
2862 }
2863 else
2864 {
2865 callback.NameFieldSize = kFieldSize_SmallName;
2866 use2Columns = true;
2867 }
2868 callback.Use2Columns = use2Columns;
2869
2870 bool showFreq = false;
2871 UInt64 cpuFreq = 0;
2872
2873 if (totalBenchMode)
2874 {
2875 showFreq = true;
2876 }
2877
2878 unsigned fileldSize = kFieldSize_TotalSize;
2879 if (showFreq)
2880 fileldSize += kFieldSize_EUAndEffec;
2881
2882 if (use2Columns)
2883 {
2884 PrintSpaces(f, callback.NameFieldSize);
2885 PrintRight(f, "Compressing", fileldSize);
2886 f.Print(kSep);
2887 PrintRight(f, "Decompressing", fileldSize);
2888 }
2889 f.NewLine();
2890 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
2891
2892 int j;
2893
2894 for (j = 0; j < 2; j++)
2895 {
2896 PrintRight(f, "Speed", kFieldSize_Speed + 1);
2897 PrintRight(f, "Usage", kFieldSize_Usage + 1);
2898 PrintRight(f, "R/U", kFieldSize_RU + 1);
2899 PrintRight(f, "Rating", kFieldSize_Rating + 1);
2900 if (showFreq)
2901 {
2902 PrintRight(f, "E/U", kFieldSize_EU + 1);
2903 PrintRight(f, "Effec", kFieldSize_Effec + 1);
2904 }
2905 if (!use2Columns)
2906 break;
2907 if (j == 0)
2908 f.Print(kSep);
2909 }
2910
2911 f.NewLine();
2912 PrintSpaces(f, callback.NameFieldSize);
2913
2914 for (j = 0; j < 2; j++)
2915 {
2916 PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
2917 PrintRight(f, "%", kFieldSize_Usage + 1);
2918 PrintRight(f, "MIPS", kFieldSize_RU + 1);
2919 PrintRight(f, "MIPS", kFieldSize_Rating + 1);
2920 if (showFreq)
2921 {
2922 PrintRight(f, "%", kFieldSize_EU + 1);
2923 PrintRight(f, "%", kFieldSize_Effec + 1);
2924 }
2925 if (!use2Columns)
2926 break;
2927 if (j == 0)
2928 f.Print(kSep);
2929 }
2930
2931 f.NewLine();
2932 f.NewLine();
2933
2934 if (specifiedFreq != 0)
2935 cpuFreq = specifiedFreq;
2936
2937
2938 if (totalBenchMode)
2939 {
2940 for (UInt32 i = 0; i < numIterations; i++)
2941 {
2942 if (i != 0)
2943 printCallback->NewLine();
2944 HRESULT res;
2945
2946 const unsigned kNumCpuTests = 3;
2947 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
2948 {
2949 PrintLeft(f, "CPU", kFieldSize_Name);
2950 UInt32 resVal;
2951 RINOK(FreqBench(complexInCommands, numThreads, printCallback,
2952 (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
2953 specifiedFreq,
2954 cpuFreq, resVal));
2955 callback.NewLine();
2956
2957 if (specifiedFreq != 0)
2958 cpuFreq = specifiedFreq;
2959
2960 if (freqTest == kNumCpuTests - 1)
2961 SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
2962 }
2963 callback.NewLine();
2964
2965 callback.SetFreq(true, cpuFreq);
2966
2967 if (!onlyHashBench)
2968 {
2969 res = TotalBench(EXTERNAL_CODECS_LOC_VARS
2970 complexInCommands, numThreads,
2971 dictIsDefined || fileDataBuffer.Buffer, // forceUnpackSize
2972 fileDataBuffer.Buffer ? fileDataBuffer.BufferSize : dict,
2973 fileDataBuffer.Buffer,
2974 printCallback, &callback);
2975 RINOK(res);
2976 }
2977
2978 res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
2979 1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
2980 RINOK(res);
2981
2982 callback.NewLine();
2983 {
2984 PrintLeft(f, "CPU", kFieldSize_Name);
2985 UInt32 resVal;
2986 UInt64 cpuFreqLastTemp = cpuFreq;
2987 RINOK(FreqBench(complexInCommands, numThreads, printCallback,
2988 specifiedFreq != 0, // showFreq
2989 specifiedFreq,
2990 cpuFreqLastTemp, resVal));
2991 callback.NewLine();
2992 }
2993 }
2994 }
2995 else
2996 {
2997 bool needSetComplexity = true;
2998 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
2999 {
3000 for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
3001 {
3002 const CBenchMethod &h = g_Bench[i];
3003 AString s = h.Name;
3004 if (AreSameMethodNames(h.Name, methodName))
3005 {
3006 callback.BenchProps.EncComplex = h.EncComplex;
3007 callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
3008 callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
3009 needSetComplexity = false;
3010 break;
3011 }
3012 }
3013 }
3014 if (needSetComplexity)
3015 callback.BenchProps.SetLzmaCompexity();
3016
3017 for (unsigned i = 0; i < numIterations; i++)
3018 {
3019 const unsigned kStartDicLog = 22;
3020 unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
3021 if (!multiDict)
3022 pow = 31;
3023 while (((UInt32)1 << pow) > dict && pow > 0)
3024 pow--;
3025 for (; ((UInt32)1 << pow) <= dict; pow++)
3026 {
3027 char s[16];
3028 ConvertUInt32ToString(pow, s);
3029 unsigned pos = MyStringLen(s);
3030 s[pos++] = ':';
3031 s[pos] = 0;
3032 PrintLeft(f, s, kFieldSize_SmallName);
3033 callback.DictSize = (UInt32)1 << pow;
3034
3035 COneMethodInfo method2 = method;
3036
3037 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
3038 {
3039 // We add dictionary size property.
3040 // method2 can have two different dictionary size properties.
3041 // And last property is main.
3042 NCOM::CPropVariant propVariant = (UInt32)pow;
3043 RINOK(method2.ParseMethodFromPROPVARIANT(L"d", propVariant));
3044 }
3045
3046 size_t uncompressedDataSize;
3047 if (fileDataBuffer.Buffer)
3048 {
3049 uncompressedDataSize = fileDataBuffer.BufferSize;
3050 }
3051 else
3052 {
3053 uncompressedDataSize = callback.DictSize;
3054 if (uncompressedDataSize >= (1 << 18))
3055 uncompressedDataSize += kAdditionalSize;
3056 }
3057
3058 HRESULT res = MethodBench(
3059 EXTERNAL_CODECS_LOC_VARS
3060 complexInCommands,
3061 true, numThreads,
3062 method2,
3063 uncompressedDataSize, fileDataBuffer.Buffer,
3064 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
3065 f.NewLine();
3066 RINOK(res);
3067 if (!multiDict)
3068 break;
3069 }
3070 }
3071 }
3072
3073 PrintChars(f, '-', callback.NameFieldSize + fileldSize);
3074
3075 if (use2Columns)
3076 {
3077 f.Print(kSep);
3078 PrintChars(f, '-', fileldSize);
3079 }
3080
3081 f.NewLine();
3082
3083 if (use2Columns)
3084 {
3085 PrintLeft(f, "Avr:", callback.NameFieldSize);
3086 PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
3087 f.Print(kSep);
3088 PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
3089 f.NewLine();
3090 }
3091
3092 PrintLeft(f, "Tot:", callback.NameFieldSize);
3093 CTotalBenchRes midRes;
3094 midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
3095 PrintTotals(f, showFreq, cpuFreq, midRes);
3096 f.NewLine();
3097
3098 }
3099 return S_OK;
3100 }
3101