1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 // #include <stdio.h>
6
7 #ifndef _WIN32
8 #define USE_POSIX_TIME
9 #define USE_POSIX_TIME2
10 #endif // _WIN32
11
12 #ifdef USE_POSIX_TIME
13 #include <time.h>
14 #include <unistd.h>
15 #ifdef USE_POSIX_TIME2
16 #include <sys/time.h>
17 #include <sys/times.h>
18 #endif
19 #endif // USE_POSIX_TIME
20
21 #ifdef _WIN32
22 #define USE_ALLOCA
23 #endif
24
25 #ifdef USE_ALLOCA
26 #ifdef _WIN32
27 #include <malloc.h>
28 #else
29 #include <stdlib.h>
30 #endif
31 #define BENCH_ALLOCA_VALUE(index) (((index) * 64 * 21) & 0x7FF)
32 #endif
33
34 #include "../../../../C/7zCrc.h"
35 #include "../../../../C/RotateDefs.h"
36 #include "../../../../C/CpuArch.h"
37
38 #ifndef Z7_ST
39 #include "../../../Windows/Synchronization.h"
40 #include "../../../Windows/Thread.h"
41 #endif
42
43 #include "../../../Windows/FileFind.h"
44 #include "../../../Windows/FileIO.h"
45 #include "../../../Windows/SystemInfo.h"
46
47 #include "../../../Common/MyBuffer2.h"
48 #include "../../../Common/IntToString.h"
49 #include "../../../Common/StringConvert.h"
50 #include "../../../Common/StringToInt.h"
51 #include "../../../Common/Wildcard.h"
52
53 #include "../../Common/MethodProps.h"
54 #include "../../Common/StreamObjects.h"
55 #include "../../Common/StreamUtils.h"
56
57 #include "Bench.h"
58
59 using namespace NWindows;
60
61 #ifndef Z7_ST
62 static const UInt32 k_LZMA = 0x030101;
63 #endif
64
65 static const UInt64 kComplexInCommands = (UInt64)1 <<
66 #ifdef UNDER_CE
67 31;
68 #else
69 34;
70 #endif
71
72 static const UInt32 kComplexInMs = 4000;
73
SetComplexCommandsMs(UInt32 complexInMs,bool isSpecifiedFreq,UInt64 cpuFreq,UInt64 & complexInCommands)74 static void SetComplexCommandsMs(UInt32 complexInMs,
75 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
76 {
77 complexInCommands = kComplexInCommands;
78 const UInt64 kMinFreq = (UInt64)1000000 * 4;
79 const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
80 if (cpuFreq < kMinFreq && !isSpecifiedFreq)
81 cpuFreq = kMinFreq;
82 if (cpuFreq < kMaxFreq || isSpecifiedFreq)
83 {
84 if (complexInMs != 0)
85 complexInCommands = complexInMs * cpuFreq / 1000;
86 else
87 complexInCommands = cpuFreq >> 2;
88 }
89 }
90
91 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
92 static const unsigned kBenchmarkUsageMultBits = 16;
93 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
94
Benchmark_GetUsage_Percents(UInt64 usage)95 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
96 {
97 return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
98 }
99
100 static const unsigned kNumHashDictBits = 17;
101 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
102
103 static const unsigned kOldLzmaDictBits = 32;
104
105 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
106 static const size_t kAdditionalSize = (size_t)1 << 16;
107 static const size_t kCompressedAdditionalSize = 1 << 10;
108
109 static const UInt32 kMaxMethodPropSize = 1 << 6;
110
111
112 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
113 { (_buffer_)->Alloc(_size_); \
114 if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
115
116
117 class CBaseRandomGenerator
118 {
119 UInt32 A1;
120 UInt32 A2;
121 UInt32 Salt;
122 public:
CBaseRandomGenerator(UInt32 salt=0)123 CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()124 void Init() { A1 = 362436069; A2 = 521288629;}
125 Z7_FORCE_INLINE
GetRnd()126 UInt32 GetRnd()
127 {
128 #if 0
129 // for debug:
130 return 0x0c080400;
131 // return 0;
132 #else
133 return Salt ^
134 (
135 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
136 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
137 );
138 #endif
139 }
140 };
141
142
143 static const size_t k_RandBuf_AlignMask = 4 - 1;
144
145 Z7_NO_INLINE
RandGen_BufAfterPad(Byte * buf,size_t size)146 static void RandGen_BufAfterPad(Byte *buf, size_t size)
147 {
148 CBaseRandomGenerator RG;
149 for (size_t i = 0; i < size; i += 4)
150 {
151 const UInt32 v = RG.GetRnd();
152 SetUi32a(buf + i, v)
153 }
154 /*
155 UInt32 v = RG.GetRnd();
156 for (; i < size; i++)
157 {
158 buf[i] = (Byte)v;
159 v >>= 8;
160 }
161 */
162 }
163
164
165 class CBenchRandomGenerator: public CMidAlignedBuffer
166 {
GetVal(UInt32 & res,unsigned numBits)167 static UInt32 GetVal(UInt32 &res, unsigned numBits)
168 {
169 const UInt32 val = res & (((UInt32)1 << numBits) - 1);
170 res >>= numBits;
171 return val;
172 }
173
GetLen(UInt32 & r)174 static UInt32 GetLen(UInt32 &r)
175 {
176 const unsigned len = (unsigned)GetVal(r, 2);
177 return GetVal(r, 1 + len);
178 }
179
180 public:
181
GenerateSimpleRandom(UInt32 salt)182 void GenerateSimpleRandom(UInt32 salt)
183 {
184 CBaseRandomGenerator rg(salt);
185 const size_t bufSize = Size();
186 Byte *buf = (Byte *)*this;
187 for (size_t i = 0; i < bufSize; i++)
188 buf[i] = (Byte)rg.GetRnd();
189 }
190
GenerateLz(unsigned dictBits,UInt32 salt)191 void GenerateLz(unsigned dictBits, UInt32 salt)
192 {
193 CBaseRandomGenerator rg(salt);
194 size_t pos = 0;
195 size_t rep0 = 1;
196 const size_t bufSize = Size();
197 Byte *buf = (Byte *)*this;
198 unsigned posBits = 1;
199
200 // printf("\n dictBits = %d\n", (UInt32)dictBits);
201 // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
202
203 while (pos < bufSize)
204 {
205 /*
206 if (pos >= ((UInt32)1 << 31))
207 printf(" %x\n", pos);
208 */
209 UInt32 r = rg.GetRnd();
210 if (GetVal(r, 1) == 0 || pos < 1024)
211 buf[pos++] = (Byte)(r & 0xFF);
212 else
213 {
214 UInt32 len;
215 len = 1 + GetLen(r);
216
217 if (GetVal(r, 3) != 0)
218 {
219 len += GetLen(r);
220
221 while (((size_t)1 << posBits) < pos)
222 posBits++;
223
224 unsigned numBitsMax = dictBits;
225 if (numBitsMax > posBits)
226 numBitsMax = posBits;
227
228 const unsigned kAddBits = 6;
229 unsigned numLogBits = 5;
230 if (numBitsMax <= (1 << 4) - 1 + kAddBits)
231 numLogBits = 4;
232
233 for (;;)
234 {
235 const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
236 r = rg.GetRnd();
237 if (ppp > numBitsMax)
238 continue;
239 // rep0 = GetVal(r, ppp);
240 rep0 = r & (((size_t)1 << ppp) - 1);
241 if (rep0 < pos)
242 break;
243 r = rg.GetRnd();
244 }
245 rep0++;
246 }
247
248 // len *= 300; // for debug
249 {
250 const size_t rem = bufSize - pos;
251 if (len > rem)
252 len = (UInt32)rem;
253 }
254 Byte *dest = buf + pos;
255 const Byte *src = dest - rep0;
256 pos += len;
257 for (UInt32 i = 0; i < len; i++)
258 *dest++ = *src++;
259 }
260 }
261 // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
262 }
263 };
264
265
266 Z7_CLASS_IMP_NOQIB_1(
267 CBenchmarkInStream
268 , ISequentialInStream
269 )
270 const Byte *Data;
271 size_t Pos;
272 size_t Size;
273 public:
274 void Init(const Byte *data, size_t size)
275 {
276 Data = data;
277 Size = size;
278 Pos = 0;
279 }
280 bool WasFinished() const { return Pos == Size; }
281 };
282
283 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
284 {
285 const UInt32 kMaxBlockSize = (1 << 20);
286 if (size > kMaxBlockSize)
287 size = kMaxBlockSize;
288 const size_t remain = Size - Pos;
289 if (size > remain)
290 size = (UInt32)remain;
291
292 if (size)
293 memcpy(data, Data + Pos, size);
294
295 Pos += size;
296 if (processedSize)
297 *processedSize = size;
298 return S_OK;
299 }
300
301
302 class CBenchmarkOutStream Z7_final:
303 public ISequentialOutStream,
304 public CMyUnknownImp,
305 public CMidAlignedBuffer
306 {
307 Z7_COM_UNKNOWN_IMP_0
308 Z7_IFACE_COM7_IMP(ISequentialOutStream)
309 // bool _overflow;
310 public:
311 size_t Pos;
312 bool RealCopy;
313 bool CalcCrc;
314 UInt32 Crc;
315
316 // CBenchmarkOutStream(): _overflow(false) {}
317 void Init(bool realCopy, bool calcCrc)
318 {
319 Crc = CRC_INIT_VAL;
320 RealCopy = realCopy;
321 CalcCrc = calcCrc;
322 // _overflow = false;
323 Pos = 0;
324 }
325
326 void InitCrc()
327 {
328 Crc = CRC_INIT_VAL;
329 }
330
331 void Calc(const void *data, size_t size)
332 {
333 Crc = CrcUpdate(Crc, data, size);
334 }
335
336 size_t GetPos() const { return Pos; }
337
338 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
339 };
340
341 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
342 {
343 size_t curSize = Size() - Pos;
344 if (curSize > size)
345 curSize = size;
346 if (curSize != 0)
347 {
348 if (RealCopy)
349 memcpy(((Byte *)*this) + Pos, data, curSize);
350 if (CalcCrc)
351 Calc(data, curSize);
352 Pos += curSize;
353 }
354 if (processedSize)
355 *processedSize = (UInt32)curSize;
356 if (curSize != size)
357 {
358 // _overflow = true;
359 return E_FAIL;
360 }
361 return S_OK;
362 }
363
364
365 Z7_CLASS_IMP_NOQIB_1(
366 CCrcOutStream
367 , ISequentialOutStream
368 )
369 public:
370 bool CalcCrc;
371 UInt32 Crc;
372 UInt64 Pos;
373
374 CCrcOutStream(): CalcCrc(true) {}
375 void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
376 void Calc(const void *data, size_t size)
377 {
378 Crc = CrcUpdate(Crc, data, size);
379 }
380 };
381
382 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
383 {
384 if (CalcCrc)
385 Calc(data, size);
386 Pos += size;
387 if (processedSize)
388 *processedSize = size;
389 return S_OK;
390 }
391
392 // #include "../../../../C/My_sys_time.h"
393
394 static UInt64 GetTimeCount()
395 {
396 #ifdef USE_POSIX_TIME
397 #ifdef USE_POSIX_TIME2
398 timeval v;
399 if (gettimeofday(&v, NULL) == 0)
400 return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
401 return (UInt64)time(NULL) * 1000000;
402 #else
403 return time(NULL);
404 #endif
405 #else
406 LARGE_INTEGER value;
407 if (::QueryPerformanceCounter(&value))
408 return (UInt64)value.QuadPart;
409 return GetTickCount();
410 #endif
411 }
412
413 static UInt64 GetFreq()
414 {
415 #ifdef USE_POSIX_TIME
416 #ifdef USE_POSIX_TIME2
417 return 1000000;
418 #else
419 return 1;
420 #endif
421 #else
422 LARGE_INTEGER value;
423 if (::QueryPerformanceFrequency(&value))
424 return (UInt64)value.QuadPart;
425 return 1000;
426 #endif
427 }
428
429
430 #ifdef USE_POSIX_TIME
431
432 struct CUserTime
433 {
434 UInt64 Sum;
435 clock_t Prev;
436
437 void Init()
438 {
439 // Prev = clock();
440 Sum = 0;
441 Prev = 0;
442 Update();
443 Sum = 0;
444 }
445
446 void Update()
447 {
448 tms t;
449 /* clock_t res = */ times(&t);
450 clock_t newVal = t.tms_utime + t.tms_stime;
451 Sum += (UInt64)(newVal - Prev);
452 Prev = newVal;
453
454 /*
455 clock_t v = clock();
456 if (v != -1)
457 {
458 Sum += v - Prev;
459 Prev = v;
460 }
461 */
462 }
463 UInt64 GetUserTime()
464 {
465 Update();
466 return Sum;
467 }
468 };
469
470 #else
471
472
473 struct CUserTime
474 {
475 bool UseTick;
476 DWORD Prev_Tick;
477 UInt64 Prev;
478 UInt64 Sum;
479
480 void Init()
481 {
482 UseTick = false;
483 Prev_Tick = 0;
484 Prev = 0;
485 Sum = 0;
486 Update();
487 Sum = 0;
488 }
489 UInt64 GetUserTime()
490 {
491 Update();
492 return Sum;
493 }
494 void Update();
495 };
496
497 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
498
499 void CUserTime::Update()
500 {
501 DWORD new_Tick = GetTickCount();
502 FILETIME creationTime, exitTime, kernelTime, userTime;
503 if (!UseTick &&
504 #ifdef UNDER_CE
505 ::GetThreadTimes(::GetCurrentThread()
506 #else
507 ::GetProcessTimes(::GetCurrentProcess()
508 #endif
509 , &creationTime, &exitTime, &kernelTime, &userTime))
510 {
511 UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
512 Sum += newVal - Prev;
513 Prev = newVal;
514 }
515 else
516 {
517 UseTick = true;
518 Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
519 }
520 Prev_Tick = new_Tick;
521 }
522
523
524 #endif
525
526 static UInt64 GetUserFreq()
527 {
528 #ifdef USE_POSIX_TIME
529 // return CLOCKS_PER_SEC;
530 return (UInt64)sysconf(_SC_CLK_TCK);
531 #else
532 return 10000000;
533 #endif
534 }
535
536 class CBenchProgressStatus Z7_final
537 {
538 #ifndef Z7_ST
539 NSynchronization::CCriticalSection CS;
540 #endif
541 public:
542 HRESULT Res;
543 bool EncodeMode;
544 void SetResult(HRESULT res)
545 {
546 #ifndef Z7_ST
547 NSynchronization::CCriticalSectionLock lock(CS);
548 #endif
549 Res = res;
550 }
551 HRESULT GetResult()
552 {
553 #ifndef Z7_ST
554 NSynchronization::CCriticalSectionLock lock(CS);
555 #endif
556 return Res;
557 }
558 };
559
560 struct CBenchInfoCalc
561 {
562 CBenchInfo BenchInfo;
563 CUserTime UserTime;
564
565 void SetStartTime();
566 void SetFinishTime(CBenchInfo &dest);
567 };
568
569 void CBenchInfoCalc::SetStartTime()
570 {
571 BenchInfo.GlobalFreq = GetFreq();
572 BenchInfo.UserFreq = GetUserFreq();
573 BenchInfo.GlobalTime = ::GetTimeCount();
574 BenchInfo.UserTime = 0;
575 UserTime.Init();
576 }
577
578 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
579 {
580 dest = BenchInfo;
581 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
582 dest.UserTime = UserTime.GetUserTime();
583 }
584
585 class CBenchProgressInfo Z7_final:
586 public ICompressProgressInfo,
587 public CMyUnknownImp,
588 public CBenchInfoCalc
589 {
590 Z7_COM_UNKNOWN_IMP_0
591 Z7_IFACE_COM7_IMP(ICompressProgressInfo)
592 public:
593 CBenchProgressStatus *Status;
594 IBenchCallback *Callback;
595
596 CBenchProgressInfo(): Callback(NULL) {}
597 };
598
599
600 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
601 {
602 HRESULT res = Status->GetResult();
603 if (res != S_OK)
604 return res;
605 if (!Callback)
606 return res;
607
608 /*
609 static UInt64 inSizePrev = 0;
610 static UInt64 outSizePrev = 0;
611 UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
612 if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; }
613 if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; }
614 UInt64 percents = delta2 * 1000;
615 if (delta1 != 0)
616 percents /= delta1;
617 printf("=== %7d %7d %7d %7d ratio = %4d\n",
618 (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
619 (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
620 (unsigned)percents);
621 */
622
623 CBenchInfo info;
624 SetFinishTime(info);
625 if (Status->EncodeMode)
626 {
627 info.UnpackSize = BenchInfo.UnpackSize + *inSize;
628 info.PackSize = BenchInfo.PackSize + *outSize;
629 res = Callback->SetEncodeResult(info, false);
630 }
631 else
632 {
633 info.PackSize = BenchInfo.PackSize + *inSize;
634 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
635 res = Callback->SetDecodeResult(info, false);
636 }
637 if (res != S_OK)
638 Status->SetResult(res);
639 return res;
640 }
641
642 static const unsigned kSubBits = 8;
643
644 static unsigned GetLogSize(UInt64 size)
645 {
646 unsigned i = 0;
647 for (;;)
648 {
649 i++; size >>= 1; if (size == 0) break;
650 }
651 return i;
652 }
653
654
655 static UInt32 GetLogSize_Sub(UInt64 size)
656 {
657 if (size <= 1)
658 return 0;
659 const unsigned i = GetLogSize(size) - 1;
660 UInt32 v;
661 if (i <= kSubBits)
662 v = (UInt32)(size) << (kSubBits - i);
663 else
664 v = (UInt32)(size >> (i - kSubBits));
665 return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
666 }
667
668
669 static UInt64 Get_UInt64_from_double(double v)
670 {
671 const UInt64 kMaxVal = (UInt64)1 << 62;
672 if (v > (double)(Int64)kMaxVal)
673 return kMaxVal;
674 return (UInt64)v;
675 }
676
677 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
678 {
679 if (d == 0)
680 d = 1;
681 const double v =
682 (double)(Int64)m1 *
683 (double)(Int64)m2 /
684 (double)(Int64)d;
685 return Get_UInt64_from_double(v);
686 /*
687 unsigned n1 = GetLogSize(m1);
688 unsigned n2 = GetLogSize(m2);
689 while (n1 + n2 > 64)
690 {
691 if (n1 >= n2)
692 {
693 m1 >>= 1;
694 n1--;
695 }
696 else
697 {
698 m2 >>= 1;
699 n2--;
700 }
701 d >>= 1;
702 }
703
704 if (d == 0)
705 d = 1;
706 return m1 * m2 / d;
707 */
708 }
709
710
711 UInt64 CBenchInfo::GetUsage() const
712 {
713 UInt64 userTime = UserTime;
714 UInt64 userFreq = UserFreq;
715 UInt64 globalTime = GlobalTime;
716 UInt64 globalFreq = GlobalFreq;
717
718 if (userFreq == 0)
719 userFreq = 1;
720 if (globalTime == 0)
721 globalTime = 1;
722
723 const double v =
724 ((double)(Int64)userTime / (double)(Int64)userFreq)
725 * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
726 * (double)(Int64)kBenchmarkUsageMult;
727 return Get_UInt64_from_double(v);
728 /*
729 return MyMultDiv64(
730 MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
731 globalFreq, globalTime);
732 */
733 }
734
735
736 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
737 {
738 if (UserTime == 0)
739 {
740 return 0;
741 // userTime = 1;
742 }
743 UInt64 globalFreq = GlobalFreq;
744 if (globalFreq == 0)
745 globalFreq = 1;
746
747 const double v =
748 ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
749 * ((double)(Int64)UserFreq / (double)(Int64)UserTime)
750 * (double)(Int64)rating;
751 return Get_UInt64_from_double(v);
752 /*
753 return MyMultDiv64(
754 MyMultDiv64(rating, UserFreq, UserTime),
755 GlobalTime, globalFreq);
756 */
757 }
758
759
760 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
761 {
762 return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
763 }
764
765 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
766 {
767 return complexity >= 0 ?
768 size * (UInt32)complexity :
769 size / (UInt32)(-complexity);
770 }
771
772 struct CBenchProps
773 {
774 bool LzmaRatingMode;
775
776 Int32 EncComplex;
777 Int32 DecComplexCompr;
778 Int32 DecComplexUnc;
779
780 unsigned KeySize;
781
782 CBenchProps():
783 LzmaRatingMode(false),
784 KeySize(0)
785 {}
786
787 void SetLzmaCompexity();
788
789 UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
790 {
791 const UInt32 kMinSize = 100;
792 if (unpackSize < kMinSize)
793 unpackSize = kMinSize;
794 return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
795 }
796
797 UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
798 {
799 return
800 GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
801 GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
802 }
803
804 UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
805 UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
806 };
807
808 void CBenchProps::SetLzmaCompexity()
809 {
810 EncComplex = 1200;
811 DecComplexUnc = 4;
812 DecComplexCompr = 190;
813 LzmaRatingMode = true;
814 }
815
816 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
817 {
818 if (dictSize < (1 << kBenchMinDicLogSize))
819 dictSize = (1 << kBenchMinDicLogSize);
820 Int32 encComplex = EncComplex;
821 if (LzmaRatingMode)
822 {
823 /*
824 for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
825 {
826 unsigned rr = GetLogSize_Sub(uu);
827 printf("\n%16I64x , log = %4x", uu, rr);
828 uu += 1;
829 uu += uu / 50;
830 }
831 */
832 // throw 1;
833 const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
834 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
835 }
836 const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
837 return MyMultDiv64(numCommands, freq, elapsedTime);
838 }
839
840 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
841 {
842 const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
843 return MyMultDiv64(numCommands, freq, elapsedTime);
844 }
845
846
847
848 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
849 {
850 CBenchProps props;
851 props.SetLzmaCompexity();
852 return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
853 }
854
855 UInt64 CBenchInfo::GetRating_LzmaDec() const
856 {
857 CBenchProps props;
858 props.SetLzmaCompexity();
859 return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
860 }
861
862
863 #ifndef Z7_ST
864
865 #define NUM_CPU_LEVELS_MAX 3
866
867 struct CAffinityMode
868 {
869 unsigned NumBundleThreads;
870 unsigned NumLevels;
871 unsigned NumCoreThreads;
872 unsigned NumCores;
873 // unsigned DivideNum;
874 UInt32 Sizes[NUM_CPU_LEVELS_MAX];
875
876 void SetLevels(unsigned numCores, unsigned numCoreThreads);
877 DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
878 bool NeedAffinity() const { return NumBundleThreads != 0; }
879
880 WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
881 {
882 if (NeedAffinity())
883 {
884 CCpuSet cpuSet;
885 GetAffinityMask(bundleIndex, &cpuSet);
886 return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
887 }
888 return thread.Create(startAddress, parameter);
889 }
890
891 CAffinityMode():
892 NumBundleThreads(0),
893 NumLevels(0),
894 NumCoreThreads(1)
895 // DivideNum(1)
896 {}
897 };
898
899 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
900 {
901 NumCores = numCores;
902 NumCoreThreads = numCoreThreads;
903 NumLevels = 0;
904 if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
905 return;
906 UInt32 c = numCores / numCoreThreads;
907 UInt32 c2 = 1;
908 while ((c & 1) == 0)
909 {
910 c >>= 1;
911 c2 <<= 1;
912 }
913 if (c2 != 1)
914 Sizes[NumLevels++] = c2;
915 if (c != 1)
916 Sizes[NumLevels++] = c;
917 if (numCoreThreads != 1)
918 Sizes[NumLevels++] = numCoreThreads;
919 if (NumLevels == 0)
920 Sizes[NumLevels++] = 1;
921
922 /*
923 printf("\n Cores:");
924 for (unsigned i = 0; i < NumLevels; i++)
925 {
926 printf(" %d", Sizes[i]);
927 }
928 printf("\n");
929 */
930 }
931
932
933 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
934 {
935 CpuSet_Zero(cpuSet);
936
937 if (NumLevels == 0)
938 return 0;
939
940 // printf("\n%2d", bundleIndex);
941
942 /*
943 UInt32 low = 0;
944 if (DivideNum != 1)
945 {
946 low = bundleIndex % DivideNum;
947 bundleIndex /= DivideNum;
948 }
949 */
950
951 UInt32 numGroups = NumCores / NumBundleThreads;
952 UInt32 m = bundleIndex % numGroups;
953 UInt32 v = 0;
954 for (unsigned i = 0; i < NumLevels; i++)
955 {
956 UInt32 size = Sizes[i];
957 while ((size & 1) == 0)
958 {
959 v *= 2;
960 v |= (m & 1);
961 m >>= 1;
962 size >>= 1;
963 }
964 v *= size;
965 v += m % size;
966 m /= size;
967 }
968
969 // UInt32 nb = NumBundleThreads / DivideNum;
970 UInt32 nb = NumBundleThreads;
971
972 DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
973 // v += low;
974 mask <<= v;
975
976 // printf(" %2d %8x \n ", v, (unsigned)mask);
977 #ifdef _WIN32
978 *cpuSet = mask;
979 #else
980 {
981 for (unsigned k = 0; k < nb; k++)
982 CpuSet_Set(cpuSet, v + k);
983 }
984 #endif
985
986 return mask;
987 }
988
989
990 struct CBenchSyncCommon
991 {
992 bool ExitMode;
993 NSynchronization::CManualResetEvent StartEvent;
994
995 CBenchSyncCommon(): ExitMode(false) {}
996 };
997
998 #endif
999
1000
1001
1002 enum E_CheckCrcMode
1003 {
1004 k_CheckCrcMode_Never = 0,
1005 k_CheckCrcMode_Always = 1,
1006 k_CheckCrcMode_FirstPass = 2
1007 };
1008
1009 class CEncoderInfo;
1010
1011 class CEncoderInfo Z7_final
1012 {
1013 Z7_CLASS_NO_COPY(CEncoderInfo)
1014
1015 public:
1016
1017 #ifndef Z7_ST
1018 NWindows::CThread thread[2];
1019 NSynchronization::CManualResetEvent ReadyEvent;
1020 UInt32 NumDecoderSubThreads;
1021 CBenchSyncCommon *Common;
1022 UInt32 EncoderIndex;
1023 UInt32 NumEncoderInternalThreads;
1024 CAffinityMode AffinityMode;
1025 bool IsGlobalMtMode; // if more than one benchmark encoder threads
1026 #endif
1027
1028 CMyComPtr<ICompressCoder> _encoder;
1029 CMyComPtr<ICompressFilter> _encoderFilter;
1030 CBenchProgressInfo *progressInfoSpec[2];
1031 CMyComPtr<ICompressProgressInfo> progressInfo[2];
1032 UInt64 NumIterations;
1033
1034 UInt32 Salt;
1035
1036 #ifdef USE_ALLOCA
1037 size_t AllocaSize;
1038 #endif
1039
1040 unsigned KeySize;
1041 Byte _key[32];
1042 Byte _iv[16];
1043
1044 HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1045 {
1046 RINOK(cp->SetKey(_key, KeySize))
1047 return cp->SetInitVector(_iv, sizeof(_iv));
1048 }
1049
1050 Byte _psw[16];
1051
1052 bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass
1053 used for filter and usual coders */
1054 bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1055 used only for filter */
1056 E_CheckCrcMode CheckCrcMode_Dec;
1057
1058 struct CDecoderInfo
1059 {
1060 CEncoderInfo *Encoder;
1061 UInt32 DecoderIndex;
1062 bool CallbackMode;
1063
1064 #ifdef USE_ALLOCA
1065 size_t AllocaSize;
1066 #endif
1067 };
1068 CDecoderInfo decodersInfo[2];
1069
1070 CMyComPtr<ICompressCoder> _decoders[2];
1071 CMyComPtr<ICompressFilter> _decoderFilter;
1072
1073 HRESULT Results[2];
1074 CBenchmarkOutStream *outStreamSpec;
1075 CMyComPtr<ISequentialOutStream> outStream;
1076 IBenchCallback *callback;
1077 IBenchPrintCallback *printCallback;
1078 UInt32 crc;
1079 size_t kBufferSize;
1080 size_t compressedSize;
1081 const Byte *uncompressedDataPtr;
1082
1083 const Byte *fileData;
1084 CBenchRandomGenerator rg;
1085
1086 CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1087
1088 // CBenchmarkOutStream *propStreamSpec;
1089 Byte propsData[kMaxMethodPropSize];
1090 CBufPtrSeqOutStream *propStreamSpec;
1091 CMyComPtr<ISequentialOutStream> propStream;
1092
1093 unsigned generateDictBits;
1094 COneMethodInfo _method;
1095
1096 // for decode
1097 size_t _uncompressedDataSize;
1098
1099 HRESULT Generate();
1100 HRESULT Encode();
1101 HRESULT Decode(UInt32 decoderIndex);
1102
1103 CEncoderInfo():
1104 #ifndef Z7_ST
1105 Common(NULL),
1106 IsGlobalMtMode(true),
1107 #endif
1108 Salt(0),
1109 KeySize(0),
1110 CheckCrc_Enc(true),
1111 UseRealData_Enc(true),
1112 CheckCrcMode_Dec(k_CheckCrcMode_Always),
1113 outStreamSpec(NULL),
1114 callback(NULL),
1115 printCallback(NULL),
1116 fileData(NULL),
1117 propStreamSpec(NULL)
1118 {}
1119
1120 #ifndef Z7_ST
1121
1122 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1123 {
1124 HRESULT res;
1125 CEncoderInfo *encoder = (CEncoderInfo *)param;
1126 try
1127 {
1128 #ifdef USE_ALLOCA
1129 alloca(encoder->AllocaSize);
1130 #endif
1131
1132 res = encoder->Encode();
1133 }
1134 catch(...)
1135 {
1136 res = E_FAIL;
1137 }
1138 encoder->Results[0] = res;
1139 if (res != S_OK)
1140 encoder->progressInfoSpec[0]->Status->SetResult(res);
1141 encoder->ReadyEvent.Set();
1142 return THREAD_FUNC_RET_ZERO;
1143 }
1144
1145 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1146 {
1147 CDecoderInfo *decoder = (CDecoderInfo *)param;
1148
1149 #ifdef USE_ALLOCA
1150 alloca(decoder->AllocaSize);
1151 // printf("\nalloca=%d\n", (unsigned)decoder->AllocaSize);
1152 #endif
1153
1154 CEncoderInfo *encoder = decoder->Encoder;
1155 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1156 return THREAD_FUNC_RET_ZERO;
1157 }
1158
1159 HRESULT CreateEncoderThread()
1160 {
1161 WRes res = 0;
1162 if (!ReadyEvent.IsCreated())
1163 res = ReadyEvent.Create();
1164 if (res == 0)
1165 res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1166 EncoderIndex);
1167 return HRESULT_FROM_WIN32(res);
1168 }
1169
1170 HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1171 #ifdef USE_ALLOCA
1172 , size_t allocaSize
1173 #endif
1174 )
1175 {
1176 CDecoderInfo &decoder = decodersInfo[index];
1177 decoder.DecoderIndex = index;
1178 decoder.Encoder = this;
1179
1180 #ifdef USE_ALLOCA
1181 decoder.AllocaSize = allocaSize;
1182 #endif
1183
1184 decoder.CallbackMode = callbackMode;
1185
1186 WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1187 // EncoderIndex * NumEncoderInternalThreads + index
1188 EncoderIndex
1189 );
1190
1191 return HRESULT_FROM_WIN32(res);
1192 }
1193
1194 #endif
1195 };
1196
1197
1198
1199
1200 static size_t GetBenchCompressedSize(size_t bufferSize)
1201 {
1202 return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1203 // kBufferSize / 2;
1204 }
1205
1206
1207 HRESULT CEncoderInfo::Generate()
1208 {
1209 const COneMethodInfo &method = _method;
1210
1211 // we need extra space, if input data is already compressed
1212 const size_t kCompressedBufferSize = _encoderFilter ?
1213 kBufferSize :
1214 GetBenchCompressedSize(kBufferSize);
1215
1216 if (kCompressedBufferSize < kBufferSize)
1217 return E_FAIL;
1218
1219 uncompressedDataPtr = fileData;
1220 if (fileData)
1221 {
1222 #if !defined(Z7_ST)
1223 if (IsGlobalMtMode)
1224 {
1225 /* we copy the data to local buffer of thread to eliminate
1226 using of shared buffer by different threads */
1227 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228 memcpy((Byte *)rg, fileData, kBufferSize);
1229 uncompressedDataPtr = (const Byte *)rg;
1230 }
1231 #endif
1232 }
1233 else
1234 {
1235 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1236 // DWORD ttt = GetTickCount();
1237 if (generateDictBits == 0)
1238 rg.GenerateSimpleRandom(Salt);
1239 else
1240 {
1241 if (generateDictBits >= sizeof(size_t) * 8
1242 && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1243 return E_INVALIDARG;
1244 rg.GenerateLz(generateDictBits, Salt);
1245 // return E_ABORT; // for debug
1246 }
1247 // printf("\n%d\n ", GetTickCount() - ttt);
1248
1249 crc = CrcCalc((const Byte *)rg, rg.Size());
1250 uncompressedDataPtr = (const Byte *)rg;
1251 }
1252
1253 if (!outStream)
1254 {
1255 outStreamSpec = new CBenchmarkOutStream;
1256 outStream = outStreamSpec;
1257 }
1258
1259 ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1260
1261 if (_encoderFilter)
1262 {
1263 /* we try to reduce the number of memcpy() in main encoding loop.
1264 so we copy data to temp buffers here */
1265 ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1266 memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1267 memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1268 }
1269
1270 if (!propStream)
1271 {
1272 propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1273 propStream = propStreamSpec;
1274 }
1275 // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1276 // propStreamSpec->Init(true, false);
1277 propStreamSpec->Init(propsData, sizeof(propsData));
1278
1279
1280 CMyComPtr<IUnknown> coder;
1281 if (_encoderFilter)
1282 coder = _encoderFilter;
1283 else
1284 coder = _encoder;
1285 {
1286 CMyComPtr<ICompressSetCoderProperties> scp;
1287 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1288 if (scp)
1289 {
1290 const UInt64 reduceSize = kBufferSize;
1291
1292 /* in posix new thread uses same affinity as parent thread,
1293 so we don't need to send affinity to coder in posix */
1294 UInt64 affMask;
1295 #if !defined(Z7_ST) && defined(_WIN32)
1296 {
1297 CCpuSet cpuSet;
1298 affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1299 }
1300 #else
1301 affMask = 0;
1302 #endif
1303 // affMask <<= 3; // debug line: to test no affinity in coder;
1304 // affMask = 0;
1305
1306 RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1307 }
1308 else
1309 {
1310 if (method.AreThereNonOptionalProps())
1311 return E_INVALIDARG;
1312 }
1313
1314 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1315 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1316 if (writeCoderProps)
1317 {
1318 RINOK(writeCoderProps->WriteCoderProperties(propStream))
1319 }
1320
1321 {
1322 CMyComPtr<ICryptoSetPassword> sp;
1323 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1324 if (sp)
1325 {
1326 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1327
1328 // we must call encoding one time to calculate password key for key cache.
1329 // it must be after WriteCoderProperties!
1330 Byte temp[16];
1331 memset(temp, 0, sizeof(temp));
1332
1333 if (_encoderFilter)
1334 {
1335 _encoderFilter->Init();
1336 _encoderFilter->Filter(temp, sizeof(temp));
1337 }
1338 else
1339 {
1340 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1341 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1342 inStreamSpec->Init(temp, sizeof(temp));
1343
1344 CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1345 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1346 crcStreamSpec->Init();
1347
1348 RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1349 }
1350 }
1351 }
1352 }
1353
1354 return S_OK;
1355 }
1356
1357
1358 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1359 {
1360 while (size != 0)
1361 {
1362 UInt32 cur = crc ? 1 << 17 : 1 << 24;
1363 if (cur > size)
1364 cur = (UInt32)size;
1365 UInt32 processed = filter->Filter(data, cur);
1366 /* if (processed > size) (in AES filter), we must fill last block with zeros.
1367 but it is not important for benchmark. So we just copy that data without filtering.
1368 if (processed == 0) then filter can't process more */
1369 if (processed > size || processed == 0)
1370 processed = (UInt32)size;
1371 if (crc)
1372 *crc = CrcUpdate(*crc, data, processed);
1373 data += processed;
1374 size -= processed;
1375 }
1376 }
1377
1378
1379 HRESULT CEncoderInfo::Encode()
1380 {
1381 // printf("\nCEncoderInfo::Generate\n");
1382
1383 RINOK(Generate())
1384
1385 // printf("\n2222\n");
1386
1387 #ifndef Z7_ST
1388 if (Common)
1389 {
1390 Results[0] = S_OK;
1391 WRes wres = ReadyEvent.Set();
1392 if (wres == 0)
1393 wres = Common->StartEvent.Lock();
1394 if (wres != 0)
1395 return HRESULT_FROM_WIN32(wres);
1396 if (Common->ExitMode)
1397 return S_OK;
1398 }
1399 else
1400 #endif
1401 {
1402 CBenchProgressInfo *bpi = progressInfoSpec[0];
1403 bpi->SetStartTime();
1404 }
1405
1406
1407 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1408 bi.UnpackSize = 0;
1409 bi.PackSize = 0;
1410 CMyComPtr<ICryptoProperties> cp;
1411 CMyComPtr<IUnknown> coder;
1412 if (_encoderFilter)
1413 coder = _encoderFilter;
1414 else
1415 coder = _encoder;
1416 coder.QueryInterface(IID_ICryptoProperties, &cp);
1417 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1418 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1419
1420 if (cp)
1421 {
1422 RINOK(Set_Key_and_IV(cp))
1423 }
1424
1425 compressedSize = 0;
1426 if (_encoderFilter)
1427 compressedSize = kBufferSize;
1428
1429 // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1430 UInt64 prev = 0;
1431
1432 const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1433 const bool useCrc = (mask < NumIterations);
1434 bool crcPrev_defined = false;
1435 UInt32 crcPrev = 0;
1436
1437 bool useRealData_Enc = UseRealData_Enc;
1438 bool data_Was_Changed = false;
1439 if (useRealData_Enc)
1440 {
1441 /* we want memcpy() for each iteration including first iteration.
1442 So results will be equal for different number of iterations */
1443 data_Was_Changed = true;
1444 }
1445
1446 const UInt64 numIterations = NumIterations;
1447 UInt64 i = numIterations;
1448 // printCallback->NewLine();
1449
1450 while (i != 0)
1451 {
1452 i--;
1453 if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1454 {
1455 prev = bi.UnpackSize;
1456 RINOK(printCallback->CheckBreak())
1457 }
1458
1459 /*
1460 CBenchInfo info;
1461 progressInfoSpec[0]->SetStartTime();
1462 */
1463
1464 bool calcCrc = false;
1465 if (useCrc)
1466 calcCrc = (((UInt32)i & mask) == 0);
1467
1468 if (_encoderFilter)
1469 {
1470 Byte *filterData = rgCopy;
1471 if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1472 {
1473 // printf("\nfilterData = (Byte *)*outStreamSpec;\n");
1474 filterData = (Byte *)*outStreamSpec;
1475 if (data_Was_Changed)
1476 {
1477 // printf("\nmemcpy(filterData, uncompressedDataPtr\n");
1478 memcpy(filterData, uncompressedDataPtr, kBufferSize);
1479 }
1480 data_Was_Changed = true;
1481 }
1482 _encoderFilter->Init();
1483 if (calcCrc)
1484 {
1485 // printf("\nInitCrc\n");
1486 outStreamSpec->InitCrc();
1487 }
1488 // printf("\nMy_FilterBench\n");
1489 My_FilterBench(_encoderFilter, filterData, kBufferSize,
1490 calcCrc ? &outStreamSpec->Crc : NULL);
1491 }
1492 else
1493 {
1494 outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1495 inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1496 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1497 if (!inStreamSpec->WasFinished())
1498 return E_FAIL;
1499 if (compressedSize != outStreamSpec->Pos)
1500 {
1501 if (compressedSize != 0)
1502 return E_FAIL;
1503 compressedSize = outStreamSpec->Pos;
1504 }
1505 }
1506
1507 // outStreamSpec->Print();
1508
1509 if (calcCrc)
1510 {
1511 const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1512 if (crcPrev_defined && crcPrev != crc2)
1513 return E_FAIL;
1514 crcPrev = crc2;
1515 crcPrev_defined = true;
1516 }
1517
1518 bi.UnpackSize += kBufferSize;
1519 bi.PackSize += compressedSize;
1520
1521 /*
1522 {
1523 progressInfoSpec[0]->SetFinishTime(info);
1524 info.UnpackSize = 0;
1525 info.PackSize = 0;
1526 info.NumIterations = 1;
1527
1528 info.UnpackSize = kBufferSize;
1529 info.PackSize = compressedSize;
1530 // printf("\n%7d\n", encoder.compressedSize);
1531
1532 RINOK(callback->SetEncodeResult(info, true))
1533 printCallback->NewLine();
1534 }
1535 */
1536
1537 }
1538
1539 _encoder.Release();
1540 _encoderFilter.Release();
1541 return S_OK;
1542 }
1543
1544
1545 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1546 {
1547 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1548 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1549 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1550 CMyComPtr<IUnknown> coder;
1551 if (_decoderFilter)
1552 {
1553 if (decoderIndex != 0)
1554 return E_FAIL;
1555 coder = _decoderFilter;
1556 }
1557 else
1558 coder = decoder;
1559
1560 // printf("\ndecoderIndex = %d, stack = %p", decoderIndex, &coder);
1561
1562 CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1563 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1564 if (!setDecProps && propStreamSpec->GetPos() != 0)
1565 return E_FAIL;
1566
1567 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1568 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1569
1570 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1571 pi->BenchInfo.UnpackSize = 0;
1572 pi->BenchInfo.PackSize = 0;
1573
1574 #ifndef Z7_ST
1575 {
1576 CMyComPtr<ICompressSetCoderMt> setCoderMt;
1577 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1578 if (setCoderMt)
1579 {
1580 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1581 }
1582 }
1583 #endif
1584
1585 CMyComPtr<ICompressSetCoderProperties> scp;
1586 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1587 if (scp)
1588 {
1589 const UInt64 reduceSize = _uncompressedDataSize;
1590 RINOK(_method.SetCoderProps(scp, &reduceSize))
1591 }
1592
1593 CMyComPtr<ICryptoProperties> cp;
1594 coder.QueryInterface(IID_ICryptoProperties, &cp);
1595
1596 if (setDecProps)
1597 {
1598 RINOK(setDecProps->SetDecoderProperties2(
1599 /* (const Byte *)*propStreamSpec, */
1600 propsData,
1601 (UInt32)propStreamSpec->GetPos()))
1602 }
1603
1604 {
1605 CMyComPtr<ICryptoSetPassword> sp;
1606 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1607 if (sp)
1608 {
1609 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1610 }
1611 }
1612
1613 UInt64 prev = 0;
1614
1615 if (cp)
1616 {
1617 RINOK(Set_Key_and_IV(cp))
1618 }
1619
1620 CMyComPtr<ICompressSetFinishMode> setFinishMode;
1621
1622 if (_decoderFilter)
1623 {
1624 if (compressedSize > rgCopy.Size())
1625 return E_FAIL;
1626 }
1627 else
1628 {
1629 decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1630 }
1631
1632 const UInt64 numIterations = NumIterations;
1633 const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1634
1635 for (UInt64 i = 0; i < numIterations; i++)
1636 {
1637 if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1638 {
1639 RINOK(printCallback->CheckBreak())
1640 prev = pi->BenchInfo.UnpackSize;
1641 }
1642
1643 const UInt64 outSize = kBufferSize;
1644 bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1645
1646 crcOutStreamSpec->Init();
1647
1648 if (_decoderFilter)
1649 {
1650 Byte *filterData = (Byte *)*outStreamSpec;
1651 if (calcCrc)
1652 {
1653 calcCrc = (i == 0);
1654 if (checkCrcMode == k_CheckCrcMode_Always)
1655 {
1656 calcCrc = true;
1657 memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1658 filterData = rgCopy;
1659 }
1660 }
1661 _decoderFilter->Init();
1662 My_FilterBench(_decoderFilter, filterData, compressedSize,
1663 calcCrc ? &crcOutStreamSpec->Crc : NULL);
1664 }
1665 else
1666 {
1667 crcOutStreamSpec->CalcCrc = calcCrc;
1668 inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1669
1670 if (setFinishMode)
1671 {
1672 RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1673 }
1674
1675 RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1676
1677 if (setFinishMode)
1678 {
1679 if (!inStreamSpec->WasFinished())
1680 return S_FALSE;
1681
1682 CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1683 decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1684
1685 if (getInStreamProcessedSize)
1686 {
1687 UInt64 processed;
1688 RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1689 if (processed != compressedSize)
1690 return S_FALSE;
1691 }
1692 }
1693
1694 if (crcOutStreamSpec->Pos != outSize)
1695 return S_FALSE;
1696 }
1697
1698 if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1699 return S_FALSE;
1700
1701 pi->BenchInfo.UnpackSize += kBufferSize;
1702 pi->BenchInfo.PackSize += compressedSize;
1703 }
1704
1705 decoder.Release();
1706 _decoderFilter.Release();
1707 return S_OK;
1708 }
1709
1710
1711 static const UInt32 kNumThreadsMax = (1 << 12);
1712
1713 struct CBenchEncoders
1714 {
1715 CEncoderInfo *encoders;
1716 CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1717 ~CBenchEncoders() { delete []encoders; }
1718 };
1719
1720
1721 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1722 {
1723 if (numCommands < (1 << 4))
1724 numCommands = (1 << 4);
1725 UInt64 res = complexInCommands / numCommands;
1726 return (res == 0 ? 1 : res);
1727 }
1728
1729
1730
1731 #ifndef Z7_ST
1732
1733 // ---------- CBenchThreadsFlusher ----------
1734
1735 struct CBenchThreadsFlusher
1736 {
1737 CBenchEncoders *EncodersSpec;
1738 CBenchSyncCommon Common;
1739 unsigned NumThreads;
1740 bool NeedClose;
1741
1742 CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1743
1744 ~CBenchThreadsFlusher()
1745 {
1746 StartAndWait(true);
1747 }
1748
1749 WRes StartAndWait(bool exitMode = false);
1750 };
1751
1752
1753 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1754 {
1755 if (!NeedClose)
1756 return 0;
1757
1758 Common.ExitMode = exitMode;
1759 WRes res = Common.StartEvent.Set();
1760
1761 for (unsigned i = 0; i < NumThreads; i++)
1762 {
1763 NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1764 if (t.IsCreated())
1765 {
1766 WRes res2 = t.Wait_Close();
1767 if (res == 0)
1768 res = res2;
1769 }
1770 }
1771 NeedClose = false;
1772 return res;
1773 }
1774
1775 #endif // Z7_ST
1776
1777
1778
1779 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1780 {
1781 for (size_t i = 0; i < size; i++)
1782 {
1783 data[i] = (Byte)startValue;
1784 startValue++;
1785 }
1786 }
1787
1788
1789
1790 static HRESULT MethodBench(
1791 DECL_EXTERNAL_CODECS_LOC_VARS
1792 UInt64 complexInCommands,
1793 #ifndef Z7_ST
1794 bool oldLzmaBenchMode,
1795 UInt32 numThreads,
1796 const CAffinityMode *affinityMode,
1797 #endif
1798 const COneMethodInfo &method2,
1799 size_t uncompressedDataSize,
1800 const Byte *fileData,
1801 unsigned generateDictBits,
1802
1803 IBenchPrintCallback *printCallback,
1804 IBenchCallback *callback,
1805 CBenchProps *benchProps)
1806 {
1807 COneMethodInfo method = method2;
1808 UInt64 methodId;
1809 UInt32 numStreams;
1810 bool isFilter;
1811 const int codecIndex = FindMethod_Index(
1812 EXTERNAL_CODECS_LOC_VARS
1813 method.MethodName, true,
1814 methodId, numStreams, isFilter);
1815 if (codecIndex < 0)
1816 return E_NOTIMPL;
1817 if (numStreams != 1)
1818 return E_INVALIDARG;
1819
1820 UInt32 numEncoderThreads = 1;
1821 UInt32 numSubDecoderThreads = 1;
1822
1823 #ifndef Z7_ST
1824 numEncoderThreads = numThreads;
1825
1826 if (oldLzmaBenchMode)
1827 if (methodId == k_LZMA)
1828 {
1829 if (numThreads == 1 && method.Get_NumThreads() < 0)
1830 method.AddProp_NumThreads(1);
1831 const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1832 if (numThreads > 1 && numLzmaThreads > 1)
1833 {
1834 numEncoderThreads = (numThreads + 1) / 2; // 20.03
1835 numSubDecoderThreads = 2;
1836 }
1837 }
1838
1839 const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1840
1841 #endif
1842
1843 CBenchEncoders encodersSpec(numEncoderThreads);
1844 CEncoderInfo *encoders = encodersSpec.encoders;
1845
1846 UInt32 i;
1847
1848 for (i = 0; i < numEncoderThreads; i++)
1849 {
1850 CEncoderInfo &encoder = encoders[i];
1851 encoder.callback = (i == 0) ? callback : NULL;
1852 encoder.printCallback = printCallback;
1853
1854 #ifndef Z7_ST
1855 encoder.EncoderIndex = i;
1856 encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1857 encoder.AffinityMode = *affinityMode;
1858
1859 /*
1860 if (numSubDecoderThreads > 1)
1861 if (encoder.AffinityMode.NeedAffinity()
1862 && encoder.AffinityMode.NumBundleThreads == 1)
1863 {
1864 // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1865 if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1866 encoder.AffinityMode.NumBundleThreads *= 2;
1867 }
1868 */
1869
1870 #endif
1871
1872 {
1873 CCreatedCoder cod;
1874 RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1875 encoder._encoder = cod.Coder;
1876 if (!encoder._encoder && !encoder._encoderFilter)
1877 return E_NOTIMPL;
1878 }
1879
1880 SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17);
1881 SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1882 SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1883
1884 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1885 {
1886 CCreatedCoder cod;
1887 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1888 RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1889 decoder = cod.Coder;
1890 if (!encoder._decoderFilter && !decoder)
1891 return E_NOTIMPL;
1892 }
1893
1894 encoder.UseRealData_Enc =
1895 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1896
1897 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1898 if (benchProps->DecComplexCompr +
1899 benchProps->DecComplexUnc <= 30)
1900 encoder.CheckCrcMode_Dec =
1901 k_CheckCrcMode_FirstPass; // for filters
1902 // k_CheckCrcMode_Never; // for debug
1903 // k_CheckCrcMode_Always; // for debug
1904 if (fileData)
1905 {
1906 encoder.UseRealData_Enc = true;
1907 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1908 }
1909 }
1910
1911 UInt32 crc = 0;
1912 if (fileData)
1913 crc = CrcCalc(fileData, uncompressedDataSize);
1914
1915 for (i = 0; i < numEncoderThreads; i++)
1916 {
1917 CEncoderInfo &encoder = encoders[i];
1918 encoder._method = method;
1919 encoder.generateDictBits = generateDictBits;
1920 encoder._uncompressedDataSize = uncompressedDataSize;
1921 encoder.kBufferSize = uncompressedDataSize;
1922 encoder.fileData = fileData;
1923 encoder.crc = crc;
1924 }
1925
1926 CBenchProgressStatus status;
1927 status.Res = S_OK;
1928 status.EncodeMode = true;
1929
1930 #ifndef Z7_ST
1931 CBenchThreadsFlusher encoderFlusher;
1932 if (mtEncMode)
1933 {
1934 WRes wres = encoderFlusher.Common.StartEvent.Create();
1935 if (wres != 0)
1936 return HRESULT_FROM_WIN32(wres);
1937 encoderFlusher.NumThreads = numEncoderThreads;
1938 encoderFlusher.EncodersSpec = &encodersSpec;
1939 encoderFlusher.NeedClose = true;
1940 }
1941 #endif
1942
1943 for (i = 0; i < numEncoderThreads; i++)
1944 {
1945 CEncoderInfo &encoder = encoders[i];
1946 encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1947 // encoder.NumIterations = 3;
1948 {
1949 #if 0
1950 #define kCrcPoly 0xEDB88320
1951 UInt32 r = i;
1952 unsigned num = numEncoderThreads < 256 ? 8 : 16;
1953 do
1954 r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
1955 while (--num);
1956 encoder.Salt = r;
1957 #else
1958 UInt32 salt0 = g_CrcTable[(Byte)i];
1959 UInt32 salt1 = g_CrcTable[(Byte)(i >> 8)];
1960 encoder.Salt = salt0 ^ (salt1 << 3);
1961 #endif
1962 }
1963
1964 // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1965 // printf("\n encoder index = %d, Salt = %8x\n", i, encoder.Salt);
1966
1967 encoder.KeySize = benchProps->KeySize;
1968
1969 for (int j = 0; j < 2; j++)
1970 {
1971 CBenchProgressInfo *spec = new CBenchProgressInfo;
1972 encoder.progressInfoSpec[j] = spec;
1973 encoder.progressInfo[j] = spec;
1974 spec->Status = &status;
1975 }
1976
1977 if (i == 0)
1978 {
1979 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1980 bpi->Callback = callback;
1981 bpi->BenchInfo.NumIterations = numEncoderThreads;
1982 }
1983
1984 #ifndef Z7_ST
1985 if (mtEncMode)
1986 {
1987 #ifdef USE_ALLOCA
1988 encoder.AllocaSize = BENCH_ALLOCA_VALUE(i);
1989 #endif
1990
1991 encoder.Common = &encoderFlusher.Common;
1992 encoder.IsGlobalMtMode = numEncoderThreads > 1;
1993 RINOK(encoder.CreateEncoderThread())
1994 }
1995 #endif
1996 }
1997
1998 if (printCallback)
1999 {
2000 RINOK(printCallback->CheckBreak())
2001 }
2002
2003 #ifndef Z7_ST
2004 if (mtEncMode)
2005 {
2006 for (i = 0; i < numEncoderThreads; i++)
2007 {
2008 CEncoderInfo &encoder = encoders[i];
2009 const WRes wres = encoder.ReadyEvent.Lock();
2010 if (wres != 0)
2011 return HRESULT_FROM_WIN32(wres);
2012 RINOK(encoder.Results[0])
2013 }
2014
2015 CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
2016 bpi->SetStartTime();
2017
2018 const WRes wres = encoderFlusher.StartAndWait();
2019 if (status.Res == 0 && wres != 0)
2020 return HRESULT_FROM_WIN32(wres);
2021 }
2022 else
2023 #endif
2024 {
2025 RINOK(encoders[0].Encode())
2026 }
2027
2028 RINOK(status.Res)
2029
2030 CBenchInfo info;
2031
2032 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2033 info.UnpackSize = 0;
2034 info.PackSize = 0;
2035 info.NumIterations = encoders[0].NumIterations;
2036
2037 for (i = 0; i < numEncoderThreads; i++)
2038 {
2039 const CEncoderInfo &encoder = encoders[i];
2040 info.UnpackSize += encoder.kBufferSize;
2041 info.PackSize += encoder.compressedSize;
2042 // printf("\n%7d\n", encoder.compressedSize);
2043 }
2044
2045 RINOK(callback->SetEncodeResult(info, true))
2046
2047
2048
2049
2050 // ---------- Decode ----------
2051
2052 status.Res = S_OK;
2053 status.EncodeMode = false;
2054
2055 const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2056 #ifndef Z7_ST
2057 const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2058 #endif
2059
2060 for (i = 0; i < numEncoderThreads; i++)
2061 {
2062 CEncoderInfo &encoder = encoders[i];
2063
2064 /*
2065 #ifndef Z7_ST
2066 // encoder.affinityMode = *affinityMode;
2067 if (encoder.NumEncoderInternalThreads != 1)
2068 encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2069 #endif
2070 */
2071
2072
2073 if (i == 0)
2074 {
2075 encoder.NumIterations = GetNumIterations(
2076 benchProps->GetNumCommands_Dec(
2077 encoder.compressedSize,
2078 encoder.kBufferSize),
2079 complexInCommands);
2080 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2081 bpi->Callback = callback;
2082 bpi->BenchInfo.NumIterations = numDecoderThreads;
2083 bpi->SetStartTime();
2084 }
2085 else
2086 encoder.NumIterations = encoders[0].NumIterations;
2087
2088 #ifndef Z7_ST
2089 {
2090 const int numSubThreads = method.Get_NumThreads();
2091 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2092 }
2093 if (mtDecoderMode)
2094 {
2095 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2096 {
2097 const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2098 #ifdef USE_ALLOCA
2099 , BENCH_ALLOCA_VALUE(i * numSubDecoderThreads + j)
2100 #endif
2101 );
2102 RINOK(res)
2103 }
2104 }
2105 else
2106 #endif
2107 {
2108 RINOK(encoder.Decode(0))
2109 }
2110 }
2111
2112 #ifndef Z7_ST
2113 if (mtDecoderMode)
2114 {
2115 WRes wres = 0;
2116 HRESULT res = S_OK;
2117 for (i = 0; i < numEncoderThreads; i++)
2118 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2119 {
2120 CEncoderInfo &encoder = encoders[i];
2121 const WRes wres2 = encoder.thread[j].
2122 // Wait(); // later we can get thread times from thread in UNDER_CE
2123 Wait_Close();
2124 if (wres == 0 && wres2 != 0)
2125 wres = wres2;
2126 const HRESULT res2 = encoder.Results[j];
2127 if (res == 0 && res2 != 0)
2128 res = res2;
2129 }
2130 if (wres != 0)
2131 return HRESULT_FROM_WIN32(wres);
2132 RINOK(res)
2133 }
2134 #endif // Z7_ST
2135
2136 RINOK(status.Res)
2137 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2138
2139 /*
2140 #ifndef Z7_ST
2141 #ifdef UNDER_CE
2142 if (mtDecoderMode)
2143 for (i = 0; i < numEncoderThreads; i++)
2144 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2145 {
2146 FILETIME creationTime, exitTime, kernelTime, userTime;
2147 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2148 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2149 }
2150 #endif
2151 #endif
2152 */
2153
2154 info.UnpackSize = 0;
2155 info.PackSize = 0;
2156 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2157
2158 for (i = 0; i < numEncoderThreads; i++)
2159 {
2160 const CEncoderInfo &encoder = encoders[i];
2161 info.UnpackSize += encoder.kBufferSize;
2162 info.PackSize += encoder.compressedSize;
2163 }
2164
2165 // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2166 RINOK(callback->SetDecodeResult(info, true))
2167
2168 return S_OK;
2169 }
2170
2171
2172
2173 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2174 {
2175 /*
2176 if (dictSizeLog < 32)
2177 return (UInt32)1 << dictSizeLog;
2178 else
2179 return (UInt32)(Int32)-1;
2180 */
2181 return (UInt64)1 << dictSizeLog;
2182 }
2183
2184
2185 // it's limit of current LZMA implementation that can be changed later
2186 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2187
2188 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2189 {
2190 if (dict == 0)
2191 dict = 1;
2192 if (dict > kLzmaMaxDictSize)
2193 dict = kLzmaMaxDictSize;
2194 UInt32 hs = (UInt32)dict - 1;
2195 hs |= (hs >> 1);
2196 hs |= (hs >> 2);
2197 hs |= (hs >> 4);
2198 hs |= (hs >> 8);
2199 hs >>= 1;
2200 hs |= 0xFFFF;
2201 if (hs > (1 << 24))
2202 hs >>= 1;
2203 hs++;
2204 hs += (1 << 16);
2205
2206 const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2207 UInt64 blockSize = (UInt64)dict + (1 << 16)
2208 + (multiThread ? (1 << 20) : 0);
2209 blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2210 if (blockSize >= kBlockSizeMax)
2211 blockSize = kBlockSizeMax;
2212
2213 UInt64 son = (UInt64)dict;
2214 if (btMode)
2215 son *= 2;
2216 const UInt64 v = (hs + son) * 4 + blockSize +
2217 (1 << 20) + (multiThread ? (6 << 20) : 0);
2218
2219 // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2220 // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2221 return v;
2222 }
2223
2224
2225 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2226 {
2227 const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2228 const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2229 if (level < 0)
2230 level = 5;
2231 const int algo = (level < 5 ? 0 : 1);
2232 const int btMode = (algo == 0 ? 0 : 1);
2233
2234 UInt32 numBigThreads = numThreads;
2235 const bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2236 if (btMode)
2237 {
2238 if (!totalBench && lzmaMt)
2239 numBigThreads /= 2;
2240 }
2241 return ((UInt64)kBufferSize + kCompressedBufferSize +
2242 GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2243 }
2244
2245 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2246 {
2247 // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2248 return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2249 }
2250
2251
2252 // ---------- CRC and HASH ----------
2253
2254 struct CCrcInfo_Base
2255 {
2256 CMidAlignedBuffer Buffer;
2257 const Byte *Data;
2258 size_t Size;
2259 bool CreateLocalBuf;
2260 UInt32 CheckSum_Res;
2261
2262 CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2263
2264 HRESULT Generate(const Byte *data, size_t size);
2265 HRESULT CrcProcess(UInt64 numIterations,
2266 const UInt32 *checkSum, IHasher *hf,
2267 IBenchPrintCallback *callback);
2268 };
2269
2270
2271 // for debug: define it to test hash calling with unaligned data
2272 // #define Z7_BENCH_HASH_ALIGN_BUF_OFFSET 3
2273
2274 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2275 {
2276 Size = size;
2277 Data = data;
2278 if (!data || CreateLocalBuf)
2279 {
2280 Byte *buf;
2281 const size_t size2 = (size + k_RandBuf_AlignMask) & ~(size_t)k_RandBuf_AlignMask;
2282 if (size2 < size)
2283 return E_OUTOFMEMORY;
2284 #ifdef Z7_BENCH_HASH_ALIGN_BUF_OFFSET
2285 ALLOC_WITH_HRESULT(&Buffer, size2 + Z7_BENCH_HASH_ALIGN_BUF_OFFSET)
2286 buf = Buffer + Z7_BENCH_HASH_ALIGN_BUF_OFFSET;
2287 #else
2288 ALLOC_WITH_HRESULT(&Buffer, size2)
2289 buf = Buffer;
2290 #endif
2291 Data = buf;
2292 if (!data)
2293 RandGen_BufAfterPad(buf, size);
2294 else if (size != 0) // (CreateLocalBuf == true)
2295 memcpy(buf, data, size);
2296 }
2297 return S_OK;
2298 }
2299
2300
2301 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2302 const UInt32 *checkSum, IHasher *hf,
2303 IBenchPrintCallback *callback)
2304 {
2305 MY_ALIGN(16)
2306 UInt32 hash32[64 / 4];
2307 memset(hash32, 0, sizeof(hash32));
2308
2309 CheckSum_Res = 0;
2310
2311 const UInt32 hashSize = hf->GetDigestSize();
2312 if (hashSize > sizeof(hash32))
2313 return S_FALSE;
2314
2315 const Byte *buf = Data;
2316 const size_t size = Size;
2317 UInt32 checkSum_Prev = 0;
2318
2319 UInt64 prev = 0;
2320 UInt64 cur = 0;
2321
2322 do
2323 {
2324 hf->Init();
2325 size_t pos = 0;
2326 do
2327 {
2328 const size_t rem = size - pos;
2329 const UInt32 kStep = ((UInt32)1 << 31);
2330 const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2331 hf->Update(buf + pos, curSize);
2332 pos += curSize;
2333 }
2334 while (pos != size);
2335
2336 hf->Final((Byte *)(void *)hash32);
2337 UInt32 sum = 0;
2338 for (UInt32 j = 0; j < hashSize; j += 4)
2339 {
2340 sum = rotlFixed(sum, 11);
2341 sum += GetUi32((const Byte *)(const void *)hash32 + j);
2342 }
2343 if (checkSum)
2344 {
2345 if (sum != *checkSum)
2346 return S_FALSE;
2347 }
2348 else
2349 {
2350 checkSum_Prev = sum;
2351 checkSum = &checkSum_Prev;
2352 }
2353 if (callback)
2354 {
2355 cur += size;
2356 if (cur - prev >= ((UInt32)1 << 30))
2357 {
2358 prev = cur;
2359 RINOK(callback->CheckBreak())
2360 }
2361 }
2362 }
2363 while (--numIterations);
2364
2365 CheckSum_Res = checkSum_Prev;
2366 return S_OK;
2367 }
2368
2369 extern
2370 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2371 UInt32 g_BenchCpuFreqTemp = 1;
2372
2373 #define YY1 sum += val; sum ^= val;
2374 #define YY3 YY1 YY1 YY1 YY1
2375 #define YY5 YY3 YY3 YY3 YY3
2376 #define YY7 YY5 YY5 YY5 YY5
2377 static const UInt32 kNumFreqCommands = 128;
2378
2379 EXTERN_C_BEGIN
2380
2381 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2382 {
2383 for (UInt32 i = 0; i < num; i++)
2384 {
2385 YY7
2386 }
2387 return sum;
2388 }
2389
2390 EXTERN_C_END
2391
2392
2393 #ifndef Z7_ST
2394
2395 struct CBaseThreadInfo
2396 {
2397 NWindows::CThread Thread;
2398 IBenchPrintCallback *Callback;
2399 HRESULT CallbackRes;
2400
2401 WRes Wait_If_Created()
2402 {
2403 if (!Thread.IsCreated())
2404 return 0;
2405 return Thread.Wait_Close();
2406 }
2407 };
2408
2409 struct CFreqInfo: public CBaseThreadInfo
2410 {
2411 UInt32 ValRes;
2412 UInt32 Size;
2413 UInt64 NumIterations;
2414 };
2415
2416 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2417 {
2418 CFreqInfo *p = (CFreqInfo *)param;
2419
2420 UInt32 sum = g_BenchCpuFreqTemp;
2421 for (UInt64 k = p->NumIterations; k > 0; k--)
2422 {
2423 if (p->Callback)
2424 {
2425 p->CallbackRes = p->Callback->CheckBreak();
2426 if (p->CallbackRes != S_OK)
2427 break;
2428 }
2429 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2430 }
2431 p->ValRes = sum;
2432 return THREAD_FUNC_RET_ZERO;
2433 }
2434
2435 struct CFreqThreads
2436 {
2437 CFreqInfo *Items;
2438 UInt32 NumThreads;
2439
2440 CFreqThreads(): Items(NULL), NumThreads(0) {}
2441
2442 WRes WaitAll()
2443 {
2444 WRes wres = 0;
2445 for (UInt32 i = 0; i < NumThreads; i++)
2446 {
2447 WRes wres2 = Items[i].Wait_If_Created();
2448 if (wres == 0 && wres2 != 0)
2449 wres = wres2;
2450 }
2451 NumThreads = 0;
2452 return wres;
2453 }
2454
2455 ~CFreqThreads()
2456 {
2457 WaitAll();
2458 delete []Items;
2459 }
2460 };
2461
2462
2463 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2464
2465 struct CCrcInfo: public CBaseThreadInfo
2466 {
2467 const Byte *Data;
2468 size_t Size;
2469 UInt64 NumIterations;
2470 bool CheckSumDefined;
2471 UInt32 CheckSum;
2472 CMyComPtr<IHasher> Hasher;
2473 HRESULT Res;
2474 UInt32 CheckSum_Res;
2475
2476 #ifndef Z7_ST
2477 NSynchronization::CManualResetEvent ReadyEvent;
2478 UInt32 ThreadIndex;
2479 CBenchSyncCommon *Common;
2480 CAffinityMode AffinityMode;
2481 #endif
2482
2483 // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2484 // so we uses non-local CCrcInfo_Base.
2485 CCrcInfo_Base crcib;
2486
2487 HRESULT CreateThread()
2488 {
2489 WRes res = 0;
2490 if (!ReadyEvent.IsCreated())
2491 res = ReadyEvent.Create();
2492 if (res == 0)
2493 res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2494 ThreadIndex);
2495 return HRESULT_FROM_WIN32(res);
2496 }
2497
2498 #ifdef USE_ALLOCA
2499 size_t AllocaSize;
2500 #endif
2501
2502 void Process();
2503
2504 CCrcInfo(): Res(E_FAIL) {}
2505 };
2506
2507 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2508 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2509
2510 void CCrcInfo::Process()
2511 {
2512 crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2513 // we can use additional Generate() passes to reduce some time effects for new page allocation
2514 // for (unsigned y = 0; y < 10; y++)
2515 Res = crcib.Generate(Data, Size);
2516
2517 // if (Common)
2518 {
2519 WRes wres = ReadyEvent.Set();
2520 if (wres != 0)
2521 {
2522 if (Res == 0)
2523 Res = HRESULT_FROM_WIN32(wres);
2524 return;
2525 }
2526 if (Res != 0)
2527 return;
2528
2529 wres = Common->StartEvent.Lock();
2530
2531 if (wres != 0)
2532 {
2533 Res = HRESULT_FROM_WIN32(wres);
2534 return;
2535 }
2536 if (Common->ExitMode)
2537 return;
2538 }
2539
2540 Res = crcib.CrcProcess(NumIterations,
2541 CheckSumDefined ? &CheckSum : NULL, Hasher,
2542 Callback);
2543 CheckSum_Res = crcib.CheckSum_Res;
2544 /*
2545 We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2546 to time of benchmark. So we don't free Buffer here
2547 */
2548 // crcib.Buffer.Free();
2549 }
2550
2551
2552 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2553 {
2554 CCrcInfo *p = (CCrcInfo *)param;
2555
2556 #ifdef USE_ALLOCA
2557 alloca(p->AllocaSize);
2558 #endif
2559 p->Process();
2560 return THREAD_FUNC_RET_ZERO;
2561 }
2562
2563
2564 struct CCrcThreads
2565 {
2566 CCrcInfo *Items;
2567 unsigned NumThreads;
2568 CBenchSyncCommon Common;
2569 bool NeedClose;
2570
2571 CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2572
2573 WRes StartAndWait(bool exitMode = false);
2574
2575 ~CCrcThreads()
2576 {
2577 StartAndWait(true);
2578 delete []Items;
2579 }
2580 };
2581
2582
2583 WRes CCrcThreads::StartAndWait(bool exitMode)
2584 {
2585 if (!NeedClose)
2586 return 0;
2587
2588 Common.ExitMode = exitMode;
2589 WRes wres = Common.StartEvent.Set();
2590
2591 for (unsigned i = 0; i < NumThreads; i++)
2592 {
2593 WRes wres2 = Items[i].Wait_If_Created();
2594 if (wres == 0 && wres2 != 0)
2595 wres = wres2;
2596 }
2597 NumThreads = 0;
2598 NeedClose = false;
2599 return wres;
2600 }
2601
2602 #endif
2603
2604
2605 /*
2606 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2607 {
2608 UInt32 crc = CRC_INIT_VAL;
2609 for (size_t i = 0; i < size; i++)
2610 crc = CRC_UPDATE_BYTE(crc, buf[i]);
2611 return CRC_GET_DIGEST(crc);
2612 }
2613 */
2614
2615 /*
2616 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2617 {
2618 RandGen(buf, size, RG);
2619 return CrcCalc1(buf, size);
2620 }
2621 */
2622
2623 static bool CrcInternalTest()
2624 {
2625 CAlignedBuffer buffer;
2626 const size_t kBufSize = 1 << 11;
2627 const size_t kCheckSize = 1 << 6;
2628 buffer.Alloc(kBufSize);
2629 if (!buffer.IsAllocated())
2630 return false;
2631 Byte *buf = (Byte *)buffer;
2632 RandGen_BufAfterPad(buf, kBufSize);
2633 UInt32 sum = 0;
2634 for (size_t i = 0; i < kBufSize - kCheckSize * 2; i += kCheckSize - 1)
2635 for (size_t j = 0; j < kCheckSize; j++)
2636 {
2637 sum = rotlFixed(sum, 11);
2638 sum += CrcCalc(buf + i + j, j);
2639 }
2640 return sum == 0x28462c7c;
2641 }
2642
2643 struct CBenchMethod
2644 {
2645 unsigned Weight;
2646 unsigned DictBits;
2647 Int32 EncComplex;
2648 Int32 DecComplexCompr;
2649 Int32 DecComplexUnc;
2650 const char *Name;
2651 // unsigned KeySize;
2652 };
2653
2654 // #define USE_SW_CMPLX
2655
2656 #ifdef USE_SW_CMPLX
2657 #define CMPLX(x) ((x) * 1000)
2658 #else
2659 #define CMPLX(x) (x)
2660 #endif
2661
2662 static const CBenchMethod g_Bench[] =
2663 {
2664 // { 40, 17, 357, 145, 20, "LZMA:x1" },
2665 // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" },
2666
2667 { 20, 18, 360, 145, 20, "LZMA:x1" },
2668 { 20, 22, 600, 145, 20, "LZMA:x3" },
2669
2670 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
2671 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
2672
2673 { 10, 16, 124, 40, 14, "Deflate:x1" },
2674 { 20, 16, 376, 40, 14, "Deflate:x5" },
2675 { 10, 16, 1082, 40, 14, "Deflate:x7" },
2676 { 10, 17, 422, 40, 14, "Deflate64:x5" },
2677
2678 { 10, 15, 590, 69, 69, "BZip2:x1" },
2679 { 20, 19, 815, 122, 122, "BZip2:x5" },
2680 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
2681 { 10, 19, 2530, 122, 122, "BZip2:x7" },
2682
2683 // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" },
2684 { 10, 18, 1010, 0, 1150, "PPMD:x1" },
2685 // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" },
2686 { 10, 22, 1655, 0, 1830, "PPMD:x5" },
2687
2688 // { 2, 0, -16, 0, -16, "Swap2" },
2689 { 2, 0, -16, 0, -16, "Swap4" },
2690
2691 // { 2, 0, 3, 0, 4, "Delta:1" },
2692 // { 2, 0, 3, 0, 4, "Delta:2" },
2693 // { 2, 0, 3, 0, 4, "Delta:3" },
2694 { 2, 0, 3, 0, 4, "Delta:4" },
2695 // { 2, 0, 3, 0, 4, "Delta:8" },
2696 // { 2, 0, 3, 0, 4, "Delta:32" },
2697
2698 { 2, 0, 2, 0, 2, "BCJ" },
2699 { 2, 0, 1, 0, 1, "ARM64" },
2700 { 2, 0, 1, 0, 1, "RISCV" },
2701
2702 // { 10, 0, 18, 0, 18, "AES128CBC:1" },
2703 // { 10, 0, 21, 0, 21, "AES192CBC:1" },
2704 { 10, 0, 24, 0, 24, "AES256CBC:1" },
2705
2706 // { 10, 0, 18, 0, 18, "AES128CTR:1" },
2707 // { 10, 0, 21, 0, 21, "AES192CTR:1" },
2708 // { 10, 0, 24, 0, 24, "AES256CTR:1" },
2709 // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2710 // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2711 { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2712
2713 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2714 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2715 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2716
2717 // { 1, 0, CMPLX(6), 0, -2, "AES128CBC:3" },
2718 // { 1, 0, CMPLX(7), 0, -2, "AES192CBC:3" },
2719 { 1, 0, CMPLX(8), 0, -2, "AES256CBC:3" }
2720
2721 // { 1, 0, CMPLX(1), 0, -2, "AES128CTR:3" },
2722 // { 1, 0, CMPLX(1), 0, -2, "AES192CTR:3" },
2723 // { 1, 0, CMPLX(1), 0, -2, "AES256CTR:3" },
2724 };
2725
2726 struct CBenchHash
2727 {
2728 unsigned Weight;
2729 UInt32 Complex;
2730 UInt32 CheckSum;
2731 const char *Name;
2732 };
2733
2734 // #define ARM_CRC_MUL 100
2735 #define ARM_CRC_MUL 1
2736
2737 #define k_Hash_Complex_Mult 256
2738
2739 static const CBenchHash g_Hash[] =
2740 {
2741 { 20, 256, 0x21e207bb, "CRC32:12" } ,
2742 { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2743 { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2744 { 10, 256, 0x41b901d1, "CRC64" },
2745 { 10, 64, 0x43eac94f, "XXH64" },
2746
2747 { 10, 5100, 0x7913ba03, "SHA256:1" },
2748 { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2749
2750 { 10, 2340, 0xff769021, "SHA1:1" },
2751 { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2752
2753 { 2, 4096, 0x85189d02, "BLAKE2sp:1" },
2754 { 2, 1024, 0x85189d02, "BLAKE2sp:2" }, // sse2-way4-fast
2755 { 2, 512, 0x85189d02, "BLAKE2sp:3" } // avx2-way8-fast
2756 #if 0
2757 , { 2, 2048, 0x85189d02, "BLAKE2sp:4" } // sse2-way1
2758 , { 2, 1024, 0x85189d02, "BLAKE2sp:5" } // sse2-way2
2759 , { 2, 1024, 0x85189d02, "BLAKE2sp:6" } // avx2-way2
2760 , { 2, 1024, 0x85189d02, "BLAKE2sp:7" } // avx2-way4
2761 #endif
2762 };
2763
2764 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2765 {
2766 char s[128];
2767 unsigned startPos = (unsigned)sizeof(s) - 32;
2768 memset(s, ' ', startPos);
2769 ConvertUInt64ToString(value, s + startPos);
2770 // if (withSpace)
2771 {
2772 startPos--;
2773 size++;
2774 }
2775 unsigned len = (unsigned)strlen(s + startPos);
2776 if (size > len)
2777 {
2778 size -= len;
2779 if (startPos < size)
2780 startPos = 0;
2781 else
2782 startPos -= size;
2783 }
2784 f.Print(s + startPos);
2785 }
2786
2787 static const unsigned kFieldSize_Name = 12;
2788 static const unsigned kFieldSize_SmallName = 4;
2789 static const unsigned kFieldSize_Speed = 9;
2790 static const unsigned kFieldSize_Usage = 5;
2791 static const unsigned kFieldSize_RU = 6;
2792 static const unsigned kFieldSize_Rating = 6;
2793 static const unsigned kFieldSize_EU = 5;
2794 static const unsigned kFieldSize_Effec = 5;
2795 static const unsigned kFieldSize_CrcSpeed = 8;
2796
2797
2798 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2799 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2800
2801
2802 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2803 {
2804 PrintNumber(f, (rating + 500000) / 1000000, size);
2805 }
2806
2807
2808 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2809 {
2810 UInt64 v = 0;
2811 if (divider != 0)
2812 v = (val * 100 + divider / 2) / divider;
2813 PrintNumber(f, v, size);
2814 }
2815
2816 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2817 {
2818 char s[256];
2819 memset(s, (Byte)c, size);
2820 s[size] = 0;
2821 f.Print(s);
2822 }
2823
2824 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2825 {
2826 PrintChars(f, ' ', size);
2827 }
2828
2829 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2830 {
2831 PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2832 }
2833
2834 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2835 {
2836 PrintUsage(f, usage, kFieldSize_Usage);
2837 PrintRating(f, rpu, kFieldSize_RU);
2838 PrintRating(f, rating, kFieldSize_Rating);
2839 if (showFreq)
2840 {
2841 if (cpuFreq == 0)
2842 PrintSpaces(f, kFieldSize_EUAndEffec);
2843 else
2844 {
2845 PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2846 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2847 }
2848 }
2849 }
2850
2851
2852 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2853 {
2854 Speed = info.GetUnpackSizeSpeed();
2855 Usage = info.GetUsage();
2856 RPU = info.GetRatingPerUsage(Rating);
2857 }
2858
2859 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2860 {
2861 NumIterations2 *= weight;
2862 RPU *= weight;
2863 Rating *= weight;
2864 Usage *= weight;
2865 Speed *= weight;
2866 }
2867
2868 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2869 {
2870 Rating += r.Rating;
2871 Usage += r.Usage;
2872 RPU += r.RPU;
2873 Speed += r.Speed;
2874 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2875 NumIterations2 += r.NumIterations2;
2876 }
2877
2878 static void PrintResults(IBenchPrintCallback *f,
2879 const CBenchInfo &info,
2880 unsigned weight,
2881 UInt64 rating,
2882 bool showFreq, UInt64 cpuFreq,
2883 CTotalBenchRes *res)
2884 {
2885 CTotalBenchRes t;
2886 t.Rating = rating;
2887 t.NumIterations2 = 1;
2888 t.Generate_From_BenchInfo(info);
2889
2890 if (f)
2891 {
2892 if (t.Speed != 0)
2893 PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2894 else
2895 PrintSpaces(*f, 1 + kFieldSize_Speed);
2896 }
2897 if (f)
2898 {
2899 PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2900 }
2901
2902 if (res)
2903 {
2904 // res->NumIterations1++;
2905 t.Mult_For_Weight(weight);
2906 res->Update_With_Res(t);
2907 }
2908 }
2909
2910 static void PrintTotals(IBenchPrintCallback &f,
2911 bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2912 {
2913 const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2914 const UInt64 speed = res.Speed / numIterations2;
2915 if (showSpeed && speed != 0)
2916 PrintNumber(f, speed / 1024, kFieldSize_Speed);
2917 else
2918 PrintSpaces(f, 1 + kFieldSize_Speed);
2919
2920 // PrintSpaces(f, 1 + kFieldSize_Speed);
2921 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2922 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2923 }
2924
2925
2926 static void PrintHex(AString &s, UInt64 v)
2927 {
2928 char temp[32];
2929 ConvertUInt64ToHex(v, temp);
2930 s += temp;
2931 }
2932
2933 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2934 {
2935 AString s;
2936 // s.Add_UInt32(ti.numProcessThreads);
2937 unsigned numSysThreads = ti.GetNumSystemThreads();
2938 if (ti.GetNumProcessThreads() != numSysThreads)
2939 {
2940 // if (ti.numProcessThreads != ti.numSysThreads)
2941 {
2942 s += " / ";
2943 s.Add_UInt32(numSysThreads);
2944 }
2945 s += " : ";
2946 #ifdef _WIN32
2947 PrintHex(s, ti.processAffinityMask);
2948 s += " / ";
2949 PrintHex(s, ti.systemAffinityMask);
2950 #else
2951 unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2952 if (i == 0)
2953 i = 4;
2954 for (; i >= 4; )
2955 {
2956 i -= 4;
2957 unsigned val = 0;
2958 for (unsigned k = 0; k < 4; k++)
2959 {
2960 const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2961 val += (bit << k);
2962 }
2963 PrintHex(s, val);
2964 }
2965 #endif
2966 }
2967 return s;
2968 }
2969
2970
2971 #ifdef Z7_LARGE_PAGES
2972
2973 #ifdef _WIN32
2974 extern bool g_LargePagesMode;
2975 extern "C"
2976 {
2977 extern SIZE_T g_LargePageSize;
2978 }
2979 #endif
2980
2981 void Add_LargePages_String(AString &s)
2982 {
2983 #ifdef _WIN32
2984 if (g_LargePagesMode || g_LargePageSize != 0)
2985 {
2986 s.Add_OptSpaced("(LP-");
2987 PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
2988 #ifdef MY_CPU_X86_OR_AMD64
2989 if (CPU_IsSupported_PageGB())
2990 s += "-1G";
2991 #endif
2992 if (!g_LargePagesMode)
2993 s += "-NA";
2994 s += ")";
2995 }
2996 #else
2997 s += "";
2998 #endif
2999 }
3000
3001 #endif
3002
3003
3004
3005 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
3006 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
3007 {
3008 f.Print("RAM ");
3009 f.Print(sizeString);
3010 if (size_Defined)
3011 PrintNumber(f, (size >> 20), 6);
3012 else
3013 f.Print(" ?");
3014 f.Print(" MB");
3015
3016 #ifdef Z7_LARGE_PAGES
3017 {
3018 AString s;
3019 Add_LargePages_String(s);
3020 f.Print(s);
3021 }
3022 #endif
3023
3024 f.Print(", # ");
3025 f.Print(threadsString);
3026 PrintNumber(f, numThreads, 3);
3027 }
3028
3029
3030
3031 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
3032 {
3033 bool NeedPrint;
3034 bool Use2Columns;
3035 bool ShowFreq;
3036 unsigned NameFieldSize;
3037
3038 unsigned EncodeWeight;
3039 unsigned DecodeWeight;
3040
3041 UInt64 CpuFreq;
3042 UInt64 DictSize;
3043
3044 IBenchPrintCallback *_file;
3045 CBenchProps BenchProps;
3046 CTotalBenchRes EncodeRes;
3047 CTotalBenchRes DecodeRes;
3048
3049 CBenchInfo BenchInfo_Results[2];
3050
3051 CBenchCallbackToPrint():
3052 NeedPrint(true),
3053 Use2Columns(false),
3054 ShowFreq(false),
3055 NameFieldSize(0),
3056 EncodeWeight(1),
3057 DecodeWeight(1),
3058 CpuFreq(0)
3059 {}
3060
3061 void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3062 void Print(const char *s);
3063 void NewLine();
3064
3065 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3066 HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3067 HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3068 };
3069
3070 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3071 {
3072 ShowFreq = showFreq;
3073 CpuFreq = cpuFreq;
3074 return S_OK;
3075 }
3076
3077 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3078 {
3079 RINOK(_file->CheckBreak())
3080 if (final)
3081 BenchInfo_Results[0] = info;
3082 if (final)
3083 if (NeedPrint)
3084 {
3085 const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3086 PrintResults(_file, info,
3087 EncodeWeight, rating,
3088 ShowFreq, CpuFreq, &EncodeRes);
3089 if (!Use2Columns)
3090 _file->NewLine();
3091 }
3092 return S_OK;
3093 }
3094
3095 static const char * const kSep = " | ";
3096
3097 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3098 {
3099 RINOK(_file->CheckBreak())
3100 if (final)
3101 BenchInfo_Results[1] = info;
3102 if (final)
3103 if (NeedPrint)
3104 {
3105 const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3106 if (Use2Columns)
3107 _file->Print(kSep);
3108 else
3109 PrintSpaces(*_file, NameFieldSize);
3110 CBenchInfo info2 = info;
3111 info2.UnpackSize *= info2.NumIterations;
3112 info2.PackSize *= info2.NumIterations;
3113 info2.NumIterations = 1;
3114 PrintResults(_file, info2,
3115 DecodeWeight, rating,
3116 ShowFreq, CpuFreq, &DecodeRes);
3117 }
3118 return S_OK;
3119 }
3120
3121 void CBenchCallbackToPrint::Print(const char *s)
3122 {
3123 _file->Print(s);
3124 }
3125
3126 void CBenchCallbackToPrint::NewLine()
3127 {
3128 _file->NewLine();
3129 }
3130
3131 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3132 {
3133 f.Print(s);
3134 int numSpaces = (int)size - (int)MyStringLen(s);
3135 if (numSpaces > 0)
3136 PrintSpaces(f, (unsigned)numSpaces);
3137 }
3138
3139 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3140 {
3141 int numSpaces = (int)size - (int)MyStringLen(s);
3142 if (numSpaces > 0)
3143 PrintSpaces(f, (unsigned)numSpaces);
3144 f.Print(s);
3145 }
3146
3147
3148 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3149 {
3150 UString wildc = GetUnicodeString(mask);
3151 UString bname = GetUnicodeString(name);
3152 wildc.MakeLower_Ascii();
3153 bname.MakeLower_Ascii();
3154 return DoesWildcardMatchName(wildc, bname);
3155 }
3156
3157
3158 static HRESULT TotalBench(
3159 DECL_EXTERNAL_CODECS_LOC_VARS
3160 const COneMethodInfo &methodMask,
3161 UInt64 complexInCommands,
3162 #ifndef Z7_ST
3163 UInt32 numThreads,
3164 const CAffinityMode *affinityMode,
3165 #endif
3166 bool forceUnpackSize,
3167 size_t unpackSize,
3168 const Byte *fileData,
3169 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3170 {
3171 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3172 {
3173 const CBenchMethod &bench = g_Bench[i];
3174 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3175 continue;
3176 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3177 {
3178 unsigned keySize = 32;
3179 if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3180 else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3181 callback->BenchProps.KeySize = keySize;
3182 }
3183 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3184 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3185 callback->BenchProps.EncComplex = bench.EncComplex;
3186
3187 COneMethodInfo method;
3188 NCOM::CPropVariant propVariant;
3189 propVariant = bench.Name;
3190 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3191
3192 size_t unpackSize2 = unpackSize;
3193 if (!forceUnpackSize && bench.DictBits == 0)
3194 unpackSize2 = kFilterUnpackSize;
3195
3196 callback->EncodeWeight = bench.Weight;
3197 callback->DecodeWeight = bench.Weight;
3198
3199 const HRESULT res = MethodBench(
3200 EXTERNAL_CODECS_LOC_VARS
3201 complexInCommands,
3202 #ifndef Z7_ST
3203 false, numThreads, affinityMode,
3204 #endif
3205 method,
3206 unpackSize2, fileData,
3207 bench.DictBits,
3208 printCallback, callback, &callback->BenchProps);
3209
3210 if (res == E_NOTIMPL)
3211 {
3212 // callback->Print(" ---");
3213 // we need additional empty line as line for decompression results
3214 if (!callback->Use2Columns)
3215 callback->NewLine();
3216 }
3217 else
3218 {
3219 RINOK(res)
3220 }
3221
3222 callback->NewLine();
3223 }
3224 return S_OK;
3225 }
3226
3227
3228 struct CFreqBench
3229 {
3230 // in:
3231 UInt64 complexInCommands;
3232 UInt32 numThreads;
3233 bool showFreq;
3234 UInt64 specifiedFreq;
3235
3236 // out:
3237 UInt64 CpuFreqRes;
3238 UInt64 UsageRes;
3239 UInt32 res;
3240
3241 CFreqBench()
3242 {}
3243
3244 HRESULT FreqBench(IBenchPrintCallback *_file
3245 #ifndef Z7_ST
3246 , const CAffinityMode *affinityMode
3247 #endif
3248 );
3249 };
3250
3251
3252 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3253 #ifndef Z7_ST
3254 , const CAffinityMode *affinityMode
3255 #endif
3256 )
3257 {
3258 res = 0;
3259 CpuFreqRes = 0;
3260 UsageRes = 0;
3261
3262 if (numThreads == 0)
3263 numThreads = 1;
3264
3265 #ifdef Z7_ST
3266 numThreads = 1;
3267 #endif
3268
3269 const UInt32 complexity = kNumFreqCommands;
3270 UInt64 numIterations = complexInCommands / complexity;
3271 UInt32 numIterations2 = 1 << 30;
3272 if (numIterations > numIterations2)
3273 numIterations /= numIterations2;
3274 else
3275 {
3276 numIterations2 = (UInt32)numIterations;
3277 numIterations = 1;
3278 }
3279
3280 CBenchInfoCalc progressInfoSpec;
3281
3282 #ifndef Z7_ST
3283
3284 bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3285
3286 if (mtMode)
3287 {
3288 CFreqThreads threads;
3289 threads.Items = new CFreqInfo[numThreads];
3290 UInt32 i;
3291 for (i = 0; i < numThreads; i++)
3292 {
3293 CFreqInfo &info = threads.Items[i];
3294 info.Callback = _file;
3295 info.CallbackRes = S_OK;
3296 info.NumIterations = numIterations;
3297 info.Size = numIterations2;
3298 }
3299 progressInfoSpec.SetStartTime();
3300 for (i = 0; i < numThreads; i++)
3301 {
3302 // Sleep(10);
3303 CFreqInfo &info = threads.Items[i];
3304 WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3305 if (info.Thread.IsCreated())
3306 threads.NumThreads++;
3307 if (wres != 0)
3308 return HRESULT_FROM_WIN32(wres);
3309 }
3310 WRes wres = threads.WaitAll();
3311 if (wres != 0)
3312 return HRESULT_FROM_WIN32(wres);
3313 for (i = 0; i < numThreads; i++)
3314 {
3315 RINOK(threads.Items[i].CallbackRes)
3316 }
3317 }
3318 else
3319 #endif
3320 {
3321 progressInfoSpec.SetStartTime();
3322 UInt32 sum = g_BenchCpuFreqTemp;
3323 UInt64 k = numIterations;
3324 do
3325 {
3326 sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3327 if (_file)
3328 {
3329 RINOK(_file->CheckBreak())
3330 }
3331 }
3332 while (--k);
3333 res += sum;
3334 }
3335
3336 if (res == 0x12345678)
3337 if (_file)
3338 {
3339 RINOK(_file->CheckBreak())
3340 }
3341
3342 CBenchInfo info;
3343 progressInfoSpec.SetFinishTime(info);
3344
3345 info.UnpackSize = 0;
3346 info.PackSize = 0;
3347 info.NumIterations = 1;
3348
3349 const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3350 const UInt64 rating = info.GetSpeed(numCommands);
3351 CpuFreqRes = rating / numThreads;
3352 UsageRes = info.GetUsage();
3353
3354 if (_file)
3355 {
3356 PrintResults(_file, info,
3357 0, // weight
3358 rating,
3359 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3360 RINOK(_file->CheckBreak())
3361 }
3362
3363 return S_OK;
3364 }
3365
3366
3367
3368 static HRESULT CrcBench(
3369 DECL_EXTERNAL_CODECS_LOC_VARS
3370 UInt64 complexInCommands,
3371 UInt32 numThreads,
3372 const size_t bufferSize,
3373 const Byte *fileData,
3374
3375 UInt64 &speed,
3376 UInt64 &usage,
3377
3378 UInt32 complexity, unsigned benchWeight,
3379 const UInt32 *checkSum,
3380 const COneMethodInfo &method,
3381 IBenchPrintCallback *_file,
3382 #ifndef Z7_ST
3383 const CAffinityMode *affinityMode,
3384 #endif
3385 bool showRating,
3386 CTotalBenchRes *encodeRes,
3387 bool showFreq, UInt64 cpuFreq)
3388 {
3389 if (numThreads == 0)
3390 numThreads = 1;
3391
3392 #ifdef Z7_ST
3393 numThreads = 1;
3394 #endif
3395
3396 const AString &methodName = method.MethodName;
3397 // methodName.RemoveChar(L'-');
3398 CMethodId hashID;
3399 if (!FindHashMethod(
3400 EXTERNAL_CODECS_LOC_VARS
3401 methodName, hashID))
3402 return E_NOTIMPL;
3403
3404 /*
3405 // if will generate random data in each thread, instead of global data
3406 CMidAlignedBuffer buffer;
3407 if (!fileData)
3408 {
3409 ALLOC_WITH_HRESULT(&buffer, bufferSize)
3410 RandGen(buffer, bufferSize);
3411 fileData = buffer;
3412 }
3413 */
3414
3415 const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3416 UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3417 if (numIterations == 0)
3418 numIterations = 1;
3419
3420 CBenchInfoCalc progressInfoSpec;
3421 CBenchInfo info;
3422
3423 #ifndef Z7_ST
3424 bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3425
3426 if (mtEncMode)
3427 {
3428 CCrcThreads threads;
3429 threads.Items = new CCrcInfo[numThreads];
3430 {
3431 WRes wres = threads.Common.StartEvent.Create();
3432 if (wres != 0)
3433 return HRESULT_FROM_WIN32(wres);
3434 threads.NeedClose = true;
3435 }
3436
3437 UInt32 i;
3438 for (i = 0; i < numThreads; i++)
3439 {
3440 CCrcInfo &ci = threads.Items[i];
3441 AString name;
3442 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3443 if (!ci.Hasher)
3444 return E_NOTIMPL;
3445 CMyComPtr<ICompressSetCoderProperties> scp;
3446 ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3447 if (scp)
3448 {
3449 RINOK(method.SetCoderProps(scp))
3450 }
3451
3452 ci.Callback = _file;
3453 ci.Data = fileData;
3454 ci.NumIterations = numIterations;
3455 ci.Size = bufferSize;
3456 ci.CheckSumDefined = false;
3457 if (checkSum)
3458 {
3459 ci.CheckSum = *checkSum;
3460 ci.CheckSumDefined = true;
3461 }
3462
3463 #ifdef USE_ALLOCA
3464 ci.AllocaSize = BENCH_ALLOCA_VALUE(i);
3465 #endif
3466 }
3467
3468 for (i = 0; i < numThreads; i++)
3469 {
3470 CCrcInfo &ci = threads.Items[i];
3471 ci.ThreadIndex = i;
3472 ci.Common = &threads.Common;
3473 ci.AffinityMode = *affinityMode;
3474 HRESULT hres = ci.CreateThread();
3475 if (ci.Thread.IsCreated())
3476 threads.NumThreads++;
3477 if (hres != 0)
3478 return hres;
3479 }
3480
3481 for (i = 0; i < numThreads; i++)
3482 {
3483 CCrcInfo &ci = threads.Items[i];
3484 WRes wres = ci.ReadyEvent.Lock();
3485 if (wres != 0)
3486 return HRESULT_FROM_WIN32(wres);
3487 RINOK(ci.Res)
3488 }
3489
3490 progressInfoSpec.SetStartTime();
3491
3492 WRes wres = threads.StartAndWait();
3493 if (wres != 0)
3494 return HRESULT_FROM_WIN32(wres);
3495
3496 progressInfoSpec.SetFinishTime(info);
3497
3498 for (i = 0; i < numThreads; i++)
3499 {
3500 RINOK(threads.Items[i].Res)
3501 if (i != 0)
3502 if (threads.Items[i].CheckSum_Res !=
3503 threads.Items[i - 1].CheckSum_Res)
3504 return S_FALSE;
3505 }
3506 }
3507 else
3508 #endif
3509 {
3510 CMyComPtr<IHasher> hasher;
3511 AString name;
3512 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3513 if (!hasher)
3514 return E_NOTIMPL;
3515 CMyComPtr<ICompressSetCoderProperties> scp;
3516 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3517 if (scp)
3518 {
3519 RINOK(method.SetCoderProps(scp))
3520 }
3521 CCrcInfo_Base crcib;
3522 crcib.CreateLocalBuf = false;
3523 RINOK(crcib.Generate(fileData, bufferSize))
3524 progressInfoSpec.SetStartTime();
3525 RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3526 progressInfoSpec.SetFinishTime(info);
3527 }
3528
3529
3530 UInt64 unpSize = numIterations * bufferSize;
3531 UInt64 unpSizeThreads = unpSize * numThreads;
3532 info.UnpackSize = unpSizeThreads;
3533 info.PackSize = unpSizeThreads;
3534 info.NumIterations = 1;
3535
3536 if (_file)
3537 {
3538 if (showRating)
3539 {
3540 UInt64 unpSizeThreads2 = unpSizeThreads;
3541 if (unpSizeThreads2 == 0)
3542 unpSizeThreads2 = numIterations * 1 * numThreads;
3543 const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3544 const UInt64 rating = info.GetSpeed(numCommands);
3545 PrintResults(_file, info,
3546 benchWeight, rating,
3547 showFreq, cpuFreq, encodeRes);
3548 }
3549 RINOK(_file->CheckBreak())
3550 }
3551
3552 speed = info.GetSpeed(unpSizeThreads);
3553 usage = info.GetUsage();
3554
3555 return S_OK;
3556 }
3557
3558
3559
3560 static HRESULT TotalBench_Hash(
3561 DECL_EXTERNAL_CODECS_LOC_VARS
3562 const COneMethodInfo &methodMask,
3563 UInt64 complexInCommands,
3564 UInt32 numThreads,
3565 size_t bufSize,
3566 const Byte *fileData,
3567 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3568 #ifndef Z7_ST
3569 const CAffinityMode *affinityMode,
3570 #endif
3571 CTotalBenchRes *encodeRes,
3572 bool showFreq, UInt64 cpuFreq)
3573 {
3574 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3575 {
3576 const CBenchHash &bench = g_Hash[i];
3577 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3578 continue;
3579 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3580 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3581 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3582 // callback->BenchProps.EncComplex = bench.EncComplex;
3583
3584 COneMethodInfo method;
3585 NCOM::CPropVariant propVariant;
3586 propVariant = bench.Name;
3587 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3588
3589 UInt64 speed, usage;
3590
3591 const HRESULT res = CrcBench(
3592 EXTERNAL_CODECS_LOC_VARS
3593 complexInCommands,
3594 numThreads, bufSize, fileData,
3595 speed, usage,
3596 bench.Complex, bench.Weight,
3597 (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3598 method,
3599 printCallback,
3600 #ifndef Z7_ST
3601 affinityMode,
3602 #endif
3603 true, // showRating
3604 encodeRes, showFreq, cpuFreq);
3605 if (res == E_NOTIMPL)
3606 {
3607 // callback->Print(" ---");
3608 }
3609 else
3610 {
3611 RINOK(res)
3612 }
3613 callback->NewLine();
3614 }
3615 return S_OK;
3616 }
3617
3618 struct CTempValues
3619 {
3620 UInt64 *Values;
3621 CTempValues(): Values(NULL) {}
3622 void Alloc(UInt32 num) { Values = new UInt64[num]; }
3623 ~CTempValues() { delete []Values; }
3624 };
3625
3626 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3627 {
3628 const wchar_t *end;
3629 UInt64 result = ConvertStringToUInt64(s, &end);
3630 if (*end != 0 || s.IsEmpty())
3631 prop = s;
3632 else if (result <= (UInt32)0xFFFFFFFF)
3633 prop = (UInt32)result;
3634 else
3635 prop = result;
3636 }
3637
3638
3639 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3640 {
3641 return StringsAreEqualNoCase_Ascii(fullName, shortName);
3642 }
3643
3644
3645
3646
3647 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3648 {
3649 PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads);
3650 }
3651
3652
3653 static void Print_Delimiter(IBenchPrintCallback &f)
3654 {
3655 f.Print(" |");
3656 }
3657
3658 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3659 {
3660 char s[16];
3661 ConvertUInt32ToString(pow, s);
3662 unsigned pos = MyStringLen(s);
3663 s[pos++] = ':';
3664 s[pos] = 0;
3665 PrintLeft(f, s, kFieldSize_SmallName); // 4
3666 }
3667
3668 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3669 UInt64 usage, UInt64 speed)
3670 {
3671 PrintUsage(f, usage, kFieldSize_Usage);
3672 PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3673 }
3674
3675
3676 HRESULT Bench(
3677 DECL_EXTERNAL_CODECS_LOC_VARS
3678 IBenchPrintCallback *printCallback,
3679 IBenchCallback *benchCallback,
3680 const CObjectVector<CProperty> &props,
3681 UInt32 numIterations,
3682 bool multiDict,
3683 IBenchFreqCallback *freqCallback)
3684 {
3685 // for (int y = 0; y < 10000; y++)
3686 if (!CrcInternalTest())
3687 return E_FAIL;
3688
3689 UInt32 numCPUs = 1;
3690 UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
3691
3692 NSystem::CProcessAffinity threadsInfo;
3693 threadsInfo.InitST();
3694
3695 #ifndef Z7_ST
3696
3697 if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3698 numCPUs = threadsInfo.GetNumProcessThreads();
3699 else
3700 numCPUs = NSystem::GetNumberOfProcessors();
3701
3702 #endif
3703
3704 // numCPUs = 24;
3705 /*
3706 {
3707 DWORD_PTR mask = (1 << 0);
3708 DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3709 old = old;
3710 DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3711 old2 = old2;
3712 return 0;
3713 }
3714 */
3715
3716 bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3717
3718 UInt32 numThreadsSpecified = numCPUs;
3719 bool needSetComplexity = false;
3720 UInt32 testTimeMs = kComplexInMs;
3721 UInt32 startDicLog = 22;
3722 bool startDicLog_Defined = false;
3723 UInt64 specifiedFreq = 0;
3724 bool multiThreadTests = false;
3725 UInt64 complexInCommands = kComplexInCommands;
3726 UInt32 numThreads_Start = 1;
3727
3728 #ifndef Z7_ST
3729 CAffinityMode affinityMode;
3730 #endif
3731
3732
3733 COneMethodInfo method;
3734
3735 CMidAlignedBuffer fileDataBuffer;
3736 bool use_fileData = false;
3737 bool isFixedDict = false;
3738
3739 {
3740 unsigned i;
3741
3742 if (printCallback)
3743 {
3744 for (i = 0; i < props.Size(); i++)
3745 {
3746 const CProperty &property = props[i];
3747 printCallback->Print(" ");
3748 printCallback->Print(GetAnsiString(property.Name));
3749 if (!property.Value.IsEmpty())
3750 {
3751 printCallback->Print("=");
3752 printCallback->Print(GetAnsiString(property.Value));
3753 }
3754 }
3755 if (!props.IsEmpty())
3756 printCallback->NewLine();
3757 }
3758
3759
3760 for (i = 0; i < props.Size(); i++)
3761 {
3762 const CProperty &property = props[i];
3763 UString name (property.Name);
3764 name.MakeLower_Ascii();
3765
3766 if (name.IsEqualTo("file"))
3767 {
3768 if (property.Value.IsEmpty())
3769 return E_INVALIDARG;
3770
3771 NFile::NIO::CInFile file;
3772 if (!file.Open(us2fs(property.Value)))
3773 return GetLastError_noZero_HRESULT();
3774 size_t len;
3775 {
3776 UInt64 len64;
3777 if (!file.GetLength(len64))
3778 return GetLastError_noZero_HRESULT();
3779 if (printCallback)
3780 {
3781 printCallback->Print("file size =");
3782 PrintNumber(*printCallback, len64, 0);
3783 printCallback->NewLine();
3784 }
3785 len = (size_t)len64;
3786 if (len != len64)
3787 return E_INVALIDARG;
3788 }
3789
3790 // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3791
3792 ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3793 use_fileData = true;
3794
3795 {
3796 size_t processed;
3797 if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3798 return GetLastError_noZero_HRESULT();
3799 if (processed != len)
3800 return E_FAIL;
3801 }
3802 continue;
3803 }
3804
3805 NCOM::CPropVariant propVariant;
3806 if (!property.Value.IsEmpty())
3807 ParseNumberString(property.Value, propVariant);
3808
3809 if (name.IsEqualTo("time"))
3810 {
3811 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3812 needSetComplexity = true;
3813 testTimeMs *= 1000;
3814 continue;
3815 }
3816
3817 if (name.IsEqualTo("timems"))
3818 {
3819 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3820 needSetComplexity = true;
3821 continue;
3822 }
3823
3824 if (name.IsEqualTo("tic"))
3825 {
3826 UInt32 v;
3827 RINOK(ParsePropToUInt32(UString(), propVariant, v))
3828 if (v >= 64)
3829 return E_INVALIDARG;
3830 complexInCommands = (UInt64)1 << v;
3831 continue;
3832 }
3833
3834 const bool isCurrent_fixedDict = name.IsEqualTo("df");
3835 if (isCurrent_fixedDict)
3836 isFixedDict = true;
3837 if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3838 {
3839 RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3840 if (startDicLog > 32)
3841 return E_INVALIDARG;
3842 startDicLog_Defined = true;
3843 continue;
3844 }
3845
3846 if (name.IsEqualTo("mts"))
3847 {
3848 RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3849 continue;
3850 }
3851
3852 if (name.IsEqualTo("af"))
3853 {
3854 UInt32 bundle;
3855 RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3856 if (bundle > 0 && bundle < numCPUs)
3857 {
3858 #ifndef Z7_ST
3859 affinityMode.SetLevels(numCPUs, 2);
3860 affinityMode.NumBundleThreads = bundle;
3861 #endif
3862 }
3863 continue;
3864 }
3865
3866 if (name.IsEqualTo("freq"))
3867 {
3868 UInt32 freq32 = 0;
3869 RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3870 if (freq32 == 0)
3871 return E_INVALIDARG;
3872 specifiedFreq = (UInt64)freq32 * 1000000;
3873
3874 if (printCallback)
3875 {
3876 printCallback->Print("freq=");
3877 PrintNumber(*printCallback, freq32, 0);
3878 printCallback->NewLine();
3879 }
3880
3881 continue;
3882 }
3883
3884 if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3885 {
3886 const UString s = name.Ptr(2);
3887 if (s.IsEqualTo("*")
3888 || (s.IsEmpty()
3889 && propVariant.vt == VT_BSTR
3890 && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3891 {
3892 multiThreadTests = true;
3893 continue;
3894 }
3895 #ifndef Z7_ST
3896 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3897 #endif
3898 continue;
3899 }
3900
3901 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3902 }
3903 }
3904
3905 if (printCallback)
3906 {
3907 AString s;
3908
3909 #if 1 || !defined(Z7_MSC_VER_ORIGINAL) || (Z7_MSC_VER_ORIGINAL >= 1900)
3910 s += "Compiler: ";
3911 GetCompiler(s);
3912 printCallback->Print(s);
3913 printCallback->NewLine();
3914 s.Empty();
3915 #endif
3916
3917 GetSystemInfoText(s);
3918 printCallback->Print(s);
3919 printCallback->NewLine();
3920 }
3921
3922 if (printCallback)
3923 {
3924 printCallback->Print("1T CPU Freq (MHz):");
3925 }
3926
3927 if (printCallback || freqCallback)
3928 {
3929 UInt64 numMilCommands = 1 << 6;
3930 if (specifiedFreq != 0)
3931 {
3932 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3933 numMilCommands >>= 1;
3934 }
3935
3936 for (int jj = 0;; jj++)
3937 {
3938 if (printCallback)
3939 RINOK(printCallback->CheckBreak())
3940
3941 UInt64 start = ::GetTimeCount();
3942 UInt32 sum = (UInt32)start;
3943 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3944 if (sum == 0xF1541213)
3945 if (printCallback)
3946 printCallback->Print("");
3947 const UInt64 realDelta = ::GetTimeCount() - start;
3948 start = realDelta;
3949 if (start == 0)
3950 start = 1;
3951 if (start > (UInt64)1 << 61)
3952 start = 1;
3953 const UInt64 freq = GetFreq();
3954 // mips is constant in some compilers
3955 const UInt64 hzVal = MyMultDiv64(numMilCommands * 1000000, freq, start);
3956 const UInt64 mipsVal = numMilCommands * freq / start;
3957 if (printCallback)
3958 {
3959 if (realDelta == 0)
3960 {
3961 printCallback->Print(" -");
3962 }
3963 else
3964 {
3965 // PrintNumber(*printCallback, start, 0);
3966 PrintNumber(*printCallback, mipsVal, 5);
3967 }
3968 }
3969 if (freqCallback)
3970 {
3971 RINOK(freqCallback->AddCpuFreq(1, hzVal, kBenchmarkUsageMult))
3972 }
3973
3974 if (jj >= 1)
3975 {
3976 bool needStop = (numMilCommands >= (1 <<
3977 #ifdef _DEBUG
3978 7
3979 #else
3980 11
3981 #endif
3982 ));
3983 if (start >= freq * 16)
3984 {
3985 printCallback->Print(" (Cmplx)");
3986 if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
3987 {
3988 needSetComplexity = true;
3989 }
3990 needStop = true;
3991 }
3992 if (needSetComplexity)
3993 SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
3994 if (needStop)
3995 break;
3996 numMilCommands <<= 1;
3997 }
3998 }
3999 if (freqCallback)
4000 {
4001 RINOK(freqCallback->FreqsFinished(1))
4002 }
4003 }
4004
4005 if (numThreadsSpecified >= 2)
4006 if (printCallback || freqCallback)
4007 {
4008 if (printCallback)
4009 printCallback->NewLine();
4010
4011 /* it can show incorrect frequency for HT threads.
4012 so we reduce freq test to (numCPUs / 2) */
4013
4014 UInt32 numThreads = (numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2 : numThreadsSpecified);
4015 if (numThreads < 1)
4016 numThreads = 1;
4017
4018 if (printCallback)
4019 {
4020 char s[128];
4021 ConvertUInt64ToString(numThreads, s);
4022 printCallback->Print(s);
4023 printCallback->Print("T CPU Freq (MHz):");
4024 }
4025 UInt64 numMilCommands = 1 <<
4026 #ifdef _DEBUG
4027 7;
4028 #else
4029 10;
4030 #endif
4031
4032 if (specifiedFreq != 0)
4033 {
4034 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
4035 numMilCommands >>= 1;
4036 }
4037
4038 // for (int jj = 0;; jj++)
4039 for (;;)
4040 {
4041 if (printCallback)
4042 RINOK(printCallback->CheckBreak())
4043
4044 {
4045 // PrintLeft(f, "CPU", kFieldSize_Name);
4046
4047 // UInt32 resVal;
4048
4049 CFreqBench fb;
4050 fb.complexInCommands = numMilCommands * 1000000;
4051 fb.numThreads = numThreads;
4052 // showFreq;
4053 // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4054 fb.showFreq = true;
4055 fb.specifiedFreq = 1;
4056
4057 const HRESULT res = fb.FreqBench(NULL /* printCallback */
4058 #ifndef Z7_ST
4059 , &affinityMode
4060 #endif
4061 );
4062 RINOK(res)
4063
4064 if (freqCallback)
4065 {
4066 RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4067 }
4068
4069 if (printCallback)
4070 {
4071 /*
4072 if (realDelta == 0)
4073 {
4074 printCallback->Print(" -");
4075 }
4076 else
4077 */
4078 {
4079 // PrintNumber(*printCallback, start, 0);
4080 PrintUsage(*printCallback, fb.UsageRes, 3);
4081 printCallback->Print("%");
4082 PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4083 printCallback->Print(" ");
4084
4085 // PrintNumber(*printCallback, fb.UsageRes, 5);
4086 }
4087 }
4088 }
4089 // if (jj >= 1)
4090 {
4091 const bool needStop = (numMilCommands >= (1 <<
4092 #ifdef _DEBUG
4093 7
4094 #else
4095 11
4096 #endif
4097 ));
4098 if (needStop)
4099 break;
4100 numMilCommands <<= 1;
4101 }
4102 }
4103 if (freqCallback)
4104 {
4105 RINOK(freqCallback->FreqsFinished(numThreads))
4106 }
4107 }
4108
4109
4110 if (printCallback)
4111 {
4112 printCallback->NewLine();
4113 printCallback->NewLine();
4114 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4115 printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4116 printCallback->NewLine();
4117 }
4118
4119 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4120 return E_INVALIDARG;
4121
4122 UInt64 dict = (UInt64)1 << startDicLog;
4123 const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4124
4125 const unsigned level = method.GetLevel();
4126
4127 AString &methodName = method.MethodName;
4128 const AString original_MethodName = methodName;
4129 if (methodName.IsEmpty())
4130 methodName = "LZMA";
4131
4132 if (benchCallback)
4133 {
4134 CBenchProps benchProps;
4135 benchProps.SetLzmaCompexity();
4136 const UInt64 dictSize = method.Get_Lzma_DicSize();
4137
4138 size_t uncompressedDataSize;
4139 if (use_fileData)
4140 {
4141 uncompressedDataSize = fileDataBuffer.Size();
4142 }
4143 else
4144 {
4145 uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4146 if (uncompressedDataSize < dictSize)
4147 return E_INVALIDARG;
4148 }
4149
4150 return MethodBench(
4151 EXTERNAL_CODECS_LOC_VARS
4152 complexInCommands,
4153 #ifndef Z7_ST
4154 true, numThreadsSpecified,
4155 &affinityMode,
4156 #endif
4157 method,
4158 uncompressedDataSize, (const Byte *)fileDataBuffer,
4159 kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4160 }
4161
4162 if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4163 methodName = "crc32";
4164
4165 CMethodId hashID;
4166 const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4167 int codecIndex = -1;
4168 bool isFilter = false;
4169 if (!isHashMethod)
4170 {
4171 UInt32 numStreams;
4172 codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4173 true, // encode
4174 hashID, numStreams, isFilter);
4175 // we can allow non filter for BW tests
4176 if (!isFilter) codecIndex = -1;
4177 }
4178
4179 CBenchCallbackToPrint callback;
4180 callback.Init();
4181 callback._file = printCallback;
4182
4183 if (isHashMethod || codecIndex != -1)
4184 {
4185 if (!printCallback)
4186 return S_FALSE;
4187 IBenchPrintCallback &f = *printCallback;
4188
4189 UInt64 dict64 = dict;
4190 if (!dictIsDefined)
4191 dict64 = (1 << 27);
4192 if (use_fileData)
4193 {
4194 if (!dictIsDefined)
4195 dict64 = fileDataBuffer.Size();
4196 else if (dict64 > fileDataBuffer.Size())
4197 dict64 = fileDataBuffer.Size();
4198 }
4199
4200 for (;;)
4201 {
4202 const int index = method.FindProp(NCoderPropID::kDictionarySize);
4203 if (index < 0)
4204 break;
4205 method.Props.Delete((unsigned)index);
4206 }
4207
4208 // methodName.RemoveChar(L'-');
4209 Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4210 const UInt32 *checkSum = NULL;
4211 int benchIndex = -1;
4212
4213 if (isHashMethod)
4214 {
4215 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4216 {
4217 const CBenchHash &h = g_Hash[i];
4218 AString benchMethod (h.Name);
4219 AString benchProps;
4220 const int propPos = benchMethod.Find(':');
4221 if (propPos >= 0)
4222 {
4223 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4224 benchMethod.DeleteFrom((unsigned)propPos);
4225 }
4226
4227 if (AreSameMethodNames(benchMethod, methodName))
4228 {
4229 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4230 /*
4231 bool isMainMethod = method.PropsString.IsEmpty();
4232 if (isMainMethod)
4233 isMainMethod = !checkSum
4234 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4235 if (sameProps || isMainMethod)
4236 */
4237 {
4238 complexity = (Int32)h.Complex;
4239 checkSum = &h.CheckSum;
4240 if (sameProps)
4241 break;
4242 /*
4243 if property. is not specified, we use the complexity
4244 for latest fastest method (crc32:64)
4245 */
4246 }
4247 }
4248 }
4249 // if (!checkSum) return E_NOTIMPL;
4250 }
4251 else
4252 {
4253 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4254 {
4255 const CBenchMethod &bench = g_Bench[i];
4256 AString benchMethod (bench.Name);
4257 AString benchProps;
4258 const int propPos = benchMethod.Find(':');
4259 if (propPos >= 0)
4260 {
4261 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4262 benchMethod.DeleteFrom((unsigned)propPos);
4263 }
4264
4265 if (AreSameMethodNames(benchMethod, methodName))
4266 {
4267 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4268 // bool isMainMethod = method.PropsString.IsEmpty();
4269 // if (sameProps || isMainMethod)
4270 {
4271 benchIndex = (int)i;
4272 if (sameProps)
4273 break;
4274 }
4275 }
4276 }
4277 // if (benchIndex < 0) return E_NOTIMPL;
4278 }
4279
4280 {
4281 /* we count usage only for crc and filter. non-filters are not supported */
4282 UInt64 usage = (1 << 20);
4283 UInt64 bufSize = dict64;
4284 UInt32 numBlocks = isHashMethod ? 1 : 3;
4285 if (use_fileData)
4286 {
4287 usage += fileDataBuffer.Size();
4288 if (bufSize > fileDataBuffer.Size())
4289 bufSize = fileDataBuffer.Size();
4290 if (isHashMethod)
4291 {
4292 numBlocks = 0;
4293 #ifndef Z7_ST
4294 if (numThreadsSpecified != 1)
4295 numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4296 #endif
4297 }
4298 }
4299 usage += numThreadsSpecified * bufSize * numBlocks;
4300 Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4301 }
4302
4303 CUIntVector numThreadsVector;
4304 {
4305 unsigned nt = numThreads_Start;
4306 for (;;)
4307 {
4308 if (nt > numThreadsSpecified)
4309 break;
4310 numThreadsVector.Add(nt);
4311 const unsigned next = nt * 2;
4312 const UInt32 ntHalf= numThreadsSpecified / 2;
4313 if (ntHalf > nt && ntHalf < next)
4314 numThreadsVector.Add(ntHalf);
4315 if (numThreadsSpecified > nt && numThreadsSpecified < next)
4316 numThreadsVector.Add(numThreadsSpecified);
4317 nt = next;
4318 }
4319 }
4320
4321 unsigned numColumns = isHashMethod ? 1 : 2;
4322 CTempValues speedTotals;
4323 CTempValues usageTotals;
4324 {
4325 const unsigned numItems = numThreadsVector.Size() * numColumns;
4326 speedTotals.Alloc(numItems);
4327 usageTotals.Alloc(numItems);
4328 for (unsigned i = 0; i < numItems; i++)
4329 {
4330 speedTotals.Values[i] = 0;
4331 usageTotals.Values[i] = 0;
4332 }
4333 }
4334
4335 f.NewLine();
4336 for (unsigned line = 0; line < 3; line++)
4337 {
4338 f.NewLine();
4339 f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size");
4340 FOR_VECTOR (ti, numThreadsVector)
4341 {
4342 if (ti != 0)
4343 Print_Delimiter(f);
4344 if (line == 0)
4345 {
4346 PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4347 PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4348 }
4349 else
4350 {
4351 for (unsigned c = 0; c < numColumns; c++)
4352 {
4353 PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1);
4354 PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1);
4355 }
4356 }
4357 }
4358 }
4359 f.NewLine();
4360
4361 UInt64 numSteps = 0;
4362
4363 // for (UInt32 iter = 0; iter < numIterations; iter++)
4364 // {
4365 unsigned pow = 10; // kNumHashDictBits
4366 if (startDicLog_Defined)
4367 pow = startDicLog;
4368
4369 // #define NUM_SUB_BITS 2
4370 // pow <<= NUM_SUB_BITS;
4371 for (;; pow++)
4372 {
4373 const UInt64 bufSize = (UInt64)1 << pow;
4374 // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4375 // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4376
4377 size_t dataSize = fileDataBuffer.Size();
4378 if (dataSize > bufSize || !use_fileData)
4379 dataSize = (size_t)bufSize;
4380
4381 for (UInt32 iter = 0; iter < numIterations; iter++)
4382 {
4383 Print_Pow(f, pow);
4384 // PrintNumber(f, bufSize >> 10, 4);
4385
4386 FOR_VECTOR (ti, numThreadsVector)
4387 {
4388 RINOK(f.CheckBreak())
4389 const UInt32 numThreads = numThreadsVector[ti];
4390 if (isHashMethod)
4391 {
4392 UInt64 speed = 0;
4393 UInt64 usage = 0;
4394 const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4395 numThreads,
4396 dataSize, (const Byte *)fileDataBuffer,
4397 speed, usage,
4398 (UInt32)complexity,
4399 1, // benchWeight,
4400 (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4401 method,
4402 &f,
4403 #ifndef Z7_ST
4404 &affinityMode,
4405 #endif
4406 false, // showRating
4407 NULL, false, 0);
4408 RINOK(res)
4409
4410 if (ti != 0)
4411 Print_Delimiter(f);
4412
4413 Bench_BW_Print_Usage_Speed(f, usage, speed);
4414 speedTotals.Values[ti] += speed;
4415 usageTotals.Values[ti] += usage;
4416 }
4417 else
4418 {
4419 {
4420 unsigned keySize = 32;
4421 if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4422 else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4423 callback.BenchProps.KeySize = keySize;
4424 }
4425
4426 COneMethodInfo method2 = method;
4427 unsigned bench_DictBits;
4428
4429 if (benchIndex >= 0)
4430 {
4431 const CBenchMethod &bench = g_Bench[benchIndex];
4432 callback.BenchProps.EncComplex = bench.EncComplex;
4433 callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4434 callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4435 bench_DictBits = bench.DictBits;
4436 // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4437 }
4438 else
4439 {
4440 bench_DictBits = kOldLzmaDictBits; // = 32 default
4441 if (isFilter)
4442 {
4443 const unsigned k_UnknownCoderComplexity = 4;
4444 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4445 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4446 }
4447 else
4448 {
4449 callback.BenchProps.EncComplex = 1 << 10;
4450 callback.BenchProps.DecComplexUnc = 1 << 6;
4451 }
4452 callback.BenchProps.DecComplexCompr = 0;
4453 }
4454 callback.NeedPrint = false;
4455
4456 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4457 {
4458 const NCOM::CPropVariant propVariant = (UInt32)pow;
4459 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4460 }
4461
4462 const HRESULT res = MethodBench(
4463 EXTERNAL_CODECS_LOC_VARS
4464 complexInCommands,
4465 #ifndef Z7_ST
4466 false, // oldLzmaBenchMode
4467 numThreadsVector[ti],
4468 &affinityMode,
4469 #endif
4470 method2,
4471 dataSize, (const Byte *)fileDataBuffer,
4472 bench_DictBits,
4473 printCallback,
4474 &callback,
4475 &callback.BenchProps);
4476 RINOK(res)
4477
4478 if (ti != 0)
4479 Print_Delimiter(f);
4480
4481 for (unsigned i = 0; i < 2; i++)
4482 {
4483 const CBenchInfo &bi = callback.BenchInfo_Results[i];
4484 const UInt64 usage = bi.GetUsage();
4485 const UInt64 speed = bi.GetUnpackSizeSpeed();
4486 usageTotals.Values[ti * 2 + i] += usage;
4487 speedTotals.Values[ti * 2 + i] += speed;
4488 Bench_BW_Print_Usage_Speed(f, usage, speed);
4489 }
4490 }
4491 }
4492
4493 f.NewLine();
4494 numSteps++;
4495 }
4496 if (dataSize >= dict64)
4497 break;
4498 }
4499
4500 if (numSteps != 0)
4501 {
4502 f.Print("Avg:");
4503 for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4504 {
4505 if (ti != 0)
4506 Print_Delimiter(f);
4507 for (unsigned i = 0; i < numColumns; i++)
4508 Bench_BW_Print_Usage_Speed(f,
4509 usageTotals.Values[ti * numColumns + i] / numSteps,
4510 speedTotals.Values[ti * numColumns + i] / numSteps);
4511 }
4512 f.NewLine();
4513 }
4514
4515 return S_OK;
4516 }
4517
4518 bool use2Columns = false;
4519
4520 bool totalBenchMode = false;
4521 bool onlyHashBench = false;
4522 if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4523 {
4524 onlyHashBench = true;
4525 methodName = "*";
4526 totalBenchMode = true;
4527 }
4528 else if (methodName.Find('*') >= 0)
4529 totalBenchMode = true;
4530
4531 // ---------- Threads loop ----------
4532 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4533 {
4534
4535 UInt32 numThreads = numThreadsSpecified;
4536
4537 if (!multiThreadTests)
4538 {
4539 if (threadsPassIndex != 0)
4540 break;
4541 }
4542 else
4543 {
4544 numThreads = 1;
4545 if (threadsPassIndex != 0)
4546 {
4547 if (numCPUs < 2)
4548 break;
4549 numThreads = numCPUs;
4550 if (threadsPassIndex == 1)
4551 {
4552 if (numCPUs >= 4)
4553 numThreads = numCPUs / 2;
4554 }
4555 else if (numCPUs < 4)
4556 break;
4557 }
4558 }
4559
4560 IBenchPrintCallback &f = *printCallback;
4561
4562 if (threadsPassIndex > 0)
4563 {
4564 f.NewLine();
4565 f.NewLine();
4566 }
4567
4568 if (!dictIsDefined && !onlyHashBench)
4569 {
4570 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4571 unsigned dicSizeLog = dicSizeLog_Main;
4572
4573 #ifdef UNDER_CE
4574 dicSizeLog = (UInt64)1 << 20;
4575 #endif
4576
4577 if (ramSize_Defined)
4578 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4579 if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4580 break;
4581
4582 dict = (UInt64)1 << dicSizeLog;
4583
4584 if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4585 {
4586 f.Print("Dictionary reduced to: ");
4587 PrintNumber(f, dicSizeLog, 1);
4588 f.NewLine();
4589 }
4590 }
4591
4592 Print_Usage_and_Threads(f,
4593 onlyHashBench ?
4594 GetBenchMemoryUsage_Hash(numThreads, dict) :
4595 GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4596 numThreads);
4597
4598 f.NewLine();
4599
4600 f.NewLine();
4601
4602 if (totalBenchMode)
4603 {
4604 callback.NameFieldSize = kFieldSize_Name;
4605 use2Columns = false;
4606 }
4607 else
4608 {
4609 callback.NameFieldSize = kFieldSize_SmallName;
4610 use2Columns = true;
4611 }
4612 callback.Use2Columns = use2Columns;
4613
4614 bool showFreq = false;
4615 UInt64 cpuFreq = 0;
4616
4617 if (totalBenchMode)
4618 {
4619 showFreq = true;
4620 }
4621
4622 unsigned fileldSize = kFieldSize_TotalSize;
4623 if (showFreq)
4624 fileldSize += kFieldSize_EUAndEffec;
4625
4626 if (use2Columns)
4627 {
4628 PrintSpaces(f, callback.NameFieldSize);
4629 PrintRight(f, "Compressing", fileldSize);
4630 f.Print(kSep);
4631 PrintRight(f, "Decompressing", fileldSize);
4632 }
4633 f.NewLine();
4634 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4635
4636 int j;
4637
4638 for (j = 0; j < 2; j++)
4639 {
4640 PrintRight(f, "Speed", kFieldSize_Speed + 1);
4641 PrintRight(f, "Usage", kFieldSize_Usage + 1);
4642 PrintRight(f, "R/U", kFieldSize_RU + 1);
4643 PrintRight(f, "Rating", kFieldSize_Rating + 1);
4644 if (showFreq)
4645 {
4646 PrintRight(f, "E/U", kFieldSize_EU + 1);
4647 PrintRight(f, "Effec", kFieldSize_Effec + 1);
4648 }
4649 if (!use2Columns)
4650 break;
4651 if (j == 0)
4652 f.Print(kSep);
4653 }
4654
4655 f.NewLine();
4656 PrintSpaces(f, callback.NameFieldSize);
4657
4658 for (j = 0; j < 2; j++)
4659 {
4660 PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4661 PrintRight(f, "%", kFieldSize_Usage + 1);
4662 PrintRight(f, "MIPS", kFieldSize_RU + 1);
4663 PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4664 if (showFreq)
4665 {
4666 PrintRight(f, "%", kFieldSize_EU + 1);
4667 PrintRight(f, "%", kFieldSize_Effec + 1);
4668 }
4669 if (!use2Columns)
4670 break;
4671 if (j == 0)
4672 f.Print(kSep);
4673 }
4674
4675 f.NewLine();
4676 f.NewLine();
4677
4678 if (specifiedFreq != 0)
4679 cpuFreq = specifiedFreq;
4680
4681 // bool showTotalSpeed = false;
4682
4683 if (totalBenchMode)
4684 {
4685 for (UInt32 i = 0; i < numIterations; i++)
4686 {
4687 if (i != 0)
4688 printCallback->NewLine();
4689
4690 const unsigned kNumCpuTests = 3;
4691 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4692 {
4693 PrintLeft(f, "CPU", kFieldSize_Name);
4694
4695 // UInt32 resVal;
4696
4697 CFreqBench fb;
4698 fb.complexInCommands = complexInCommands;
4699 fb.numThreads = numThreads;
4700 // showFreq;
4701 fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4702 fb.specifiedFreq = specifiedFreq;
4703
4704 const HRESULT res = fb.FreqBench(printCallback
4705 #ifndef Z7_ST
4706 , &affinityMode
4707 #endif
4708 );
4709 RINOK(res)
4710
4711 cpuFreq = fb.CpuFreqRes;
4712 callback.NewLine();
4713
4714 if (specifiedFreq != 0)
4715 cpuFreq = specifiedFreq;
4716
4717 if (testTimeMs >= 1000)
4718 if (freqTest == kNumCpuTests - 1)
4719 {
4720 // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4721 }
4722 }
4723 callback.NewLine();
4724
4725 // return S_OK; // change it
4726
4727 callback.SetFreq(true, cpuFreq);
4728
4729 if (!onlyHashBench)
4730 {
4731 size_t dataSize = (size_t)dict;
4732 if (use_fileData)
4733 {
4734 dataSize = fileDataBuffer.Size();
4735 if (dictIsDefined && dataSize > dict)
4736 dataSize = (size_t)dict;
4737 }
4738
4739 const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4740 method, complexInCommands,
4741 #ifndef Z7_ST
4742 numThreads,
4743 &affinityMode,
4744 #endif
4745 dictIsDefined || use_fileData, // forceUnpackSize
4746 dataSize,
4747 (const Byte *)fileDataBuffer,
4748 printCallback, &callback);
4749 RINOK(res)
4750 }
4751
4752 {
4753 size_t dataSize = (size_t)1 << kNumHashDictBits;
4754 if (dictIsDefined)
4755 {
4756 dataSize = (size_t)dict;
4757 if (dataSize != dict)
4758 return E_OUTOFMEMORY;
4759 }
4760 if (use_fileData)
4761 {
4762 dataSize = fileDataBuffer.Size();
4763 if (dictIsDefined && dataSize > dict)
4764 dataSize = (size_t)dict;
4765 }
4766
4767 const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4768 method, complexInCommands,
4769 numThreads,
4770 dataSize, (const Byte *)fileDataBuffer,
4771 printCallback, &callback,
4772 #ifndef Z7_ST
4773 &affinityMode,
4774 #endif
4775 &callback.EncodeRes, true, cpuFreq);
4776 RINOK(res)
4777 }
4778
4779 callback.NewLine();
4780 {
4781 PrintLeft(f, "CPU", kFieldSize_Name);
4782
4783 CFreqBench fb;
4784 fb.complexInCommands = complexInCommands;
4785 fb.numThreads = numThreads;
4786 // showFreq;
4787 fb.showFreq = (specifiedFreq != 0);
4788 fb.specifiedFreq = specifiedFreq;
4789
4790 const HRESULT res = fb.FreqBench(printCallback
4791 #ifndef Z7_ST
4792 , &affinityMode
4793 #endif
4794 );
4795 RINOK(res)
4796 callback.NewLine();
4797 }
4798 }
4799 }
4800 else
4801 {
4802 needSetComplexity = true;
4803 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4804 {
4805 unsigned i;
4806 for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4807 {
4808 const CBenchMethod &h = g_Bench[i];
4809 AString benchMethod (h.Name);
4810 AString benchProps;
4811 const int propPos = benchMethod.Find(':');
4812 if (propPos >= 0)
4813 {
4814 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4815 benchMethod.DeleteFrom((unsigned)propPos);
4816 }
4817
4818 if (AreSameMethodNames(benchMethod, methodName))
4819 {
4820 if (benchProps.IsEmpty()
4821 || (benchProps == "x5" && method.PropsString.IsEmpty())
4822 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4823 {
4824 callback.BenchProps.EncComplex = h.EncComplex;
4825 callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4826 callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4827 needSetComplexity = false;
4828 break;
4829 }
4830 }
4831 }
4832 /*
4833 if (i == Z7_ARRAY_SIZE(g_Bench))
4834 return E_NOTIMPL;
4835 */
4836 }
4837 if (needSetComplexity)
4838 callback.BenchProps.SetLzmaCompexity();
4839
4840 if (startDicLog < kBenchMinDicLogSize)
4841 startDicLog = kBenchMinDicLogSize;
4842
4843 for (unsigned i = 0; i < numIterations; i++)
4844 {
4845 unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4846 if (!multiDict)
4847 pow = 32;
4848 while (GetDictSizeFromLog(pow) > dict && pow > 0)
4849 pow--;
4850 for (; GetDictSizeFromLog(pow) <= dict; pow++)
4851 {
4852 Print_Pow(f, pow);
4853 callback.DictSize = (UInt64)1 << pow;
4854
4855 COneMethodInfo method2 = method;
4856
4857 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4858 {
4859 // We add dictionary size property.
4860 // method2 can have two different dictionary size properties.
4861 // And last property is main.
4862 NCOM::CPropVariant propVariant = (UInt32)pow;
4863 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4864 }
4865
4866 size_t uncompressedDataSize;
4867 if (use_fileData)
4868 {
4869 uncompressedDataSize = fileDataBuffer.Size();
4870 }
4871 else
4872 {
4873 uncompressedDataSize = (size_t)callback.DictSize;
4874 if (uncompressedDataSize != callback.DictSize)
4875 return E_OUTOFMEMORY;
4876 if (uncompressedDataSize >= (1 << 18))
4877 uncompressedDataSize += kAdditionalSize;
4878 }
4879
4880 const HRESULT res = MethodBench(
4881 EXTERNAL_CODECS_LOC_VARS
4882 complexInCommands,
4883 #ifndef Z7_ST
4884 true, numThreads,
4885 &affinityMode,
4886 #endif
4887 method2,
4888 uncompressedDataSize, (const Byte *)fileDataBuffer,
4889 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4890 f.NewLine();
4891 RINOK(res)
4892 if (!multiDict)
4893 break;
4894 }
4895 }
4896 }
4897
4898 PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4899
4900 if (use2Columns)
4901 {
4902 f.Print(kSep);
4903 PrintChars(f, '-', fileldSize);
4904 }
4905
4906 f.NewLine();
4907
4908 if (use2Columns)
4909 {
4910 PrintLeft(f, "Avr:", callback.NameFieldSize);
4911 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4912 f.Print(kSep);
4913 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4914 f.NewLine();
4915 }
4916
4917 PrintLeft(f, "Tot:", callback.NameFieldSize);
4918 CTotalBenchRes midRes;
4919 midRes = callback.EncodeRes;
4920 midRes.Update_With_Res(callback.DecodeRes);
4921
4922 // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4923 PrintTotals(f, showFreq, cpuFreq, false, midRes);
4924 f.NewLine();
4925
4926 }
4927 return S_OK;
4928 }
4929