// Bench.cpp #include "StdAfx.h" #include "../../../../C/CpuArch.h" // #include #ifndef _WIN32 #define USE_POSIX_TIME #define USE_POSIX_TIME2 #endif // _WIN32 #ifdef USE_POSIX_TIME #include #include #ifdef USE_POSIX_TIME2 #include #include #endif #endif // USE_POSIX_TIME #ifdef _WIN32 #define USE_ALLOCA #endif #ifdef USE_ALLOCA #ifdef _WIN32 #include #else #include #endif #endif #include "../../../../C/7zCrc.h" #include "../../../../C/RotateDefs.h" #ifndef Z7_ST #include "../../../Windows/Synchronization.h" #include "../../../Windows/Thread.h" #endif #include "../../../Windows/FileFind.h" #include "../../../Windows/FileIO.h" #include "../../../Windows/SystemInfo.h" #include "../../../Common/MyBuffer2.h" #include "../../../Common/IntToString.h" #include "../../../Common/StringConvert.h" #include "../../../Common/StringToInt.h" #include "../../../Common/Wildcard.h" #include "../../Common/MethodProps.h" #include "../../Common/StreamObjects.h" #include "../../Common/StreamUtils.h" #include "Bench.h" using namespace NWindows; #ifndef Z7_ST static const UInt32 k_LZMA = 0x030101; #endif static const UInt64 kComplexInCommands = (UInt64)1 << #ifdef UNDER_CE 31; #else 34; #endif static const UInt32 kComplexInMs = 4000; static void SetComplexCommandsMs(UInt32 complexInMs, bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands) { complexInCommands = kComplexInCommands; const UInt64 kMinFreq = (UInt64)1000000 * 4; const UInt64 kMaxFreq = (UInt64)1000000 * 20000; if (cpuFreq < kMinFreq && !isSpecifiedFreq) cpuFreq = kMinFreq; if (cpuFreq < kMaxFreq || isSpecifiedFreq) { if (complexInMs != 0) complexInCommands = complexInMs * cpuFreq / 1000; else complexInCommands = cpuFreq >> 2; } } // const UInt64 kBenchmarkUsageMult = 1000000; // for debug static const unsigned kBenchmarkUsageMultBits = 16; static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits; UInt64 Benchmark_GetUsage_Percents(UInt64 usage) { return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult; } static const unsigned kNumHashDictBits = 17; static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test static const unsigned kOldLzmaDictBits = 32; // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug static const size_t kAdditionalSize = (size_t)1 << 16; static const UInt32 kCompressedAdditionalSize = (1 << 10); static const UInt32 kMaxMethodPropSize = (1 << 6); #define ALLOC_WITH_HRESULT(_buffer_, _size_) \ { (_buffer_)->Alloc(_size_); \ if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; } class CBaseRandomGenerator { UInt32 A1; UInt32 A2; UInt32 Salt; public: CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); } void Init() { A1 = 362436069; A2 = 521288629;} Z7_FORCE_INLINE UInt32 GetRnd() { return Salt ^ ( ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ) ); } }; Z7_NO_INLINE static void RandGen(Byte *buf, size_t size) { CBaseRandomGenerator RG; const size_t size4 = size & ~((size_t)3); size_t i; for (i = 0; i < size4; i += 4) { const UInt32 v = RG.GetRnd(); SetUi32(buf + i, v) } UInt32 v = RG.GetRnd(); for (; i < size; i++) { buf[i] = (Byte)v; v >>= 8; } } class CBenchRandomGenerator: public CMidAlignedBuffer { static UInt32 GetVal(UInt32 &res, unsigned numBits) { UInt32 val = res & (((UInt32)1 << numBits) - 1); res >>= numBits; return val; } static UInt32 GetLen(UInt32 &r) { UInt32 len = GetVal(r, 2); return GetVal(r, 1 + len); } public: void GenerateSimpleRandom(UInt32 salt) { CBaseRandomGenerator rg(salt); const size_t bufSize = Size(); Byte *buf = (Byte *)*this; for (size_t i = 0; i < bufSize; i++) buf[i] = (Byte)rg.GetRnd(); } void GenerateLz(unsigned dictBits, UInt32 salt) { CBaseRandomGenerator rg(salt); size_t pos = 0; size_t rep0 = 1; const size_t bufSize = Size(); Byte *buf = (Byte *)*this; unsigned posBits = 1; // printf("\n dictBits = %d\n", (UInt32)dictBits); // printf("\n bufSize = 0x%p\n", (const void *)bufSize); while (pos < bufSize) { /* if (pos >= ((UInt32)1 << 31)) printf(" %x\n", pos); */ UInt32 r = rg.GetRnd(); if (GetVal(r, 1) == 0 || pos < 1024) buf[pos++] = (Byte)(r & 0xFF); else { UInt32 len; len = 1 + GetLen(r); if (GetVal(r, 3) != 0) { len += GetLen(r); while (((size_t)1 << posBits) < pos) posBits++; unsigned numBitsMax = dictBits; if (numBitsMax > posBits) numBitsMax = posBits; const unsigned kAddBits = 6; unsigned numLogBits = 5; if (numBitsMax <= (1 << 4) - 1 + kAddBits) numLogBits = 4; for (;;) { const UInt32 ppp = GetVal(r, numLogBits) + kAddBits; r = rg.GetRnd(); if (ppp > numBitsMax) continue; // rep0 = GetVal(r, ppp); rep0 = r & (((size_t)1 << ppp) - 1); if (rep0 < pos) break; r = rg.GetRnd(); } rep0++; } // len *= 300; // for debug { const size_t rem = bufSize - pos; if (len > rem) len = (UInt32)rem; } Byte *dest = buf + pos; const Byte *src = dest - rep0; pos += len; for (UInt32 i = 0; i < len; i++) *dest++ = *src++; } } // printf("\n CRC = %x\n", CrcCalc(buf, bufSize)); } }; Z7_CLASS_IMP_NOQIB_1( CBenchmarkInStream , ISequentialInStream ) const Byte *Data; size_t Pos; size_t Size; public: void Init(const Byte *data, size_t size) { Data = data; Size = size; Pos = 0; } bool WasFinished() const { return Pos == Size; } }; Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)) { const UInt32 kMaxBlockSize = (1 << 20); if (size > kMaxBlockSize) size = kMaxBlockSize; const size_t remain = Size - Pos; if (size > remain) size = (UInt32)remain; if (size != 0) memcpy(data, Data + Pos, size); Pos += size; if (processedSize) *processedSize = size; return S_OK; } class CBenchmarkOutStream Z7_final: public ISequentialOutStream, public CMyUnknownImp, public CMidAlignedBuffer { Z7_COM_UNKNOWN_IMP_0 Z7_IFACE_COM7_IMP(ISequentialOutStream) // bool _overflow; public: size_t Pos; bool RealCopy; bool CalcCrc; UInt32 Crc; // CBenchmarkOutStream(): _overflow(false) {} void Init(bool realCopy, bool calcCrc) { Crc = CRC_INIT_VAL; RealCopy = realCopy; CalcCrc = calcCrc; // _overflow = false; Pos = 0; } void InitCrc() { Crc = CRC_INIT_VAL; } void Calc(const void *data, size_t size) { Crc = CrcUpdate(Crc, data, size); } size_t GetPos() const { return Pos; } // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); } }; Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) { size_t curSize = Size() - Pos; if (curSize > size) curSize = size; if (curSize != 0) { if (RealCopy) memcpy(((Byte *)*this) + Pos, data, curSize); if (CalcCrc) Calc(data, curSize); Pos += curSize; } if (processedSize) *processedSize = (UInt32)curSize; if (curSize != size) { // _overflow = true; return E_FAIL; } return S_OK; } Z7_CLASS_IMP_NOQIB_1( CCrcOutStream , ISequentialOutStream ) public: bool CalcCrc; UInt32 Crc; UInt64 Pos; CCrcOutStream(): CalcCrc(true) {} void Init() { Crc = CRC_INIT_VAL; Pos = 0; } void Calc(const void *data, size_t size) { Crc = CrcUpdate(Crc, data, size); } }; Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) { if (CalcCrc) Calc(data, size); Pos += size; if (processedSize) *processedSize = size; return S_OK; } // #include "../../../../C/My_sys_time.h" static UInt64 GetTimeCount() { #ifdef USE_POSIX_TIME #ifdef USE_POSIX_TIME2 timeval v; if (gettimeofday(&v, NULL) == 0) return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec; return (UInt64)time(NULL) * 1000000; #else return time(NULL); #endif #else LARGE_INTEGER value; if (::QueryPerformanceCounter(&value)) return (UInt64)value.QuadPart; return GetTickCount(); #endif } static UInt64 GetFreq() { #ifdef USE_POSIX_TIME #ifdef USE_POSIX_TIME2 return 1000000; #else return 1; #endif #else LARGE_INTEGER value; if (::QueryPerformanceFrequency(&value)) return (UInt64)value.QuadPart; return 1000; #endif } #ifdef USE_POSIX_TIME struct CUserTime { UInt64 Sum; clock_t Prev; void Init() { // Prev = clock(); Sum = 0; Prev = 0; Update(); Sum = 0; } void Update() { tms t; /* clock_t res = */ times(&t); clock_t newVal = t.tms_utime + t.tms_stime; Sum += (UInt64)(newVal - Prev); Prev = newVal; /* clock_t v = clock(); if (v != -1) { Sum += v - Prev; Prev = v; } */ } UInt64 GetUserTime() { Update(); return Sum; } }; #else struct CUserTime { bool UseTick; DWORD Prev_Tick; UInt64 Prev; UInt64 Sum; void Init() { UseTick = false; Prev_Tick = 0; Prev = 0; Sum = 0; Update(); Sum = 0; } UInt64 GetUserTime() { Update(); return Sum; } void Update(); }; static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } void CUserTime::Update() { DWORD new_Tick = GetTickCount(); FILETIME creationTime, exitTime, kernelTime, userTime; if (!UseTick && #ifdef UNDER_CE ::GetThreadTimes(::GetCurrentThread() #else ::GetProcessTimes(::GetCurrentProcess() #endif , &creationTime, &exitTime, &kernelTime, &userTime)) { UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime); Sum += newVal - Prev; Prev = newVal; } else { UseTick = true; Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000; } Prev_Tick = new_Tick; } #endif static UInt64 GetUserFreq() { #ifdef USE_POSIX_TIME // return CLOCKS_PER_SEC; return (UInt64)sysconf(_SC_CLK_TCK); #else return 10000000; #endif } class CBenchProgressStatus Z7_final { #ifndef Z7_ST NSynchronization::CCriticalSection CS; #endif public: HRESULT Res; bool EncodeMode; void SetResult(HRESULT res) { #ifndef Z7_ST NSynchronization::CCriticalSectionLock lock(CS); #endif Res = res; } HRESULT GetResult() { #ifndef Z7_ST NSynchronization::CCriticalSectionLock lock(CS); #endif return Res; } }; struct CBenchInfoCalc { CBenchInfo BenchInfo; CUserTime UserTime; void SetStartTime(); void SetFinishTime(CBenchInfo &dest); }; void CBenchInfoCalc::SetStartTime() { BenchInfo.GlobalFreq = GetFreq(); BenchInfo.UserFreq = GetUserFreq(); BenchInfo.GlobalTime = ::GetTimeCount(); BenchInfo.UserTime = 0; UserTime.Init(); } void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest) { dest = BenchInfo; dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime; dest.UserTime = UserTime.GetUserTime(); } class CBenchProgressInfo Z7_final: public ICompressProgressInfo, public CMyUnknownImp, public CBenchInfoCalc { Z7_COM_UNKNOWN_IMP_0 Z7_IFACE_COM7_IMP(ICompressProgressInfo) public: CBenchProgressStatus *Status; IBenchCallback *Callback; CBenchProgressInfo(): Callback(NULL) {} }; Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)) { HRESULT res = Status->GetResult(); if (res != S_OK) return res; if (!Callback) return res; /* static UInt64 inSizePrev = 0; static UInt64 outSizePrev = 0; UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0; if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; } if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; } UInt64 percents = delta2 * 1000; if (delta1 != 0) percents /= delta1; printf("=== %7d %7d %7d %7d ratio = %4d\n", (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10), (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10), (unsigned)percents); */ CBenchInfo info; SetFinishTime(info); if (Status->EncodeMode) { info.UnpackSize = BenchInfo.UnpackSize + *inSize; info.PackSize = BenchInfo.PackSize + *outSize; res = Callback->SetEncodeResult(info, false); } else { info.PackSize = BenchInfo.PackSize + *inSize; info.UnpackSize = BenchInfo.UnpackSize + *outSize; res = Callback->SetDecodeResult(info, false); } if (res != S_OK) Status->SetResult(res); return res; } static const unsigned kSubBits = 8; static unsigned GetLogSize(UInt64 size) { unsigned i = 0; for (;;) { i++; size >>= 1; if (size == 0) break; } return i; } static UInt32 GetLogSize_Sub(UInt64 size) { if (size <= 1) return 0; const unsigned i = GetLogSize(size) - 1; UInt32 v; if (i <= kSubBits) v = (UInt32)(size) << (kSubBits - i); else v = (UInt32)(size >> (i - kSubBits)); return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1)); } static UInt64 Get_UInt64_from_double(double v) { const UInt64 kMaxVal = (UInt64)1 << 62; if (v > (double)(Int64)kMaxVal) return kMaxVal; return (UInt64)v; } static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d) { if (d == 0) d = 1; const double v = (double)(Int64)m1 * (double)(Int64)m2 / (double)(Int64)d; return Get_UInt64_from_double(v); /* unsigned n1 = GetLogSize(m1); unsigned n2 = GetLogSize(m2); while (n1 + n2 > 64) { if (n1 >= n2) { m1 >>= 1; n1--; } else { m2 >>= 1; n2--; } d >>= 1; } if (d == 0) d = 1; return m1 * m2 / d; */ } UInt64 CBenchInfo::GetUsage() const { UInt64 userTime = UserTime; UInt64 userFreq = UserFreq; UInt64 globalTime = GlobalTime; UInt64 globalFreq = GlobalFreq; if (userFreq == 0) userFreq = 1; if (globalTime == 0) globalTime = 1; const double v = ((double)(Int64)userTime / (double)(Int64)userFreq) * ((double)(Int64)globalFreq / (double)(Int64)globalTime) * (double)(Int64)kBenchmarkUsageMult; return Get_UInt64_from_double(v); /* return MyMultDiv64( MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq), globalFreq, globalTime); */ } UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const { if (UserTime == 0) { return 0; // userTime = 1; } UInt64 globalFreq = GlobalFreq; if (globalFreq == 0) globalFreq = 1; const double v = ((double)(Int64)GlobalTime / (double)(Int64)globalFreq) * ((double)(Int64)UserFreq / (double)(Int64)UserTime) * (double)(Int64)rating; return Get_UInt64_from_double(v); /* return MyMultDiv64( MyMultDiv64(rating, UserFreq, UserTime), GlobalTime, globalFreq); */ } UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const { return MyMultDiv64(numUnits, GlobalFreq, GlobalTime); } static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity) { return complexity >= 0 ? size * (UInt32)complexity : size / (UInt32)(-complexity); } struct CBenchProps { bool LzmaRatingMode; Int32 EncComplex; Int32 DecComplexCompr; Int32 DecComplexUnc; unsigned KeySize; CBenchProps(): LzmaRatingMode(false), KeySize(0) {} void SetLzmaCompexity(); UInt64 GetNumCommands_Enc(UInt64 unpackSize) const { const UInt32 kMinSize = 100; if (unpackSize < kMinSize) unpackSize = kMinSize; return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex); } UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const { return GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) + GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc); } UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const; UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const; }; void CBenchProps::SetLzmaCompexity() { EncComplex = 1200; DecComplexUnc = 4; DecComplexCompr = 190; LzmaRatingMode = true; } UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const { if (dictSize < (1 << kBenchMinDicLogSize)) dictSize = (1 << kBenchMinDicLogSize); Int32 encComplex = EncComplex; if (LzmaRatingMode) { /* for (UInt64 uu = 0; uu < (UInt64)0xf << 60;) { unsigned rr = GetLogSize_Sub(uu); printf("\n%16I64x , log = %4x", uu, rr); uu += 1; uu += uu / 50; } */ // throw 1; const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits); encComplex = 870 + ((t * t * 5) >> (2 * kSubBits)); } const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex); return MyMultDiv64(numCommands, freq, elapsedTime); } UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const { const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations; return MyMultDiv64(numCommands, freq, elapsedTime); } UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const { CBenchProps props; props.SetLzmaCompexity(); return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations); } UInt64 CBenchInfo::GetRating_LzmaDec() const { CBenchProps props; props.SetLzmaCompexity(); return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations); } #ifndef Z7_ST #define NUM_CPU_LEVELS_MAX 3 struct CAffinityMode { unsigned NumBundleThreads; unsigned NumLevels; unsigned NumCoreThreads; unsigned NumCores; // unsigned DivideNum; UInt32 Sizes[NUM_CPU_LEVELS_MAX]; void SetLevels(unsigned numCores, unsigned numCoreThreads); DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const; bool NeedAffinity() const { return NumBundleThreads != 0; } WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const { if (NeedAffinity()) { CCpuSet cpuSet; GetAffinityMask(bundleIndex, &cpuSet); return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet); } return thread.Create(startAddress, parameter); } CAffinityMode(): NumBundleThreads(0), NumLevels(0), NumCoreThreads(1) // DivideNum(1) {} }; void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads) { NumCores = numCores; NumCoreThreads = numCoreThreads; NumLevels = 0; if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0) return; UInt32 c = numCores / numCoreThreads; UInt32 c2 = 1; while ((c & 1) == 0) { c >>= 1; c2 <<= 1; } if (c2 != 1) Sizes[NumLevels++] = c2; if (c != 1) Sizes[NumLevels++] = c; if (numCoreThreads != 1) Sizes[NumLevels++] = numCoreThreads; if (NumLevels == 0) Sizes[NumLevels++] = 1; /* printf("\n Cores:"); for (unsigned i = 0; i < NumLevels; i++) { printf(" %d", Sizes[i]); } printf("\n"); */ } DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const { CpuSet_Zero(cpuSet); if (NumLevels == 0) return 0; // printf("\n%2d", bundleIndex); /* UInt32 low = 0; if (DivideNum != 1) { low = bundleIndex % DivideNum; bundleIndex /= DivideNum; } */ UInt32 numGroups = NumCores / NumBundleThreads; UInt32 m = bundleIndex % numGroups; UInt32 v = 0; for (unsigned i = 0; i < NumLevels; i++) { UInt32 size = Sizes[i]; while ((size & 1) == 0) { v *= 2; v |= (m & 1); m >>= 1; size >>= 1; } v *= size; v += m % size; m /= size; } // UInt32 nb = NumBundleThreads / DivideNum; UInt32 nb = NumBundleThreads; DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1; // v += low; mask <<= v; // printf(" %2d %8x \n ", v, (unsigned)mask); #ifdef _WIN32 *cpuSet = mask; #else { for (unsigned k = 0; k < nb; k++) CpuSet_Set(cpuSet, v + k); } #endif return mask; } struct CBenchSyncCommon { bool ExitMode; NSynchronization::CManualResetEvent StartEvent; CBenchSyncCommon(): ExitMode(false) {} }; #endif enum E_CheckCrcMode { k_CheckCrcMode_Never = 0, k_CheckCrcMode_Always = 1, k_CheckCrcMode_FirstPass = 2 }; class CEncoderInfo; class CEncoderInfo Z7_final { Z7_CLASS_NO_COPY(CEncoderInfo) public: #ifndef Z7_ST NWindows::CThread thread[2]; NSynchronization::CManualResetEvent ReadyEvent; UInt32 NumDecoderSubThreads; CBenchSyncCommon *Common; UInt32 EncoderIndex; UInt32 NumEncoderInternalThreads; CAffinityMode AffinityMode; bool IsGlobalMtMode; // if more than one benchmark encoder threads #endif CMyComPtr _encoder; CMyComPtr _encoderFilter; CBenchProgressInfo *progressInfoSpec[2]; CMyComPtr progressInfo[2]; UInt64 NumIterations; UInt32 Salt; #ifdef USE_ALLOCA size_t AllocaSize; #endif unsigned KeySize; Byte _key[32]; Byte _iv[16]; HRESULT Set_Key_and_IV(ICryptoProperties *cp) { RINOK(cp->SetKey(_key, KeySize)) return cp->SetInitVector(_iv, sizeof(_iv)); } Byte _psw[16]; bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass used for filter and usual coders */ bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass used only for filter */ E_CheckCrcMode CheckCrcMode_Dec; struct CDecoderInfo { CEncoderInfo *Encoder; UInt32 DecoderIndex; bool CallbackMode; #ifdef USE_ALLOCA size_t AllocaSize; #endif }; CDecoderInfo decodersInfo[2]; CMyComPtr _decoders[2]; CMyComPtr _decoderFilter; HRESULT Results[2]; CBenchmarkOutStream *outStreamSpec; CMyComPtr outStream; IBenchCallback *callback; IBenchPrintCallback *printCallback; UInt32 crc; size_t kBufferSize; size_t compressedSize; const Byte *uncompressedDataPtr; const Byte *fileData; CBenchRandomGenerator rg; CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!! // CBenchmarkOutStream *propStreamSpec; Byte propsData[kMaxMethodPropSize]; CBufPtrSeqOutStream *propStreamSpec; CMyComPtr propStream; unsigned generateDictBits; COneMethodInfo _method; // for decode size_t _uncompressedDataSize; HRESULT Generate(); HRESULT Encode(); HRESULT Decode(UInt32 decoderIndex); CEncoderInfo(): #ifndef Z7_ST Common(NULL), IsGlobalMtMode(true), #endif Salt(0), KeySize(0), CheckCrc_Enc(true), UseRealData_Enc(true), CheckCrcMode_Dec(k_CheckCrcMode_Always), outStreamSpec(NULL), callback(NULL), printCallback(NULL), fileData(NULL), propStreamSpec(NULL) {} #ifndef Z7_ST static THREAD_FUNC_DECL EncodeThreadFunction(void *param) { HRESULT res; CEncoderInfo *encoder = (CEncoderInfo *)param; try { #ifdef USE_ALLOCA alloca(encoder->AllocaSize); #endif res = encoder->Encode(); } catch(...) { res = E_FAIL; } encoder->Results[0] = res; if (res != S_OK) encoder->progressInfoSpec[0]->Status->SetResult(res); encoder->ReadyEvent.Set(); return THREAD_FUNC_RET_ZERO; } static THREAD_FUNC_DECL DecodeThreadFunction(void *param) { CDecoderInfo *decoder = (CDecoderInfo *)param; #ifdef USE_ALLOCA alloca(decoder->AllocaSize); #endif CEncoderInfo *encoder = decoder->Encoder; encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); return THREAD_FUNC_RET_ZERO; } HRESULT CreateEncoderThread() { WRes res = 0; if (!ReadyEvent.IsCreated()) res = ReadyEvent.Create(); if (res == 0) res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this, EncoderIndex); return HRESULT_FROM_WIN32(res); } HRESULT CreateDecoderThread(unsigned index, bool callbackMode #ifdef USE_ALLOCA , size_t allocaSize #endif ) { CDecoderInfo &decoder = decodersInfo[index]; decoder.DecoderIndex = index; decoder.Encoder = this; #ifdef USE_ALLOCA decoder.AllocaSize = allocaSize; #endif decoder.CallbackMode = callbackMode; WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder, // EncoderIndex * NumEncoderInternalThreads + index EncoderIndex ); return HRESULT_FROM_WIN32(res); } #endif }; static size_t GetBenchCompressedSize(size_t bufferSize) { return kCompressedAdditionalSize + bufferSize + bufferSize / 16; // kBufferSize / 2; } HRESULT CEncoderInfo::Generate() { const COneMethodInfo &method = _method; // we need extra space, if input data is already compressed const size_t kCompressedBufferSize = _encoderFilter ? kBufferSize : GetBenchCompressedSize(kBufferSize); if (kCompressedBufferSize < kBufferSize) return E_FAIL; uncompressedDataPtr = fileData; if (fileData) { #if !defined(Z7_ST) if (IsGlobalMtMode) { /* we copy the data to local buffer of thread to eliminate using of shared buffer by different threads */ ALLOC_WITH_HRESULT(&rg, kBufferSize) memcpy((Byte *)rg, fileData, kBufferSize); uncompressedDataPtr = (const Byte *)rg; } #endif } else { ALLOC_WITH_HRESULT(&rg, kBufferSize) // DWORD ttt = GetTickCount(); if (generateDictBits == 0) rg.GenerateSimpleRandom(Salt); else { if (generateDictBits >= sizeof(size_t) * 8 && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1))) return E_INVALIDARG; rg.GenerateLz(generateDictBits, Salt); // return E_ABORT; // for debug } // printf("\n%d\n ", GetTickCount() - ttt); crc = CrcCalc((const Byte *)rg, rg.Size()); uncompressedDataPtr = (const Byte *)rg; } if (!outStream) { outStreamSpec = new CBenchmarkOutStream; outStream = outStreamSpec; } ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize) if (_encoderFilter) { /* we try to reduce the number of memcpy() in main encoding loop. so we copy data to temp buffers here */ ALLOC_WITH_HRESULT(&rgCopy, kBufferSize) memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize); memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize); } if (!propStream) { propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream; propStream = propStreamSpec; } // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize); // propStreamSpec->Init(true, false); propStreamSpec->Init(propsData, sizeof(propsData)); CMyComPtr coder; if (_encoderFilter) coder = _encoderFilter; else coder = _encoder; { CMyComPtr scp; coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); if (scp) { const UInt64 reduceSize = kBufferSize; /* in posix new thread uses same affinity as parent thread, so we don't need to send affinity to coder in posix */ UInt64 affMask; #if !defined(Z7_ST) && defined(_WIN32) { CCpuSet cpuSet; affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); } #else affMask = 0; #endif // affMask <<= 3; // debug line: to test no affinity in coder; // affMask = 0; RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL))) } else { if (method.AreThereNonOptionalProps()) return E_INVALIDARG; } CMyComPtr writeCoderProps; coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps); if (writeCoderProps) { RINOK(writeCoderProps->WriteCoderProperties(propStream)) } { CMyComPtr sp; coder.QueryInterface(IID_ICryptoSetPassword, &sp); if (sp) { RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) // we must call encoding one time to calculate password key for key cache. // it must be after WriteCoderProperties! Byte temp[16]; memset(temp, 0, sizeof(temp)); if (_encoderFilter) { _encoderFilter->Init(); _encoderFilter->Filter(temp, sizeof(temp)); } else { CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; CMyComPtr inStream = inStreamSpec; inStreamSpec->Init(temp, sizeof(temp)); CCrcOutStream *crcStreamSpec = new CCrcOutStream; CMyComPtr crcStream = crcStreamSpec; crcStreamSpec->Init(); RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL)) } } } } return S_OK; } static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc) { while (size != 0) { UInt32 cur = crc ? 1 << 17 : 1 << 24; if (cur > size) cur = (UInt32)size; UInt32 processed = filter->Filter(data, cur); /* if (processed > size) (in AES filter), we must fill last block with zeros. but it is not important for benchmark. So we just copy that data without filtering. if (processed == 0) then filter can't process more */ if (processed > size || processed == 0) processed = (UInt32)size; if (crc) *crc = CrcUpdate(*crc, data, processed); data += processed; size -= processed; } } HRESULT CEncoderInfo::Encode() { // printf("\nCEncoderInfo::Generate\n"); RINOK(Generate()) // printf("\n2222\n"); #ifndef Z7_ST if (Common) { Results[0] = S_OK; WRes wres = ReadyEvent.Set(); if (wres == 0) wres = Common->StartEvent.Lock(); if (wres != 0) return HRESULT_FROM_WIN32(wres); if (Common->ExitMode) return S_OK; } else #endif { CBenchProgressInfo *bpi = progressInfoSpec[0]; bpi->SetStartTime(); } CBenchInfo &bi = progressInfoSpec[0]->BenchInfo; bi.UnpackSize = 0; bi.PackSize = 0; CMyComPtr cp; CMyComPtr coder; if (_encoderFilter) coder = _encoderFilter; else coder = _encoder; coder.QueryInterface(IID_ICryptoProperties, &cp); CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; CMyComPtr inStream = inStreamSpec; if (cp) { RINOK(Set_Key_and_IV(cp)) } compressedSize = 0; if (_encoderFilter) compressedSize = kBufferSize; // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec; UInt64 prev = 0; const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF); const bool useCrc = (mask < NumIterations); bool crcPrev_defined = false; UInt32 crcPrev = 0; bool useRealData_Enc = UseRealData_Enc; bool data_Was_Changed = false; if (useRealData_Enc) { /* we want memcpy() for each iteration including first iteration. So results will be equal for different number of iterations */ data_Was_Changed = true; } const UInt64 numIterations = NumIterations; UInt64 i = numIterations; // printCallback->NewLine(); while (i != 0) { i--; if (printCallback && bi.UnpackSize - prev >= (1 << 26)) { prev = bi.UnpackSize; RINOK(printCallback->CheckBreak()) } /* CBenchInfo info; progressInfoSpec[0]->SetStartTime(); */ bool calcCrc = false; if (useCrc) calcCrc = (((UInt32)i & mask) == 0); if (_encoderFilter) { Byte *filterData = rgCopy; if (i == numIterations - 1 || calcCrc || useRealData_Enc) { filterData = (Byte *)*outStreamSpec; if (data_Was_Changed) memcpy(filterData, uncompressedDataPtr, kBufferSize); data_Was_Changed = true; } _encoderFilter->Init(); if (calcCrc) outStreamSpec->InitCrc(); My_FilterBench(_encoderFilter, filterData, kBufferSize, calcCrc ? &outStreamSpec->Crc : NULL); } else { outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations inStreamSpec->Init(uncompressedDataPtr, kBufferSize); RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0])) if (!inStreamSpec->WasFinished()) return E_FAIL; if (compressedSize != outStreamSpec->Pos) { if (compressedSize != 0) return E_FAIL; compressedSize = outStreamSpec->Pos; } } // outStreamSpec->Print(); if (calcCrc) { const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc); if (crcPrev_defined && crcPrev != crc2) return E_FAIL; crcPrev = crc2; crcPrev_defined = true; } bi.UnpackSize += kBufferSize; bi.PackSize += compressedSize; /* { progressInfoSpec[0]->SetFinishTime(info); info.UnpackSize = 0; info.PackSize = 0; info.NumIterations = 1; info.UnpackSize = kBufferSize; info.PackSize = compressedSize; // printf("\n%7d\n", encoder.compressedSize); RINOK(callback->SetEncodeResult(info, true)) printCallback->NewLine(); } */ } _encoder.Release(); _encoderFilter.Release(); return S_OK; } HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) { CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; CMyComPtr inStream = inStreamSpec; CMyComPtr &decoder = _decoders[decoderIndex]; CMyComPtr coder; if (_decoderFilter) { if (decoderIndex != 0) return E_FAIL; coder = _decoderFilter; } else coder = decoder; CMyComPtr setDecProps; coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps); if (!setDecProps && propStreamSpec->GetPos() != 0) return E_FAIL; CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; CMyComPtr crcOutStream = crcOutStreamSpec; CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; pi->BenchInfo.UnpackSize = 0; pi->BenchInfo.PackSize = 0; #ifndef Z7_ST { CMyComPtr setCoderMt; coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt); if (setCoderMt) { RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads)) } } #endif CMyComPtr scp; coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); if (scp) { const UInt64 reduceSize = _uncompressedDataSize; RINOK(_method.SetCoderProps(scp, &reduceSize)) } CMyComPtr cp; coder.QueryInterface(IID_ICryptoProperties, &cp); if (setDecProps) { RINOK(setDecProps->SetDecoderProperties2( /* (const Byte *)*propStreamSpec, */ propsData, (UInt32)propStreamSpec->GetPos())) } { CMyComPtr sp; coder.QueryInterface(IID_ICryptoSetPassword, &sp); if (sp) { RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) } } UInt64 prev = 0; if (cp) { RINOK(Set_Key_and_IV(cp)) } CMyComPtr setFinishMode; if (_decoderFilter) { if (compressedSize > rgCopy.Size()) return E_FAIL; } else { decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode); } const UInt64 numIterations = NumIterations; const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec; for (UInt64 i = 0; i < numIterations; i++) { if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26)) { RINOK(printCallback->CheckBreak()) prev = pi->BenchInfo.UnpackSize; } const UInt64 outSize = kBufferSize; bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never); crcOutStreamSpec->Init(); if (_decoderFilter) { Byte *filterData = (Byte *)*outStreamSpec; if (calcCrc) { calcCrc = (i == 0); if (checkCrcMode == k_CheckCrcMode_Always) { calcCrc = true; memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize); filterData = rgCopy; } } _decoderFilter->Init(); My_FilterBench(_decoderFilter, filterData, compressedSize, calcCrc ? &crcOutStreamSpec->Crc : NULL); } else { crcOutStreamSpec->CalcCrc = calcCrc; inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize); if (setFinishMode) { RINOK(setFinishMode->SetFinishMode(BoolToUInt(true))) } RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex])) if (setFinishMode) { if (!inStreamSpec->WasFinished()) return S_FALSE; CMyComPtr getInStreamProcessedSize; decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize); if (getInStreamProcessedSize) { UInt64 processed; RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed)) if (processed != compressedSize) return S_FALSE; } } if (crcOutStreamSpec->Pos != outSize) return S_FALSE; } if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) return S_FALSE; pi->BenchInfo.UnpackSize += kBufferSize; pi->BenchInfo.PackSize += compressedSize; } decoder.Release(); _decoderFilter.Release(); return S_OK; } static const UInt32 kNumThreadsMax = (1 << 12); struct CBenchEncoders { CEncoderInfo *encoders; CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; } ~CBenchEncoders() { delete []encoders; } }; static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands) { if (numCommands < (1 << 4)) numCommands = (1 << 4); UInt64 res = complexInCommands / numCommands; return (res == 0 ? 1 : res); } #ifndef Z7_ST // ---------- CBenchThreadsFlusher ---------- struct CBenchThreadsFlusher { CBenchEncoders *EncodersSpec; CBenchSyncCommon Common; unsigned NumThreads; bool NeedClose; CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {} ~CBenchThreadsFlusher() { StartAndWait(true); } WRes StartAndWait(bool exitMode = false); }; WRes CBenchThreadsFlusher::StartAndWait(bool exitMode) { if (!NeedClose) return 0; Common.ExitMode = exitMode; WRes res = Common.StartEvent.Set(); for (unsigned i = 0; i < NumThreads; i++) { NWindows::CThread &t = EncodersSpec->encoders[i].thread[0]; if (t.IsCreated()) { WRes res2 = t.Wait_Close(); if (res == 0) res = res2; } } NeedClose = false; return res; } #endif // Z7_ST static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue) { for (size_t i = 0; i < size; i++) { data[i] = (Byte)startValue; startValue++; } } static HRESULT MethodBench( DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands, #ifndef Z7_ST bool oldLzmaBenchMode, UInt32 numThreads, const CAffinityMode *affinityMode, #endif const COneMethodInfo &method2, size_t uncompressedDataSize, const Byte *fileData, unsigned generateDictBits, IBenchPrintCallback *printCallback, IBenchCallback *callback, CBenchProps *benchProps) { COneMethodInfo method = method2; UInt64 methodId; UInt32 numStreams; bool isFilter; const int codecIndex = FindMethod_Index( EXTERNAL_CODECS_LOC_VARS method.MethodName, true, methodId, numStreams, isFilter); if (codecIndex < 0) return E_NOTIMPL; if (numStreams != 1) return E_INVALIDARG; UInt32 numEncoderThreads = 1; UInt32 numSubDecoderThreads = 1; #ifndef Z7_ST numEncoderThreads = numThreads; if (oldLzmaBenchMode) if (methodId == k_LZMA) { if (numThreads == 1 && method.Get_NumThreads() < 0) method.AddProp_NumThreads(1); const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(); if (numThreads > 1 && numLzmaThreads > 1) { numEncoderThreads = (numThreads + 1) / 2; // 20.03 numSubDecoderThreads = 2; } } const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity(); #endif CBenchEncoders encodersSpec(numEncoderThreads); CEncoderInfo *encoders = encodersSpec.encoders; UInt32 i; for (i = 0; i < numEncoderThreads; i++) { CEncoderInfo &encoder = encoders[i]; encoder.callback = (i == 0) ? callback : NULL; encoder.printCallback = printCallback; #ifndef Z7_ST encoder.EncoderIndex = i; encoder.NumEncoderInternalThreads = numSubDecoderThreads; encoder.AffinityMode = *affinityMode; /* if (numSubDecoderThreads > 1) if (encoder.AffinityMode.NeedAffinity() && encoder.AffinityMode.NumBundleThreads == 1) { // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores) encoder.AffinityMode.NumBundleThreads *= 2; } */ #endif { CCreatedCoder cod; RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod)) encoder._encoder = cod.Coder; if (!encoder._encoder && !encoder._encoderFilter) return E_NOTIMPL; } SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17); SetPseudoRand(encoder._key, sizeof(encoder._key), 51); SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123); for (UInt32 j = 0; j < numSubDecoderThreads; j++) { CCreatedCoder cod; CMyComPtr &decoder = encoder._decoders[j]; RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod)) decoder = cod.Coder; if (!encoder._decoderFilter && !decoder) return E_NOTIMPL; } encoder.UseRealData_Enc = encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30; encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; if (benchProps->DecComplexCompr + benchProps->DecComplexUnc <= 30) encoder.CheckCrcMode_Dec = k_CheckCrcMode_FirstPass; // for filters // k_CheckCrcMode_Never; // for debug // k_CheckCrcMode_Always; // for debug if (fileData) { encoder.UseRealData_Enc = true; encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; } } UInt32 crc = 0; if (fileData) crc = CrcCalc(fileData, uncompressedDataSize); for (i = 0; i < numEncoderThreads; i++) { CEncoderInfo &encoder = encoders[i]; encoder._method = method; encoder.generateDictBits = generateDictBits; encoder._uncompressedDataSize = uncompressedDataSize; encoder.kBufferSize = uncompressedDataSize; encoder.fileData = fileData; encoder.crc = crc; } CBenchProgressStatus status; status.Res = S_OK; status.EncodeMode = true; #ifndef Z7_ST CBenchThreadsFlusher encoderFlusher; if (mtEncMode) { WRes wres = encoderFlusher.Common.StartEvent.Create(); if (wres != 0) return HRESULT_FROM_WIN32(wres); encoderFlusher.NumThreads = numEncoderThreads; encoderFlusher.EncodersSpec = &encodersSpec; encoderFlusher.NeedClose = true; } #endif for (i = 0; i < numEncoderThreads; i++) { CEncoderInfo &encoder = encoders[i]; encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands); // encoder.NumIterations = 3; encoder.Salt = g_CrcTable[i & 0xFF]; encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3); // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread // printf(" %8x", encoder.Salt); encoder.KeySize = benchProps->KeySize; for (int j = 0; j < 2; j++) { CBenchProgressInfo *spec = new CBenchProgressInfo; encoder.progressInfoSpec[j] = spec; encoder.progressInfo[j] = spec; spec->Status = &status; } if (i == 0) { CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; bpi->Callback = callback; bpi->BenchInfo.NumIterations = numEncoderThreads; } #ifndef Z7_ST if (mtEncMode) { #ifdef USE_ALLOCA encoder.AllocaSize = (i * 16 * 21) & 0x7FF; #endif encoder.Common = &encoderFlusher.Common; encoder.IsGlobalMtMode = numEncoderThreads > 1; RINOK(encoder.CreateEncoderThread()) } #endif } if (printCallback) { RINOK(printCallback->CheckBreak()) } #ifndef Z7_ST if (mtEncMode) { for (i = 0; i < numEncoderThreads; i++) { CEncoderInfo &encoder = encoders[i]; const WRes wres = encoder.ReadyEvent.Lock(); if (wres != 0) return HRESULT_FROM_WIN32(wres); RINOK(encoder.Results[0]) } CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0]; bpi->SetStartTime(); const WRes wres = encoderFlusher.StartAndWait(); if (status.Res == 0 && wres != 0) return HRESULT_FROM_WIN32(wres); } else #endif { RINOK(encoders[0].Encode()) } RINOK(status.Res) CBenchInfo info; encoders[0].progressInfoSpec[0]->SetFinishTime(info); info.UnpackSize = 0; info.PackSize = 0; info.NumIterations = encoders[0].NumIterations; for (i = 0; i < numEncoderThreads; i++) { const CEncoderInfo &encoder = encoders[i]; info.UnpackSize += encoder.kBufferSize; info.PackSize += encoder.compressedSize; // printf("\n%7d\n", encoder.compressedSize); } RINOK(callback->SetEncodeResult(info, true)) // ---------- Decode ---------- status.Res = S_OK; status.EncodeMode = false; const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; #ifndef Z7_ST const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity(); #endif for (i = 0; i < numEncoderThreads; i++) { CEncoderInfo &encoder = encoders[i]; /* #ifndef Z7_ST // encoder.affinityMode = *affinityMode; if (encoder.NumEncoderInternalThreads != 1) encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads; #endif */ if (i == 0) { encoder.NumIterations = GetNumIterations( benchProps->GetNumCommands_Dec( encoder.compressedSize, encoder.kBufferSize), complexInCommands); CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; bpi->Callback = callback; bpi->BenchInfo.NumIterations = numDecoderThreads; bpi->SetStartTime(); } else encoder.NumIterations = encoders[0].NumIterations; #ifndef Z7_ST { int numSubThreads = method.Get_NumThreads(); encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads; } if (mtDecoderMode) { for (UInt32 j = 0; j < numSubDecoderThreads; j++) { const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) #ifdef USE_ALLOCA , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF #endif ); RINOK(res) } } else #endif { RINOK(encoder.Decode(0)) } } #ifndef Z7_ST if (mtDecoderMode) { WRes wres = 0; HRESULT res = S_OK; for (i = 0; i < numEncoderThreads; i++) for (UInt32 j = 0; j < numSubDecoderThreads; j++) { CEncoderInfo &encoder = encoders[i]; const WRes wres2 = encoder.thread[j]. // Wait(); // later we can get thread times from thread in UNDER_CE Wait_Close(); if (wres == 0 && wres2 != 0) wres = wres2; const HRESULT res2 = encoder.Results[j]; if (res == 0 && res2 != 0) res = res2; } if (wres != 0) return HRESULT_FROM_WIN32(wres); RINOK(res) } #endif // Z7_ST RINOK(status.Res) encoders[0].progressInfoSpec[0]->SetFinishTime(info); /* #ifndef Z7_ST #ifdef UNDER_CE if (mtDecoderMode) for (i = 0; i < numEncoderThreads; i++) for (UInt32 j = 0; j < numSubDecoderThreads; j++) { FILETIME creationTime, exitTime, kernelTime, userTime; if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0) info.UserTime += GetTime64(userTime) + GetTime64(kernelTime); } #endif #endif */ info.UnpackSize = 0; info.PackSize = 0; info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; for (i = 0; i < numEncoderThreads; i++) { const CEncoderInfo &encoder = encoders[i]; info.UnpackSize += encoder.kBufferSize; info.PackSize += encoder.compressedSize; } // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ?? RINOK(callback->SetDecodeResult(info, true)) return S_OK; } static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog) { /* if (dictSizeLog < 32) return (UInt32)1 << dictSizeLog; else return (UInt32)(Int32)-1; */ return (UInt64)1 << dictSizeLog; } // it's limit of current LZMA implementation that can be changed later #define kLzmaMaxDictSize ((UInt32)15 << 28) static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict) { if (dict == 0) dict = 1; if (dict > kLzmaMaxDictSize) dict = kLzmaMaxDictSize; UInt32 hs = (UInt32)dict - 1; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); hs >>= 1; hs |= 0xFFFF; if (hs > (1 << 24)) hs >>= 1; hs++; hs += (1 << 16); const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16); UInt64 blockSize = (UInt64)dict + (1 << 16) + (multiThread ? (1 << 20) : 0); blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)); if (blockSize >= kBlockSizeMax) blockSize = kBlockSizeMax; UInt64 son = (UInt64)dict; if (btMode) son *= 2; const UInt64 v = (hs + son) * 4 + blockSize + (1 << 20) + (multiThread ? (6 << 20) : 0); // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20)); // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20)); return v; } UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench) { const size_t kBufferSize = (size_t)dictionary + kAdditionalSize; const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2; if (level < 0) level = 5; const int algo = (level < 5 ? 0 : 1); const int btMode = (algo == 0 ? 0 : 1); UInt32 numBigThreads = numThreads; bool lzmaMt = (totalBench || (numThreads > 1 && btMode)); if (btMode) { if (!totalBench && lzmaMt) numBigThreads /= 2; } return ((UInt64)kBufferSize + kCompressedBufferSize + GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads; } static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary) { // dictionary += (dictionary >> 9); // for page tables (virtual memory) return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20); } // ---------- CRC and HASH ---------- struct CCrcInfo_Base { CMidAlignedBuffer Buffer; const Byte *Data; size_t Size; bool CreateLocalBuf; UInt32 CheckSum_Res; CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {} HRESULT Generate(const Byte *data, size_t size); HRESULT CrcProcess(UInt64 numIterations, const UInt32 *checkSum, IHasher *hf, IBenchPrintCallback *callback); }; HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size) { Size = size; Data = data; if (!data || CreateLocalBuf) { ALLOC_WITH_HRESULT(&Buffer, size) Data = Buffer; } if (!data) RandGen(Buffer, size); else if (CreateLocalBuf && size != 0) memcpy(Buffer, data, size); return S_OK; } HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations, const UInt32 *checkSum, IHasher *hf, IBenchPrintCallback *callback) { MY_ALIGN(16) Byte hash[64]; memset(hash, 0, sizeof(hash)); CheckSum_Res = 0; const UInt32 hashSize = hf->GetDigestSize(); if (hashSize > sizeof(hash)) return S_FALSE; const Byte *buf = Data; const size_t size = Size; UInt32 checkSum_Prev = 0; UInt64 prev = 0; UInt64 cur = 0; for (UInt64 i = 0; i < numIterations; i++) { hf->Init(); size_t pos = 0; do { const size_t rem = size - pos; const UInt32 kStep = ((UInt32)1 << 31); const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep; hf->Update(buf + pos, curSize); pos += curSize; } while (pos != size); hf->Final(hash); UInt32 sum = 0; for (UInt32 j = 0; j < hashSize; j += 4) { sum = rotlFixed(sum, 11); sum += GetUi32(hash + j); } if (checkSum) { if (sum != *checkSum) return S_FALSE; } else { checkSum_Prev = sum; checkSum = &checkSum_Prev; } if (callback) { cur += size; if (cur - prev >= ((UInt32)1 << 30)) { prev = cur; RINOK(callback->CheckBreak()) } } } CheckSum_Res = checkSum_Prev; return S_OK; } extern UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization UInt32 g_BenchCpuFreqTemp = 1; #define YY1 sum += val; sum ^= val; #define YY3 YY1 YY1 YY1 YY1 #define YY5 YY3 YY3 YY3 YY3 #define YY7 YY5 YY5 YY5 YY5 static const UInt32 kNumFreqCommands = 128; EXTERN_C_BEGIN static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val) { for (UInt32 i = 0; i < num; i++) { YY7 } return sum; } EXTERN_C_END #ifndef Z7_ST struct CBaseThreadInfo { NWindows::CThread Thread; IBenchPrintCallback *Callback; HRESULT CallbackRes; WRes Wait_If_Created() { if (!Thread.IsCreated()) return 0; return Thread.Wait_Close(); } }; struct CFreqInfo: public CBaseThreadInfo { UInt32 ValRes; UInt32 Size; UInt64 NumIterations; }; static THREAD_FUNC_DECL FreqThreadFunction(void *param) { CFreqInfo *p = (CFreqInfo *)param; UInt32 sum = g_BenchCpuFreqTemp; for (UInt64 k = p->NumIterations; k > 0; k--) { if (p->Callback) { p->CallbackRes = p->Callback->CheckBreak(); if (p->CallbackRes != S_OK) break; } sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp); } p->ValRes = sum; return THREAD_FUNC_RET_ZERO; } struct CFreqThreads { CFreqInfo *Items; UInt32 NumThreads; CFreqThreads(): Items(NULL), NumThreads(0) {} WRes WaitAll() { WRes wres = 0; for (UInt32 i = 0; i < NumThreads; i++) { WRes wres2 = Items[i].Wait_If_Created(); if (wres == 0 && wres2 != 0) wres = wres2; } NumThreads = 0; return wres; } ~CFreqThreads() { WaitAll(); delete []Items; } }; static THREAD_FUNC_DECL CrcThreadFunction(void *param); struct CCrcInfo: public CBaseThreadInfo { const Byte *Data; size_t Size; UInt64 NumIterations; bool CheckSumDefined; UInt32 CheckSum; CMyComPtr Hasher; HRESULT Res; UInt32 CheckSum_Res; #ifndef Z7_ST NSynchronization::CManualResetEvent ReadyEvent; UInt32 ThreadIndex; CBenchSyncCommon *Common; CAffinityMode AffinityMode; #endif // we want to call CCrcInfo_Base::Buffer.Free() in main thread. // so we uses non-local CCrcInfo_Base. CCrcInfo_Base crcib; HRESULT CreateThread() { WRes res = 0; if (!ReadyEvent.IsCreated()) res = ReadyEvent.Create(); if (res == 0) res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this, ThreadIndex); return HRESULT_FROM_WIN32(res); } #ifdef USE_ALLOCA size_t AllocaSize; #endif void Process(); CCrcInfo(): Res(E_FAIL) {} }; static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test void CCrcInfo::Process() { crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File; // we can use additional Generate() passes to reduce some time effects for new page allocation // for (unsigned y = 0; y < 10; y++) Res = crcib.Generate(Data, Size); // if (Common) { WRes wres = ReadyEvent.Set(); if (wres != 0) { if (Res == 0) Res = HRESULT_FROM_WIN32(wres); return; } if (Res != 0) return; wres = Common->StartEvent.Lock(); if (wres != 0) { Res = HRESULT_FROM_WIN32(wres); return; } if (Common->ExitMode) return; } Res = crcib.CrcProcess(NumIterations, CheckSumDefined ? &CheckSum : NULL, Hasher, Callback); CheckSum_Res = crcib.CheckSum_Res; /* We don't want to include the time of slow CCrcInfo_Base::Buffer.Free() to time of benchmark. So we don't free Buffer here */ // crcib.Buffer.Free(); } static THREAD_FUNC_DECL CrcThreadFunction(void *param) { CCrcInfo *p = (CCrcInfo *)param; #ifdef USE_ALLOCA alloca(p->AllocaSize); #endif p->Process(); return THREAD_FUNC_RET_ZERO; } struct CCrcThreads { CCrcInfo *Items; unsigned NumThreads; CBenchSyncCommon Common; bool NeedClose; CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {} WRes StartAndWait(bool exitMode = false); ~CCrcThreads() { StartAndWait(true); delete []Items; } }; WRes CCrcThreads::StartAndWait(bool exitMode) { if (!NeedClose) return 0; Common.ExitMode = exitMode; WRes wres = Common.StartEvent.Set(); for (unsigned i = 0; i < NumThreads; i++) { WRes wres2 = Items[i].Wait_If_Created(); if (wres == 0 && wres2 != 0) wres = wres2; } NumThreads = 0; NeedClose = false; return wres; } #endif static UInt32 CrcCalc1(const Byte *buf, size_t size) { UInt32 crc = CRC_INIT_VAL; for (size_t i = 0; i < size; i++) crc = CRC_UPDATE_BYTE(crc, buf[i]); return CRC_GET_DIGEST(crc); } /* static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG) { RandGen(buf, size, RG); return CrcCalc1(buf, size); } */ static bool CrcInternalTest() { CAlignedBuffer buffer; const size_t kBufferSize0 = (1 << 8); const size_t kBufferSize1 = (1 << 10); const unsigned kCheckSize = (1 << 5); buffer.Alloc(kBufferSize0 + kBufferSize1); if (!buffer.IsAllocated()) return false; Byte *buf = (Byte *)buffer; size_t i; for (i = 0; i < kBufferSize0; i++) buf[i] = (Byte)i; UInt32 crc1 = CrcCalc1(buf, kBufferSize0); if (crc1 != 0x29058C73) return false; RandGen(buf + kBufferSize0, kBufferSize1); for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) for (unsigned j = 0; j < kCheckSize; j++) if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) return false; return true; } struct CBenchMethod { unsigned Weight; unsigned DictBits; Int32 EncComplex; Int32 DecComplexCompr; Int32 DecComplexUnc; const char *Name; // unsigned KeySize; }; // #define USE_SW_CMPLX #ifdef USE_SW_CMPLX #define CMPLX(x) ((x) * 1000) #else #define CMPLX(x) (x) #endif static const CBenchMethod g_Bench[] = { // { 40, 17, 357, 145, 20, "LZMA:x1" }, // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" }, { 20, 18, 360, 145, 20, "LZMA:x1" }, { 20, 22, 600, 145, 20, "LZMA:x3" }, { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" }, { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" }, { 10, 16, 124, 40, 14, "Deflate:x1" }, { 20, 16, 376, 40, 14, "Deflate:x5" }, { 10, 16, 1082, 40, 14, "Deflate:x7" }, { 10, 17, 422, 40, 14, "Deflate64:x5" }, { 10, 15, 590, 69, 69, "BZip2:x1" }, { 20, 19, 815, 122, 122, "BZip2:x5" }, { 10, 19, 815, 122, 122, "BZip2:x5:mt2" }, { 10, 19, 2530, 122, 122, "BZip2:x7" }, // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" }, { 10, 18, 1010, 0, 1150, "PPMD:x1" }, // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" }, { 10, 22, 1655, 0, 1830, "PPMD:x5" }, // { 2, 0, -16, 0, -16, "Swap2" }, { 2, 0, -16, 0, -16, "Swap4" }, // { 2, 0, 3, 0, 4, "Delta:1" }, // { 2, 0, 3, 0, 4, "Delta:2" }, // { 2, 0, 3, 0, 4, "Delta:3" }, { 2, 0, 3, 0, 4, "Delta:4" }, // { 2, 0, 3, 0, 4, "Delta:8" }, // { 2, 0, 3, 0, 4, "Delta:32" }, { 2, 0, 2, 0, 2, "BCJ" }, { 2, 0, 1, 0, 1, "ARM64" }, // { 10, 0, 18, 0, 18, "AES128CBC:1" }, // { 10, 0, 21, 0, 21, "AES192CBC:1" }, { 10, 0, 24, 0, 24, "AES256CBC:1" }, // { 10, 0, 18, 0, 18, "AES128CTR:1" }, // { 10, 0, 21, 0, 21, "AES192CTR:1" }, // { 10, 0, 24, 0, 24, "AES256CTR:1" }, // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" }, // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" }, { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" }, // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" }, // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" }, // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" }, // { 1, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" }, // { 1, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" }, { 1, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" } // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" }, // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" }, // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" }, }; struct CBenchHash { unsigned Weight; UInt32 Complex; UInt32 CheckSum; const char *Name; }; // #define ARM_CRC_MUL 100 #define ARM_CRC_MUL 1 #define k_Hash_Complex_Mult 256 static const CBenchHash g_Hash[] = { // { 1, 1820, 0x21e207bb, "CRC32:1" }, // { 10, 558, 0x21e207bb, "CRC32:4" }, { 20, 339, 0x21e207bb, "CRC32:8" } , { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" }, { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" }, { 10, 512, 0x41b901d1, "CRC64" }, { 10, 5100, 0x7913ba03, "SHA256:1" }, { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" }, { 10, 2340, 0xff769021, "SHA1:1" }, { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" }, { 2, 5500, 0x85189d02, "BLAKE2sp" } }; static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size) { char s[128]; unsigned startPos = (unsigned)sizeof(s) - 32; memset(s, ' ', startPos); ConvertUInt64ToString(value, s + startPos); // if (withSpace) { startPos--; size++; } unsigned len = (unsigned)strlen(s + startPos); if (size > len) { size -= len; if (startPos < size) startPos = 0; else startPos -= size; } f.Print(s + startPos); } static const unsigned kFieldSize_Name = 12; static const unsigned kFieldSize_SmallName = 4; static const unsigned kFieldSize_Speed = 9; static const unsigned kFieldSize_Usage = 5; static const unsigned kFieldSize_RU = 6; static const unsigned kFieldSize_Rating = 6; static const unsigned kFieldSize_EU = 5; static const unsigned kFieldSize_Effec = 5; static const unsigned kFieldSize_CrcSpeed = 8; static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating; static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec; static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size) { PrintNumber(f, (rating + 500000) / 1000000, size); } static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size) { UInt64 v = 0; if (divider != 0) v = (val * 100 + divider / 2) / divider; PrintNumber(f, v, size); } static void PrintChars(IBenchPrintCallback &f, char c, unsigned size) { char s[256]; memset(s, (Byte)c, size); s[size] = 0; f.Print(s); } static void PrintSpaces(IBenchPrintCallback &f, unsigned size) { PrintChars(f, ' ', size); } static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size) { PrintNumber(f, Benchmark_GetUsage_Percents(usage), size); } static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq) { PrintUsage(f, usage, kFieldSize_Usage); PrintRating(f, rpu, kFieldSize_RU); PrintRating(f, rating, kFieldSize_Rating); if (showFreq) { if (cpuFreq == 0) PrintSpaces(f, kFieldSize_EUAndEffec); else { PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU); PrintPercents(f, rating, cpuFreq, kFieldSize_Effec); } } } void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info) { Speed = info.GetUnpackSizeSpeed(); Usage = info.GetUsage(); RPU = info.GetRatingPerUsage(Rating); } void CTotalBenchRes::Mult_For_Weight(unsigned weight) { NumIterations2 *= weight; RPU *= weight; Rating *= weight; Usage *= weight; Speed *= weight; } void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r) { Rating += r.Rating; Usage += r.Usage; RPU += r.RPU; Speed += r.Speed; // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); NumIterations2 += r.NumIterations2; } static void PrintResults(IBenchPrintCallback *f, const CBenchInfo &info, unsigned weight, UInt64 rating, bool showFreq, UInt64 cpuFreq, CTotalBenchRes *res) { CTotalBenchRes t; t.Rating = rating; t.NumIterations2 = 1; t.Generate_From_BenchInfo(info); if (f) { if (t.Speed != 0) PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed); else PrintSpaces(*f, 1 + kFieldSize_Speed); } if (f) { PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq); } if (res) { // res->NumIterations1++; t.Mult_For_Weight(weight); res->Update_With_Res(t); } } static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res) { const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1; const UInt64 speed = res.Speed / numIterations2; if (showSpeed && speed != 0) PrintNumber(f, speed / 1024, kFieldSize_Speed); else PrintSpaces(f, 1 + kFieldSize_Speed); // PrintSpaces(f, 1 + kFieldSize_Speed); // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1; PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq); } static void PrintHex(AString &s, UInt64 v) { char temp[32]; ConvertUInt64ToHex(v, temp); s += temp; } AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) { AString s; // s.Add_UInt32(ti.numProcessThreads); unsigned numSysThreads = ti.GetNumSystemThreads(); if (ti.GetNumProcessThreads() != numSysThreads) { // if (ti.numProcessThreads != ti.numSysThreads) { s += " / "; s.Add_UInt32(numSysThreads); } s += " : "; #ifdef _WIN32 PrintHex(s, ti.processAffinityMask); s += " / "; PrintHex(s, ti.systemAffinityMask); #else unsigned i = (numSysThreads + 3) & ~(unsigned)3; if (i == 0) i = 4; for (; i >= 4; ) { i -= 4; unsigned val = 0; for (unsigned k = 0; k < 4; k++) { const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0); val += (bit << k); } PrintHex(s, val); } #endif } return s; } #ifdef Z7_LARGE_PAGES #ifdef _WIN32 extern bool g_LargePagesMode; extern "C" { extern SIZE_T g_LargePageSize; } #endif void Add_LargePages_String(AString &s) { #ifdef _WIN32 if (g_LargePagesMode || g_LargePageSize != 0) { s.Add_OptSpaced("(LP-"); PrintSize_KMGT_Or_Hex(s, g_LargePageSize); #ifdef MY_CPU_X86_OR_AMD64 if (CPU_IsSupported_PageGB()) s += "-1G"; #endif if (!g_LargePagesMode) s += "-NA"; s += ")"; } #else s += ""; #endif } #endif static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString, bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads) { f.Print("RAM "); f.Print(sizeString); if (size_Defined) PrintNumber(f, (size >> 20), 6); else f.Print(" ?"); f.Print(" MB"); #ifdef Z7_LARGE_PAGES { AString s; Add_LargePages_String(s); f.Print(s); } #endif f.Print(", # "); f.Print(threadsString); PrintNumber(f, numThreads, 3); } struct CBenchCallbackToPrint Z7_final: public IBenchCallback { bool NeedPrint; bool Use2Columns; bool ShowFreq; unsigned NameFieldSize; unsigned EncodeWeight; unsigned DecodeWeight; UInt64 CpuFreq; UInt64 DictSize; IBenchPrintCallback *_file; CBenchProps BenchProps; CTotalBenchRes EncodeRes; CTotalBenchRes DecodeRes; CBenchInfo BenchInfo_Results[2]; CBenchCallbackToPrint(): NeedPrint(true), Use2Columns(false), ShowFreq(false), NameFieldSize(0), EncodeWeight(1), DecodeWeight(1), CpuFreq(0) {} void Init() { EncodeRes.Init(); DecodeRes.Init(); } void Print(const char *s); void NewLine(); HRESULT SetFreq(bool showFreq, UInt64 cpuFreq); HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override; HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override; }; HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq) { ShowFreq = showFreq; CpuFreq = cpuFreq; return S_OK; } HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final) { RINOK(_file->CheckBreak()) if (final) BenchInfo_Results[0] = info; if (final) if (NeedPrint) { const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations); PrintResults(_file, info, EncodeWeight, rating, ShowFreq, CpuFreq, &EncodeRes); if (!Use2Columns) _file->NewLine(); } return S_OK; } static const char * const kSep = " | "; HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final) { RINOK(_file->CheckBreak()) if (final) BenchInfo_Results[1] = info; if (final) if (NeedPrint) { const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations); if (Use2Columns) _file->Print(kSep); else PrintSpaces(*_file, NameFieldSize); CBenchInfo info2 = info; info2.UnpackSize *= info2.NumIterations; info2.PackSize *= info2.NumIterations; info2.NumIterations = 1; PrintResults(_file, info2, DecodeWeight, rating, ShowFreq, CpuFreq, &DecodeRes); } return S_OK; } void CBenchCallbackToPrint::Print(const char *s) { _file->Print(s); } void CBenchCallbackToPrint::NewLine() { _file->NewLine(); } static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size) { f.Print(s); int numSpaces = (int)size - (int)MyStringLen(s); if (numSpaces > 0) PrintSpaces(f, (unsigned)numSpaces); } static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size) { int numSpaces = (int)size - (int)MyStringLen(s); if (numSpaces > 0) PrintSpaces(f, (unsigned)numSpaces); f.Print(s); } static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name) { UString wildc = GetUnicodeString(mask); UString bname = GetUnicodeString(name); wildc.MakeLower_Ascii(); bname.MakeLower_Ascii(); return DoesWildcardMatchName(wildc, bname); } static HRESULT TotalBench( DECL_EXTERNAL_CODECS_LOC_VARS const COneMethodInfo &methodMask, UInt64 complexInCommands, #ifndef Z7_ST UInt32 numThreads, const CAffinityMode *affinityMode, #endif bool forceUnpackSize, size_t unpackSize, const Byte *fileData, IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback) { for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) { const CBenchMethod &bench = g_Bench[i]; if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) continue; PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); { unsigned keySize = 32; if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16; else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24; callback->BenchProps.KeySize = keySize; } callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; callback->BenchProps.EncComplex = bench.EncComplex; COneMethodInfo method; NCOM::CPropVariant propVariant; propVariant = bench.Name; RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) size_t unpackSize2 = unpackSize; if (!forceUnpackSize && bench.DictBits == 0) unpackSize2 = kFilterUnpackSize; callback->EncodeWeight = bench.Weight; callback->DecodeWeight = bench.Weight; const HRESULT res = MethodBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, #ifndef Z7_ST false, numThreads, affinityMode, #endif method, unpackSize2, fileData, bench.DictBits, printCallback, callback, &callback->BenchProps); if (res == E_NOTIMPL) { // callback->Print(" ---"); // we need additional empty line as line for decompression results if (!callback->Use2Columns) callback->NewLine(); } else { RINOK(res) } callback->NewLine(); } return S_OK; } struct CFreqBench { // in: UInt64 complexInCommands; UInt32 numThreads; bool showFreq; UInt64 specifiedFreq; // out: UInt64 CpuFreqRes; UInt64 UsageRes; UInt32 res; CFreqBench() {} HRESULT FreqBench(IBenchPrintCallback *_file #ifndef Z7_ST , const CAffinityMode *affinityMode #endif ); }; HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file #ifndef Z7_ST , const CAffinityMode *affinityMode #endif ) { res = 0; CpuFreqRes = 0; UsageRes = 0; if (numThreads == 0) numThreads = 1; #ifdef Z7_ST numThreads = 1; #endif const UInt32 complexity = kNumFreqCommands; UInt64 numIterations = complexInCommands / complexity; UInt32 numIterations2 = 1 << 30; if (numIterations > numIterations2) numIterations /= numIterations2; else { numIterations2 = (UInt32)numIterations; numIterations = 1; } CBenchInfoCalc progressInfoSpec; #ifndef Z7_ST bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity(); if (mtMode) { CFreqThreads threads; threads.Items = new CFreqInfo[numThreads]; UInt32 i; for (i = 0; i < numThreads; i++) { CFreqInfo &info = threads.Items[i]; info.Callback = _file; info.CallbackRes = S_OK; info.NumIterations = numIterations; info.Size = numIterations2; } progressInfoSpec.SetStartTime(); for (i = 0; i < numThreads; i++) { // Sleep(10); CFreqInfo &info = threads.Items[i]; WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i); if (info.Thread.IsCreated()) threads.NumThreads++; if (wres != 0) return HRESULT_FROM_WIN32(wres); } WRes wres = threads.WaitAll(); if (wres != 0) return HRESULT_FROM_WIN32(wres); for (i = 0; i < numThreads; i++) { RINOK(threads.Items[i].CallbackRes) } } else #endif { progressInfoSpec.SetStartTime(); UInt32 sum = g_BenchCpuFreqTemp; for (UInt64 k = numIterations; k > 0; k--) { sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp); if (_file) { RINOK(_file->CheckBreak()) } } res += sum; } if (res == 0x12345678) if (_file) { RINOK(_file->CheckBreak()) } CBenchInfo info; progressInfoSpec.SetFinishTime(info); info.UnpackSize = 0; info.PackSize = 0; info.NumIterations = 1; const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity; const UInt64 rating = info.GetSpeed(numCommands); CpuFreqRes = rating / numThreads; UsageRes = info.GetUsage(); if (_file) { PrintResults(_file, info, 0, // weight rating, showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL); RINOK(_file->CheckBreak()) } return S_OK; } static HRESULT CrcBench( DECL_EXTERNAL_CODECS_LOC_VARS UInt64 complexInCommands, UInt32 numThreads, const size_t bufferSize, const Byte *fileData, UInt64 &speed, UInt64 &usage, UInt32 complexity, unsigned benchWeight, const UInt32 *checkSum, const COneMethodInfo &method, IBenchPrintCallback *_file, #ifndef Z7_ST const CAffinityMode *affinityMode, #endif bool showRating, CTotalBenchRes *encodeRes, bool showFreq, UInt64 cpuFreq) { if (numThreads == 0) numThreads = 1; #ifdef Z7_ST numThreads = 1; #endif const AString &methodName = method.MethodName; // methodName.RemoveChar(L'-'); CMethodId hashID; if (!FindHashMethod( EXTERNAL_CODECS_LOC_VARS methodName, hashID)) return E_NOTIMPL; /* // if will generate random data in each thread, instead of global data CMidAlignedBuffer buffer; if (!fileData) { ALLOC_WITH_HRESULT(&buffer, bufferSize) RandGen(buffer, bufferSize); fileData = buffer; } */ const size_t bsize = (bufferSize == 0 ? 1 : bufferSize); UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize; if (numIterations == 0) numIterations = 1; CBenchInfoCalc progressInfoSpec; CBenchInfo info; #ifndef Z7_ST bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity(); if (mtEncMode) { CCrcThreads threads; threads.Items = new CCrcInfo[numThreads]; { WRes wres = threads.Common.StartEvent.Create(); if (wres != 0) return HRESULT_FROM_WIN32(wres); threads.NeedClose = true; } UInt32 i; for (i = 0; i < numThreads; i++) { CCrcInfo &ci = threads.Items[i]; AString name; RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher)) if (!ci.Hasher) return E_NOTIMPL; CMyComPtr scp; ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); if (scp) { RINOK(method.SetCoderProps(scp)) } ci.Callback = _file; ci.Data = fileData; ci.NumIterations = numIterations; ci.Size = bufferSize; ci.CheckSumDefined = false; if (checkSum) { ci.CheckSum = *checkSum; ci.CheckSumDefined = true; } #ifdef USE_ALLOCA ci.AllocaSize = (i * 16 * 21) & 0x7FF; #endif } for (i = 0; i < numThreads; i++) { CCrcInfo &ci = threads.Items[i]; ci.ThreadIndex = i; ci.Common = &threads.Common; ci.AffinityMode = *affinityMode; HRESULT hres = ci.CreateThread(); if (ci.Thread.IsCreated()) threads.NumThreads++; if (hres != 0) return hres; } for (i = 0; i < numThreads; i++) { CCrcInfo &ci = threads.Items[i]; WRes wres = ci.ReadyEvent.Lock(); if (wres != 0) return HRESULT_FROM_WIN32(wres); RINOK(ci.Res) } progressInfoSpec.SetStartTime(); WRes wres = threads.StartAndWait(); if (wres != 0) return HRESULT_FROM_WIN32(wres); progressInfoSpec.SetFinishTime(info); for (i = 0; i < numThreads; i++) { RINOK(threads.Items[i].Res) if (i != 0) if (threads.Items[i].CheckSum_Res != threads.Items[i - 1].CheckSum_Res) return S_FALSE; } } else #endif { CMyComPtr hasher; AString name; RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher)) if (!hasher) return E_NOTIMPL; CMyComPtr scp; hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); if (scp) { RINOK(method.SetCoderProps(scp)) } CCrcInfo_Base crcib; crcib.CreateLocalBuf = false; RINOK(crcib.Generate(fileData, bufferSize)) progressInfoSpec.SetStartTime(); RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file)) progressInfoSpec.SetFinishTime(info); } UInt64 unpSize = numIterations * bufferSize; UInt64 unpSizeThreads = unpSize * numThreads; info.UnpackSize = unpSizeThreads; info.PackSize = unpSizeThreads; info.NumIterations = 1; if (_file) { if (showRating) { UInt64 unpSizeThreads2 = unpSizeThreads; if (unpSizeThreads2 == 0) unpSizeThreads2 = numIterations * 1 * numThreads; const UInt64 numCommands = unpSizeThreads2 * complexity / 256; const UInt64 rating = info.GetSpeed(numCommands); PrintResults(_file, info, benchWeight, rating, showFreq, cpuFreq, encodeRes); } RINOK(_file->CheckBreak()) } speed = info.GetSpeed(unpSizeThreads); usage = info.GetUsage(); return S_OK; } static HRESULT TotalBench_Hash( DECL_EXTERNAL_CODECS_LOC_VARS const COneMethodInfo &methodMask, UInt64 complexInCommands, UInt32 numThreads, size_t bufSize, const Byte *fileData, IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback, #ifndef Z7_ST const CAffinityMode *affinityMode, #endif CTotalBenchRes *encodeRes, bool showFreq, UInt64 cpuFreq) { for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) { const CBenchHash &bench = g_Hash[i]; if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) continue; PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; // callback->BenchProps.EncComplex = bench.EncComplex; COneMethodInfo method; NCOM::CPropVariant propVariant; propVariant = bench.Name; RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) UInt64 speed, usage; const HRESULT res = CrcBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads, bufSize, fileData, speed, usage, bench.Complex, bench.Weight, (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL, method, printCallback, #ifndef Z7_ST affinityMode, #endif true, // showRating encodeRes, showFreq, cpuFreq); if (res == E_NOTIMPL) { // callback->Print(" ---"); } else { RINOK(res) } callback->NewLine(); } return S_OK; } struct CTempValues { UInt64 *Values; CTempValues(): Values(NULL) {} void Alloc(UInt32 num) { Values = new UInt64[num]; } ~CTempValues() { delete []Values; } }; static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop) { const wchar_t *end; UInt64 result = ConvertStringToUInt64(s, &end); if (*end != 0 || s.IsEmpty()) prop = s; else if (result <= (UInt32)0xFFFFFFFF) prop = (UInt32)result; else prop = result; } static bool AreSameMethodNames(const char *fullName, const char *shortName) { return StringsAreEqualNoCase_Ascii(fullName, shortName); } static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads) { PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads); } static void Print_Delimiter(IBenchPrintCallback &f) { f.Print(" |"); } static void Print_Pow(IBenchPrintCallback &f, unsigned pow) { char s[16]; ConvertUInt32ToString(pow, s); unsigned pos = MyStringLen(s); s[pos++] = ':'; s[pos] = 0; PrintLeft(f, s, kFieldSize_SmallName); // 4 } static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f, UInt64 usage, UInt64 speed) { PrintUsage(f, usage, kFieldSize_Usage); PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed); } HRESULT Bench( DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback *printCallback, IBenchCallback *benchCallback, const CObjectVector &props, UInt32 numIterations, bool multiDict, IBenchFreqCallback *freqCallback) { if (!CrcInternalTest()) return E_FAIL; UInt32 numCPUs = 1; UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29; NSystem::CProcessAffinity threadsInfo; threadsInfo.InitST(); #ifndef Z7_ST if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0) numCPUs = threadsInfo.GetNumProcessThreads(); else numCPUs = NSystem::GetNumberOfProcessors(); #endif // numCPUs = 24; /* { DWORD_PTR mask = (1 << 0); DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask); old = old; DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask); old2 = old2; return 0; } */ bool ramSize_Defined = NSystem::GetRamSize(ramSize); UInt32 numThreadsSpecified = numCPUs; bool needSetComplexity = false; UInt32 testTimeMs = kComplexInMs; UInt32 startDicLog = 22; bool startDicLog_Defined = false; UInt64 specifiedFreq = 0; bool multiThreadTests = false; UInt64 complexInCommands = kComplexInCommands; UInt32 numThreads_Start = 1; #ifndef Z7_ST CAffinityMode affinityMode; #endif COneMethodInfo method; CMidAlignedBuffer fileDataBuffer; bool use_fileData = false; bool isFixedDict = false; { unsigned i; if (printCallback) { for (i = 0; i < props.Size(); i++) { const CProperty &property = props[i]; printCallback->Print(" "); printCallback->Print(GetAnsiString(property.Name)); if (!property.Value.IsEmpty()) { printCallback->Print("="); printCallback->Print(GetAnsiString(property.Value)); } } if (!props.IsEmpty()) printCallback->NewLine(); } for (i = 0; i < props.Size(); i++) { const CProperty &property = props[i]; UString name (property.Name); name.MakeLower_Ascii(); if (name.IsEqualTo("file")) { if (property.Value.IsEmpty()) return E_INVALIDARG; NFile::NIO::CInFile file; if (!file.Open(us2fs(property.Value))) return GetLastError_noZero_HRESULT(); size_t len; { UInt64 len64; if (!file.GetLength(len64)) return GetLastError_noZero_HRESULT(); if (printCallback) { printCallback->Print("file size ="); PrintNumber(*printCallback, len64, 0); printCallback->NewLine(); } len = (size_t)len64; if (len != len64) return E_INVALIDARG; } // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here ALLOC_WITH_HRESULT(&fileDataBuffer, len) use_fileData = true; { size_t processed; if (!file.ReadFull((Byte *)fileDataBuffer, len, processed)) return GetLastError_noZero_HRESULT(); if (processed != len) return E_FAIL; } continue; } NCOM::CPropVariant propVariant; if (!property.Value.IsEmpty()) ParseNumberString(property.Value, propVariant); if (name.IsEqualTo("time")) { RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) needSetComplexity = true; testTimeMs *= 1000; continue; } if (name.IsEqualTo("timems")) { RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) needSetComplexity = true; continue; } if (name.IsEqualTo("tic")) { UInt32 v; RINOK(ParsePropToUInt32(UString(), propVariant, v)) if (v >= 64) return E_INVALIDARG; complexInCommands = (UInt64)1 << v; continue; } const bool isCurrent_fixedDict = name.IsEqualTo("df"); if (isCurrent_fixedDict) isFixedDict = true; if (isCurrent_fixedDict || name.IsEqualTo("ds")) { RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog)) if (startDicLog > 32) return E_INVALIDARG; startDicLog_Defined = true; continue; } if (name.IsEqualTo("mts")) { RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start)) continue; } if (name.IsEqualTo("af")) { UInt32 bundle; RINOK(ParsePropToUInt32(UString(), propVariant, bundle)) if (bundle > 0 && bundle < numCPUs) { #ifndef Z7_ST affinityMode.SetLevels(numCPUs, 2); affinityMode.NumBundleThreads = bundle; #endif } continue; } if (name.IsEqualTo("freq")) { UInt32 freq32 = 0; RINOK(ParsePropToUInt32(UString(), propVariant, freq32)) if (freq32 == 0) return E_INVALIDARG; specifiedFreq = (UInt64)freq32 * 1000000; if (printCallback) { printCallback->Print("freq="); PrintNumber(*printCallback, freq32, 0); printCallback->NewLine(); } continue; } if (name.IsPrefixedBy_Ascii_NoCase("mt")) { const UString s = name.Ptr(2); if (s.IsEqualTo("*") || (s.IsEmpty() && propVariant.vt == VT_BSTR && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))) { multiThreadTests = true; continue; } #ifndef Z7_ST RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified)) #endif continue; } RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant)) } } if (printCallback) { AString s; #ifndef _WIN32 s += "Compiler: "; GetCompiler(s); printCallback->Print(s); printCallback->NewLine(); s.Empty(); #endif GetSystemInfoText(s); printCallback->Print(s); printCallback->NewLine(); } if (printCallback) { printCallback->Print("1T CPU Freq (MHz):"); } if (printCallback || freqCallback) { UInt64 numMilCommands = 1 << 6; if (specifiedFreq != 0) { while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) numMilCommands >>= 1; } for (int jj = 0;; jj++) { if (printCallback) RINOK(printCallback->CheckBreak()) UInt64 start = ::GetTimeCount(); UInt32 sum = (UInt32)start; sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp); if (sum == 0xF1541213) if (printCallback) printCallback->Print(""); const UInt64 realDelta = ::GetTimeCount() - start; start = realDelta; if (start == 0) start = 1; if (start > (UInt64)1 << 61) start = 1; const UInt64 freq = GetFreq(); // mips is constant in some compilers const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start); const UInt64 mipsVal = numMilCommands * freq / start; if (printCallback) { if (realDelta == 0) { printCallback->Print(" -"); } else { // PrintNumber(*printCallback, start, 0); PrintNumber(*printCallback, mipsVal, 5); } } if (freqCallback) { RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult)) } if (jj >= 1) { bool needStop = (numMilCommands >= (1 << #ifdef _DEBUG 7 #else 11 #endif )); if (start >= freq * 16) { printCallback->Print(" (Cmplx)"); if (!freqCallback) // we don't want complexity change for old gui lzma benchmark { needSetComplexity = true; } needStop = true; } if (needSetComplexity) SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands); if (needStop) break; numMilCommands <<= 1; } } if (freqCallback) { RINOK(freqCallback->FreqsFinished(1)) } } if (numThreadsSpecified >= 2) if (printCallback || freqCallback) { if (printCallback) printCallback->NewLine(); /* it can show incorrect frequency for HT threads. so we reduce freq test to (numCPUs / 2) */ UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified; if (numThreads < 1) numThreads = 1; if (printCallback) { char s[128]; ConvertUInt64ToString(numThreads, s); printCallback->Print(s); printCallback->Print("T CPU Freq (MHz):"); } UInt64 numMilCommands = 1 << #ifdef _DEBUG 7; #else 10; #endif if (specifiedFreq != 0) { while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) numMilCommands >>= 1; } // for (int jj = 0;; jj++) for (;;) { if (printCallback) RINOK(printCallback->CheckBreak()) { // PrintLeft(f, "CPU", kFieldSize_Name); // UInt32 resVal; CFreqBench fb; fb.complexInCommands = numMilCommands * 1000000; fb.numThreads = numThreads; // showFreq; // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); fb.showFreq = true; fb.specifiedFreq = 1; const HRESULT res = fb.FreqBench(NULL /* printCallback */ #ifndef Z7_ST , &affinityMode #endif ); RINOK(res) if (freqCallback) { RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes)) } if (printCallback) { /* if (realDelta == 0) { printCallback->Print(" -"); } else */ { // PrintNumber(*printCallback, start, 0); PrintUsage(*printCallback, fb.UsageRes, 3); printCallback->Print("%"); PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0); printCallback->Print(" "); // PrintNumber(*printCallback, fb.UsageRes, 5); } } } // if (jj >= 1) { const bool needStop = (numMilCommands >= (1 << #ifdef _DEBUG 7 #else 11 #endif )); if (needStop) break; numMilCommands <<= 1; } } if (freqCallback) { RINOK(freqCallback->FreqsFinished(numThreads)) } } if (printCallback) { printCallback->NewLine(); printCallback->NewLine(); PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs); printCallback->Print(GetProcessThreadsInfo(threadsInfo)); printCallback->NewLine(); } if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax) return E_INVALIDARG; UInt64 dict = (UInt64)1 << startDicLog; const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict)); const unsigned level = method.GetLevel(); AString &methodName = method.MethodName; const AString original_MethodName = methodName; if (methodName.IsEmpty()) methodName = "LZMA"; if (benchCallback) { CBenchProps benchProps; benchProps.SetLzmaCompexity(); const UInt64 dictSize = method.Get_Lzma_DicSize(); size_t uncompressedDataSize; if (use_fileData) { uncompressedDataSize = fileDataBuffer.Size(); } else { uncompressedDataSize = kAdditionalSize + (size_t)dictSize; if (uncompressedDataSize < dictSize) return E_INVALIDARG; } return MethodBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, #ifndef Z7_ST true, numThreadsSpecified, &affinityMode, #endif method, uncompressedDataSize, (const Byte *)fileDataBuffer, kOldLzmaDictBits, printCallback, benchCallback, &benchProps); } if (methodName.IsEqualTo_Ascii_NoCase("CRC")) methodName = "crc32"; CMethodId hashID; const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID); int codecIndex = -1; bool isFilter = false; if (!isHashMethod) { UInt32 numStreams; codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName, true, // encode hashID, numStreams, isFilter); // we can allow non filter for BW tests if (!isFilter) codecIndex = -1; } CBenchCallbackToPrint callback; callback.Init(); callback._file = printCallback; if (isHashMethod || codecIndex != -1) { if (!printCallback) return S_FALSE; IBenchPrintCallback &f = *printCallback; UInt64 dict64 = dict; if (!dictIsDefined) dict64 = (1 << 27); if (use_fileData) { if (!dictIsDefined) dict64 = fileDataBuffer.Size(); else if (dict64 > fileDataBuffer.Size()) dict64 = fileDataBuffer.Size(); } for (;;) { const int index = method.FindProp(NCoderPropID::kDictionarySize); if (index < 0) break; method.Props.Delete((unsigned)index); } // methodName.RemoveChar(L'-'); Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method const UInt32 *checkSum = NULL; int benchIndex = -1; if (isHashMethod) { for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) { const CBenchHash &h = g_Hash[i]; AString benchMethod (h.Name); AString benchProps; const int propPos = benchMethod.Find(':'); if (propPos >= 0) { benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); benchMethod.DeleteFrom((unsigned)propPos); } if (AreSameMethodNames(benchMethod, methodName)) { const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); /* bool isMainMethod = method.PropsString.IsEmpty(); if (isMainMethod) isMainMethod = !checkSum || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8")); if (sameProps || isMainMethod) */ { complexity = (Int32)h.Complex; checkSum = &h.CheckSum; if (sameProps) break; /* if property. is not specified, we use the complexity for latest fastest method (crc32:64) */ } } } // if (!checkSum) return E_NOTIMPL; } else { for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) { const CBenchMethod &bench = g_Bench[i]; AString benchMethod (bench.Name); AString benchProps; const int propPos = benchMethod.Find(':'); if (propPos >= 0) { benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); benchMethod.DeleteFrom((unsigned)propPos); } if (AreSameMethodNames(benchMethod, methodName)) { const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); // bool isMainMethod = method.PropsString.IsEmpty(); // if (sameProps || isMainMethod) { benchIndex = (int)i; if (sameProps) break; } } } // if (benchIndex < 0) return E_NOTIMPL; } { /* we count usage only for crc and filter. non-filters are not supported */ UInt64 usage = (1 << 20); UInt64 bufSize = dict64; UInt32 numBlocks = isHashMethod ? 1 : 3; if (use_fileData) { usage += fileDataBuffer.Size(); if (bufSize > fileDataBuffer.Size()) bufSize = fileDataBuffer.Size(); if (isHashMethod) { numBlocks = 0; #ifndef Z7_ST if (numThreadsSpecified != 1) numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0); #endif } } usage += numThreadsSpecified * bufSize * numBlocks; Print_Usage_and_Threads(f, usage, numThreadsSpecified); } CUIntVector numThreadsVector; { unsigned nt = numThreads_Start; for (;;) { if (nt > numThreadsSpecified) break; numThreadsVector.Add(nt); const unsigned next = nt * 2; const UInt32 ntHalf= numThreadsSpecified / 2; if (ntHalf > nt && ntHalf < next) numThreadsVector.Add(ntHalf); if (numThreadsSpecified > nt && numThreadsSpecified < next) numThreadsVector.Add(numThreadsSpecified); nt = next; } } unsigned numColumns = isHashMethod ? 1 : 2; CTempValues speedTotals; CTempValues usageTotals; { const unsigned numItems = numThreadsVector.Size() * numColumns; speedTotals.Alloc(numItems); usageTotals.Alloc(numItems); for (unsigned i = 0; i < numItems; i++) { speedTotals.Values[i] = 0; usageTotals.Values[i] = 0; } } f.NewLine(); for (unsigned line = 0; line < 3; line++) { f.NewLine(); f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size"); FOR_VECTOR (ti, numThreadsVector) { if (ti != 0) Print_Delimiter(f); if (line == 0) { PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1)); PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed); } else { for (unsigned c = 0; c < numColumns; c++) { PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1); PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1); } } } } f.NewLine(); UInt64 numSteps = 0; // for (UInt32 iter = 0; iter < numIterations; iter++) // { unsigned pow = 10; // kNumHashDictBits if (startDicLog_Defined) pow = startDicLog; // #define NUM_SUB_BITS 2 // pow <<= NUM_SUB_BITS; for (;; pow++) { const UInt64 bufSize = (UInt64)1 << pow; // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS); // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS); size_t dataSize = fileDataBuffer.Size(); if (dataSize > bufSize || !use_fileData) dataSize = (size_t)bufSize; for (UInt32 iter = 0; iter < numIterations; iter++) { Print_Pow(f, pow); // PrintNumber(f, bufSize >> 10, 4); FOR_VECTOR (ti, numThreadsVector) { RINOK(f.CheckBreak()) const UInt32 numThreads = numThreadsVector[ti]; if (isHashMethod) { UInt64 speed = 0; UInt64 usage = 0; const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads, dataSize, (const Byte *)fileDataBuffer, speed, usage, (UInt32)complexity, 1, // benchWeight, (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL, method, &f, #ifndef Z7_ST &affinityMode, #endif false, // showRating NULL, false, 0); RINOK(res) if (ti != 0) Print_Delimiter(f); Bench_BW_Print_Usage_Speed(f, usage, speed); speedTotals.Values[ti] += speed; usageTotals.Values[ti] += usage; } else { { unsigned keySize = 32; if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16; else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24; callback.BenchProps.KeySize = keySize; } COneMethodInfo method2 = method; unsigned bench_DictBits; if (benchIndex >= 0) { const CBenchMethod &bench = g_Bench[benchIndex]; callback.BenchProps.EncComplex = bench.EncComplex; callback.BenchProps.DecComplexUnc = bench.DecComplexUnc; callback.BenchProps.DecComplexCompr = bench.DecComplexCompr; bench_DictBits = bench.DictBits; // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug } else { bench_DictBits = kOldLzmaDictBits; // = 32 default if (isFilter) { const unsigned k_UnknownCoderComplexity = 4; callback.BenchProps.EncComplex = k_UnknownCoderComplexity; callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity; } else { callback.BenchProps.EncComplex = 1 << 10; callback.BenchProps.DecComplexUnc = 1 << 6; } callback.BenchProps.DecComplexCompr = 0; } callback.NeedPrint = false; if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) { const NCOM::CPropVariant propVariant = (UInt32)pow; RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) } const HRESULT res = MethodBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, #ifndef Z7_ST false, // oldLzmaBenchMode numThreadsVector[ti], &affinityMode, #endif method2, dataSize, (const Byte *)fileDataBuffer, bench_DictBits, printCallback, &callback, &callback.BenchProps); RINOK(res) if (ti != 0) Print_Delimiter(f); for (unsigned i = 0; i < 2; i++) { const CBenchInfo &bi = callback.BenchInfo_Results[i]; const UInt64 usage = bi.GetUsage(); const UInt64 speed = bi.GetUnpackSizeSpeed(); usageTotals.Values[ti * 2 + i] += usage; speedTotals.Values[ti * 2 + i] += speed; Bench_BW_Print_Usage_Speed(f, usage, speed); } } } f.NewLine(); numSteps++; } if (dataSize >= dict64) break; } if (numSteps != 0) { f.Print("Avg:"); for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++) { if (ti != 0) Print_Delimiter(f); for (unsigned i = 0; i < numColumns; i++) Bench_BW_Print_Usage_Speed(f, usageTotals.Values[ti * numColumns + i] / numSteps, speedTotals.Values[ti * numColumns + i] / numSteps); } f.NewLine(); } return S_OK; } bool use2Columns = false; bool totalBenchMode = false; bool onlyHashBench = false; if (methodName.IsEqualTo_Ascii_NoCase("hash")) { onlyHashBench = true; methodName = "*"; totalBenchMode = true; } else if (methodName.Find('*') >= 0) totalBenchMode = true; // ---------- Threads loop ---------- for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++) { UInt32 numThreads = numThreadsSpecified; if (!multiThreadTests) { if (threadsPassIndex != 0) break; } else { numThreads = 1; if (threadsPassIndex != 0) { if (numCPUs < 2) break; numThreads = numCPUs; if (threadsPassIndex == 1) { if (numCPUs >= 4) numThreads = numCPUs / 2; } else if (numCPUs < 4) break; } } IBenchPrintCallback &f = *printCallback; if (threadsPassIndex > 0) { f.NewLine(); f.NewLine(); } if (!dictIsDefined && !onlyHashBench) { const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25); unsigned dicSizeLog = dicSizeLog_Main; #ifdef UNDER_CE dicSizeLog = (UInt64)1 << 20; #endif if (ramSize_Defined) for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--) if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize) break; dict = (UInt64)1 << dicSizeLog; if (totalBenchMode && dicSizeLog != dicSizeLog_Main) { f.Print("Dictionary reduced to: "); PrintNumber(f, dicSizeLog, 1); f.NewLine(); } } Print_Usage_and_Threads(f, onlyHashBench ? GetBenchMemoryUsage_Hash(numThreads, dict) : GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode), numThreads); f.NewLine(); f.NewLine(); if (totalBenchMode) { callback.NameFieldSize = kFieldSize_Name; use2Columns = false; } else { callback.NameFieldSize = kFieldSize_SmallName; use2Columns = true; } callback.Use2Columns = use2Columns; bool showFreq = false; UInt64 cpuFreq = 0; if (totalBenchMode) { showFreq = true; } unsigned fileldSize = kFieldSize_TotalSize; if (showFreq) fileldSize += kFieldSize_EUAndEffec; if (use2Columns) { PrintSpaces(f, callback.NameFieldSize); PrintRight(f, "Compressing", fileldSize); f.Print(kSep); PrintRight(f, "Decompressing", fileldSize); } f.NewLine(); PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize); int j; for (j = 0; j < 2; j++) { PrintRight(f, "Speed", kFieldSize_Speed + 1); PrintRight(f, "Usage", kFieldSize_Usage + 1); PrintRight(f, "R/U", kFieldSize_RU + 1); PrintRight(f, "Rating", kFieldSize_Rating + 1); if (showFreq) { PrintRight(f, "E/U", kFieldSize_EU + 1); PrintRight(f, "Effec", kFieldSize_Effec + 1); } if (!use2Columns) break; if (j == 0) f.Print(kSep); } f.NewLine(); PrintSpaces(f, callback.NameFieldSize); for (j = 0; j < 2; j++) { PrintRight(f, "KiB/s", kFieldSize_Speed + 1); PrintRight(f, "%", kFieldSize_Usage + 1); PrintRight(f, "MIPS", kFieldSize_RU + 1); PrintRight(f, "MIPS", kFieldSize_Rating + 1); if (showFreq) { PrintRight(f, "%", kFieldSize_EU + 1); PrintRight(f, "%", kFieldSize_Effec + 1); } if (!use2Columns) break; if (j == 0) f.Print(kSep); } f.NewLine(); f.NewLine(); if (specifiedFreq != 0) cpuFreq = specifiedFreq; // bool showTotalSpeed = false; if (totalBenchMode) { for (UInt32 i = 0; i < numIterations; i++) { if (i != 0) printCallback->NewLine(); const unsigned kNumCpuTests = 3; for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++) { PrintLeft(f, "CPU", kFieldSize_Name); // UInt32 resVal; CFreqBench fb; fb.complexInCommands = complexInCommands; fb.numThreads = numThreads; // showFreq; fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); fb.specifiedFreq = specifiedFreq; const HRESULT res = fb.FreqBench(printCallback #ifndef Z7_ST , &affinityMode #endif ); RINOK(res) cpuFreq = fb.CpuFreqRes; callback.NewLine(); if (specifiedFreq != 0) cpuFreq = specifiedFreq; if (testTimeMs >= 1000) if (freqTest == kNumCpuTests - 1) { // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands); } } callback.NewLine(); // return S_OK; // change it callback.SetFreq(true, cpuFreq); if (!onlyHashBench) { size_t dataSize = (size_t)dict; if (use_fileData) { dataSize = fileDataBuffer.Size(); if (dictIsDefined && dataSize > dict) dataSize = (size_t)dict; } const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS method, complexInCommands, #ifndef Z7_ST numThreads, &affinityMode, #endif dictIsDefined || use_fileData, // forceUnpackSize dataSize, (const Byte *)fileDataBuffer, printCallback, &callback); RINOK(res) } { size_t dataSize = (size_t)1 << kNumHashDictBits; if (dictIsDefined) { dataSize = (size_t)dict; if (dataSize != dict) return E_OUTOFMEMORY; } if (use_fileData) { dataSize = fileDataBuffer.Size(); if (dictIsDefined && dataSize > dict) dataSize = (size_t)dict; } const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS method, complexInCommands, numThreads, dataSize, (const Byte *)fileDataBuffer, printCallback, &callback, #ifndef Z7_ST &affinityMode, #endif &callback.EncodeRes, true, cpuFreq); RINOK(res) } callback.NewLine(); { PrintLeft(f, "CPU", kFieldSize_Name); CFreqBench fb; fb.complexInCommands = complexInCommands; fb.numThreads = numThreads; // showFreq; fb.showFreq = (specifiedFreq != 0); fb.specifiedFreq = specifiedFreq; const HRESULT res = fb.FreqBench(printCallback #ifndef Z7_ST , &affinityMode #endif ); RINOK(res) callback.NewLine(); } } } else { needSetComplexity = true; if (!methodName.IsEqualTo_Ascii_NoCase("LZMA")) { unsigned i; for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) { const CBenchMethod &h = g_Bench[i]; AString benchMethod (h.Name); AString benchProps; const int propPos = benchMethod.Find(':'); if (propPos >= 0) { benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); benchMethod.DeleteFrom((unsigned)propPos); } if (AreSameMethodNames(benchMethod, methodName)) { if (benchProps.IsEmpty() || (benchProps == "x5" && method.PropsString.IsEmpty()) || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps)) { callback.BenchProps.EncComplex = h.EncComplex; callback.BenchProps.DecComplexCompr = h.DecComplexCompr; callback.BenchProps.DecComplexUnc = h.DecComplexUnc; needSetComplexity = false; break; } } } /* if (i == Z7_ARRAY_SIZE(g_Bench)) return E_NOTIMPL; */ } if (needSetComplexity) callback.BenchProps.SetLzmaCompexity(); if (startDicLog < kBenchMinDicLogSize) startDicLog = kBenchMinDicLogSize; for (unsigned i = 0; i < numIterations; i++) { unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog; if (!multiDict) pow = 32; while (GetDictSizeFromLog(pow) > dict && pow > 0) pow--; for (; GetDictSizeFromLog(pow) <= dict; pow++) { Print_Pow(f, pow); callback.DictSize = (UInt64)1 << pow; COneMethodInfo method2 = method; if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) { // We add dictionary size property. // method2 can have two different dictionary size properties. // And last property is main. NCOM::CPropVariant propVariant = (UInt32)pow; RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) } size_t uncompressedDataSize; if (use_fileData) { uncompressedDataSize = fileDataBuffer.Size(); } else { uncompressedDataSize = (size_t)callback.DictSize; if (uncompressedDataSize != callback.DictSize) return E_OUTOFMEMORY; if (uncompressedDataSize >= (1 << 18)) uncompressedDataSize += kAdditionalSize; } const HRESULT res = MethodBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, #ifndef Z7_ST true, numThreads, &affinityMode, #endif method2, uncompressedDataSize, (const Byte *)fileDataBuffer, kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps); f.NewLine(); RINOK(res) if (!multiDict) break; } } } PrintChars(f, '-', callback.NameFieldSize + fileldSize); if (use2Columns) { f.Print(kSep); PrintChars(f, '-', fileldSize); } f.NewLine(); if (use2Columns) { PrintLeft(f, "Avr:", callback.NameFieldSize); PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes); f.Print(kSep); PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes); f.NewLine(); } PrintLeft(f, "Tot:", callback.NameFieldSize); CTotalBenchRes midRes; midRes = callback.EncodeRes; midRes.Update_With_Res(callback.DecodeRes); // midRes.SetSum(callback.EncodeRes, callback.DecodeRes); PrintTotals(f, showFreq, cpuFreq, false, midRes); f.NewLine(); } return S_OK; }