X-Git-Url: http://vcs.maemo.org/git/?a=blobdiff_plain;f=lzma%2FCPP%2F7zip%2FCompress%2FLZMA_Alone%2FLzmaBench.cpp;fp=lzma%2FCPP%2F7zip%2FCompress%2FLZMA_Alone%2FLzmaBench.cpp;h=4cd2d63e9f806e5f556853f85ff09f1b80960383;hb=1a2e54b98aaab3669eebd38facb83687c4ac7baf;hp=0000000000000000000000000000000000000000;hpb=0b9bdac28929054558b5d7f315403fe3399a1413;p=physicsfs diff --git a/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp b/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp new file mode 100644 index 0000000..4cd2d63 --- /dev/null +++ b/lzma/CPP/7zip/Compress/LZMA_Alone/LzmaBench.cpp @@ -0,0 +1,1024 @@ +// LzmaBench.cpp + +#include "StdAfx.h" + +#include "LzmaBench.h" + +#ifndef _WIN32 +#define USE_POSIX_TIME +#define USE_POSIX_TIME2 +#endif + +#ifdef USE_POSIX_TIME +#include +#ifdef USE_POSIX_TIME2 +#include +#endif +#endif + +#ifdef _WIN32 +#define USE_ALLOCA +#endif + +#ifdef USE_ALLOCA +#ifdef _WIN32 +#include +#else +#include +#endif +#endif + +extern "C" +{ +#include "../../../../C/Alloc.h" +#include "../../../../C/7zCrc.h" +} +#include "../../../Common/MyCom.h" +#include "../../ICoder.h" + +#ifdef BENCH_MT +#include "../../../Windows/Thread.h" +#include "../../../Windows/Synchronization.h" +#endif + +#ifdef EXTERNAL_LZMA +#include "../../../Windows/PropVariant.h" +#else +#include "../LZMA/LZMADecoder.h" +#include "../LZMA/LZMAEncoder.h" +#endif + +static const UInt32 kUncompressMinBlockSize = 1 << 26; +static const UInt32 kAdditionalSize = (1 << 16); +static const UInt32 kCompressedAdditionalSize = (1 << 10); +static const UInt32 kMaxLzmaPropSize = 5; + +class CBaseRandomGenerator +{ + UInt32 A1; + UInt32 A2; +public: + CBaseRandomGenerator() { Init(); } + void Init() { A1 = 362436069; A2 = 521288629;} + UInt32 GetRnd() + { + return + ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + + ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ); + } +}; + +class CBenchBuffer +{ +public: + size_t BufferSize; + Byte *Buffer; + CBenchBuffer(): Buffer(0) {} + virtual ~CBenchBuffer() { Free(); } + void Free() + { + ::MidFree(Buffer); + Buffer = 0; + } + bool Alloc(size_t bufferSize) + { + if (Buffer != 0 && BufferSize == bufferSize) + return true; + Free(); + Buffer = (Byte *)::MidAlloc(bufferSize); + BufferSize = bufferSize; + return (Buffer != 0); + } +}; + +class CBenchRandomGenerator: public CBenchBuffer +{ + CBaseRandomGenerator *RG; +public: + void Set(CBaseRandomGenerator *rg) { RG = rg; } + UInt32 GetVal(UInt32 &res, int numBits) + { + UInt32 val = res & (((UInt32)1 << numBits) - 1); + res >>= numBits; + return val; + } + UInt32 GetLen(UInt32 &res) + { + UInt32 len = GetVal(res, 2); + return GetVal(res, 1 + len); + } + void Generate() + { + UInt32 pos = 0; + UInt32 rep0 = 1; + while (pos < BufferSize) + { + UInt32 res = RG->GetRnd(); + res >>= 1; + if (GetVal(res, 1) == 0 || pos < 1024) + Buffer[pos++] = (Byte)(res & 0xFF); + else + { + UInt32 len; + len = 1 + GetLen(res); + if (GetVal(res, 3) != 0) + { + len += GetLen(res); + do + { + UInt32 ppp = GetVal(res, 5) + 6; + res = RG->GetRnd(); + if (ppp > 30) + continue; + rep0 = /* (1 << ppp) +*/ GetVal(res, ppp); + res = RG->GetRnd(); + } + while (rep0 >= pos); + rep0++; + } + + for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++) + Buffer[pos] = Buffer[pos - rep0]; + } + } + } +}; + + +class CBenchmarkInStream: + public ISequentialInStream, + public CMyUnknownImp +{ + const Byte *Data; + size_t Pos; + size_t Size; +public: + MY_UNKNOWN_IMP + void Init(const Byte *data, size_t size) + { + Data = data; + Size = size; + Pos = 0; + } + STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize); +}; + +STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize) +{ + size_t remain = Size - Pos; + UInt32 kMaxBlockSize = (1 << 20); + if (size > kMaxBlockSize) + size = kMaxBlockSize; + if (size > remain) + size = (UInt32)remain; + for (UInt32 i = 0; i < size; i++) + ((Byte *)data)[i] = Data[Pos + i]; + Pos += size; + if(processedSize != NULL) + *processedSize = size; + return S_OK; +} + +class CBenchmarkOutStream: + public ISequentialOutStream, + public CBenchBuffer, + public CMyUnknownImp +{ + // bool _overflow; +public: + UInt32 Pos; + // CBenchmarkOutStream(): _overflow(false) {} + void Init() + { + // _overflow = false; + Pos = 0; + } + MY_UNKNOWN_IMP + STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); +}; + +STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) +{ + size_t curSize = BufferSize - Pos; + if (curSize > size) + curSize = size; + memcpy(Buffer + Pos, data, curSize); + Pos += (UInt32)curSize; + if(processedSize != NULL) + *processedSize = (UInt32)curSize; + if (curSize != size) + { + // _overflow = true; + return E_FAIL; + } + return S_OK; +} + +class CCrcOutStream: + public ISequentialOutStream, + public CMyUnknownImp +{ +public: + UInt32 Crc; + MY_UNKNOWN_IMP + void Init() { Crc = CRC_INIT_VAL; } + STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize); +}; + +STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize) +{ + Crc = CrcUpdate(Crc, data, size); + if (processedSize != NULL) + *processedSize = size; + return S_OK; +} + +static UInt64 GetTimeCount() +{ + #ifdef USE_POSIX_TIME + #ifdef USE_POSIX_TIME2 + timeval v; + if (gettimeofday(&v, 0) == 0) + return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec; + return (UInt64)time(NULL) * 1000000; + #else + return time(NULL); + #endif + #else + /* + LARGE_INTEGER value; + if (::QueryPerformanceCounter(&value)) + return value.QuadPart; + */ + return GetTickCount(); + #endif +} + +static UInt64 GetFreq() +{ + #ifdef USE_POSIX_TIME + #ifdef USE_POSIX_TIME2 + return 1000000; + #else + return 1; + #endif + #else + /* + LARGE_INTEGER value; + if (::QueryPerformanceFrequency(&value)) + return value.QuadPart; + */ + return 1000; + #endif +} + +#ifndef USE_POSIX_TIME +static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } +#endif +static UInt64 GetUserTime() +{ + #ifdef USE_POSIX_TIME + return clock(); + #else + FILETIME creationTime, exitTime, kernelTime, userTime; + if (::GetProcessTimes(::GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime) != 0) + return GetTime64(userTime) + GetTime64(kernelTime); + return (UInt64)GetTickCount() * 10000; + #endif +} + +static UInt64 GetUserFreq() +{ + #ifdef USE_POSIX_TIME + return CLOCKS_PER_SEC; + #else + return 10000000; + #endif +} + +class CBenchProgressStatus +{ + #ifdef BENCH_MT + NWindows::NSynchronization::CCriticalSection CS; + #endif +public: + HRESULT Res; + bool EncodeMode; + void SetResult(HRESULT res) + { + #ifdef BENCH_MT + NWindows::NSynchronization::CCriticalSectionLock lock(CS); + #endif + Res = res; + } + HRESULT GetResult() + { + #ifdef BENCH_MT + NWindows::NSynchronization::CCriticalSectionLock lock(CS); + #endif + return Res; + } +}; + +class CBenchProgressInfo: + public ICompressProgressInfo, + public CMyUnknownImp +{ +public: + CBenchProgressStatus *Status; + CBenchInfo BenchInfo; + HRESULT Res; + IBenchCallback *callback; + CBenchProgressInfo(): callback(0) {} + MY_UNKNOWN_IMP + STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize); +}; + +void SetStartTime(CBenchInfo &bi) +{ + bi.GlobalFreq = GetFreq(); + bi.UserFreq = GetUserFreq(); + bi.GlobalTime = ::GetTimeCount(); + bi.UserTime = ::GetUserTime(); +} + +void SetFinishTime(const CBenchInfo &biStart, CBenchInfo &dest) +{ + dest.GlobalFreq = GetFreq(); + dest.UserFreq = GetUserFreq(); + dest.GlobalTime = ::GetTimeCount() - biStart.GlobalTime; + dest.UserTime = ::GetUserTime() - biStart.UserTime; +} + +STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize) +{ + HRESULT res = Status->GetResult(); + if (res != S_OK) + return res; + if (!callback) + return res; + CBenchInfo info = BenchInfo; + SetFinishTime(BenchInfo, info); + if (Status->EncodeMode) + { + info.UnpackSize = *inSize; + info.PackSize = *outSize; + res = callback->SetEncodeResult(info, false); + } + else + { + info.PackSize = BenchInfo.PackSize + *inSize; + info.UnpackSize = BenchInfo.UnpackSize + *outSize; + res = callback->SetDecodeResult(info, false); + } + if (res != S_OK) + Status->SetResult(res); + return res; +} + +static const int kSubBits = 8; + +static UInt32 GetLogSize(UInt32 size) +{ + for (int i = kSubBits; i < 32; i++) + for (UInt32 j = 0; j < (1 << kSubBits); j++) + if (size <= (((UInt32)1) << i) + (j << (i - kSubBits))) + return (i << kSubBits) + j; + return (32 << kSubBits); +} + +static void NormalizeVals(UInt64 &v1, UInt64 &v2) +{ + while (v1 > 1000000) + { + v1 >>= 1; + v2 >>= 1; + } +} + +UInt64 GetUsage(const CBenchInfo &info) +{ + UInt64 userTime = info.UserTime; + UInt64 userFreq = info.UserFreq; + UInt64 globalTime = info.GlobalTime; + UInt64 globalFreq = info.GlobalFreq; + NormalizeVals(userTime, userFreq); + NormalizeVals(globalFreq, globalTime); + if (userFreq == 0) + userFreq = 1; + if (globalTime == 0) + globalTime = 1; + return userTime * globalFreq * 1000000 / userFreq / globalTime; +} + +UInt64 GetRatingPerUsage(const CBenchInfo &info, UInt64 rating) +{ + UInt64 userTime = info.UserTime; + UInt64 userFreq = info.UserFreq; + UInt64 globalTime = info.GlobalTime; + UInt64 globalFreq = info.GlobalFreq; + NormalizeVals(userFreq, userTime); + NormalizeVals(globalTime, globalFreq); + if (globalFreq == 0) + globalFreq = 1; + if (userTime == 0) + userTime = 1; + return userFreq * globalTime / globalFreq * rating / userTime; +} + +static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq) +{ + UInt64 elTime = elapsedTime; + NormalizeVals(freq, elTime); + if (elTime == 0) + elTime = 1; + return value * freq / elTime; +} + +UInt64 GetCompressRating(UInt32 dictionarySize, UInt64 elapsedTime, UInt64 freq, UInt64 size) +{ + UInt64 t = GetLogSize(dictionarySize) - (kBenchMinDicLogSize << kSubBits); + // UInt64 numCommandsForOne = 1000 + ((t * t * 7) >> (2 * kSubBits)); // AMD K8 + UInt64 numCommandsForOne = 870 + ((t * t * 5) >> (2 * kSubBits)); // Intel Core2 + + UInt64 numCommands = (UInt64)(size) * numCommandsForOne; + return MyMultDiv64(numCommands, elapsedTime, freq); +} + +UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt32 numIterations) +{ + // UInt64 numCommands = (inSize * 216 + outSize * 14) * numIterations; // AMD K8 + UInt64 numCommands = (inSize * 220 + outSize * 8) * numIterations; // Intel Core2 + return MyMultDiv64(numCommands, elapsedTime, freq); +} + +#ifdef EXTERNAL_LZMA +typedef UInt32 (WINAPI * CreateObjectPointer)(const GUID *clsID, + const GUID *interfaceID, void **outObject); +#endif + +struct CEncoderInfo; + +struct CEncoderInfo +{ + #ifdef BENCH_MT + NWindows::CThread thread[2]; + #endif + CMyComPtr encoder; + CBenchProgressInfo *progressInfoSpec[2]; + CMyComPtr progressInfo[2]; + UInt32 NumIterations; + #ifdef USE_ALLOCA + size_t AllocaSize; + #endif + + struct CDecoderInfo + { + CEncoderInfo *Encoder; + UInt32 DecoderIndex; + #ifdef USE_ALLOCA + size_t AllocaSize; + #endif + bool CallbackMode; + }; + CDecoderInfo decodersInfo[2]; + + CMyComPtr decoders[2]; + HRESULT Results[2]; + CBenchmarkOutStream *outStreamSpec; + CMyComPtr outStream; + IBenchCallback *callback; + UInt32 crc; + UInt32 kBufferSize; + UInt32 compressedSize; + CBenchRandomGenerator rg; + CBenchmarkOutStream *propStreamSpec; + CMyComPtr propStream; + HRESULT Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rg); + HRESULT Encode(); + HRESULT Decode(UInt32 decoderIndex); + + CEncoderInfo(): outStreamSpec(0), callback(0), propStreamSpec(0) {} + + #ifdef BENCH_MT + static THREAD_FUNC_DECL EncodeThreadFunction(void *param) + { + CEncoderInfo *encoder = (CEncoderInfo *)param; + #ifdef USE_ALLOCA + alloca(encoder->AllocaSize); + #endif + HRESULT res = encoder->Encode(); + encoder->Results[0] = res; + if (res != S_OK) + encoder->progressInfoSpec[0]->Status->SetResult(res); + + return 0; + } + static THREAD_FUNC_DECL DecodeThreadFunction(void *param) + { + CDecoderInfo *decoder = (CDecoderInfo *)param; + #ifdef USE_ALLOCA + alloca(decoder->AllocaSize); + #endif + CEncoderInfo *encoder = decoder->Encoder; + encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); + return 0; + } + + HRESULT CreateEncoderThread() + { + return thread[0].Create(EncodeThreadFunction, this); + } + + HRESULT CreateDecoderThread(int index, bool callbackMode + #ifdef USE_ALLOCA + , size_t allocaSize + #endif + ) + { + CDecoderInfo &decoder = decodersInfo[index]; + decoder.DecoderIndex = index; + decoder.Encoder = this; + #ifdef USE_ALLOCA + decoder.AllocaSize = allocaSize; + #endif + decoder.CallbackMode = callbackMode; + return thread[index].Create(DecodeThreadFunction, &decoder); + } + #endif +}; + +HRESULT CEncoderInfo::Init(UInt32 dictionarySize, UInt32 numThreads, CBaseRandomGenerator *rgLoc) +{ + rg.Set(rgLoc); + kBufferSize = dictionarySize + kAdditionalSize; + UInt32 kCompressedBufferSize = (kBufferSize / 2) + kCompressedAdditionalSize; + if (!rg.Alloc(kBufferSize)) + return E_OUTOFMEMORY; + rg.Generate(); + crc = CrcCalc(rg.Buffer, rg.BufferSize); + + outStreamSpec = new CBenchmarkOutStream; + if (!outStreamSpec->Alloc(kCompressedBufferSize)) + return E_OUTOFMEMORY; + + outStream = outStreamSpec; + + propStreamSpec = 0; + if (!propStream) + { + propStreamSpec = new CBenchmarkOutStream; + propStream = propStreamSpec; + } + if (!propStreamSpec->Alloc(kMaxLzmaPropSize)) + return E_OUTOFMEMORY; + propStreamSpec->Init(); + + PROPID propIDs[] = + { + NCoderPropID::kDictionarySize, + NCoderPropID::kMultiThread + }; + const int kNumProps = sizeof(propIDs) / sizeof(propIDs[0]); + PROPVARIANT properties[kNumProps]; + properties[0].vt = VT_UI4; + properties[0].ulVal = (UInt32)dictionarySize; + + properties[1].vt = VT_BOOL; + properties[1].boolVal = (numThreads > 1) ? VARIANT_TRUE : VARIANT_FALSE; + + { + CMyComPtr setCoderProperties; + RINOK(encoder.QueryInterface(IID_ICompressSetCoderProperties, &setCoderProperties)); + if (!setCoderProperties) + return E_FAIL; + RINOK(setCoderProperties->SetCoderProperties(propIDs, properties, kNumProps)); + + CMyComPtr writeCoderProperties; + encoder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProperties); + if (writeCoderProperties) + { + RINOK(writeCoderProperties->WriteCoderProperties(propStream)); + } + } + return S_OK; +} + +HRESULT CEncoderInfo::Encode() +{ + CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; + CMyComPtr inStream = inStreamSpec; + inStreamSpec->Init(rg.Buffer, rg.BufferSize); + outStreamSpec->Init(); + + RINOK(encoder->Code(inStream, outStream, 0, 0, progressInfo[0])); + compressedSize = outStreamSpec->Pos; + encoder.Release(); + return S_OK; +} + +HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) +{ + CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; + CMyComPtr inStream = inStreamSpec; + CMyComPtr &decoder = decoders[decoderIndex]; + + CMyComPtr compressSetDecoderProperties; + decoder.QueryInterface(IID_ICompressSetDecoderProperties2, &compressSetDecoderProperties); + if (!compressSetDecoderProperties) + return E_FAIL; + + CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; + CMyComPtr crcOutStream = crcOutStreamSpec; + + CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; + pi->BenchInfo.UnpackSize = 0; + pi->BenchInfo.PackSize = 0; + + for (UInt32 j = 0; j < NumIterations; j++) + { + inStreamSpec->Init(outStreamSpec->Buffer, compressedSize); + crcOutStreamSpec->Init(); + + RINOK(compressSetDecoderProperties->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos)); + UInt64 outSize = kBufferSize; + RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex])); + if (CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) + return S_FALSE; + pi->BenchInfo.UnpackSize += kBufferSize; + pi->BenchInfo.PackSize += compressedSize; + } + decoder.Release(); + return S_OK; +} + +static const UInt32 kNumThreadsMax = (1 << 16); + +struct CBenchEncoders +{ + CEncoderInfo *encoders; + CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; } + ~CBenchEncoders() { delete []encoders; } +}; + +HRESULT LzmaBench( + #ifdef EXTERNAL_LZMA + CCodecs *codecs, + #endif + UInt32 numThreads, UInt32 dictionarySize, IBenchCallback *callback) +{ + UInt32 numEncoderThreads = + #ifdef BENCH_MT + (numThreads > 1 ? numThreads / 2 : 1); + #else + 1; + #endif + UInt32 numSubDecoderThreads = + #ifdef BENCH_MT + (numThreads > 1 ? 2 : 1); + #else + 1; + #endif + if (dictionarySize < (1 << kBenchMinDicLogSize) || numThreads < 1 || numEncoderThreads > kNumThreadsMax) + { + return E_INVALIDARG; + } + + CBenchEncoders encodersSpec(numEncoderThreads); + CEncoderInfo *encoders = encodersSpec.encoders; + + #ifdef EXTERNAL_LZMA + UString name = L"LZMA"; + #endif + + UInt32 i; + for (i = 0; i < numEncoderThreads; i++) + { + CEncoderInfo &encoder = encoders[i]; + encoder.callback = (i == 0) ? callback : 0; + + #ifdef EXTERNAL_LZMA + RINOK(codecs->CreateCoder(name, true, encoder.encoder)); + #else + encoder.encoder = new NCompress::NLZMA::CEncoder; + #endif + for (UInt32 j = 0; j < numSubDecoderThreads; j++) + { + #ifdef EXTERNAL_LZMA + RINOK(codecs->CreateCoder(name, false, encoder.decoders[j])); + #else + encoder.decoders[j] = new NCompress::NLZMA::CDecoder; + #endif + } + } + + CBaseRandomGenerator rg; + rg.Init(); + for (i = 0; i < numEncoderThreads; i++) + { + RINOK(encoders[i].Init(dictionarySize, numThreads, &rg)); + } + + CBenchProgressStatus status; + status.Res = S_OK; + status.EncodeMode = true; + + for (i = 0; i < numEncoderThreads; i++) + { + CEncoderInfo &encoder = encoders[i]; + for (int j = 0; j < 2; j++) + { + encoder.progressInfo[j] = encoder.progressInfoSpec[j] = new CBenchProgressInfo; + encoder.progressInfoSpec[j]->Status = &status; + } + if (i == 0) + { + encoder.progressInfoSpec[0]->callback = callback; + encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numEncoderThreads; + SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); + } + + #ifdef BENCH_MT + if (numEncoderThreads > 1) + { + #ifdef USE_ALLOCA + encoder.AllocaSize = (i * 16 * 21) & 0x7FF; + #endif + RINOK(encoder.CreateEncoderThread()) + } + else + #endif + { + RINOK(encoder.Encode()); + } + } + #ifdef BENCH_MT + if (numEncoderThreads > 1) + for (i = 0; i < numEncoderThreads; i++) + encoders[i].thread[0].Wait(); + #endif + + RINOK(status.Res); + + CBenchInfo info; + + SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); + info.UnpackSize = 0; + info.PackSize = 0; + info.NumIterations = 1; // progressInfoSpec->NumIterations; + for (i = 0; i < numEncoderThreads; i++) + { + CEncoderInfo &encoder = encoders[i]; + info.UnpackSize += encoder.kBufferSize; + info.PackSize += encoder.compressedSize; + } + RINOK(callback->SetEncodeResult(info, true)); + + + status.Res = S_OK; + status.EncodeMode = false; + + UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; + for (i = 0; i < numEncoderThreads; i++) + { + CEncoderInfo &encoder = encoders[i]; + encoder.NumIterations = 2 + kUncompressMinBlockSize / encoder.kBufferSize; + + if (i == 0) + { + encoder.progressInfoSpec[0]->callback = callback; + encoder.progressInfoSpec[0]->BenchInfo.NumIterations = numDecoderThreads; + SetStartTime(encoder.progressInfoSpec[0]->BenchInfo); + } + + #ifdef BENCH_MT + if (numDecoderThreads > 1) + { + for (UInt32 j = 0; j < numSubDecoderThreads; j++) + { + size_t allocaSize = ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF; + HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) + #ifdef USE_ALLOCA + , allocaSize + #endif + ); + RINOK(res); + } + } + else + #endif + { + RINOK(encoder.Decode(0)); + } + } + #ifdef BENCH_MT + HRESULT res = S_OK; + if (numDecoderThreads > 1) + for (i = 0; i < numEncoderThreads; i++) + for (UInt32 j = 0; j < numSubDecoderThreads; j++) + { + CEncoderInfo &encoder = encoders[i]; + encoder.thread[j].Wait(); + if (encoder.Results[j] != S_OK) + res = encoder.Results[j]; + } + RINOK(res); + #endif + RINOK(status.Res); + SetFinishTime(encoders[0].progressInfoSpec[0]->BenchInfo, info); + info.UnpackSize = 0; + info.PackSize = 0; + info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; + for (i = 0; i < numEncoderThreads; i++) + { + CEncoderInfo &encoder = encoders[i]; + info.UnpackSize += encoder.kBufferSize; + info.PackSize += encoder.compressedSize; + } + RINOK(callback->SetDecodeResult(info, false)); + RINOK(callback->SetDecodeResult(info, true)); + return S_OK; +} + + +inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary) +{ + UInt32 hs = dictionary - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; + if (hs > (1 << 24)) + hs >>= 1; + hs++; + return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + + (1 << 20) + (multiThread ? (6 << 20) : 0); +} + +UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary) +{ + const UInt32 kBufferSize = dictionary; + const UInt32 kCompressedBufferSize = (kBufferSize / 2); + UInt32 numSubThreads = (numThreads > 1) ? 2 : 1; + UInt32 numBigThreads = numThreads / numSubThreads; + return (kBufferSize + kCompressedBufferSize + + GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads; +} + +static bool CrcBig(const void *data, UInt32 size, UInt32 numCycles, UInt32 crcBase) +{ + for (UInt32 i = 0; i < numCycles; i++) + if (CrcCalc(data, size) != crcBase) + return false; + return true; +} + +#ifdef BENCH_MT +struct CCrcInfo +{ + NWindows::CThread Thread; + const Byte *Data; + UInt32 Size; + UInt32 NumCycles; + UInt32 Crc; + bool Res; + void Wait() + { + Thread.Wait(); + Thread.Close(); + } +}; + +static THREAD_FUNC_DECL CrcThreadFunction(void *param) +{ + CCrcInfo *p = (CCrcInfo *)param; + p->Res = CrcBig(p->Data, p->Size, p->NumCycles, p->Crc); + return 0; +} + +struct CCrcThreads +{ + UInt32 NumThreads; + CCrcInfo *Items; + CCrcThreads(): Items(0), NumThreads(0) {} + void WaitAll() + { + for (UInt32 i = 0; i < NumThreads; i++) + Items[i].Wait(); + NumThreads = 0; + } + ~CCrcThreads() + { + WaitAll(); + delete []Items; + } +}; +#endif + +static UInt32 CrcCalc1(const Byte *buf, UInt32 size) +{ + UInt32 crc = CRC_INIT_VAL;; + for (UInt32 i = 0; i < size; i++) + crc = CRC_UPDATE_BYTE(crc, buf[i]); + return CRC_GET_DIGEST(crc); +} + +static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) +{ + for (UInt32 i = 0; i < size; i++) + buf[i] = (Byte)RG.GetRnd(); +} + +static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG) +{ + RandGen(buf, size, RG); + return CrcCalc1(buf, size); +} + +bool CrcInternalTest() +{ + CBenchBuffer buffer; + const UInt32 kBufferSize0 = (1 << 8); + const UInt32 kBufferSize1 = (1 << 10); + const UInt32 kCheckSize = (1 << 5); + if (!buffer.Alloc(kBufferSize0 + kBufferSize1)) + return false; + Byte *buf = buffer.Buffer; + UInt32 i; + for (i = 0; i < kBufferSize0; i++) + buf[i] = (Byte)i; + UInt32 crc1 = CrcCalc1(buf, kBufferSize0); + if (crc1 != 0x29058C73) + return false; + CBaseRandomGenerator RG; + RandGen(buf + kBufferSize0, kBufferSize1, RG); + for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) + for (UInt32 j = 0; j < kCheckSize; j++) + if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) + return false; + return true; +} + +HRESULT CrcBench(UInt32 numThreads, UInt32 bufferSize, UInt64 &speed) +{ + if (numThreads == 0) + numThreads = 1; + + CBenchBuffer buffer; + size_t totalSize = (size_t)bufferSize * numThreads; + if (totalSize / numThreads != bufferSize) + return E_OUTOFMEMORY; + if (!buffer.Alloc(totalSize)) + return E_OUTOFMEMORY; + + Byte *buf = buffer.Buffer; + CBaseRandomGenerator RG; + UInt32 numCycles = ((UInt32)1 << 30) / ((bufferSize >> 2) + 1) + 1; + + UInt64 timeVal; + #ifdef BENCH_MT + CCrcThreads threads; + if (numThreads > 1) + { + threads.Items = new CCrcInfo[numThreads]; + UInt32 i; + for (i = 0; i < numThreads; i++) + { + CCrcInfo &info = threads.Items[i]; + Byte *data = buf + (size_t)bufferSize * i; + info.Data = data; + info.NumCycles = numCycles; + info.Size = bufferSize; + info.Crc = RandGenCrc(data, bufferSize, RG); + } + timeVal = GetTimeCount(); + for (i = 0; i < numThreads; i++) + { + CCrcInfo &info = threads.Items[i]; + RINOK(info.Thread.Create(CrcThreadFunction, &info)); + threads.NumThreads++; + } + threads.WaitAll(); + for (i = 0; i < numThreads; i++) + if (!threads.Items[i].Res) + return S_FALSE; + } + else + #endif + { + UInt32 crc = RandGenCrc(buf, bufferSize, RG); + timeVal = GetTimeCount(); + if (!CrcBig(buf, bufferSize, numCycles, crc)) + return S_FALSE; + } + timeVal = GetTimeCount() - timeVal; + if (timeVal == 0) + timeVal = 1; + + UInt64 size = (UInt64)numCycles * totalSize; + speed = MyMultDiv64(size, timeVal, GetFreq()); + return S_OK; +} +