/*------------------------------------------------------------------------- * drawElements Quality Program OpenGL ES 3.0 Module * ------------------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Buffer data upload performance tests. *//*--------------------------------------------------------------------*/ #include "es3pBufferDataUploadTests.hpp" #include "glsCalibration.hpp" #include "tcuTestLog.hpp" #include "tcuVectorUtil.hpp" #include "tcuSurface.hpp" #include "tcuCPUWarmup.hpp" #include "tcuRenderTarget.hpp" #include "gluRenderContext.hpp" #include "gluShaderProgram.hpp" #include "gluStrUtil.hpp" #include "gluPixelTransfer.hpp" #include "gluObjectWrapper.hpp" #include "glwFunctions.hpp" #include "glwEnums.hpp" #include "deClock.h" #include "deMath.h" #include "deStringUtil.hpp" #include "deRandom.hpp" #include "deMemory.h" #include "deThread.h" #include "deMeta.hpp" #include #include #include namespace deqp { namespace gles3 { namespace Performance { namespace { using de::meta::EnableIf; using de::meta::Not; using gls::LineParametersWithConfidence; using gls::theilSenSiegelLinearRegression; static const char *const s_minimalVertexShader = "#version 300 es\n" "in highp vec4 a_position;\n" "void main (void)\n" "{\n" " gl_Position = a_position;\n" "}\n"; static const char *const s_minimalFragnentShader = "#version 300 es\n" "layout(location = 0) out mediump vec4 dEQP_FragColor;\n" "void main (void)\n" "{\n" " dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n" "}\n"; static const char *const s_colorVertexShader = "#version 300 es\n" "in highp vec4 a_position;\n" "in highp vec4 a_color;\n" "out highp vec4 v_color;\n" "void main (void)\n" "{\n" " gl_Position = a_position;\n" " v_color = a_color;\n" "}\n"; static const char *const s_colorFragmentShader = "#version 300 es\n" "layout(location = 0) out mediump vec4 dEQP_FragColor;\n" "in mediump vec4 v_color;\n" "void main (void)\n" "{\n" " dEQP_FragColor = v_color;\n" "}\n"; struct SingleOperationDuration { uint64_t totalDuration; uint64_t fitResponseDuration; // used for fitting }; struct MapBufferRangeDuration { uint64_t mapDuration; uint64_t unmapDuration; uint64_t writeDuration; uint64_t allocDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct MapBufferRangeDurationNoAlloc { uint64_t mapDuration; uint64_t unmapDuration; uint64_t writeDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct MapBufferRangeFlushDuration { uint64_t mapDuration; uint64_t unmapDuration; uint64_t writeDuration; uint64_t flushDuration; uint64_t allocDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct MapBufferRangeFlushDurationNoAlloc { uint64_t mapDuration; uint64_t unmapDuration; uint64_t writeDuration; uint64_t flushDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct RenderReadDuration { uint64_t renderDuration; uint64_t readDuration; uint64_t renderReadDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct UnrelatedUploadRenderReadDuration { uint64_t renderDuration; uint64_t readDuration; uint64_t renderReadDuration; uint64_t totalDuration; uint64_t fitResponseDuration; }; struct UploadRenderReadDuration { uint64_t uploadDuration; uint64_t renderDuration; uint64_t readDuration; uint64_t totalDuration; uint64_t renderReadDuration; uint64_t fitResponseDuration; }; struct UploadRenderReadDurationWithUnrelatedUploadSize { uint64_t uploadDuration; uint64_t renderDuration; uint64_t readDuration; uint64_t totalDuration; uint64_t renderReadDuration; uint64_t fitResponseDuration; }; struct RenderUploadRenderReadDuration { uint64_t firstRenderDuration; uint64_t uploadDuration; uint64_t secondRenderDuration; uint64_t readDuration; uint64_t totalDuration; uint64_t renderReadDuration; uint64_t fitResponseDuration; }; template struct UploadSampleResult { typedef SampleT SampleType; int bufferSize; int allocatedSize; int writtenSize; SampleType duration; }; template struct RenderSampleResult { typedef SampleT SampleType; int uploadedDataSize; int renderDataSize; int unrelatedDataSize; int numVertices; SampleT duration; }; struct SingleOperationStatistics { float minTime; float maxTime; float medianTime; float min2DecileTime; // !< minimum value in the 2nd decile float max9DecileTime; // !< maximum value in the 9th decile }; struct SingleCallStatistics { SingleOperationStatistics result; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; struct MapCallStatistics { SingleOperationStatistics map; SingleOperationStatistics unmap; SingleOperationStatistics write; SingleOperationStatistics alloc; SingleOperationStatistics result; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; struct MapFlushCallStatistics { SingleOperationStatistics map; SingleOperationStatistics unmap; SingleOperationStatistics write; SingleOperationStatistics flush; SingleOperationStatistics alloc; SingleOperationStatistics result; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; struct RenderReadStatistics { SingleOperationStatistics render; SingleOperationStatistics read; SingleOperationStatistics result; SingleOperationStatistics total; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; struct UploadRenderReadStatistics { SingleOperationStatistics upload; SingleOperationStatistics render; SingleOperationStatistics read; SingleOperationStatistics result; SingleOperationStatistics total; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; struct RenderUploadRenderReadStatistics { SingleOperationStatistics firstRender; SingleOperationStatistics upload; SingleOperationStatistics secondRender; SingleOperationStatistics read; SingleOperationStatistics result; SingleOperationStatistics total; float medianRate; float maxDiffTime; float maxDiff9DecileTime; float medianDiffTime; float maxRelDiffTime; float max9DecileRelDiffTime; float medianRelDiffTime; }; template struct SampleTypeTraits { }; template <> struct SampleTypeTraits { typedef SingleCallStatistics StatsType; enum { HAS_MAP_STATS = 0 }; enum { HAS_UNMAP_STATS = 0 }; enum { HAS_WRITE_STATS = 0 }; enum { HAS_FLUSH_STATS = 0 }; enum { HAS_ALLOC_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 0 }; }; template <> struct SampleTypeTraits { typedef MapCallStatistics StatsType; enum { HAS_MAP_STATS = 1 }; enum { HAS_UNMAP_STATS = 1 }; enum { HAS_WRITE_STATS = 1 }; enum { HAS_FLUSH_STATS = 0 }; enum { HAS_ALLOC_STATS = 1 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef MapCallStatistics StatsType; enum { HAS_MAP_STATS = 1 }; enum { HAS_UNMAP_STATS = 1 }; enum { HAS_WRITE_STATS = 1 }; enum { HAS_FLUSH_STATS = 0 }; enum { HAS_ALLOC_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef MapFlushCallStatistics StatsType; enum { HAS_MAP_STATS = 1 }; enum { HAS_UNMAP_STATS = 1 }; enum { HAS_WRITE_STATS = 1 }; enum { HAS_FLUSH_STATS = 1 }; enum { HAS_ALLOC_STATS = 1 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef MapFlushCallStatistics StatsType; enum { HAS_MAP_STATS = 1 }; enum { HAS_UNMAP_STATS = 1 }; enum { HAS_WRITE_STATS = 1 }; enum { HAS_FLUSH_STATS = 1 }; enum { HAS_ALLOC_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef RenderReadStatistics StatsType; enum { HAS_RENDER_STATS = 1 }; enum { HAS_READ_STATS = 1 }; enum { HAS_UPLOAD_STATS = 0 }; enum { HAS_TOTAL_STATS = 1 }; enum { HAS_FIRST_RENDER_STATS = 0 }; enum { HAS_SECOND_RENDER_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef RenderReadStatistics StatsType; enum { HAS_RENDER_STATS = 1 }; enum { HAS_READ_STATS = 1 }; enum { HAS_UPLOAD_STATS = 0 }; enum { HAS_TOTAL_STATS = 1 }; enum { HAS_FIRST_RENDER_STATS = 0 }; enum { HAS_SECOND_RENDER_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; }; template <> struct SampleTypeTraits { typedef UploadRenderReadStatistics StatsType; enum { HAS_RENDER_STATS = 1 }; enum { HAS_READ_STATS = 1 }; enum { HAS_UPLOAD_STATS = 1 }; enum { HAS_TOTAL_STATS = 1 }; enum { HAS_FIRST_RENDER_STATS = 0 }; enum { HAS_SECOND_RENDER_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; enum { LOG_UNRELATED_UPLOAD_SIZE = 0 }; }; template <> struct SampleTypeTraits { typedef UploadRenderReadStatistics StatsType; enum { HAS_RENDER_STATS = 1 }; enum { HAS_READ_STATS = 1 }; enum { HAS_UPLOAD_STATS = 1 }; enum { HAS_TOTAL_STATS = 1 }; enum { HAS_FIRST_RENDER_STATS = 0 }; enum { HAS_SECOND_RENDER_STATS = 0 }; enum { LOG_CONTRIBUTIONS = 1 }; enum { LOG_UNRELATED_UPLOAD_SIZE = 1 }; }; template <> struct SampleTypeTraits { typedef RenderUploadRenderReadStatistics StatsType; enum { HAS_RENDER_STATS = 0 }; enum { HAS_READ_STATS = 1 }; enum { HAS_UPLOAD_STATS = 1 }; enum { HAS_TOTAL_STATS = 1 }; enum { HAS_FIRST_RENDER_STATS = 1 }; enum { HAS_SECOND_RENDER_STATS = 1 }; enum { LOG_CONTRIBUTIONS = 1 }; enum { LOG_UNRELATED_UPLOAD_SIZE = 1 }; }; struct UploadSampleAnalyzeResult { float transferRateMedian; float transferRateAtRange; float transferRateAtInfinity; }; struct RenderSampleAnalyzeResult { float renderRateMedian; float renderRateAtRange; float renderRateAtInfinity; }; class UnmapFailureError : public std::exception { public: UnmapFailureError(void) : std::exception() { } }; static std::string getHumanReadableByteSize(int numBytes) { std::ostringstream buf; if (numBytes < 1024) buf << numBytes << " byte(s)"; else if (numBytes < 1024 * 1024) buf << de::floatToString((float)numBytes / 1024.0f, 1) << " KiB"; else buf << de::floatToString((float)numBytes / 1024.0f / 1024.0f, 1) << " MiB"; return buf.str(); } static uint64_t medianTimeMemcpy(void *dst, const void *src, int numBytes) { // Time used by memcpy is assumed to be asymptotically linear // With large numBytes, the probability of context switch or other random // event is high. Apply memcpy in parts and report how much time would // memcpy have used with the median transfer rate. // Less than 1MiB, no need to do anything special if (numBytes < 1048576) { uint64_t startTime; uint64_t endTime; deYield(); startTime = deGetMicroseconds(); deMemcpy(dst, src, numBytes); endTime = deGetMicroseconds(); return endTime - startTime; } else { // Do memcpy in multiple parts const int numSections = 5; const int sectionAlign = 16; int sectionStarts[numSections + 1]; int sectionLens[numSections]; uint64_t sectionTimes[numSections]; uint64_t medianTime; uint64_t bestTime = 0; for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign); sectionStarts[numSections] = numBytes; for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) sectionLens[sectionNdx] = sectionStarts[sectionNdx + 1] - sectionStarts[sectionNdx]; // Memcpy is usually called after mapbuffer range which may take // a lot of time. To prevent power management from kicking in during // copy, warm up more. { deYield(); tcu::warmupCPU(); deYield(); } for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx) { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); deMemcpy((uint8_t *)dst + sectionStarts[sectionNdx], (const uint8_t *)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]); endTime = deGetMicroseconds(); sectionTimes[sectionNdx] = endTime - startTime; if (!bestTime || sectionTimes[sectionNdx] < bestTime) bestTime = sectionTimes[sectionNdx]; // Detect if write takes 50% longer than it should, and warm up if that happened if (sectionNdx != numSections - 1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime) { deYield(); tcu::warmupCPU(); deYield(); } } std::sort(sectionTimes, sectionTimes + numSections); if ((numSections % 2) == 0) medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2; else medianTime = sectionTimes[numSections / 2]; return medianTime * numSections; } } static float busyworkCalculation(float initial, int workSize) { float a = initial; int b = 123; for (int ndx = 0; ndx < workSize; ++ndx) { a = deFloatCos(a + (float)b); b = (b + 63) % 107 + de::abs((int)(a * 10.0f)); } return a + (float)b; } static void busyWait(int microseconds) { const uint64_t maxSingleWaitTime = 1000; // 1ms const uint64_t endTime = deGetMicroseconds() + microseconds; float unused = *tcu::warmupCPUInternal::g_unused.m_v; int workSize = 500; // exponentially increase work, cap to 1ms while (deGetMicroseconds() < endTime) { const uint64_t startTime = deGetMicroseconds(); uint64_t totalTime; unused = busyworkCalculation(unused, workSize); totalTime = deGetMicroseconds() - startTime; if (totalTime >= maxSingleWaitTime) break; else workSize *= 2; } // "wait" while (deGetMicroseconds() < endTime) unused = busyworkCalculation(unused, workSize); *tcu::warmupCPUInternal::g_unused.m_v = unused; } // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1] template static float linearSample(const std::vector &values, float position) { DE_ASSERT(position >= 0.0f); DE_ASSERT(position <= 1.0f); const float floatNdx = (float)(values.size() - 1) * position; const int lowerNdx = (int)deFloatFloor(floatNdx); const int higherNdx = lowerNdx + 1; const float interpolationFactor = floatNdx - (float)lowerNdx; DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size()); DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size()); DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f); return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor); } template SingleOperationStatistics calculateSingleOperationStatistics(const std::vector &samples, uint64_t T::SampleType::*target) { SingleOperationStatistics stats; std::vector values(samples.size()); for (int ndx = 0; ndx < (int)samples.size(); ++ndx) values[ndx] = samples[ndx].duration.*target; std::sort(values.begin(), values.end()); stats.minTime = (float)values.front(); stats.maxTime = (float)values.back(); stats.medianTime = linearSample(values, 0.5f); stats.min2DecileTime = linearSample(values, 0.1f); stats.max9DecileTime = linearSample(values, 0.9f); return stats; } template void calculateBasicStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit, const std::vector &samples, int SampleType::*predictor) { std::vector values(samples.size()); for (int ndx = 0; ndx < (int)samples.size(); ++ndx) values[ndx] = samples[ndx].duration.fitResponseDuration; // median rate { std::vector processingRates(samples.size()); for (int ndx = 0; ndx < (int)samples.size(); ++ndx) { const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f; processingRates[ndx] = (float)(samples[ndx].*predictor) / timeInSeconds; } std::sort(processingRates.begin(), processingRates.end()); stats.medianRate = linearSample(processingRates, 0.5f); } // results compared to the approximation { std::vector timeDiffs(samples.size()); for (int ndx = 0; ndx < (int)samples.size(); ++ndx) { const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset; const float actual = (float)values[ndx]; timeDiffs[ndx] = actual - prediction; } std::sort(timeDiffs.begin(), timeDiffs.end()); stats.maxDiffTime = timeDiffs.back(); stats.maxDiff9DecileTime = linearSample(timeDiffs, 0.9f); stats.medianDiffTime = linearSample(timeDiffs, 0.5f); } // relative comparison to the approximation { std::vector relativeDiffs(samples.size()); for (int ndx = 0; ndx < (int)samples.size(); ++ndx) { const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset; const float actual = (float)values[ndx]; // Ignore cases where we predict negative times, or if // ratio would be (nearly) infinite: ignore if predicted // time is less than 1 microsecond if (prediction < 1.0f) relativeDiffs[ndx] = 0.0f; else relativeDiffs[ndx] = (actual - prediction) / prediction; } std::sort(relativeDiffs.begin(), relativeDiffs.end()); stats.maxRelDiffTime = relativeDiffs.back(); stats.max9DecileRelDiffTime = linearSample(relativeDiffs, 0.9f); stats.medianRelDiffTime = linearSample(relativeDiffs, 0.5f); } // values calculated using sorted timings std::sort(values.begin(), values.end()); stats.result.minTime = (float)values.front(); stats.result.maxTime = (float)values.back(); stats.result.medianTime = linearSample(values, 0.5f); stats.result.min2DecileTime = linearSample(values, 0.1f); stats.result.max9DecileTime = linearSample(values, 0.9f); } template void calculateBasicTransferStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit, const std::vector &samples) { calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize); } template void calculateBasicRenderStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit, const std::vector &samples) { calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize); } static SingleCallStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { SingleCallStatistics stats; calculateBasicTransferStatistics(stats, fit, samples); return stats; } static MapCallStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { MapCallStatistics stats; calculateBasicTransferStatistics(stats, fit, samples); stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration); stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration); stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration); stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration); return stats; } static MapFlushCallStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { MapFlushCallStatistics stats; calculateBasicTransferStatistics(stats, fit, samples); stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration); stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration); stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration); stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration); stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration); return stats; } static MapCallStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { MapCallStatistics stats; calculateBasicTransferStatistics(stats, fit, samples); stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration); stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration); stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration); return stats; } static MapFlushCallStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { MapFlushCallStatistics stats; calculateBasicTransferStatistics(stats, fit, samples); stats.map = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration); stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration); stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration); stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration); return stats; } static RenderReadStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { RenderReadStatistics stats; calculateBasicRenderStatistics(stats, fit, samples); stats.render = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration); stats.read = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration); stats.total = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration); return stats; } static RenderReadStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { RenderReadStatistics stats; calculateBasicRenderStatistics(stats, fit, samples); stats.render = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration); stats.read = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration); stats.total = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration); return stats; } static UploadRenderReadStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { UploadRenderReadStatistics stats; calculateBasicRenderStatistics(stats, fit, samples); stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration); stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration); stats.read = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration); stats.total = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration); return stats; } static UploadRenderReadStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { UploadRenderReadStatistics stats; calculateBasicRenderStatistics(stats, fit, samples); stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration); stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration); stats.read = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration); stats.total = calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration); return stats; } static RenderUploadRenderReadStatistics calculateSampleStatistics( const LineParametersWithConfidence &fit, const std::vector> &samples) { RenderUploadRenderReadStatistics stats; calculateBasicRenderStatistics(stats, fit, samples); stats.firstRender = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration); stats.upload = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration); stats.secondRender = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration); stats.read = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration); stats.total = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration); return stats; } template static LineParametersWithConfidence fitLineToSamples( const std::vector> &samples, int beginNdx, int endNdx, int step, uint64_t DurationType::*target = &DurationType::fitResponseDuration) { std::vector samplePoints; for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step) { tcu::Vec2 point; point.x() = (float)(samples[sampleNdx].writtenSize); point.y() = (float)(samples[sampleNdx].duration.*target); samplePoints.push_back(point); } return theilSenSiegelLinearRegression(samplePoints, 0.6f); } template static LineParametersWithConfidence fitLineToSamples( const std::vector> &samples, int beginNdx, int endNdx, int step, uint64_t DurationType::*target = &DurationType::fitResponseDuration) { std::vector samplePoints; for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step) { tcu::Vec2 point; point.x() = (float)(samples[sampleNdx].renderDataSize); point.y() = (float)(samples[sampleNdx].duration.*target); samplePoints.push_back(point); } return theilSenSiegelLinearRegression(samplePoints, 0.6f); } template static LineParametersWithConfidence fitLineToSamples( const std::vector &samples, int beginNdx, int endNdx, uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration) { return fitLineToSamples(samples, beginNdx, endNdx, 1, target); } template static LineParametersWithConfidence fitLineToSamples( const std::vector &samples, uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration) { return fitLineToSamples(samples, 0, (int)samples.size(), target); } static float getAreaBetweenLines(float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient) { const float lineAMin = lineAOffset + lineACoefficient * xmin; const float lineAMax = lineAOffset + lineACoefficient * xmax; const float lineBMin = lineBOffset + lineBCoefficient * xmin; const float lineBMax = lineBOffset + lineBCoefficient * xmax; const bool aOverBAtBegin = (lineAMin > lineBMin); const bool aOverBAtEnd = (lineAMax > lineBMax); if (aOverBAtBegin == aOverBAtEnd) { // lines do not intersect const float midpoint = (xmin + xmax) / 2.0f; const float width = (xmax - xmin); const float lineAHeight = lineAOffset + lineACoefficient * midpoint; const float lineBHeight = lineBOffset + lineBCoefficient * midpoint; return width * de::abs(lineAHeight - lineBHeight); } else { // lines intersect const float approachCoeffient = de::abs(lineACoefficient - lineBCoefficient); const float epsilon = 0.0001f; const float leftHeight = de::abs(lineAMin - lineBMin); const float rightHeight = de::abs(lineAMax - lineBMax); if (approachCoeffient < epsilon) return 0.0f; return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient)); } } template static float calculateSampleFitLinearity(const std::vector &samples, int T::*predictor) { // Compare the fitted line of first half of the samples to the fitted line of // the second half of the samples. Calculate a AABB that fully contains every // sample's x component and both fit lines in this range. Calculate the ratio // of the area between the lines and the AABB. const float epsilon = 1.e-6f; const int midPoint = (int)samples.size() / 2; const LineParametersWithConfidence startApproximation = fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration); const LineParametersWithConfidence endApproximation = fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration); const float aabbMinX = (float)(samples.front().*predictor); const float aabbMinY = de::min(startApproximation.offset + startApproximation.coefficient * aabbMinX, endApproximation.offset + endApproximation.coefficient * aabbMinX); const float aabbMaxX = (float)(samples.back().*predictor); const float aabbMaxY = de::max(startApproximation.offset + startApproximation.coefficient * aabbMaxX, endApproximation.offset + endApproximation.coefficient * aabbMaxX); const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY); const float areaBetweenLines = getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient); const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea); return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f); } template static float calculateSampleFitLinearity(const std::vector> &samples) { return calculateSampleFitLinearity(samples, &UploadSampleResult::writtenSize); } template static float calculateSampleFitLinearity(const std::vector> &samples) { return calculateSampleFitLinearity(samples, &RenderSampleResult::renderDataSize); } template static float calculateSampleTemporalStability(const std::vector &samples, int T::*predictor) { // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order) // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between // the lines and the AABB. const float epsilon = 1.e-6f; const LineParametersWithConfidence evenApproximation = fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration); const LineParametersWithConfidence oddApproximation = fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration); const float aabbMinX = (float)(samples.front().*predictor); const float aabbMinY = de::min(evenApproximation.offset + evenApproximation.coefficient * aabbMinX, oddApproximation.offset + oddApproximation.coefficient * aabbMinX); const float aabbMaxX = (float)(samples.back().*predictor); const float aabbMaxY = de::max(evenApproximation.offset + evenApproximation.coefficient * aabbMaxX, oddApproximation.offset + oddApproximation.coefficient * aabbMaxX); const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY); const float areaBetweenLines = getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient); const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea); return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f); } template static float calculateSampleTemporalStability(const std::vector> &samples) { return calculateSampleTemporalStability(samples, &UploadSampleResult::writtenSize); } template static float calculateSampleTemporalStability(const std::vector> &samples) { return calculateSampleTemporalStability(samples, &RenderSampleResult::renderDataSize); } template static void bucketizeSamplesUniformly(const std::vector> &samples, std::vector> *buckets, int numBuckets, int &minBufferSize, int &maxBufferSize) { minBufferSize = 0; maxBufferSize = 0; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { DE_ASSERT(samples[sampleNdx].allocatedSize != 0); if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize) minBufferSize = samples[sampleNdx].allocatedSize; if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize) maxBufferSize = samples[sampleNdx].allocatedSize; } for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float bucketNdxFloat = (float)(samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * (float)numBuckets; const int bucketNdx = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets - 1); buckets[bucketNdx].push_back(samples[sampleNdx]); } } template static typename EnableIf::HAS_MAP_STATS>::Type logMapRangeStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { log << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime) << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime) << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime) << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime) << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime); } template static typename EnableIf::HAS_UNMAP_STATS>::Type logUnmapStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { log << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime) << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime) << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime) << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime) << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime); } template static typename EnableIf::HAS_WRITE_STATS>::Type logWriteStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { log << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime) << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime) << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime) << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime) << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime); } template static typename EnableIf::HAS_FLUSH_STATS>::Type logFlushStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { log << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime) << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime) << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime) << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime) << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime); } template static typename EnableIf::HAS_ALLOC_STATS>::Type logAllocStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { log << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime) << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime) << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime) << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime) << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime); } template static typename EnableIf::HAS_MAP_STATS>::Value>::Type logMapRangeStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(stats); } template static typename EnableIf::HAS_UNMAP_STATS>::Value>::Type logUnmapStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(stats); } template static typename EnableIf::HAS_WRITE_STATS>::Value>::Type logWriteStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(stats); } template static typename EnableIf::HAS_FLUSH_STATS>::Value>::Type logFlushStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(stats); } template static typename EnableIf::HAS_ALLOC_STATS>::Value>::Type logAllocStats( tcu::TestLog &log, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(stats); } template static typename EnableIf::HAS_MAP_STATS>::Type logMapContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration); log << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime); } template static typename EnableIf::HAS_UNMAP_STATS>::Type logUnmapContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration); log << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime); } template static typename EnableIf::HAS_WRITE_STATS>::Type logWriteContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration); log << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime); } template static typename EnableIf::HAS_FLUSH_STATS>::Type logFlushContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration); log << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime); } template static typename EnableIf::HAS_ALLOC_STATS>::Type logAllocContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration); log << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime); } template static typename EnableIf::HAS_RENDER_STATS>::Type logRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration); log << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime); } template static typename EnableIf::HAS_READ_STATS>::Type logReadContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration); log << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime); } template static typename EnableIf::HAS_UPLOAD_STATS>::Type logUploadContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration); log << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime); } template static typename EnableIf::HAS_TOTAL_STATS>::Type logTotalContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration); log << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime); } template static typename EnableIf::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration); log << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime); } template static typename EnableIf::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration); log << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset) << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime); } template static typename EnableIf::HAS_MAP_STATS>::Value>::Type logMapContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_WRITE_STATS>::Value>::Type logWriteContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_FLUSH_STATS>::Value>::Type logFlushContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_ALLOC_STATS>::Value>::Type logAllocContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_RENDER_STATS>::Value>::Type logRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_READ_STATS>::Value>::Type logReadContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_TOTAL_STATS>::Value>::Type logTotalContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } template static typename EnableIf::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution( tcu::TestLog &log, const std::vector> &samples, const typename SampleTypeTraits::StatsType &stats) { DE_UNREF(log); DE_UNREF(samples); DE_UNREF(stats); } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UploadTime", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize); log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize << (int)samples[sampleNdx].duration.totalDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize); log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize); log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize); log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration << (int)samples[sampleNdx].duration.flushDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize); log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration << (int)samples[sampleNdx].duration.flushDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize); log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize); log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices << samples[sampleNdx].unrelatedDataSize << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize); log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize); log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize << samples[sampleNdx].numVertices << samples[sampleNdx].unrelatedDataSize << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting, const std::vector> &samples) { log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("DrawReadTime", "Second draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FirstDrawCallTime", "First draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("SecondDrawCallTime", "Second draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx) { const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize); log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.firstRenderDuration << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.secondRenderDuration << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample; } log << tcu::TestLog::EndSampleList; } template static UploadSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log, const std::vector> &samples, bool logBucketPerformance) { // Assume data is linear with some outliers, fit a line const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples); const typename SampleTypeTraits::StatsType resultStats = calculateSampleStatistics(theilSenFitting, samples); float approximatedTransferRate; float approximatedTransferRateNoConstant; // Output raw samples { const tcu::ScopedLogSection section(log, "Samples", "Samples"); logSampleList(log, theilSenFitting, samples); } // Calculate results for different ranges if (logBucketPerformance) { const int numBuckets = 4; int minBufferSize = 0; int maxBufferSize = 0; std::vector> buckets[numBuckets]; bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize); for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx) { if (buckets[bucketNdx].empty()) continue; // Print a nice result summary const int bucketRangeMin = minBufferSize + (int)(((float)bucketNdx / (float)numBuckets) * (float)(maxBufferSize - minBufferSize)); const int bucketRangeMax = minBufferSize + (int)(((float)(bucketNdx + 1) / (float)numBuckets) * (float)(maxBufferSize - minBufferSize)); const typename SampleTypeTraits::StatsType stats = calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]); const tcu::ScopedLogSection section( log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [") .append(getHumanReadableByteSize(bucketRangeMin) .append(", ") .append(getHumanReadableByteSize(bucketRangeMax).append("]")))); logMapRangeStats(log, stats); logUnmapStats(log, stats); logWriteStats(log, stats); logFlushStats(log, stats); logAllocStats(log, stats); log << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime) << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime) << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime) << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime) << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime) << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f) << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime) << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime) << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime) << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f) << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f) << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f); } } // Contributions if (SampleTypeTraits::LOG_CONTRIBUTIONS) { const tcu::ScopedLogSection section(log, "Contribution", "Contributions"); logMapContribution(log, samples, resultStats); logUnmapContribution(log, samples, resultStats); logWriteContribution(log, samples, resultStats); logFlushContribution(log, samples, resultStats); logAllocContribution(log, samples, resultStats); } // Print results { const tcu::ScopedLogSection section(log, "Results", "Results"); const int medianBufferSize = (samples.front().bufferSize + samples.back().bufferSize) / 2; const float approximatedTransferTime = (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f; const float approximatedTransferTimeNoConstant = (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f; const float sampleLinearity = calculateSampleFitLinearity(samples); const float sampleTemporalStability = calculateSampleTemporalStability(samples); approximatedTransferRateNoConstant = (float)medianBufferSize / approximatedTransferTimeNoConstant; approximatedTransferRate = (float)medianBufferSize / approximatedTransferTime; log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f) << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f) << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset) << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower) << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper) << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f) << tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f) << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime) << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f); } // return approximated transfer rate { UploadSampleAnalyzeResult result; result.transferRateMedian = resultStats.medianRate; result.transferRateAtRange = approximatedTransferRate; result.transferRateAtInfinity = approximatedTransferRateNoConstant; return result; } } template static RenderSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log, const std::vector> &samples) { // Assume data is linear with some outliers, fit a line const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples); const typename SampleTypeTraits::StatsType resultStats = calculateSampleStatistics(theilSenFitting, samples); float approximatedProcessingRate; float approximatedProcessingRateNoConstant; // output raw samples { const tcu::ScopedLogSection section(log, "Samples", "Samples"); logSampleList(log, theilSenFitting, samples); } // Contributions if (SampleTypeTraits::LOG_CONTRIBUTIONS) { const tcu::ScopedLogSection section(log, "Contribution", "Contributions"); logFirstRenderContribution(log, samples, resultStats); logUploadContribution(log, samples, resultStats); logRenderContribution(log, samples, resultStats); logSecondRenderContribution(log, samples, resultStats); logReadContribution(log, samples, resultStats); logTotalContribution(log, samples, resultStats); } // print results { const tcu::ScopedLogSection section(log, "Results", "Results"); const int medianDataSize = (samples.front().renderDataSize + samples.back().renderDataSize) / 2; const float approximatedRenderTime = (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f; const float approximatedRenderTimeNoConstant = (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f; const float sampleLinearity = calculateSampleFitLinearity(samples); const float sampleTemporalStability = calculateSampleTemporalStability(samples); approximatedProcessingRateNoConstant = (float)medianDataSize / approximatedRenderTimeNoConstant; approximatedProcessingRate = (float)medianDataSize / approximatedRenderTime; log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f) << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f) << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset) << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower) << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper) << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f) << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f) << tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f) << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime) << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f); } // return approximated render rate { RenderSampleAnalyzeResult result; result.renderRateMedian = resultStats.medianRate; result.renderRateAtRange = approximatedProcessingRate; result.renderRateAtInfinity = approximatedProcessingRateNoConstant; return result; } return RenderSampleAnalyzeResult(); } static void generateTwoPassRandomIterationOrder(std::vector &iterationOrder, int numSamples) { de::Random rnd(0xabc); const int midPoint = (numSamples + 1) / 2; // !< ceil(m_numSamples / 2) DE_ASSERT((int)iterationOrder.size() == numSamples); // Two "passes" over range, randomize order in both passes // This allows to us detect if iterations are not independent // (first run and later run samples differ significantly?) for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx) iterationOrder[sampleNdx] = sampleNdx * 2; for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx) iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1; for (int ndx = 0; ndx < midPoint; ++ndx) std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]); for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx) std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size() - 1)]); } template class BasicBufferCase : public TestCase { public: enum Flags { FLAG_ALLOCATE_LARGER_BUFFER = 0x01, }; BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags); ~BasicBufferCase(void); virtual void init(void); virtual void deinit(void); protected: IterateResult iterate(void); virtual bool runSample(int iteration, UploadSampleResult &sample) = 0; virtual void logAndSetTestResult(const std::vector> &results) = 0; void disableGLWarmup(void); void waitGLResults(void); enum { UNUSED_RENDER_AREA_SIZE = 32 }; glu::ShaderProgram *m_minimalProgram; int32_t m_minimalProgramPosLoc; uint32_t m_bufferID; const int m_numSamples; const int m_bufferSizeMin; const int m_bufferSizeMax; const bool m_allocateLargerBuffer; private: int m_iteration; std::vector m_iterationOrder; std::vector> m_results; bool m_useGL; int m_bufferRandomizerTimer; }; template BasicBufferCase::BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags) : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, desc) , m_minimalProgram(nullptr) , m_minimalProgramPosLoc(-1) , m_bufferID(0) , m_numSamples(numSamples) , m_bufferSizeMin(bufferSizeMin) , m_bufferSizeMax(bufferSizeMax) , m_allocateLargerBuffer((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0) , m_iteration(0) , m_iterationOrder(numSamples) , m_results(numSamples) , m_useGL(true) , m_bufferRandomizerTimer(0) { // "randomize" iteration order. Deterministic, patternless generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples); // choose buffer sizes for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx) { const int rawBufferSize = (int)deFloatFloor((float)bufferSizeMin + (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples)); const int bufferSize = deAlign32(rawBufferSize, 16); const int allocatedBufferSize = deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16); m_results[sampleNdx].bufferSize = bufferSize; m_results[sampleNdx].allocatedSize = allocatedBufferSize; m_results[sampleNdx].writtenSize = -1; } } template BasicBufferCase::~BasicBufferCase(void) { deinit(); } template void BasicBufferCase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); if (!m_useGL) return; // \note Viewport size is not checked, it won't matter if the render target actually is smaller than UNUSED_RENDER_AREA_SIZE // minimal shader m_minimalProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_minimalVertexShader) << glu::FragmentSource(s_minimalFragnentShader)); if (!m_minimalProgram->isOk()) { m_testCtx.getLog() << *m_minimalProgram; throw tcu::TestError("failed to build shader program"); } m_minimalProgramPosLoc = gl.getAttribLocation(m_minimalProgram->getProgram(), "a_position"); if (m_minimalProgramPosLoc == -1) throw tcu::TestError("a_position location was -1"); } template void BasicBufferCase::deinit(void) { if (m_bufferID) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID); m_bufferID = 0; } delete m_minimalProgram; m_minimalProgram = nullptr; } template TestCase::IterateResult BasicBufferCase::iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); static bool buffersWarmedUp = false; static const uint32_t usages[] = { GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY, GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY, GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY, }; // Allocate some random sized buffers and remove them to // make sure the first samples too have some buffers removed // just before their allocation. This is only needed by the // the first test. if (m_useGL && !buffersWarmedUp) { const int numRandomBuffers = 6; const int numRepeats = 10; const int maxBufferSize = 16777216; const std::vector zeroData(maxBufferSize, 0x00); de::Random rnd(0x1234); uint32_t bufferIDs[numRandomBuffers] = {0}; gl.useProgram(m_minimalProgram->getProgram()); gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE); gl.enableVertexAttribArray(m_minimalProgramPosLoc); for (int ndx = 0; ndx < numRepeats; ++ndx) { // Create buffer and maybe draw from it for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) { const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4); const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)]; gl.genBuffers(1, &bufferIDs[randomBufferNdx]); gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); if (rnd.getBool()) { gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, nullptr); gl.drawArrays(GL_POINTS, 0, 1); gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1); } } for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); waitGLResults(); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen"); m_testCtx.touchWatchdog(); } buffersWarmedUp = true; return CONTINUE; } else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0) { // Do some random buffer operations to every now and then // to make sure the previous test iterations won't affect // following test runs. const int numRandomBuffers = 3; const int maxBufferSize = 16777216; const std::vector zeroData(maxBufferSize, 0x00); de::Random rnd(0x1234 + 0xabc * m_bufferRandomizerTimer); // BufferData { uint32_t bufferIDs[numRandomBuffers] = {0}; for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) { const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4); const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)]; gl.genBuffers(1, &bufferIDs[randomBufferNdx]); gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); } for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); } GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops"); // Do some memory mappings { uint32_t bufferIDs[numRandomBuffers] = {0}; for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) { const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4); const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)]; void *ptr; gl.genBuffers(1, &bufferIDs[randomBufferNdx]); gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]); gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage); gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, nullptr); gl.drawArrays(GL_POINTS, 0, 1); gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1); if (rnd.getBool()) waitGLResults(); ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT); if (ptr) { medianTimeMemcpy(ptr, &zeroData[0], randomSize); gl.unmapBuffer(GL_ARRAY_BUFFER); } } for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx) gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]); waitGLResults(); } GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps"); return CONTINUE; } else { const int currentIteration = m_iteration; const int sampleNdx = m_iterationOrder[currentIteration]; const bool sampleRunSuccessful = runSample(currentIteration, m_results[sampleNdx]); GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()"); // Retry failed samples if (!sampleRunSuccessful) return CONTINUE; if (++m_iteration >= m_numSamples) { logAndSetTestResult(m_results); return STOP; } else return CONTINUE; } } template void BasicBufferCase::disableGLWarmup(void) { m_useGL = false; } template void BasicBufferCase::waitGLResults(void) { tcu::Surface unusedSurface(UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE); glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess()); } template class BasicUploadCase : public BasicBufferCase { public: enum CaseType { CASE_NO_BUFFERS = 0, CASE_NEW_BUFFER, CASE_UNSPECIFIED_BUFFER, CASE_SPECIFIED_BUFFER, CASE_USED_BUFFER, CASE_USED_LARGER_BUFFER, CASE_LAST }; enum CaseFlags { FLAG_DONT_LOG_BUFFER_INFO = 0x01, FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT = 0x02, }; enum ResultType { RESULT_MEDIAN_TRANSFER_RATE = 0, RESULT_ASYMPTOTIC_TRANSFER_RATE, }; BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, int numSamples, uint32_t bufferUsage, CaseType caseType, ResultType resultType, int flags = 0); ~BasicUploadCase(void); virtual void init(void); virtual void deinit(void); private: bool runSample(int iteration, UploadSampleResult &sample); void createBuffer(int bufferSize, int iteration); void deleteBuffer(int bufferSize); void useBuffer(int bufferSize); virtual void testBufferUpload(UploadSampleResult &result, int writeSize) = 0; void logAndSetTestResult(const std::vector> &results); uint32_t m_unusedBufferID; protected: const CaseType m_caseType; const ResultType m_resultType; const uint32_t m_bufferUsage; const bool m_logBufferInfo; const bool m_bufferUnspecifiedContent; std::vector m_zeroData; using BasicBufferCase::m_testCtx; using BasicBufferCase::m_context; using BasicBufferCase::UNUSED_RENDER_AREA_SIZE; using BasicBufferCase::m_minimalProgram; using BasicBufferCase::m_minimalProgramPosLoc; using BasicBufferCase::m_bufferID; using BasicBufferCase::m_numSamples; using BasicBufferCase::m_bufferSizeMin; using BasicBufferCase::m_bufferSizeMax; using BasicBufferCase::m_allocateLargerBuffer; }; template BasicUploadCase::BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, int numSamples, uint32_t bufferUsage, CaseType caseType, ResultType resultType, int flags) : BasicBufferCase( context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase::FLAG_ALLOCATE_LARGER_BUFFER) : (0)) , m_unusedBufferID(0) , m_caseType(caseType) , m_resultType(resultType) , m_bufferUsage(bufferUsage) , m_logBufferInfo((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0) , m_bufferUnspecifiedContent((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0) , m_zeroData() { DE_ASSERT(m_caseType < CASE_LAST); } template BasicUploadCase::~BasicUploadCase(void) { deinit(); } template void BasicUploadCase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); BasicBufferCase::init(); // zero buffer as upload source m_zeroData.resize(m_bufferSizeMax, 0x00); // unused buffer gl.genBuffers(1, &m_unusedBufferID); GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf"); // log basic info m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are " "tested before samples at odd positions.\n" << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage; if (m_logBufferInfo) { switch (m_caseType) { case CASE_NO_BUFFERS: break; case CASE_NEW_BUFFER: m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage; break; case CASE_UNSPECIFIED_BUFFER: m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage; break; case CASE_SPECIFIED_BUFFER: m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage; break; case CASE_USED_BUFFER: m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage; break; case CASE_USED_LARGER_BUFFER: m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage; break; default: DE_ASSERT(false); break; } } if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE) m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage; else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage; else DE_ASSERT(false); } template void BasicUploadCase::deinit(void) { if (m_unusedBufferID) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_unusedBufferID); m_unusedBufferID = 0; } m_zeroData = std::vector(); BasicBufferCase::deinit(); } template bool BasicUploadCase::runSample(int iteration, UploadSampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int allocatedBufferSize = sample.allocatedSize; const int bufferSize = sample.bufferSize; if (m_caseType != CASE_NO_BUFFERS) createBuffer(iteration, allocatedBufferSize); // warmup CPU before the test to make sure the power management governor // keeps us in the "high performance" mode { deYield(); tcu::warmupCPU(); deYield(); } testBufferUpload(sample, bufferSize); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample"); if (m_caseType != CASE_NO_BUFFERS) deleteBuffer(bufferSize); return true; } template void BasicUploadCase::createBuffer(int iteration, int bufferSize) { DE_ASSERT(!m_bufferID); DE_ASSERT(m_caseType != CASE_NO_BUFFERS); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); // create buffer if (m_caseType == CASE_NO_BUFFERS) return; // create empty buffer gl.genBuffers(1, &m_bufferID); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen"); if (m_caseType == CASE_NEW_BUFFER) { // upload something else first, this should reduce noise in samples de::Random rng(0xbadc * iteration); const int sizeDelta = rng.getInt(0, 2097140); const int unusedUploadSize = deAlign32(1048576 + sizeDelta, 4 * 4); // Vary buffer size to make sure it is always reallocated const std::vector unusedData(unusedUploadSize, 0x20); gl.bindBuffer(GL_ARRAY_BUFFER, m_unusedBufferID); gl.bufferData(GL_ARRAY_BUFFER, unusedUploadSize, &unusedData[0], m_bufferUsage); // make sure upload won't interfere with the test useBuffer(unusedUploadSize); // don't kill the buffer so that the following upload cannot potentially reuse the buffer return; } // specify it if (m_caseType == CASE_UNSPECIFIED_BUFFER) gl.bufferData(GL_ARRAY_BUFFER, bufferSize, nullptr, m_bufferUsage); else { const std::vector unusedData(bufferSize, 0x20); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage); } if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER) return; // use it and make sure it is uploaded useBuffer(bufferSize); DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER); } template void BasicUploadCase::deleteBuffer(int bufferSize) { DE_ASSERT(m_bufferID); DE_ASSERT(m_caseType != CASE_NO_BUFFERS); // render from the buffer to make sure it actually made it to the gpu. This is to // make sure that if the upload actually happens later or is happening right now in // the background, it will not interfere with further test runs // if buffer contains unspecified content, sourcing data from it results in undefined // results, possibly including program termination. Specify all data to prevent such // case from happening const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); if (m_bufferUnspecifiedContent) { const std::vector unusedData(bufferSize, 0x20); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage); GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer"); } useBuffer(bufferSize); gl.deleteBuffers(1, &m_bufferID); m_bufferID = 0; } template void BasicUploadCase::useBuffer(int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.useProgram(m_minimalProgram->getProgram()); gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE); gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, nullptr); gl.enableVertexAttribArray(m_minimalProgramPosLoc); // use whole buffer to make sure buffer is uploaded by drawing first and last DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0); gl.drawArrays(GL_POINTS, 0, 1); gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1); BasicBufferCase::waitGLResults(); } template void BasicUploadCase::logAndSetTestResult(const std::vector> &results) { const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, true); // with small buffers, report the median transfer rate of the samples // with large buffers, report the expected preformance of infinitely large buffers const float rate = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian); if (rate == std::numeric_limits::infinity()) { // sample times are 1) invalid or 2) timer resolution too low // report speed 0 bytes / s since real value cannot be determined m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); } else { // report transfer rate in MB / s m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str()); } } class ReferenceMemcpyCase : public BasicUploadCase { public: ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase); ~ReferenceMemcpyCase(void); void init(void); void deinit(void); private: void testBufferUpload(UploadSampleResult &result, int bufferSize); std::vector m_dstBuf; }; ReferenceMemcpyCase::ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase) : BasicUploadCase( ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE)) , m_dstBuf() { disableGLWarmup(); } ReferenceMemcpyCase::~ReferenceMemcpyCase(void) { } void ReferenceMemcpyCase::init(void) { // Describe what the test tries to do m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage; m_dstBuf.resize(m_bufferSizeMax, 0x00); BasicUploadCase::init(); } void ReferenceMemcpyCase::deinit(void) { m_dstBuf = std::vector(); BasicUploadCase::deinit(); } void ReferenceMemcpyCase::testBufferUpload(UploadSampleResult &result, int bufferSize) { // write result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize); result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = bufferSize; } class BufferDataUploadCase : public BasicUploadCase { public: BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType caseType); ~BufferDataUploadCase(void); void init(void); private: void testBufferUpload(UploadSampleResult &result, int bufferSize); }; BufferDataUploadCase::BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType caseType) : BasicUploadCase(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE) { } BufferDataUploadCase::~BufferDataUploadCase(void) { } void BufferDataUploadCase::init(void) { // Describe what the test tries to do m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage; BasicUploadCase::init(); } void BufferDataUploadCase::testBufferUpload(UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); // upload { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); endTime = deGetMicroseconds(); result.duration.totalDuration = endTime - startTime; result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = bufferSize; } } class BufferSubDataUploadCase : public BasicUploadCase { public: enum Flags { FLAG_FULL_UPLOAD = 0x01, FLAG_PARTIAL_UPLOAD = 0x02, FLAG_INVALIDATE_BEFORE_USE = 0x04, }; BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType parentCase, int flags); ~BufferSubDataUploadCase(void); void init(void); private: void testBufferUpload(UploadSampleResult &result, int bufferSize); const bool m_fullUpload; const bool m_invalidateBeforeUse; }; BufferSubDataUploadCase::BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType parentCase, int flags) : BasicUploadCase(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE) , m_fullUpload((flags & FLAG_FULL_UPLOAD) != 0) , m_invalidateBeforeUse((flags & FLAG_INVALIDATE_BEFORE_USE) != 0) { DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0); DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)); } BufferSubDataUploadCase::~BufferSubDataUploadCase(void) { } void BufferSubDataUploadCase::init(void) { // Describe what the test tries to do m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferSubData() function call performance. " << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. ")) << ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n" << tcu::TestLog::EndMessage; BasicUploadCase::init(); } void BufferSubDataUploadCase::testBufferUpload(UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); // "invalidate", upload null if (m_invalidateBeforeUse) gl.bufferData(GL_ARRAY_BUFFER, bufferSize, nullptr, m_bufferUsage); // upload { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); if (m_fullUpload) gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]); else { // upload to buffer center gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]); } endTime = deGetMicroseconds(); result.duration.totalDuration = endTime - startTime; result.duration.fitResponseDuration = result.duration.totalDuration; if (m_fullUpload) result.writtenSize = bufferSize; else result.writtenSize = bufferSize / 2; } } class MapBufferRangeCase : public BasicUploadCase { public: enum Flags { FLAG_PARTIAL = 0x01, FLAG_MANUAL_INVALIDATION = 0x02, FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04, FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08, }; MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags); ~MapBufferRangeCase(void); void init(void); private: static CaseType getBaseCaseType(int caseFlags); static int getBaseFlags(uint32_t mapFlags, int caseFlags); void testBufferUpload(UploadSampleResult &result, int bufferSize); void attemptBufferMap(UploadSampleResult &result, int bufferSize); const bool m_manualInvalidation; const bool m_fullUpload; const bool m_useUnusedUnspecifiedBuffer; const bool m_useUnusedSpecifiedBuffer; const uint32_t m_mapFlags; int m_unmapFailures; }; MapBufferRangeCase::MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags) : BasicUploadCase(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags)) , m_manualInvalidation((caseFlags & FLAG_MANUAL_INVALIDATION) != 0) , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0) , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0) , m_mapFlags(mapFlags) , m_unmapFailures(0) { DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer)); DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation)); } MapBufferRangeCase::~MapBufferRangeCase(void) { } void MapBufferRangeCase::init(void) { // Describe what the test tries to do m_testCtx.getLog() << tcu::TestLog::Message << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n" << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n" << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : ("")) << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : ("")) << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : ("")) << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : ("")) << "Map bits:\n" << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) << tcu::TestLog::EndMessage; BasicUploadCase::init(); } MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType(int caseFlags) { if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0) return CASE_USED_BUFFER; else return CASE_NEW_BUFFER; } int MapBufferRangeCase::getBaseFlags(uint32_t mapFlags, int caseFlags) { int flags = FLAG_DONT_LOG_BUFFER_INFO; // If buffer contains unspecified data when it is sourced (i.e drawn) // results are undefined, and system errors may occur. Signal parent // class to take this into account if (caseFlags & FLAG_PARTIAL) { if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_MANUAL_INVALIDATION) != 0 || (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) { flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT; } } return flags; } void MapBufferRangeCase::testBufferUpload(UploadSampleResult &result, int bufferSize) { const int unmapFailureThreshold = 4; for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures) { try { attemptBufferMap(result, bufferSize); return; } catch (UnmapFailureError &) { } } throw tcu::TestError("Unmapping failures exceeded limit"); } void MapBufferRangeCase::attemptBufferMap(UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); if (m_fullUpload) result.writtenSize = bufferSize; else result.writtenSize = bufferSize / 2; // Create unused buffer if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer) { uint64_t startTime; uint64_t endTime; // "invalidate" or allocate, upload null startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, nullptr, m_bufferUsage); endTime = deGetMicroseconds(); result.duration.allocDuration = endTime - startTime; } else if (m_useUnusedSpecifiedBuffer) { uint64_t startTime; uint64_t endTime; // Specify buffer contents startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); endTime = deGetMicroseconds(); result.duration.allocDuration = endTime - startTime; } else { // No alloc, no time result.duration.allocDuration = 0; } // upload { void *mapPtr; // Map { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); if (m_fullUpload) mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags); else { // upload to buffer center mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags); } endTime = deGetMicroseconds(); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); result.duration.mapDuration = endTime - startTime; } // Write { result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize); } // Unmap { uint64_t startTime; uint64_t endTime; glw::GLboolean unmapSuccessful; startTime = deGetMicroseconds(); unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); endTime = deGetMicroseconds(); // if unmapping fails, just try again later if (!unmapSuccessful) throw UnmapFailureError(); result.duration.unmapDuration = endTime - startTime; } result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration; result.duration.fitResponseDuration = result.duration.totalDuration; } } class MapBufferRangeFlushCase : public BasicUploadCase { public: enum Flags { FLAG_PARTIAL = 0x01, FLAG_FLUSH_IN_PARTS = 0x02, FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04, FLAG_USE_UNUSED_SPECIFIED_BUFFER = 0x08, FLAG_FLUSH_PARTIAL = 0x10, }; MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags); ~MapBufferRangeFlushCase(void); void init(void); private: static CaseType getBaseCaseType(int caseFlags); static int getBaseFlags(uint32_t mapFlags, int caseFlags); void testBufferUpload(UploadSampleResult &result, int bufferSize); void attemptBufferMap(UploadSampleResult &result, int bufferSize); const bool m_fullUpload; const bool m_flushInParts; const bool m_flushPartial; const bool m_useUnusedUnspecifiedBuffer; const bool m_useUnusedSpecifiedBuffer; const uint32_t m_mapFlags; int m_unmapFailures; }; MapBufferRangeFlushCase::MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags) : BasicUploadCase(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags)) , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0) , m_flushInParts((caseFlags & FLAG_FLUSH_IN_PARTS) != 0) , m_flushPartial((caseFlags & FLAG_FLUSH_PARTIAL) != 0) , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0) , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0) , m_mapFlags(mapFlags) , m_unmapFailures(0) { DE_ASSERT(!(m_flushPartial && m_flushInParts)); DE_ASSERT(!(m_flushPartial && !m_fullUpload)); } MapBufferRangeFlushCase::~MapBufferRangeFlushCase(void) { } void MapBufferRangeFlushCase::init(void) { // Describe what the test tries to do m_testCtx.getLog() << tcu::TestLog::Message << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n" << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n" << ((m_flushInParts) ? ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") : (m_flushPartial) ? ("Half of the buffer range is flushed.") : ("The whole mapped range is flushed in one flush call.")) << "\n" << ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : ("")) << ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : ("")) << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : ("")) << "Map bits:\n" << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) << tcu::TestLog::EndMessage; BasicUploadCase::init(); } MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType(int caseFlags) { if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0) return CASE_USED_BUFFER; else return CASE_NEW_BUFFER; } int MapBufferRangeFlushCase::getBaseFlags(uint32_t mapFlags, int caseFlags) { int flags = FLAG_DONT_LOG_BUFFER_INFO; // If buffer contains unspecified data when it is sourced (i.e drawn) // results are undefined, and system errors may occur. Signal parent // class to take this into account if (caseFlags & FLAG_PARTIAL) { if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0 || (caseFlags & FLAG_FLUSH_PARTIAL) != 0) { flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT; } } return flags; } void MapBufferRangeFlushCase::testBufferUpload(UploadSampleResult &result, int bufferSize) { const int unmapFailureThreshold = 4; for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures) { try { attemptBufferMap(result, bufferSize); return; } catch (UnmapFailureError &) { } } throw tcu::TestError("Unmapping failures exceeded limit"); } void MapBufferRangeFlushCase::attemptBufferMap(UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int mappedSize = (m_fullUpload) ? (bufferSize) : (bufferSize / 2); if (m_fullUpload && !m_flushPartial) result.writtenSize = bufferSize; else result.writtenSize = bufferSize / 2; gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); // Create unused buffer if (m_useUnusedUnspecifiedBuffer) { uint64_t startTime; uint64_t endTime; // Don't specify contents startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, nullptr, m_bufferUsage); endTime = deGetMicroseconds(); result.duration.allocDuration = endTime - startTime; } else if (m_useUnusedSpecifiedBuffer) { uint64_t startTime; uint64_t endTime; // Specify buffer contents startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); endTime = deGetMicroseconds(); result.duration.allocDuration = endTime - startTime; } else { // No alloc, no time result.duration.allocDuration = 0; } // upload { void *mapPtr; // Map { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); if (m_fullUpload) mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags); else { // upload to buffer center mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags); } endTime = deGetMicroseconds(); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); result.duration.mapDuration = endTime - startTime; } // Write { if (!m_flushPartial) result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize); else result.duration.writeDuration = medianTimeMemcpy((uint8_t *)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize); } // Flush { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); if (m_flushPartial) gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize / 4, mappedSize / 2); else if (!m_flushInParts) gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize); else { const int p1 = 0; const int p2 = mappedSize / 3; const int p3 = mappedSize / 2; const int p4 = mappedSize * 2 / 4; const int p5 = mappedSize; // flush in mixed order gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2, p3 - p2); gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1, p2 - p1); gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4, p5 - p4); gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3, p4 - p3); } endTime = deGetMicroseconds(); result.duration.flushDuration = endTime - startTime; } // Unmap { uint64_t startTime; uint64_t endTime; glw::GLboolean unmapSuccessful; startTime = deGetMicroseconds(); unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); endTime = deGetMicroseconds(); // if unmapping fails, just try again later if (!unmapSuccessful) throw UnmapFailureError(); result.duration.unmapDuration = endTime - startTime; } result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration; result.duration.fitResponseDuration = result.duration.totalDuration; } } template class ModifyAfterBasicCase : public BasicBufferCase { public: ModifyAfterBasicCase(Context &context, const char *name, const char *description, int bufferSizeMin, int bufferSizeMax, uint32_t usage, bool bufferUnspecifiedAfterTest); ~ModifyAfterBasicCase(void); void init(void); void deinit(void); protected: void drawBufferRange(int begin, int end); private: enum { NUM_SAMPLES = 20, }; bool runSample(int iteration, UploadSampleResult &sample); bool prepareAndRunTest(int iteration, UploadSampleResult &result, int bufferSize); void logAndSetTestResult(const std::vector> &results); virtual void testWithBufferSize(UploadSampleResult &result, int bufferSize) = 0; int m_unmappingErrors; protected: const bool m_bufferUnspecifiedAfterTest; const uint32_t m_bufferUsage; std::vector m_zeroData; using BasicBufferCase::m_testCtx; using BasicBufferCase::m_context; using BasicBufferCase::UNUSED_RENDER_AREA_SIZE; using BasicBufferCase::m_minimalProgram; using BasicBufferCase::m_minimalProgramPosLoc; using BasicBufferCase::m_bufferID; using BasicBufferCase::m_numSamples; using BasicBufferCase::m_bufferSizeMin; using BasicBufferCase::m_bufferSizeMax; using BasicBufferCase::m_allocateLargerBuffer; }; template ModifyAfterBasicCase::ModifyAfterBasicCase(Context &context, const char *name, const char *description, int bufferSizeMin, int bufferSizeMax, uint32_t usage, bool bufferUnspecifiedAfterTest) : BasicBufferCase(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0) , m_unmappingErrors(0) , m_bufferUnspecifiedAfterTest(bufferUnspecifiedAfterTest) , m_bufferUsage(usage) , m_zeroData() { } template ModifyAfterBasicCase::~ModifyAfterBasicCase(void) { BasicBufferCase::deinit(); } template void ModifyAfterBasicCase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); // init parent BasicBufferCase::init(); // upload source m_zeroData.resize(m_bufferSizeMax, 0x00); // log basic info m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are " "tested before samples at odd positions.\n" << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage; // log which transfer rate is the test result and buffer info m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples.\n" << "Buffer usage = " << glu::getUsageName(m_bufferUsage) << tcu::TestLog::EndMessage; // Set state for drawing so that we don't have to change these during the iteration { gl.useProgram(m_minimalProgram->getProgram()); gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE); gl.enableVertexAttribArray(m_minimalProgramPosLoc); } } template void ModifyAfterBasicCase::deinit(void) { m_zeroData = std::vector(); BasicBufferCase::deinit(); } template void ModifyAfterBasicCase::drawBufferRange(int begin, int end) { DE_ASSERT(begin % (int)sizeof(float[4]) == 0); DE_ASSERT(end % (int)sizeof(float[4]) == 0); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); // use given range gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1); gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1); } template bool ModifyAfterBasicCase::runSample(int iteration, UploadSampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int bufferSize = sample.bufferSize; bool testOk; testOk = prepareAndRunTest(iteration, sample, bufferSize); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample"); if (!testOk) { const int unmapFailureThreshold = 4; // only unmapping error can cause iteration failure if (++m_unmappingErrors >= unmapFailureThreshold) throw tcu::TestError("Too many unmapping errors, cannot continue."); // just try again return false; } return true; } template bool ModifyAfterBasicCase::prepareAndRunTest(int iteration, UploadSampleResult &result, int bufferSize) { DE_UNREF(iteration); DE_ASSERT(!m_bufferID); DE_ASSERT(deIsAligned32(bufferSize, 4 * 4)); // aligned to vec4 const glw::Functions &gl = m_context.getRenderContext().getFunctions(); bool testRunOk = true; bool unmappingFailed = false; // Upload initial buffer to the GPU... gl.genBuffers(1, &m_bufferID); gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID); gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); // ...use it... gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, nullptr); drawBufferRange(0, bufferSize); // ..and make sure it is uploaded BasicBufferCase::waitGLResults(); // warmup CPU before the test to make sure the power management governor // keeps us in the "high performance" mode { deYield(); tcu::warmupCPU(); deYield(); } // test try { // buffer is uploaded to the GPU. Draw from it. drawBufferRange(0, bufferSize); // and test upload testWithBufferSize(result, bufferSize); } catch (UnmapFailureError &) { testRunOk = false; unmappingFailed = true; } // clean up: make sure buffer is not in upload queue and delete it // sourcing unspecified data causes undefined results, possibly program termination if (m_bufferUnspecifiedAfterTest || unmappingFailed) gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage); drawBufferRange(0, bufferSize); BasicBufferCase::waitGLResults(); gl.deleteBuffers(1, &m_bufferID); m_bufferID = 0; return testRunOk; } template void ModifyAfterBasicCase::logAndSetTestResult(const std::vector> &results) { const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false); // Return median transfer rate of the samples if (analysis.transferRateMedian == std::numeric_limits::infinity()) { // sample times are 1) invalid or 2) timer resolution too low // report speed 0 bytes / s since real value cannot be determined m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); } else { // report transfer rate in MB / s m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str()); } } class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase { public: enum CaseFlags { FLAG_RESPECIFY_SIZE = 0x1, FLAG_UPLOAD_REPEATED = 0x2, }; ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags); ~ModifyAfterWithBufferDataCase(void); void init(void); void deinit(void); private: void testWithBufferSize(UploadSampleResult &result, int bufferSize); enum { NUM_REPEATS = 2 }; const bool m_respecifySize; const bool m_repeatedUpload; const float m_sizeDifferenceFactor; }; ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags) : ModifyAfterBasicCase(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false) , m_respecifySize((flags & FLAG_RESPECIFY_SIZE) != 0) , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0) , m_sizeDifferenceFactor(1.3f) { DE_ASSERT(!(m_repeatedUpload && m_respecifySize)); } ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase(void) { deinit(); } void ModifyAfterWithBufferDataCase::init(void) { // Log the purpose of the test if (m_repeatedUpload) m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw " "buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage; else m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from " "the target buffer." << tcu::TestLog::EndMessage; m_testCtx.getLog() << tcu::TestLog::Message << ((m_respecifySize) ? ("Buffer size is increased and contents are modified with BufferData().\n") : ("Buffer contents are modified with BufferData().\n")) << tcu::TestLog::EndMessage; // init parent ModifyAfterBasicCase::init(); // make sure our zeroBuffer is large enough if (m_respecifySize) { const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4 * 4); m_zeroData.resize(largerBufferSize, 0x00); } } void ModifyAfterWithBufferDataCase::deinit(void) { ModifyAfterBasicCase::deinit(); } void ModifyAfterWithBufferDataCase::testWithBufferSize(UploadSampleResult &result, int bufferSize) { // always draw the same amount to make compares between cases sensible const int drawStart = deAlign32(bufferSize / 4, 4 * 4); const int drawEnd = deAlign32(bufferSize * 3 / 4, 4 * 4); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int largerBufferSize = deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4 * 4); const int newBufferSize = (m_respecifySize) ? (largerBufferSize) : (bufferSize); uint64_t startTime; uint64_t endTime; // repeat upload-draw if (m_repeatedUpload) { for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx) { gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage); drawBufferRange(drawStart, drawEnd); } } // test upload startTime = deGetMicroseconds(); gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage); endTime = deGetMicroseconds(); result.duration.totalDuration = endTime - startTime; result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = newBufferSize; } class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase { public: enum CaseFlags { FLAG_PARTIAL = 0x1, FLAG_UPLOAD_REPEATED = 0x2, }; ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags); ~ModifyAfterWithBufferSubDataCase(void); void init(void); void deinit(void); private: void testWithBufferSize(UploadSampleResult &result, int bufferSize); enum { NUM_REPEATS = 2 }; const bool m_partialUpload; const bool m_repeatedUpload; }; ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags) : ModifyAfterBasicCase(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false) , m_partialUpload((flags & FLAG_PARTIAL) != 0) , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0) { } ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase(void) { deinit(); } void ModifyAfterWithBufferSubDataCase::init(void) { // Log the purpose of the test if (m_repeatedUpload) m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw " "buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage; else m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data " "from the target buffer." << tcu::TestLog::EndMessage; m_testCtx.getLog() << tcu::TestLog::Message << ((m_partialUpload) ? ("Half of the buffer contents are modified.\n") : ("Buffer contents are fully respecified.\n")) << tcu::TestLog::EndMessage; ModifyAfterBasicCase::init(); } void ModifyAfterWithBufferSubDataCase::deinit(void) { ModifyAfterBasicCase::deinit(); } void ModifyAfterWithBufferSubDataCase::testWithBufferSize(UploadSampleResult &result, int bufferSize) { // always draw the same amount to make compares between cases sensible const int drawStart = deAlign32(bufferSize / 4, 4 * 4); const int drawEnd = deAlign32(bufferSize * 3 / 4, 4 * 4); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4); const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4); uint64_t startTime; uint64_t endTime; // make upload-draw stream if (m_repeatedUpload) { for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx) { gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]); drawBufferRange(drawStart, drawEnd); } } // test upload startTime = deGetMicroseconds(); gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]); endTime = deGetMicroseconds(); result.duration.totalDuration = endTime - startTime; result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = subdataSize; } class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase { public: enum CaseFlags { FLAG_PARTIAL = 0x1, }; ModifyAfterWithMapBufferRangeCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags); ~ModifyAfterWithMapBufferRangeCase(void); void init(void); void deinit(void); private: static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags); void testWithBufferSize(UploadSampleResult &result, int bufferSize); const bool m_partialUpload; const uint32_t m_mapFlags; }; ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags) : ModifyAfterBasicCase(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags)) , m_partialUpload((flags & FLAG_PARTIAL) != 0) , m_mapFlags(glMapFlags) { } ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase(void) { deinit(); } void ModifyAfterWithMapBufferRangeCase::init(void) { // Log the purpose of the test m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of MapBufferRange() command after a draw command that sources data from " "the target buffer.\n" << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n")) << "Map bits:\n" << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) << tcu::TestLog::EndMessage; ModifyAfterBasicCase::init(); } void ModifyAfterWithMapBufferRangeCase::deinit(void) { ModifyAfterBasicCase::deinit(); } bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags) { if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0)) return true; return false; } void ModifyAfterWithMapBufferRangeCase::testWithBufferSize(UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4); const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4); void *mapPtr; // map { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags); endTime = deGetMicroseconds(); if (!mapPtr) throw tcu::TestError("mapBufferRange returned null"); result.duration.mapDuration = endTime - startTime; } // write { result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize); } // unmap { uint64_t startTime; uint64_t endTime; glw::GLboolean unmapSucceeded; startTime = deGetMicroseconds(); unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER); endTime = deGetMicroseconds(); if (unmapSucceeded != GL_TRUE) throw UnmapFailureError(); result.duration.unmapDuration = endTime - startTime; } result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration; result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = subdataSize; } class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase { public: enum CaseFlags { FLAG_PARTIAL = 0x1, }; ModifyAfterWithMapBufferFlushCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags); ~ModifyAfterWithMapBufferFlushCase(void); void init(void); void deinit(void); private: static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags); void testWithBufferSize(UploadSampleResult &result, int bufferSize); const bool m_partialUpload; const uint32_t m_mapFlags; }; ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags) : ModifyAfterBasicCase(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags)) , m_partialUpload((flags & FLAG_PARTIAL) != 0) , m_mapFlags(glMapFlags) { } ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase(void) { deinit(); } void ModifyAfterWithMapBufferFlushCase::init(void) { // Log the purpose of the test m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of MapBufferRange() command after a draw command that sources data from " "the target buffer.\n" << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n")) << "Map bits:\n" << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : ("")) << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : ("")) << tcu::TestLog::EndMessage; ModifyAfterBasicCase::init(); } void ModifyAfterWithMapBufferFlushCase::deinit(void) { ModifyAfterBasicCase::deinit(); } bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags) { if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0)) return true; return false; } void ModifyAfterWithMapBufferFlushCase::testWithBufferSize( UploadSampleResult &result, int bufferSize) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int subdataOffset = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4); const int subdataSize = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4); void *mapPtr; // map { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags); endTime = deGetMicroseconds(); if (!mapPtr) throw tcu::TestError("mapBufferRange returned null"); result.duration.mapDuration = endTime - startTime; } // write { result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize); } // flush { uint64_t startTime; uint64_t endTime; startTime = deGetMicroseconds(); gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize); endTime = deGetMicroseconds(); result.duration.flushDuration = endTime - startTime; } // unmap { uint64_t startTime; uint64_t endTime; glw::GLboolean unmapSucceeded; startTime = deGetMicroseconds(); unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER); endTime = deGetMicroseconds(); if (unmapSucceeded != GL_TRUE) throw UnmapFailureError(); result.duration.unmapDuration = endTime - startTime; } result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration; result.duration.fitResponseDuration = result.duration.totalDuration; result.writtenSize = subdataSize; } enum DrawMethod { DRAWMETHOD_DRAW_ARRAYS = 0, DRAWMETHOD_DRAW_ELEMENTS, DRAWMETHOD_LAST }; enum TargetBuffer { TARGETBUFFER_VERTEX = 0, TARGETBUFFER_INDEX, TARGETBUFFER_LAST }; enum BufferState { BUFFERSTATE_NEW = 0, BUFFERSTATE_EXISTING, BUFFERSTATE_LAST }; enum UploadMethod { UPLOADMETHOD_BUFFER_DATA = 0, UPLOADMETHOD_BUFFER_SUB_DATA, UPLOADMETHOD_MAP_BUFFER_RANGE, UPLOADMETHOD_LAST }; enum UnrelatedBufferType { UNRELATEDBUFFERTYPE_NONE = 0, UNRELATEDBUFFERTYPE_VERTEX, UNRELATEDBUFFERTYPE_LAST }; enum UploadRange { UPLOADRANGE_FULL = 0, UPLOADRANGE_PARTIAL, UPLOADRANGE_LAST }; struct LayeredGridSpec { int gridWidth; int gridHeight; int gridLayers; }; static int getLayeredGridNumVertices(const LayeredGridSpec &scene) { return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6; } static void generateLayeredGridVertexAttribData4C4V(std::vector &vertexData, const LayeredGridSpec &scene) { // interleave color & vertex data const tcu::Vec4 green(0.0f, 1.0f, 0.0f, 0.7f); const tcu::Vec4 yellow(1.0f, 1.0f, 0.0f, 0.8f); vertexData.resize(getLayeredGridNumVertices(scene) * 2); for (int cellY = 0; cellY < scene.gridHeight; ++cellY) for (int cellX = 0; cellX < scene.gridWidth; ++cellX) for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ) { const tcu::Vec4 color = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow); const float cellLeft = (float(cellX) / (float)scene.gridWidth - 0.5f) * 2.0f; const float cellRight = (float(cellX + 1) / (float)scene.gridWidth - 0.5f) * 2.0f; const float cellTop = (float(cellY + 1) / (float)scene.gridHeight - 0.5f) * 2.0f; const float cellBottom = (float(cellY) / (float)scene.gridHeight - 0.5f) * 2.0f; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 0] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f); vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 2] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f); vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 4] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f); vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 6] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f); vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 8] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f); vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color; vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f); } } static void generateLayeredGridIndexData(std::vector &indexData, const LayeredGridSpec &scene) { indexData.resize(getLayeredGridNumVertices(scene) * 2); for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx) indexData[ndx] = ndx; } class RenderPerformanceTestBase : public TestCase { public: RenderPerformanceTestBase(Context &context, const char *name, const char *description); ~RenderPerformanceTestBase(void); protected: void init(void); void deinit(void); void waitGLResults(void) const; void setupVertexAttribs(void) const; enum { RENDER_AREA_SIZE = 128 }; private: glu::ShaderProgram *m_renderProgram; int m_colorLoc; int m_positionLoc; }; RenderPerformanceTestBase::RenderPerformanceTestBase(Context &context, const char *name, const char *description) : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description) , m_renderProgram(nullptr) , m_colorLoc(0) , m_positionLoc(0) { } RenderPerformanceTestBase::~RenderPerformanceTestBase(void) { deinit(); } void RenderPerformanceTestBase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader)); if (!m_renderProgram->isOk()) { m_testCtx.getLog() << *m_renderProgram; throw tcu::TestError("could not build program"); } m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color"); m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position"); if (m_colorLoc == -1) throw tcu::TestError("Location of attribute a_color was -1"); if (m_positionLoc == -1) throw tcu::TestError("Location of attribute a_position was -1"); } void RenderPerformanceTestBase::deinit(void) { delete m_renderProgram; m_renderProgram = nullptr; } void RenderPerformanceTestBase::setupVertexAttribs(void) const { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); // buffers are bound gl.enableVertexAttribArray(m_colorLoc); gl.enableVertexAttribArray(m_positionLoc); gl.vertexAttribPointer(m_colorLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), glu::BufferOffsetAsPointer(0 * sizeof(tcu::Vec4))); gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), glu::BufferOffsetAsPointer(1 * sizeof(tcu::Vec4))); gl.useProgram(m_renderProgram->getProgram()); GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering"); } void RenderPerformanceTestBase::waitGLResults(void) const { tcu::Surface unusedSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess()); } template class RenderCase : public RenderPerformanceTestBase { public: RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod); ~RenderCase(void); protected: void init(void); void deinit(void); private: IterateResult iterate(void); protected: struct SampleResult { LayeredGridSpec scene; RenderSampleResult result; }; int getMinWorkloadSize(void) const; int getMaxWorkloadSize(void) const; int getMinWorkloadDataSize(void) const; int getMaxWorkloadDataSize(void) const; int getVertexDataSize(void) const; int getNumSamples(void) const; void uploadScene(const LayeredGridSpec &scene); virtual void runSample(SampleResult &sample) = 0; virtual void logAndSetTestResult(const std::vector &results); void mapResultsToRenderRateFormat(std::vector> &dst, const std::vector &src) const; const DrawMethod m_drawMethod; private: glw::GLuint m_attributeBufferID; glw::GLuint m_indexBufferID; int m_iterationNdx; std::vector m_iterationOrder; std::vector m_results; int m_numUnmapFailures; }; template RenderCase::RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod) : RenderPerformanceTestBase(context, name, description) , m_drawMethod(drawMethod) , m_attributeBufferID(0) , m_indexBufferID(0) , m_iterationNdx(0) , m_numUnmapFailures(0) { DE_ASSERT(drawMethod < DRAWMETHOD_LAST); } template RenderCase::~RenderCase(void) { deinit(); } template void RenderCase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); RenderPerformanceTestBase::init(); // requirements if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE || m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE) throw tcu::NotSupportedError("Test case requires " + de::toString(RENDER_AREA_SIZE) + "x" + de::toString(RENDER_AREA_SIZE) + " render target"); // gl state gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE); // enable bleding to prevent grid layers from being discarded gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); gl.blendEquation(GL_FUNC_ADD); gl.enable(GL_BLEND); // generate iterations { const int gridSizes[] = {20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80, 86, 92, 98, 104, 110, 116, 122, 128}; for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx) { m_results.push_back(SampleResult()); m_results.back().scene.gridHeight = gridSizes[gridNdx]; m_results.back().scene.gridWidth = gridSizes[gridNdx]; m_results.back().scene.gridLayers = 5; m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene); // test cases set these, initialize to unused values m_results.back().result.renderDataSize = -1; m_results.back().result.uploadedDataSize = -1; m_results.back().result.unrelatedDataSize = -1; } } // randomize iteration order { m_iterationOrder.resize(m_results.size()); generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size()); } } template void RenderCase::deinit(void) { RenderPerformanceTestBase::deinit(); if (m_attributeBufferID) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID); m_attributeBufferID = 0; } if (m_indexBufferID) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID); m_indexBufferID = 0; } } template typename RenderCase::IterateResult RenderCase::iterate(void) { const int unmapFailureThreshold = 3; const int currentIteration = m_iterationNdx; const int currentConfigNdx = m_iterationOrder[currentIteration]; SampleResult ¤tSample = m_results[currentConfigNdx]; try { runSample(currentSample); ++m_iterationNdx; } catch (const UnmapFailureError &ex) { DE_UNREF(ex); ++m_numUnmapFailures; } if (m_numUnmapFailures > unmapFailureThreshold) throw tcu::TestError("Got too many unmap errors"); if (m_iterationNdx < (int)m_iterationOrder.size()) return CONTINUE; logAndSetTestResult(m_results); return STOP; } template int RenderCase::getMinWorkloadSize(void) const { int result = getLayeredGridNumVertices(m_results[0].scene); for (int ndx = 1; ndx < (int)m_results.size(); ++ndx) { const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene); result = de::min(result, workloadSize); } return result; } template int RenderCase::getMaxWorkloadSize(void) const { int result = getLayeredGridNumVertices(m_results[0].scene); for (int ndx = 1; ndx < (int)m_results.size(); ++ndx) { const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene); result = de::max(result, workloadSize); } return result; } template int RenderCase::getMinWorkloadDataSize(void) const { return getMinWorkloadSize() * getVertexDataSize(); } template int RenderCase::getMaxWorkloadDataSize(void) const { return getMaxWorkloadSize() * getVertexDataSize(); } template int RenderCase::getVertexDataSize(void) const { const int numVectors = 2; const int vec4Size = 4 * sizeof(float); return numVectors * vec4Size; } template int RenderCase::getNumSamples(void) const { return (int)m_results.size(); } template void RenderCase::uploadScene(const LayeredGridSpec &scene) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); // vertex buffer { std::vector vertexData; generateLayeredGridVertexAttribData4C4V(vertexData, scene); if (m_attributeBufferID == 0) gl.genBuffers(1, &m_attributeBufferID); gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID); gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); } // index buffer if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { std::vector indexData; generateLayeredGridIndexData(indexData, scene); if (m_indexBufferID == 0) gl.genBuffers(1, &m_indexBufferID); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STATIC_DRAW); } GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers"); } template void RenderCase::logAndSetTestResult(const std::vector &results) { std::vector> mappedResults; mapResultsToRenderRateFormat(mappedResults, results); { const RenderSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), mappedResults); const float rate = analysis.renderRateAtRange; if (rate == std::numeric_limits::infinity()) { // sample times are 1) invalid or 2) timer resolution too low m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str()); } else { // report transfer rate in millions of MiB/s m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str()); } } } template void RenderCase::mapResultsToRenderRateFormat(std::vector> &dst, const std::vector &src) const { dst.resize(src.size()); for (int ndx = 0; ndx < (int)src.size(); ++ndx) dst[ndx] = src[ndx].result; } class ReferenceRenderTimeCase : public RenderCase { public: ReferenceRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod); private: void init(void); void runSample(SampleResult &sample); }; ReferenceRenderTimeCase::ReferenceRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod) : RenderCase(context, name, description, drawMethod) { } void ReferenceRenderTimeCase::init(void) { const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); // init parent RenderCase::init(); // log m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" << getNumSamples() << " test samples. Sample order is randomized.\n" << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" << "Generated workload is multiple viewport-covering grids with varying number of cells, each " "cell is two separate triangles.\n" << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize() << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" << "Test result is the approximated total processing rate in MiB / s.\n" << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) << "Note! Test result should only be used as a baseline reference result for " "buffer.data_upload.* test group results." << tcu::TestLog::EndMessage; } void ReferenceRenderTimeCase::runSample(SampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); const int numVertices = getLayeredGridNumVertices(sample.scene); const glu::Buffer arrayBuffer(m_context.getRenderContext()); const glu::Buffer indexBuffer(m_context.getRenderContext()); std::vector vertexData; std::vector indexData; uint64_t startTime; uint64_t endTime; // generate and upload buffers generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { generateLayeredGridIndexData(indexData, sample.scene); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STATIC_DRAW); } setupVertexAttribs(); // make sure data is uploaded if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); waitGLResults(); gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); gl.clear(GL_COLOR_BUFFER_BIT); waitGLResults(); tcu::warmupCPU(); // Measure both draw and associated readpixels { startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.duration.renderDuration = endTime - startTime; } { startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); sample.result.duration.readDuration = endTime - startTime; } sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; sample.result.uploadedDataSize = 0; sample.result.unrelatedDataSize = 0; sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; } class UnrelatedUploadRenderTimeCase : public RenderCase { public: UnrelatedUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod); private: void init(void); void runSample(SampleResult &sample); const UploadMethod m_unrelatedUploadMethod; }; UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod) : RenderCase(context, name, description, drawMethod) , m_unrelatedUploadMethod(unrelatedUploadMethod) { DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST); } void UnrelatedUploadRenderTimeCase::init(void) { const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); tcu::MessageBuilder message(&m_testCtx.getLog()); // init parent RenderCase::init(); // log message << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" << "Uploading an unrelated buffer just before issuing the rendering command with " << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange") : (nullptr)) << ".\n" << getNumSamples() << " test samples. Sample order is randomized.\n" << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two " "separate triangles.\n" << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize() << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" << "Unrelated upload sizes are in the range [" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", " << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n" << "Test result is the approximated total processing rate in MiB / s.\n" << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) << "Note that the data size and the time used in the unrelated upload is not included in the results.\n" << "Note! Test result may not be useful as is but instead should be compared against the reference.* group " "and upload_and_draw.*_and_unrelated_upload group results.\n" << tcu::TestLog::EndMessage; } void UnrelatedUploadRenderTimeCase::runSample(SampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); const int numVertices = getLayeredGridNumVertices(sample.scene); const glu::Buffer arrayBuffer(m_context.getRenderContext()); const glu::Buffer indexBuffer(m_context.getRenderContext()); const glu::Buffer unrelatedBuffer(m_context.getRenderContext()); int unrelatedUploadSize = -1; int renderUploadSize; std::vector vertexData; std::vector indexData; uint64_t startTime; uint64_t endTime; // generate and upload buffers generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { generateLayeredGridIndexData(indexData, sample.scene); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STATIC_DRAW); } setupVertexAttribs(); // make sure data is uploaded if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); waitGLResults(); gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); gl.clear(GL_COLOR_BUFFER_BIT); waitGLResults(); tcu::warmupCPU(); // Unrelated upload if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA) { unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW); } else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) { unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, nullptr, GL_STATIC_DRAW); gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]); } else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) { void *mapPtr; glw::GLboolean unmapSuccessful; unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, nullptr, GL_STATIC_DRAW); mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize); // if unmapping fails, just try again later unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER); if (!unmapSuccessful) throw UnmapFailureError(); } else DE_ASSERT(false); DE_ASSERT(unrelatedUploadSize != -1); // Measure both draw and associated readpixels { startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.duration.renderDuration = endTime - startTime; } { startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); sample.result.duration.readDuration = endTime - startTime; } sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; sample.result.uploadedDataSize = renderUploadSize; sample.result.unrelatedDataSize = unrelatedUploadSize; sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; } class ReferenceReadPixelsTimeCase : public TestCase { public: ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description); private: void init(void); IterateResult iterate(void); void logAndSetTestResult(void); enum { RENDER_AREA_SIZE = 128 }; const int m_numSamples; int m_sampleNdx; std::vector m_samples; }; ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description) : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description) , m_numSamples(20) , m_sampleNdx(0) , m_samples(m_numSamples) { } void ReferenceReadPixelsTimeCase::init(void) { m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n" << "Test result is the median of the samples in microseconds.\n" << "Note! Test result should only be used as a baseline reference result for " "buffer.data_upload.* test group results." << tcu::TestLog::EndMessage; } ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); uint64_t startTime; uint64_t endTime; deYield(); tcu::warmupCPU(); deYield(); // "Render" something and wait for it gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f); gl.clear(GL_COLOR_BUFFER_BIT); // wait for results glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); // measure time used in readPixels startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); m_samples[m_sampleNdx] = (int)(endTime - startTime); if (++m_sampleNdx < m_numSamples) return CONTINUE; logAndSetTestResult(); return STOP; } void ReferenceReadPixelsTimeCase::logAndSetTestResult(void) { // Log sample list { m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx) m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx] << tcu::TestLog::EndSample; m_testCtx.getLog() << tcu::TestLog::EndSampleList; } // Log median { float median; float limit60Low; float limit60Up; std::sort(m_samples.begin(), m_samples.end()); median = linearSample(m_samples, 0.5f); limit60Low = linearSample(m_samples, 0.2f); limit60Up = linearSample(m_samples, 0.8f); m_testCtx.getLog() << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median) << tcu::TestLog::Message << "60 % of samples within range:\n" << tcu::TestLog::EndMessage << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low) << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up); m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str()); } } template class GenericUploadRenderTimeCase : public RenderCase { public: typedef typename RenderCase::SampleResult SampleResult; GenericUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method, TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState, UploadRange uploadRange, UnrelatedBufferType unrelatedBufferType); private: void init(void); void runSample(SampleResult &sample); using RenderCase::RENDER_AREA_SIZE; const TargetBuffer m_targetBuffer; const BufferState m_bufferState; const UploadMethod m_uploadMethod; const UnrelatedBufferType m_unrelatedBufferType; const UploadRange m_uploadRange; using RenderCase::m_context; using RenderCase::m_testCtx; using RenderCase::m_drawMethod; }; template GenericUploadRenderTimeCase::GenericUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method, TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState, UploadRange uploadRange, UnrelatedBufferType unrelatedBufferType) : RenderCase(context, name, description, method) , m_targetBuffer(targetBuffer) , m_bufferState(bufferState) , m_uploadMethod(uploadMethod) , m_unrelatedBufferType(unrelatedBufferType) , m_uploadRange(uploadRange) { DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST); DE_ASSERT(m_bufferState < BUFFERSTATE_LAST); DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST); DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST); DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST); } template void GenericUploadRenderTimeCase::init(void) { // init parent RenderCase::init(); // log { const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); const int perVertexSize = (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(uint32_t)) : ((int)sizeof(tcu::Vec4[2])); const int fullMinUploadSize = RenderCase::getMinWorkloadSize() * perVertexSize; const int fullMaxUploadSize = RenderCase::getMaxWorkloadSize() * perVertexSize; const int minUploadSize = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize / 2, 4)); const int maxUploadSize = (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize / 2, 4)); const int minUnrelatedUploadSize = RenderCase::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]); const int maxUnrelatedUploadSize = RenderCase::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]); m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n" << "The " << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib")) << " buffer " << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents ")) << "sourced by the rendering command " << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded ") : (m_uploadRange == UPLOADRANGE_FULL) ? ("are specified ") : (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("are updated (partial upload) ") : (nullptr)) << "just before issuing the rendering command.\n" << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n")) << "Buffer " << ((m_bufferState == BUFFERSTATE_NEW) ? ("is uploaded") : (m_uploadRange == UPLOADRANGE_FULL) ? ("contents are specified") : (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("contents are partially updated") : (nullptr)) << " with " << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange")) << " command. Usage of the target buffer is DYNAMIC_DRAW.\n" << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | " "MAP_UNSYNCHRONIZED_BIT\n") : ("")) << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : ("")) << RenderCase::getNumSamples() << " test samples. Sample order is randomized.\n" << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two " "separate triangles.\n" << "Workload sizes are in the range [" << RenderCase::getMinWorkloadSize() << ", " << RenderCase::getMaxWorkloadSize() << "] vertices " << "([" << getHumanReadableByteSize(RenderCase::getMinWorkloadDataSize()) << "," << getHumanReadableByteSize(RenderCase::getMaxWorkloadDataSize()) << "] to be processed).\n" << "Upload sizes are in the range [" << getHumanReadableByteSize(minUploadSize) << "," << getHumanReadableByteSize(maxUploadSize) << "].\n" << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") : ("")) << "Test result is the approximated processing rate in MiB / s.\n" << "Note that while upload time is measured, the time used is not included in the results.\n" << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the " "results.\n") : ("")) << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : ("")) << "Note! Test result may not be useful as is but instead should be compared against the reference.* group " "and other upload_and_draw.* group results.\n" << tcu::TestLog::EndMessage; } } template void GenericUploadRenderTimeCase::runSample(SampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const glu::Buffer arrayBuffer(m_context.getRenderContext()); const glu::Buffer indexBuffer(m_context.getRenderContext()); const glu::Buffer unrelatedBuffer(m_context.getRenderContext()); const int numVertices = getLayeredGridNumVertices(sample.scene); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); uint64_t startTime; uint64_t endTime; std::vector vertexData; std::vector indexData; // create data generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) generateLayeredGridIndexData(indexData, sample.scene); gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); RenderCase::setupVertexAttribs(); // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING) { gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW); gl.drawArrays(GL_TRIANGLES, 0, numVertices); } else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW) { // do not touch the vertex buffer } else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING) { // hint that the target buffer will be modified soon const glw::GLenum vertexDataUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW); const glw::GLenum indexDataUsage = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0], indexDataUsage); gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); } else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW) { if (m_targetBuffer == TARGETBUFFER_VERTEX) { // make the index buffer present on the gpu // use another vertex buffer to keep original buffer in unused state const glu::Buffer vertexCopyBuffer(m_context.getRenderContext()); gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer); RenderCase::setupVertexAttribs(); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STATIC_DRAW); gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); // restore original state gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); RenderCase::setupVertexAttribs(); } else if (m_targetBuffer == TARGETBUFFER_INDEX) { // make the vertex buffer present on the gpu gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW); gl.drawArrays(GL_TRIANGLES, 0, numVertices); } else DE_ASSERT(false); } else DE_ASSERT(false); RenderCase::waitGLResults(); GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare"); gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); gl.clear(GL_COLOR_BUFFER_BIT); RenderCase::waitGLResults(); tcu::warmupCPU(); // upload { glw::GLenum target; glw::GLsizeiptr size; glw::GLintptr offset = 0; const void *source; if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL) { target = GL_ARRAY_BUFFER; size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)); source = &vertexData[0]; } else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL) { target = GL_ELEMENT_ARRAY_BUFFER; size = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)); source = &indexData[0]; } else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL) { DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING); target = GL_ARRAY_BUFFER; size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4); offset = (glw::GLintptr)deAlign32((int)size / 2, 4); source = (const uint8_t *)&vertexData[0] + offset; } else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL) { DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING); // upload to 25% - 75% range target = GL_ELEMENT_ARRAY_BUFFER; size = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4); offset = (glw::GLintptr)deAlign32((int)size / 2, 4); source = (const uint8_t *)&indexData[0] + offset; } else { DE_ASSERT(false); return; } startTime = deGetMicroseconds(); if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) gl.bufferData(target, size, source, GL_DYNAMIC_DRAW); else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) { // create buffer storage if (m_bufferState == BUFFERSTATE_NEW) gl.bufferData(target, size, nullptr, GL_DYNAMIC_DRAW); gl.bufferSubData(target, offset, size, source); } else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) { void *mapPtr; glw::GLboolean unmapSuccessful; // create buffer storage if (m_bufferState == BUFFERSTATE_NEW) gl.bufferData(target, size, nullptr, GL_DYNAMIC_DRAW); mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); deMemcpy(mapPtr, source, (int)size); // if unmapping fails, just try again later unmapSuccessful = gl.unmapBuffer(target); if (!unmapSuccessful) throw UnmapFailureError(); } else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.uploadedDataSize = (int)size; sample.result.duration.uploadDuration = endTime - startTime; } // unrelated if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) { const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4)); gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer); gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW); // Attibute pointers are not modified, no need restore state sample.result.unrelatedDataSize = unrelatedUploadSize; } // draw { startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.duration.renderDuration = endTime - startTime; } // read { startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); sample.result.duration.readDuration = endTime - startTime; } // set results sample.result.renderDataSize = RenderCase::getVertexDataSize() * sample.result.numVertices; sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration; sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; } class BufferInUseRenderTimeCase : public RenderCase { public: enum MapFlags { MAPFLAG_NONE = 0, MAPFLAG_INVALIDATE_BUFFER, MAPFLAG_INVALIDATE_RANGE, MAPFLAG_LAST }; enum UploadBufferTarget { UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0, UPLOADBUFFERTARGET_SAME_BUFFER, UPLOADBUFFERTARGET_LAST }; BufferInUseRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method, MapFlags mapFlags, TargetBuffer targetBuffer, UploadMethod uploadMethod, UploadRange uploadRange, UploadBufferTarget uploadTarget); private: void init(void); void runSample(SampleResult &sample); const TargetBuffer m_targetBuffer; const UploadMethod m_uploadMethod; const UploadRange m_uploadRange; const MapFlags m_mapFlags; const UploadBufferTarget m_uploadBufferTarget; }; BufferInUseRenderTimeCase::BufferInUseRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method, MapFlags mapFlags, TargetBuffer targetBuffer, UploadMethod uploadMethod, UploadRange uploadRange, UploadBufferTarget uploadTarget) : RenderCase(context, name, description, method) , m_targetBuffer(targetBuffer) , m_uploadMethod(uploadMethod) , m_uploadRange(uploadRange) , m_mapFlags(mapFlags) , m_uploadBufferTarget(uploadTarget) { DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST); DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST); DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST); DE_ASSERT(m_mapFlags < MAPFLAG_LAST); DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST); } void BufferInUseRenderTimeCase::init(void) { RenderCase::init(); // log { const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements"); const char *const uploadFunctionName = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"); const bool isReferenceCase = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER); tcu::MessageBuilder message(&m_testCtx.getLog()); message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, " << targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n"; if (isReferenceCase) message << "Rendering:\n" << " before test: create and use buffers B and C\n" << " first draw: render using buffer B\n" << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer C contents\n") : (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer C contents\n") : (nullptr)) << " second draw: render using buffer C\n" << " read: readPixels\n"; else message << "Rendering:\n" << " before test: create and use buffer B\n" << " first draw: render using buffer B\n" << ((m_uploadRange == UPLOADRANGE_FULL) ? (" upload: respecify buffer B contents\n") : (m_uploadRange == UPLOADRANGE_PARTIAL) ? (" upload: modify buffer B contents\n") : (nullptr)) << " second draw: render using buffer B\n" << " read: readPixels\n"; message << "Uploading using " << uploadFunctionName << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT") : (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT") : (m_mapFlags == MAPFLAG_NONE) ? ("") : (nullptr)) << "\n" << getNumSamples() << " test samples. Sample order is randomized.\n" << "All samples at even positions (first = 0) are tested before samples at odd positions.\n" << "Workload sizes are in the range [" << getMinWorkloadSize() << ", " << getMaxWorkloadSize() << "] vertices " << "([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << "," << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n" << "Test result is the approximated processing rate in MiB / s of the second draw call and the " "readPixels call.\n"; if (isReferenceCase) message << "Note! Test result should only be used as a baseline reference result for " "buffer.render_after_upload.draw_modify_draw test group results."; else message << "Note! Test result may not be useful as is but instead should be compared against the " "buffer.render_after_upload.reference.draw_upload_draw group results.\n"; message << tcu::TestLog::EndMessage; } } void BufferInUseRenderTimeCase::runSample(SampleResult &sample) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const glu::Buffer arrayBuffer(m_context.getRenderContext()); const glu::Buffer indexBuffer(m_context.getRenderContext()); const glu::Buffer alternativeUploadBuffer(m_context.getRenderContext()); const int numVertices = getLayeredGridNumVertices(sample.scene); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); uint64_t startTime; uint64_t endTime; std::vector vertexData; std::vector indexData; // create data generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) generateLayeredGridIndexData(indexData, sample.scene); // make buffers used gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); setupVertexAttribs(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) { gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); gl.drawArrays(GL_TRIANGLES, 0, numVertices); } else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STREAM_DRAW); gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); } else DE_ASSERT(false); // another pair of buffers for reference case if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER) { if (m_targetBuffer == TARGETBUFFER_VERTEX) { gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW); setupVertexAttribs(); gl.drawArrays(GL_TRIANGLES, 0, numVertices); } else if (m_targetBuffer == TARGETBUFFER_INDEX) { gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0], GL_STREAM_DRAW); gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); } else DE_ASSERT(false); // restore state gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer); setupVertexAttribs(); } waitGLResults(); GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare"); gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f); gl.clear(GL_COLOR_BUFFER_BIT); waitGLResults(); tcu::warmupCPU(); // first draw { startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.duration.firstRenderDuration = endTime - startTime; } // upload { glw::GLenum target; glw::GLsizeiptr size; glw::GLintptr offset = 0; const void *source; if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL) { target = GL_ARRAY_BUFFER; size = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)); source = &vertexData[0]; } else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL) { target = GL_ELEMENT_ARRAY_BUFFER; size = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)); source = &indexData[0]; } else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL) { target = GL_ARRAY_BUFFER; size = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4); offset = (glw::GLintptr)deAlign32((int)size / 2, 4); source = (const uint8_t *)&vertexData[0] + offset; } else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL) { // upload to 25% - 75% range target = GL_ELEMENT_ARRAY_BUFFER; size = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4); offset = (glw::GLintptr)deAlign32((int)size / 2, 4); source = (const uint8_t *)&indexData[0] + offset; } else { DE_ASSERT(false); return; } // reference case? don't modify the buffer in use if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER) gl.bindBuffer(target, *alternativeUploadBuffer); startTime = deGetMicroseconds(); if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) gl.bufferData(target, size, source, GL_STREAM_DRAW); else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) gl.bufferSubData(target, offset, size, source); else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) { const int mapFlags = (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) : (m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT) : (-1); void *mapPtr; glw::GLboolean unmapSuccessful; mapPtr = gl.mapBufferRange(target, offset, size, mapFlags); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); deMemcpy(mapPtr, source, (int)size); // if unmapping fails, just try again later unmapSuccessful = gl.unmapBuffer(target); if (!unmapSuccessful) throw UnmapFailureError(); } else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.uploadedDataSize = (int)size; sample.result.duration.uploadDuration = endTime - startTime; } // second draw { // Source vertex data from alternative buffer in refernce case if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX) setupVertexAttribs(); startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.result.duration.secondRenderDuration = endTime - startTime; } // read { startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); sample.result.duration.readDuration = endTime - startTime; } // set results sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices; sample.result.duration.renderReadDuration = sample.result.duration.secondRenderDuration + sample.result.duration.readDuration; sample.result.duration.totalDuration = sample.result.duration.firstRenderDuration + sample.result.duration.uploadDuration + sample.result.duration.secondRenderDuration + sample.result.duration.readDuration; sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration; } class UploadWaitDrawCase : public RenderPerformanceTestBase { public: struct Sample { int numFrames; uint64_t uploadCallEndTime; }; struct Result { uint64_t uploadDuration; uint64_t renderDuration; uint64_t readDuration; uint64_t renderReadDuration; uint64_t timeBeforeUse; }; UploadWaitDrawCase(Context &context, const char *name, const char *description, DrawMethod drawMethod, TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState); ~UploadWaitDrawCase(void); private: void init(void); void deinit(void); IterateResult iterate(void); void uploadBuffer(Sample &sample, Result &result); void drawFromBuffer(Sample &sample, Result &result); void reuseAndDeleteBuffer(void); void logAndSetTestResult(void); void logSamples(void); void drawMisc(void); int findStabilizationSample(uint64_t Result::*target, const char *description); bool checkSampleTemporalStability(uint64_t Result::*target, const char *description); const DrawMethod m_drawMethod; const TargetBuffer m_targetBuffer; const UploadMethod m_uploadMethod; const BufferState m_bufferState; const int m_numSamplesPerSwap; const int m_numMaxSwaps; int m_frameNdx; int m_sampleNdx; int m_numVertices; std::vector m_vertexData; std::vector m_indexData; std::vector m_samples; std::vector m_results; std::vector m_iterationOrder; uint32_t m_vertexBuffer; uint32_t m_indexBuffer; uint32_t m_miscBuffer; int m_numMiscVertices; }; UploadWaitDrawCase::UploadWaitDrawCase(Context &context, const char *name, const char *description, DrawMethod drawMethod, TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState) : RenderPerformanceTestBase(context, name, description) , m_drawMethod(drawMethod) , m_targetBuffer(targetBuffer) , m_uploadMethod(uploadMethod) , m_bufferState(bufferState) , m_numSamplesPerSwap(10) , m_numMaxSwaps(4) , m_frameNdx(0) , m_sampleNdx(0) , m_numVertices(-1) , m_vertexBuffer(0) , m_indexBuffer(0) , m_miscBuffer(0) , m_numMiscVertices(-1) { } UploadWaitDrawCase::~UploadWaitDrawCase(void) { deinit(); } void UploadWaitDrawCase::init(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int vertexAttribSize = (int)sizeof(tcu::Vec4) * 2; // color4, position4 const int vertexIndexSize = (int)sizeof(uint32_t); const int vertexUploadDataSize = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize); RenderPerformanceTestBase::init(); // requirements if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE || m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE) throw tcu::NotSupportedError("Test case requires " + de::toString(RENDER_AREA_SIZE) + "x" + de::toString(RENDER_AREA_SIZE) + " render target"); // gl state gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE); // enable bleding to prevent grid layers from being discarded gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); gl.blendEquation(GL_FUNC_ADD); gl.enable(GL_BLEND); // scene { LayeredGridSpec scene; // create ~8MB workload with similar characteristics as in the other test // => makes comparison to other results more straightforward scene.gridWidth = 93; scene.gridHeight = 93; scene.gridLayers = 5; generateLayeredGridVertexAttribData4C4V(m_vertexData, scene); generateLayeredGridIndexData(m_indexData, scene); m_numVertices = getLayeredGridNumVertices(scene); } // buffers if (m_bufferState == BUFFERSTATE_NEW) { if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { // reads from two buffers, prepare the static buffer if (m_targetBuffer == TARGETBUFFER_VERTEX) { // index buffer is static, use another vertex buffer to keep original buffer in unused state const glu::Buffer vertexCopyBuffer(m_context.getRenderContext()); gl.genBuffers(1, &m_indexBuffer); gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)), &m_indexData[0], GL_STATIC_DRAW); setupVertexAttribs(); gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, nullptr); } else if (m_targetBuffer == TARGETBUFFER_INDEX) { // vertex buffer is static gl.genBuffers(1, &m_vertexBuffer); gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW); setupVertexAttribs(); gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); } else DE_ASSERT(false); } } else if (m_bufferState == BUFFERSTATE_EXISTING) { const glw::GLenum vertexUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW); const glw::GLenum indexUsage = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW); gl.genBuffers(1, &m_vertexBuffer); gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) { gl.genBuffers(1, &m_indexBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)), &m_indexData[0], indexUsage); } setupVertexAttribs(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); } else DE_ASSERT(false); // misc draw buffer { std::vector vertexData; LayeredGridSpec scene; // create ~1.5MB workload with similar characteristics scene.gridWidth = 40; scene.gridHeight = 40; scene.gridLayers = 5; generateLayeredGridVertexAttribData4C4V(vertexData, scene); gl.genBuffers(1, &m_miscBuffer); gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer); gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW); m_numMiscVertices = getLayeredGridNumVertices(scene); } // iterations { m_samples.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap); m_results.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap); for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps) for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx) { const int index = numSwaps * m_numSamplesPerSwap + sampleNdx; m_samples[index].numFrames = numSwaps; } m_iterationOrder.resize(m_samples.size()); generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size()); } // log m_testCtx.getLog() << tcu::TestLog::Message << "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n" << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n" << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n" << "Uploading using " << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | " "GL_MAP_UNSYNCHRONIZED_BIT") : (nullptr)) << "\n" << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n" << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : ("")) << "Test result is the number of frames (swaps) required for the render time to stabilize.\n" << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n" << tcu::TestLog::EndMessage; } void UploadWaitDrawCase::deinit(void) { RenderPerformanceTestBase::deinit(); if (m_vertexBuffer) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer); m_vertexBuffer = 0; } if (m_indexBuffer) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer); m_indexBuffer = 0; } if (m_miscBuffer) { m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer); m_miscBuffer = 0; } } UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const int betweenIterationFrameCount = 5; // draw misc between test samples const int frameNdx = m_frameNdx++; const int currentSampleNdx = m_iterationOrder[m_sampleNdx]; // Simulate work for about 8ms busyWait(8000); // Busywork rendering during unused frames if (frameNdx != m_samples[currentSampleNdx].numFrames) { // draw similar from another buffer drawMisc(); } if (frameNdx == 0) { // upload and start the clock uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]); } if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0 { // draw using the uploaded buffer drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]); // re-use buffer for something else to make sure test iteration do not affect each other if (m_bufferState == BUFFERSTATE_NEW) reuseAndDeleteBuffer(); } else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationFrameCount) { // next sample ++m_sampleNdx; m_frameNdx = 0; } GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate"); if (m_sampleNdx < (int)m_samples.size()) return CONTINUE; logAndSetTestResult(); return STOP; } void UploadWaitDrawCase::uploadBuffer(Sample &sample, Result &result) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); uint64_t startTime; uint64_t endTime; glw::GLenum target; glw::GLsizeiptr size; const void *source; // data source if (m_targetBuffer == TARGETBUFFER_VERTEX) { DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW)); target = GL_ARRAY_BUFFER; size = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)); source = &m_vertexData[0]; } else if (m_targetBuffer == TARGETBUFFER_INDEX) { DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW)); target = GL_ELEMENT_ARRAY_BUFFER; size = (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)); source = &m_indexData[0]; } else { DE_ASSERT(false); return; } // gen buffer if (m_bufferState == BUFFERSTATE_NEW) { if (m_targetBuffer == TARGETBUFFER_VERTEX) { gl.genBuffers(1, &m_vertexBuffer); gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); } else if (m_targetBuffer == TARGETBUFFER_INDEX) { gl.genBuffers(1, &m_indexBuffer); gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); } else DE_ASSERT(false); if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA || m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) { gl.bufferData(target, size, nullptr, GL_STATIC_DRAW); } } else if (m_bufferState == BUFFERSTATE_EXISTING) { if (m_targetBuffer == TARGETBUFFER_VERTEX) gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); else if (m_targetBuffer == TARGETBUFFER_INDEX) gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); else DE_ASSERT(false); } else DE_ASSERT(false); // upload startTime = deGetMicroseconds(); if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) gl.bufferData(target, size, source, GL_STATIC_DRAW); else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) gl.bufferSubData(target, 0, size, source); else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) { void *mapPtr; glw::GLboolean unmapSuccessful; mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT); if (!mapPtr) throw tcu::Exception("MapBufferRange returned NULL"); deMemcpy(mapPtr, source, (int)size); // if unmapping fails, just try again later unmapSuccessful = gl.unmapBuffer(target); if (!unmapSuccessful) throw UnmapFailureError(); } else DE_ASSERT(false); endTime = deGetMicroseconds(); sample.uploadCallEndTime = endTime; result.uploadDuration = endTime - startTime; } void UploadWaitDrawCase::drawFromBuffer(Sample &sample, Result &result) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE); uint64_t startTime; uint64_t endTime; DE_ASSERT(m_vertexBuffer != 0); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) DE_ASSERT(m_indexBuffer == 0); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) DE_ASSERT(m_indexBuffer != 0); else DE_ASSERT(false); // draw { gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer); if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer); setupVertexAttribs(); // microseconds passed since return from upload call result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime; startTime = deGetMicroseconds(); if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) gl.drawArrays(GL_TRIANGLES, 0, m_numVertices); else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, nullptr); else DE_ASSERT(false); endTime = deGetMicroseconds(); result.renderDuration = endTime - startTime; } // read { startTime = deGetMicroseconds(); glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess()); endTime = deGetMicroseconds(); result.readDuration = endTime - startTime; } result.renderReadDuration = result.renderDuration + result.readDuration; } void UploadWaitDrawCase::reuseAndDeleteBuffer(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); if (m_targetBuffer == TARGETBUFFER_INDEX) { // respecify and delete index buffer static const uint32_t indices[3] = {1, 3, 8}; DE_ASSERT(m_indexBuffer != 0); gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, nullptr); gl.deleteBuffers(1, &m_indexBuffer); m_indexBuffer = 0; } else if (m_targetBuffer == TARGETBUFFER_VERTEX) { // respecify and delete vertex buffer static const tcu::Vec4 coloredTriangle[6] = { tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f, 0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(0.8f, -0.1f, 0.0f, 1.0f), }; DE_ASSERT(m_vertexBuffer != 0); gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW); gl.drawArrays(GL_TRIANGLES, 0, 3); gl.deleteBuffers(1, &m_vertexBuffer); m_vertexBuffer = 0; } waitGLResults(); } void UploadWaitDrawCase::logAndSetTestResult(void) { int uploadStabilization; int renderReadStabilization; int renderStabilization; int readStabilization; bool temporallyStable; { const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples"); logSamples(); } { const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability"); // log stabilization points renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read"); uploadStabilization = findStabilizationSample(&Result::uploadDuration, "Upload time"); renderStabilization = findStabilizationSample(&Result::renderDuration, "Draw call time"); readStabilization = findStabilizationSample(&Result::readDuration, "ReadPixels time"); temporallyStable = true; temporallyStable &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read"); temporallyStable &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time"); temporallyStable &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time"); temporallyStable &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time"); } { const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results"); // Check result sanily if (uploadStabilization != 0) m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage; if (!temporallyStable) m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to " "drift to one direction during test execution." << tcu::TestLog::EndMessage; // render & read if (renderReadStabilization == -1) m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage; else m_testCtx.getLog() << tcu::TestLog::Integer( "RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization); // draw call if (renderStabilization == -1) m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage; else m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization); // readpixels if (readStabilization == -1) m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage; else m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization); // Report renderReadStabilization if (renderReadStabilization != -1) m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str()); else m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1 } } void UploadWaitDrawCase::logSamples(void) { // Inverse m_iterationOrder std::vector runOrder(m_iterationOrder.size()); for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx) runOrder[m_iterationOrder[ndx]] = ndx; // Log samples m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo << tcu::TestLog::ValueInfo("NumSwaps", "SwapBuffers before use", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("Delay", "Time before use", "us", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("RunOrder", "Sample run order", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << tcu::TestLog::EndSampleInfo; for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx) m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx].numFrames << (int)m_results[sampleNdx].timeBeforeUse << runOrder[sampleNdx] << (int)m_results[sampleNdx].renderReadDuration << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration) << (int)m_results[sampleNdx].uploadDuration << (int)m_results[sampleNdx].renderDuration << (int)m_results[sampleNdx].readDuration << tcu::TestLog::EndSample; m_testCtx.getLog() << tcu::TestLog::EndSampleList; } void UploadWaitDrawCase::drawMisc(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer); setupVertexAttribs(); gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices); } struct DistributionCompareResult { bool equal; float standardDeviations; }; template static float sumOfRanks(const std::vector &testSamples, const std::vector &allSamples, const Comparer &comparer) { float sum = 0; for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx) { const uint64_t testSample = testSamples[sampleNdx]; const int lowerIndex = (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin()); const int upperIndex = (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin()); const int lowerRank = lowerIndex + 1; // convert zero-indexed to rank const int upperRank = upperIndex; // convert zero-indexed to rank, upperIndex is last equal + 1 const float rankMidpoint = (float)(lowerRank + upperRank) / 2.0f; sum += rankMidpoint; } return sum; } template static DistributionCompareResult distributionCompare(const std::vector &orderedObservationsA, const std::vector &orderedObservationsB, const Comparer &comparer) { // Mann-Whitney U test const int n1 = (int)orderedObservationsA.size(); const int n2 = (int)orderedObservationsB.size(); std::vector allSamples(n1 + n2); std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin()); std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1); std::sort(allSamples.begin(), allSamples.end()); { const float R1 = sumOfRanks(orderedObservationsA, allSamples, comparer); const float U1 = (float)(n1 * n2 + n1 * (n1 + 1) / 2) - R1; const float U2 = (float)(n1 * n2) - U1; const float U = de::min(U1, U2); // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway const float mU = (float)(n1 * n2) / 2.0f; const float sigmaU = deFloatSqrt((float)(n1 * n2 * (n1 + n2 + 1)) / 12.0f); const float z = (U - mU) / sigmaU; DistributionCompareResult result; result.equal = (de::abs(z) <= 1.96f); // accept within 95% confidence interval result.standardDeviations = z; return result; } } template struct ThresholdComparer { float relativeThreshold; T absoluteThreshold; bool operator()(const T &a, const T &b) const { const float diff = de::abs((float)a - (float)b); // thresholds if (diff <= (float)absoluteThreshold) return false; if (diff <= float(a) * relativeThreshold || diff <= float(b) * relativeThreshold) return false; // cmp return a < b; } }; int UploadWaitDrawCase::findStabilizationSample(uint64_t UploadWaitDrawCase::Result::*target, const char *description) { std::vector> sampleObservations(m_numMaxSwaps + 1); ThresholdComparer comparer; comparer.relativeThreshold = 0.15f; // 15% comparer.absoluteThreshold = 100; // (us), assumed sampling precision // get observations and order them for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx) { int insertNdx = 0; sampleObservations[swapNdx].resize(m_numSamplesPerSwap); for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx) if (m_samples[ndx].numFrames == swapNdx) sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target; DE_ASSERT(insertNdx == m_numSamplesPerSwap); std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end()); } // find stabilization point for (int sampleNdx = m_numMaxSwaps - 1; sampleNdx != -1; --sampleNdx) { // Distribution is equal to all following distributions for (int cmpTargetDistribution = sampleNdx + 1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution) { // Stable section ends here? const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer); if (!result.equal) { // Last two samples are not equal? Samples never stabilized if (sampleNdx == m_numMaxSwaps - 1) { m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n" << "\tDifference in standard deviations: " << result.standardDeviations << "\n" << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n" << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n" << tcu::TestLog::EndMessage; return -1; } else { m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n" << "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n" << "\tDifference in standard deviations: " << result.standardDeviations << "\n" << "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n" << "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n" << tcu::TestLog::EndMessage; return sampleNdx + 1; } } } } m_testCtx.getLog() << tcu::TestLog::Message << description << ": All samples seem to have the same distribution" << tcu::TestLog::EndMessage; // all distributions equal return 0; } bool UploadWaitDrawCase::checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::*target, const char *description) { // Try to find correlation with sample order and sample times const int numDataPoints = (int)m_iterationOrder.size(); std::vector dataPoints(m_iterationOrder.size()); LineParametersWithConfidence lineFit; for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx) { dataPoints[m_iterationOrder[ndx]].x() = (float)ndx; dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target); } lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f); // Difference of more than 25% of the offset along the whole sample range if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f) { m_testCtx.getLog() << tcu::TestLog::Message << description << ": Correlation with data point observation order and result time. Results are not " "temporally stable, observations are not independent.\n" << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n" << tcu::TestLog::EndMessage; return false; } else return true; } } // namespace BufferDataUploadTests::BufferDataUploadTests(Context &context) : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests") { } BufferDataUploadTests::~BufferDataUploadTests(void) { } void BufferDataUploadTests::init(void) { static const struct BufferUsage { const char *name; uint32_t usage; bool primaryUsage; } bufferUsages[] = { {"stream_draw", GL_STREAM_DRAW, true}, {"stream_read", GL_STREAM_READ, false}, {"stream_copy", GL_STREAM_COPY, false}, {"static_draw", GL_STATIC_DRAW, true}, {"static_read", GL_STATIC_READ, false}, {"static_copy", GL_STATIC_COPY, false}, {"dynamic_draw", GL_DYNAMIC_DRAW, true}, {"dynamic_read", GL_DYNAMIC_READ, false}, {"dynamic_copy", GL_DYNAMIC_COPY, false}, }; tcu::TestCaseGroup *const referenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Reference functions"); tcu::TestCaseGroup *const functionCallGroup = new tcu::TestCaseGroup(m_testCtx, "function_call", "Function call timing"); tcu::TestCaseGroup *const modifyAfterUseGroup = new tcu::TestCaseGroup(m_testCtx, "modify_after_use", "Function call time after buffer has been used"); tcu::TestCaseGroup *const renderAfterUploadGroup = new tcu::TestCaseGroup( m_testCtx, "render_after_upload", "Function call time of draw commands after buffer has been modified"); addChild(referenceGroup); addChild(functionCallGroup); addChild(modifyAfterUseGroup); addChild(renderAfterUploadGroup); // .reference { static const struct BufferSizeRange { const char *name; int minBufferSize; int maxBufferSize; int numSamples; bool largeBuffersCase; } sizeRanges[] = { {"small_buffers", 0, 1 << 18, 64, false}, // !< 0kB - 256kB {"large_buffers", 1 << 18, 1 << 24, 32, true}, // !< 256kB - 16MB }; for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx) { referenceGroup->addChild(new ReferenceMemcpyCase( m_context, std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(), "Test memcpy performance", sizeRanges[bufferSizeRangeNdx].minBufferSize, sizeRanges[bufferSizeRangeNdx].maxBufferSize, sizeRanges[bufferSizeRangeNdx].numSamples, sizeRanges[bufferSizeRangeNdx].largeBuffersCase)); } } // .function_call { const int minBufferSize = 0; // !< 0kiB const int maxBufferSize = 1 << 24; // !< 16MiB const int numDataSamples = 25; const int numMapSamples = 25; tcu::TestCaseGroup *const bufferDataMethodGroup = new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData"); tcu::TestCaseGroup *const bufferSubDataMethodGroup = new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData"); tcu::TestCaseGroup *const mapBufferRangeMethodGroup = new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange"); functionCallGroup->addChild(bufferDataMethodGroup); functionCallGroup->addChild(bufferSubDataMethodGroup); functionCallGroup->addChild(mapBufferRangeMethodGroup); // .buffer_data { static const struct TargetCase { tcu::TestCaseGroup *group; BufferDataUploadCase::CaseType caseType; bool allUsages; } targetCases[] = { {new tcu::TestCaseGroup(m_testCtx, "new_buffer", "Target new buffer"), BufferDataUploadCase::CASE_NEW_BUFFER, true}, {new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer", "Target new unspecified buffer"), BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER, true}, {new tcu::TestCaseGroup(m_testCtx, "specified_buffer", "Target new specified buffer"), BufferDataUploadCase::CASE_SPECIFIED_BUFFER, true}, {new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Target buffer that was used in draw"), BufferDataUploadCase::CASE_USED_BUFFER, true}, {new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer", "Target larger buffer that was used in draw"), BufferDataUploadCase::CASE_USED_LARGER_BUFFER, false}, }; for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx) { bufferDataMethodGroup->addChild(targetCases[targetNdx].group); for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages) targetCases[targetNdx].group->addChild(new BufferDataUploadCase( m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(), std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage, targetCases[targetNdx].caseType)); } } // .buffer_sub_data { static const struct FlagCase { tcu::TestCaseGroup *group; BufferSubDataUploadCase::CaseType parentCase; bool allUsages; int flags; } flagCases[] = { {new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload", ""), BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_FULL_UPLOAD}, {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload", "Clear buffer with bufferData(...,NULL) before sub data call"), BufferSubDataUploadCase::CASE_USED_BUFFER, false, BufferSubDataUploadCase::FLAG_FULL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE}, {new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload", ""), BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD}, {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"), BufferSubDataUploadCase::CASE_USED_BUFFER, false, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE}, }; for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx) { bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group); for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages) flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase( m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(), std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage, flagCases[flagNdx].parentCase, flagCases[flagNdx].flags)); } } // .map_buffer_range { static const struct FlagCase { const char *name; bool usefulForUnusedBuffers; bool allUsages; int glFlags; int caseFlags; } flagCases[] = { {"flag_write_full", true, true, GL_MAP_WRITE_BIT, 0}, {"flag_write_partial", true, true, GL_MAP_WRITE_BIT, MapBufferRangeCase::FLAG_PARTIAL}, {"flag_read_write_full", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, 0}, {"flag_read_write_partial", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, MapBufferRangeCase::FLAG_PARTIAL}, {"flag_invalidate_range_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 0}, {"flag_invalidate_range_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_PARTIAL}, {"flag_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0}, {"flag_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, MapBufferRangeCase::FLAG_PARTIAL}, {"flag_write_full_manual_invalidate_buffer", false, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_MANUAL_INVALIDATION}, {"flag_write_partial_manual_invalidate_buffer", false, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION}, {"flag_unsynchronized_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, 0}, {"flag_unsynchronized_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, MapBufferRangeCase::FLAG_PARTIAL}, {"flag_unsynchronized_and_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0}, {"flag_unsynchronized_and_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, MapBufferRangeCase::FLAG_PARTIAL}, }; static const struct FlushCases { const char *name; int glFlags; int caseFlags; } flushCases[] = { {"flag_flush_explicit_map_full", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, 0}, {"flag_flush_explicit_map_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_PARTIAL}, {"flag_flush_explicit_map_full_flush_in_parts", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS}, {"flag_flush_explicit_map_full_flush_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL}, }; static const struct MapTestGroup { int flags; bool unusedBufferCase; tcu::TestCaseGroup *group; } groups[] = { { MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER, true, new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"), }, { MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER, true, new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"), }, {0, false, new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")}, }; // we OR same flags to both range and flushRange cases, make sure it is legal DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER); DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER); for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx) { tcu::TestCaseGroup *const bufferTypeGroup = groups[groupNdx].group; mapBufferRangeMethodGroup->addChild(bufferTypeGroup); for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx) { if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers) continue; tcu::TestCaseGroup *const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, ""); bufferTypeGroup->addChild(bufferUsageGroup); for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages) bufferUsageGroup->addChild(new MapBufferRangeCase( m_context, bufferUsages[usageNdx].name, std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage, flagCases[caseNdx].glFlags, flagCases[caseNdx].caseFlags | groups[groupNdx].flags)); } for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx) { tcu::TestCaseGroup *const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, ""); bufferTypeGroup->addChild(bufferUsageGroup); for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx) if (bufferUsages[usageNdx].primaryUsage) bufferUsageGroup->addChild(new MapBufferRangeFlushCase( m_context, bufferUsages[usageNdx].name, std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(), minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage, flushCases[caseNdx].glFlags, flushCases[caseNdx].caseFlags | groups[groupNdx].flags)); } } } } // .modify_after_use { const int minBufferSize = 0; // !< 0kiB const int maxBufferSize = 1 << 24; // !< 16MiB static const struct Usage { const char *name; const char *description; uint32_t usage; } usages[] = { {"static_draw", "Test with GL_STATIC_DRAW", GL_STATIC_DRAW}, {"dynamic_draw", "Test with GL_DYNAMIC_DRAW", GL_DYNAMIC_DRAW}, {"stream_draw", "Test with GL_STREAM_DRAW", GL_STREAM_DRAW}, }; for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx) { tcu::TestCaseGroup *const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description); modifyAfterUseGroup->addChild(usageGroup); usageGroup->addChild(new ModifyAfterWithBufferDataCase(m_context, "buffer_data", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0)); usageGroup->addChild(new ModifyAfterWithBufferDataCase( m_context, "buffer_data_different_size", "Respecify buffer contents and size after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE)); usageGroup->addChild(new ModifyAfterWithBufferDataCase( m_context, "buffer_data_repeated", "Respecify buffer contents after upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED)); usageGroup->addChild(new ModifyAfterWithBufferSubDataCase( m_context, "buffer_sub_data_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0)); usageGroup->addChild(new ModifyAfterWithBufferSubDataCase( m_context, "buffer_sub_data_partial", "Respecify buffer contents partially use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL)); usageGroup->addChild(new ModifyAfterWithBufferSubDataCase( m_context, "buffer_sub_data_full_repeated", "Respecify buffer contents after upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED)); usageGroup->addChild(new ModifyAfterWithBufferSubDataCase( m_context, "buffer_sub_data_partial_repeated", "Respecify buffer contents partially upload and use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_write_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_read_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_read_write_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_invalidate_range_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_invalidate_range_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_invalidate_buffer_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_invalidate_buffer_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_unsynchronized_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase( m_context, "map_flag_unsynchronized_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase( m_context, "map_flag_flush_explicit_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase( m_context, "map_flag_flush_explicit_partial", "Respecify buffer contents partially after use", minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); } } // .render_after_upload { // .reference { tcu::TestCaseGroup *const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results"); renderAfterUploadGroup->addChild(renderReferenceGroup); // .draw { tcu::TestCaseGroup *const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers"); renderReferenceGroup->addChild(drawGroup); // Time consumed by readPixels drawGroup->addChild(new ReferenceReadPixelsTimeCase( m_context, "read_pixels", "Measure time consumed by readPixels() function call")); // Time consumed by rendering drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_arrays", "Measure time consumed by drawArrays() function call", DRAWMETHOD_DRAW_ARRAYS)); drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_elements", "Measure time consumed by drawElements() function call", DRAWMETHOD_DRAW_ELEMENTS)); } // .draw_upload_draw { static const struct { const char *name; const char *description; DrawMethod drawMethod; TargetBuffer targetBuffer; bool partial; } uploadTargets[] = { {"draw_arrays_upload_vertices", "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels " "function calls.", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false}, {"draw_arrays_upload_vertices_partial", "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and " "readPixels function calls.", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true}, {"draw_elements_upload_vertices", "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and " "readPixels function calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false}, {"draw_elements_upload_indices", "Measure time consumed by drawElements, index upload, another drawElements, and readPixels " "function calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false}, {"draw_elements_upload_indices_partial", "Measure time consumed by drawElements, partial index upload, another drawElements, and " "readPixels function calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true}, }; static const struct { const char *name; const char *description; UploadMethod uploadMethod; BufferInUseRenderTimeCase::MapFlags mapFlags; bool supportsPartialUpload; } uploadMethods[] = { {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false}, {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, true}, {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true}, {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false}, }; tcu::TestCaseGroup *const drawUploadDrawGroup = new tcu::TestCaseGroup( m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw"); renderReferenceGroup->addChild(drawUploadDrawGroup); for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) { const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) continue; drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase( m_context, name.c_str(), uploadTargets[uploadTargetNdx].description, uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags, uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod, (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER)); } } } // .upload_unrelated_and_draw { static const struct { const char *name; const char *description; DrawMethod drawMethod; } drawMethods[] = { {"draw_arrays", "drawArrays", DRAWMETHOD_DRAW_ARRAYS}, {"draw_elements", "drawElements", DRAWMETHOD_DRAW_ELEMENTS}, }; static const struct { const char *name; UploadMethod uploadMethod; } uploadMethods[] = { {"buffer_data", UPLOADMETHOD_BUFFER_DATA}, {"buffer_sub_data", UPLOADMETHOD_BUFFER_SUB_DATA}, {"map_buffer_range", UPLOADMETHOD_MAP_BUFFER_RANGE}, }; tcu::TestCaseGroup *const uploadUnrelatedGroup = new tcu::TestCaseGroup( m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload"); renderAfterUploadGroup->addChild(uploadUnrelatedGroup); for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx) for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) { const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name; const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload"; // Time consumed by rendering command after an unrelated upload uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase( m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod)); } } // .upload_and_draw { static const struct { const char *name; const char *description; BufferState bufferState; UnrelatedBufferType unrelatedBuffer; bool supportsPartialUpload; } bufferConfigs[] = { {"used_buffer", "Upload to an used buffer", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_NONE, true}, {"new_buffer", "Upload to a new buffer", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_NONE, false}, {"used_buffer_and_unrelated_upload", "Upload to an used buffer and an unrelated buffer and then draw", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_VERTEX, true}, {"new_buffer_and_unrelated_upload", "Upload to a new buffer and an unrelated buffer and then draw", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_VERTEX, false}, }; tcu::TestCaseGroup *const uploadAndDrawGroup = new tcu::TestCaseGroup( m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers"); renderAfterUploadGroup->addChild(uploadAndDrawGroup); // .used_buffer // .new_buffer // .used_buffer_and_unrelated_upload // .new_buffer_and_unrelated_upload for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx) { static const struct { const char *name; const char *description; DrawMethod drawMethod; TargetBuffer targetBuffer; bool partial; } uploadTargets[] = { {"draw_arrays_upload_vertices", "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false}, {"draw_arrays_upload_vertices_partial", "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function " "calls", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true}, {"draw_elements_upload_vertices", "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false}, {"draw_elements_upload_indices", "Measure time consumed by index upload, drawElements, and readPixels function calls", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false}, {"draw_elements_upload_indices_partial", "Measure time consumed by partial index upload, drawElements, and readPixels function calls", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true}, }; static const struct { const char *name; const char *description; UploadMethod uploadMethod; bool supportsPartialUpload; } uploadMethods[] = { {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, false}, {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, true}, {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, true}, }; tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description); uploadAndDrawGroup->addChild(group); for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) { const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) continue; if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload) continue; // Don't log unrelated buffer information to samples if there is no such buffer if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE) { typedef UploadRenderReadDuration SampleType; typedef GenericUploadRenderTimeCase TestType; group->addChild(new TestType( m_context, name.c_str(), uploadTargets[uploadTargetNdx].description, uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState, (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), bufferConfigs[stateNdx].unrelatedBuffer)); } else { typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType; typedef GenericUploadRenderTimeCase TestType; group->addChild(new TestType( m_context, name.c_str(), uploadTargets[uploadTargetNdx].description, uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState, (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), bufferConfigs[stateNdx].unrelatedBuffer)); } } } } // .draw_modify_draw { static const struct { const char *name; const char *description; DrawMethod drawMethod; TargetBuffer targetBuffer; bool partial; } uploadTargets[] = { {"draw_arrays_upload_vertices", "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels " "function calls.", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false}, {"draw_arrays_upload_vertices_partial", "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and " "readPixels function calls.", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true}, {"draw_elements_upload_vertices", "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels " "function calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false}, {"draw_elements_upload_indices", "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function " "calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false}, {"draw_elements_upload_indices_partial", "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels " "function calls.", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true}, }; static const struct { const char *name; const char *description; UploadMethod uploadMethod; BufferInUseRenderTimeCase::MapFlags mapFlags; bool supportsPartialUpload; } uploadMethods[] = { {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false}, {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, true}, {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true}, {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false}, }; tcu::TestCaseGroup *const drawModifyDrawGroup = new tcu::TestCaseGroup( m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use"); renderAfterUploadGroup->addChild(drawModifyDrawGroup); for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) { const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload) continue; drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase( m_context, name.c_str(), uploadTargets[uploadTargetNdx].description, uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags, uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod, (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL), BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER)); } } // .upload_wait_draw { static const struct { const char *name; const char *description; BufferState bufferState; } bufferStates[] = { {"new_buffer", "Uploading to just generated name", BUFFERSTATE_NEW}, {"used_buffer", "Uploading to a used buffer", BUFFERSTATE_EXISTING}, }; static const struct { const char *name; const char *description; DrawMethod drawMethod; TargetBuffer targetBuffer; } uploadTargets[] = { {"draw_arrays_vertices", "Upload vertex data, draw with drawArrays", DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX}, {"draw_elements_vertices", "Upload vertex data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX}, {"draw_elements_indices", "Upload index data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX}, }; static const struct { const char *name; const char *description; UploadMethod uploadMethod; } uploadMethods[] = { {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA}, {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA}, {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE}, }; tcu::TestCaseGroup *const uploadSwapDrawGroup = new tcu::TestCaseGroup( m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago"); renderAfterUploadGroup->addChild(uploadSwapDrawGroup); for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx) { tcu::TestCaseGroup *const bufferGroup = new tcu::TestCaseGroup( m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description); uploadSwapDrawGroup->addChild(bufferGroup); for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx) for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx) { const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name; bufferGroup->addChild(new UploadWaitDrawCase( m_context, name.c_str(), uploadTargets[uploadTargetNdx].description, uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod, bufferStates[bufferStateNdx].bufferState)); } } } } } } // namespace Performance } // namespace gles3 } // namespace deqp