• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Buffer data upload performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pBufferDataUploadTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuVectorUtil.hpp"
28 #include "tcuSurface.hpp"
29 #include "tcuCPUWarmup.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "gluRenderContext.hpp"
32 #include "gluShaderProgram.hpp"
33 #include "gluStrUtil.hpp"
34 #include "gluPixelTransfer.hpp"
35 #include "gluObjectWrapper.hpp"
36 #include "glwFunctions.hpp"
37 #include "glwEnums.hpp"
38 #include "deClock.h"
39 #include "deMath.h"
40 #include "deStringUtil.hpp"
41 #include "deRandom.hpp"
42 #include "deMemory.h"
43 #include "deThread.h"
44 
45 #include <algorithm>
46 #include <iomanip>
47 #include <limits>
48 
49 namespace deqp
50 {
51 namespace gles3
52 {
53 namespace Performance
54 {
55 namespace
56 {
57 
58 using gls::theilSenSiegelLinearRegression;
59 using gls::LineParametersWithConfidence;
60 
61 static const char* const s_dummyVertexShader =		"#version 300 es\n"
62 													"in highp vec4 a_position;\n"
63 													"void main (void)\n"
64 													"{\n"
65 													"	gl_Position = a_position;\n"
66 													"}\n";
67 
68 static const char* const s_dummyFragnentShader =	"#version 300 es\n"
69 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
70 													"void main (void)\n"
71 													"{\n"
72 													"	dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
73 													"}\n";
74 
75 static const char* const s_colorVertexShader =		"#version 300 es\n"
76 													"in highp vec4 a_position;\n"
77 													"in highp vec4 a_color;\n"
78 													"out highp vec4 v_color;\n"
79 													"void main (void)\n"
80 													"{\n"
81 													"	gl_Position = a_position;\n"
82 													"	v_color = a_color;\n"
83 													"}\n";
84 
85 static const char* const s_colorFragmentShader =	"#version 300 es\n"
86 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
87 													"in mediump vec4 v_color;\n"
88 													"void main (void)\n"
89 													"{\n"
90 													"	dEQP_FragColor = v_color;\n"
91 													"}\n";
92 
93 template <typename TrueType, int cond>
94 struct EnableIf
95 {
96 	typedef TrueType Type;
97 };
98 
99 template <typename TrueType>
100 struct EnableIf<TrueType, 0>
101 {
102 };
103 
104 template <typename TrueType, int cond>
105 struct EnableIfNot
106 {
107 };
108 
109 template <typename TrueType>
110 struct EnableIfNot<TrueType, 0>
111 {
112 	typedef TrueType Type;
113 };
114 
115 struct SingleOperationDuration
116 {
117 	deUint64 totalDuration;
118 	deUint64 fitResponseDuration; // used for fitting
119 };
120 
121 struct MapBufferRangeDuration
122 {
123 	deUint64 mapDuration;
124 	deUint64 unmapDuration;
125 	deUint64 writeDuration;
126 	deUint64 allocDuration;
127 	deUint64 totalDuration;
128 
129 	deUint64 fitResponseDuration;
130 };
131 
132 struct MapBufferRangeDurationNoAlloc
133 {
134 	deUint64 mapDuration;
135 	deUint64 unmapDuration;
136 	deUint64 writeDuration;
137 	deUint64 totalDuration;
138 
139 	deUint64 fitResponseDuration;
140 };
141 
142 struct MapBufferRangeFlushDuration
143 {
144 	deUint64 mapDuration;
145 	deUint64 unmapDuration;
146 	deUint64 writeDuration;
147 	deUint64 flushDuration;
148 	deUint64 allocDuration;
149 	deUint64 totalDuration;
150 
151 	deUint64 fitResponseDuration;
152 };
153 
154 struct MapBufferRangeFlushDurationNoAlloc
155 {
156 	deUint64 mapDuration;
157 	deUint64 unmapDuration;
158 	deUint64 writeDuration;
159 	deUint64 flushDuration;
160 	deUint64 totalDuration;
161 
162 	deUint64 fitResponseDuration;
163 };
164 
165 struct RenderReadDuration
166 {
167 	deUint64 renderDuration;
168 	deUint64 readDuration;
169 	deUint64 renderReadDuration;
170 	deUint64 totalDuration;
171 
172 	deUint64 fitResponseDuration;
173 };
174 
175 struct UnrelatedUploadRenderReadDuration
176 {
177 	deUint64 renderDuration;
178 	deUint64 readDuration;
179 	deUint64 renderReadDuration;
180 	deUint64 totalDuration;
181 
182 	deUint64 fitResponseDuration;
183 };
184 
185 struct UploadRenderReadDuration
186 {
187 	deUint64 uploadDuration;
188 	deUint64 renderDuration;
189 	deUint64 readDuration;
190 	deUint64 totalDuration;
191 	deUint64 renderReadDuration;
192 
193 	deUint64 fitResponseDuration;
194 };
195 
196 struct UploadRenderReadDurationWithUnrelatedUploadSize
197 {
198 	deUint64 uploadDuration;
199 	deUint64 renderDuration;
200 	deUint64 readDuration;
201 	deUint64 totalDuration;
202 	deUint64 renderReadDuration;
203 
204 	deUint64 fitResponseDuration;
205 };
206 
207 struct RenderUploadRenderReadDuration
208 {
209 	deUint64 firstRenderDuration;
210 	deUint64 uploadDuration;
211 	deUint64 secondRenderDuration;
212 	deUint64 readDuration;
213 	deUint64 totalDuration;
214 	deUint64 renderReadDuration;
215 
216 	deUint64 fitResponseDuration;
217 };
218 
219 template <typename SampleT>
220 struct UploadSampleResult
221 {
222 	typedef SampleT SampleType;
223 
224 	int			bufferSize;
225 	int			allocatedSize;
226 	int			writtenSize;
227 	SampleType	duration;
228 };
229 
230 template <typename SampleT>
231 struct RenderSampleResult
232 {
233 	typedef SampleT SampleType;
234 
235 	int			uploadedDataSize;
236 	int			renderDataSize;
237 	int			unrelatedDataSize;
238 	int			numVertices;
239 	SampleT		duration;
240 };
241 
242 struct SingleOperationStatistics
243 {
244 	float minTime;
245 	float maxTime;
246 	float medianTime;
247 	float min2DecileTime;		// !< minimum value in the 2nd decile
248 	float max9DecileTime;		// !< maximum value in the 9th decile
249 };
250 
251 struct SingleCallStatistics
252 {
253 	SingleOperationStatistics	result;
254 
255 	float						medianRate;
256 	float						maxDiffTime;
257 	float						maxDiff9DecileTime;
258 	float						medianDiffTime;
259 
260 	float						maxRelDiffTime;
261 	float						max9DecileRelDiffTime;
262 	float						medianRelDiffTime;
263 };
264 
265 struct MapCallStatistics
266 {
267 	SingleOperationStatistics	map;
268 	SingleOperationStatistics	unmap;
269 	SingleOperationStatistics	write;
270 	SingleOperationStatistics	alloc;
271 	SingleOperationStatistics	result;
272 
273 	float						medianRate;
274 	float						maxDiffTime;
275 	float						maxDiff9DecileTime;
276 	float						medianDiffTime;
277 
278 	float						maxRelDiffTime;
279 	float						max9DecileRelDiffTime;
280 	float						medianRelDiffTime;
281 };
282 
283 struct MapFlushCallStatistics
284 {
285 	SingleOperationStatistics	map;
286 	SingleOperationStatistics	unmap;
287 	SingleOperationStatistics	write;
288 	SingleOperationStatistics	flush;
289 	SingleOperationStatistics	alloc;
290 	SingleOperationStatistics	result;
291 
292 	float						medianRate;
293 	float						maxDiffTime;
294 	float						maxDiff9DecileTime;
295 	float						medianDiffTime;
296 
297 	float						maxRelDiffTime;
298 	float						max9DecileRelDiffTime;
299 	float						medianRelDiffTime;
300 };
301 
302 struct RenderReadStatistics
303 {
304 	SingleOperationStatistics	render;
305 	SingleOperationStatistics	read;
306 	SingleOperationStatistics	result;
307 	SingleOperationStatistics	total;
308 
309 	float						medianRate;
310 	float						maxDiffTime;
311 	float						maxDiff9DecileTime;
312 	float						medianDiffTime;
313 
314 	float						maxRelDiffTime;
315 	float						max9DecileRelDiffTime;
316 	float						medianRelDiffTime;
317 };
318 
319 struct UploadRenderReadStatistics
320 {
321 	SingleOperationStatistics	upload;
322 	SingleOperationStatistics	render;
323 	SingleOperationStatistics	read;
324 	SingleOperationStatistics	result;
325 	SingleOperationStatistics	total;
326 
327 	float						medianRate;
328 	float						maxDiffTime;
329 	float						maxDiff9DecileTime;
330 	float						medianDiffTime;
331 
332 	float						maxRelDiffTime;
333 	float						max9DecileRelDiffTime;
334 	float						medianRelDiffTime;
335 };
336 
337 struct RenderUploadRenderReadStatistics
338 {
339 	SingleOperationStatistics	firstRender;
340 	SingleOperationStatistics	upload;
341 	SingleOperationStatistics	secondRender;
342 	SingleOperationStatistics	read;
343 	SingleOperationStatistics	result;
344 	SingleOperationStatistics	total;
345 
346 	float						medianRate;
347 	float						maxDiffTime;
348 	float						maxDiff9DecileTime;
349 	float						medianDiffTime;
350 
351 	float						maxRelDiffTime;
352 	float						max9DecileRelDiffTime;
353 	float						medianRelDiffTime;
354 };
355 
356 template <typename T>
357 struct SampleTypeTraits
358 {
359 };
360 
361 template <>
362 struct SampleTypeTraits<SingleOperationDuration>
363 {
364 	typedef SingleCallStatistics StatsType;
365 
366 	enum { HAS_MAP_STATS		= 0	};
367 	enum { HAS_UNMAP_STATS		= 0	};
368 	enum { HAS_WRITE_STATS		= 0	};
369 	enum { HAS_FLUSH_STATS		= 0	};
370 	enum { HAS_ALLOC_STATS		= 0	};
371 	enum { LOG_CONTRIBUTIONS	= 0	};
372 };
373 
374 template <>
375 struct SampleTypeTraits<MapBufferRangeDuration>
376 {
377 	typedef MapCallStatistics StatsType;
378 
379 	enum { HAS_MAP_STATS		= 1	};
380 	enum { HAS_UNMAP_STATS		= 1	};
381 	enum { HAS_WRITE_STATS		= 1	};
382 	enum { HAS_FLUSH_STATS		= 0	};
383 	enum { HAS_ALLOC_STATS		= 1	};
384 	enum { LOG_CONTRIBUTIONS	= 1	};
385 };
386 
387 template <>
388 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
389 {
390 	typedef MapCallStatistics StatsType;
391 
392 	enum { HAS_MAP_STATS		= 1	};
393 	enum { HAS_UNMAP_STATS		= 1	};
394 	enum { HAS_WRITE_STATS		= 1	};
395 	enum { HAS_FLUSH_STATS		= 0	};
396 	enum { HAS_ALLOC_STATS		= 0	};
397 	enum { LOG_CONTRIBUTIONS	= 1	};
398 };
399 
400 template <>
401 struct SampleTypeTraits<MapBufferRangeFlushDuration>
402 {
403 	typedef MapFlushCallStatistics StatsType;
404 
405 	enum { HAS_MAP_STATS		= 1	};
406 	enum { HAS_UNMAP_STATS		= 1	};
407 	enum { HAS_WRITE_STATS		= 1	};
408 	enum { HAS_FLUSH_STATS		= 1	};
409 	enum { HAS_ALLOC_STATS		= 1	};
410 	enum { LOG_CONTRIBUTIONS	= 1	};
411 };
412 
413 template <>
414 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
415 {
416 	typedef MapFlushCallStatistics StatsType;
417 
418 	enum { HAS_MAP_STATS		= 1	};
419 	enum { HAS_UNMAP_STATS		= 1	};
420 	enum { HAS_WRITE_STATS		= 1	};
421 	enum { HAS_FLUSH_STATS		= 1	};
422 	enum { HAS_ALLOC_STATS		= 0	};
423 	enum { LOG_CONTRIBUTIONS	= 1	};
424 };
425 
426 template <>
427 struct SampleTypeTraits<RenderReadDuration>
428 {
429 	typedef RenderReadStatistics StatsType;
430 
431 	enum { HAS_RENDER_STATS			= 1	};
432 	enum { HAS_READ_STATS			= 1	};
433 	enum { HAS_UPLOAD_STATS			= 0	};
434 	enum { HAS_TOTAL_STATS			= 1	};
435 	enum { HAS_FIRST_RENDER_STATS	= 0	};
436 	enum { HAS_SECOND_RENDER_STATS	= 0	};
437 
438 	enum { LOG_CONTRIBUTIONS	= 1	};
439 };
440 
441 template <>
442 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
443 {
444 	typedef RenderReadStatistics StatsType;
445 
446 	enum { HAS_RENDER_STATS			= 1	};
447 	enum { HAS_READ_STATS			= 1	};
448 	enum { HAS_UPLOAD_STATS			= 0	};
449 	enum { HAS_TOTAL_STATS			= 1	};
450 	enum { HAS_FIRST_RENDER_STATS	= 0	};
451 	enum { HAS_SECOND_RENDER_STATS	= 0	};
452 
453 	enum { LOG_CONTRIBUTIONS	= 1	};
454 };
455 
456 template <>
457 struct SampleTypeTraits<UploadRenderReadDuration>
458 {
459 	typedef UploadRenderReadStatistics StatsType;
460 
461 	enum { HAS_RENDER_STATS			= 1	};
462 	enum { HAS_READ_STATS			= 1	};
463 	enum { HAS_UPLOAD_STATS			= 1	};
464 	enum { HAS_TOTAL_STATS			= 1	};
465 	enum { HAS_FIRST_RENDER_STATS	= 0	};
466 	enum { HAS_SECOND_RENDER_STATS	= 0	};
467 
468 	enum { LOG_CONTRIBUTIONS			= 1	};
469 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 0 };
470 };
471 
472 template <>
473 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
474 {
475 	typedef UploadRenderReadStatistics StatsType;
476 
477 	enum { HAS_RENDER_STATS			= 1	};
478 	enum { HAS_READ_STATS			= 1	};
479 	enum { HAS_UPLOAD_STATS			= 1	};
480 	enum { HAS_TOTAL_STATS			= 1	};
481 	enum { HAS_FIRST_RENDER_STATS	= 0	};
482 	enum { HAS_SECOND_RENDER_STATS	= 0	};
483 
484 	enum { LOG_CONTRIBUTIONS			= 1	};
485 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
486 };
487 
488 template <>
489 struct SampleTypeTraits<RenderUploadRenderReadDuration>
490 {
491 	typedef RenderUploadRenderReadStatistics StatsType;
492 
493 	enum { HAS_RENDER_STATS			= 0	};
494 	enum { HAS_READ_STATS			= 1	};
495 	enum { HAS_UPLOAD_STATS			= 1	};
496 	enum { HAS_TOTAL_STATS			= 1	};
497 	enum { HAS_FIRST_RENDER_STATS	= 1	};
498 	enum { HAS_SECOND_RENDER_STATS	= 1	};
499 
500 	enum { LOG_CONTRIBUTIONS			= 1	};
501 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
502 };
503 
504 struct UploadSampleAnalyzeResult
505 {
506 	float transferRateMedian;
507 	float transferRateAtRange;
508 	float transferRateAtInfinity;
509 };
510 
511 struct RenderSampleAnalyzeResult
512 {
513 	float renderRateMedian;
514 	float renderRateAtRange;
515 	float renderRateAtInfinity;
516 };
517 
518 class UnmapFailureError : public std::exception
519 {
520 public:
UnmapFailureError(void)521 	UnmapFailureError (void) : std::exception() {}
522 };
523 
getHumanReadableByteSize(int numBytes)524 static std::string getHumanReadableByteSize (int numBytes)
525 {
526 	std::ostringstream buf;
527 
528 	if (numBytes < 1024)
529 		buf << numBytes << " byte(s)";
530 	else if (numBytes < 1024 * 1024)
531 		buf << de::floatToString(numBytes/1024.0f, 1) << " KiB";
532 	else
533 		buf << de::floatToString(numBytes/1024.0f/1024.0f, 1) << " MiB";
534 
535 	return buf.str();
536 }
537 
medianTimeMemcpy(void * dst,const void * src,int numBytes)538 static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
539 {
540 	// Time used by memcpy is assumed to be asymptotically linear
541 
542 	// With large numBytes, the probability of context switch or other random
543 	// event is high. Apply memcpy in parts and report how much time would
544 	// memcpy have used with the median transfer rate.
545 
546 	// Less than 1MiB, no need to do anything special
547 	if (numBytes < 1048576)
548 	{
549 		deUint64 startTime;
550 		deUint64 endTime;
551 
552 		deYield();
553 
554 		startTime = deGetMicroseconds();
555 		deMemcpy(dst, src, numBytes);
556 		endTime = deGetMicroseconds();
557 
558 		return endTime - startTime;
559 	}
560 	else
561 	{
562 		// Do memcpy in multiple parts
563 
564 		const int	numSections		= 5;
565 		const int	sectionAlign	= 16;
566 
567 		int			sectionStarts[numSections+1];
568 		int			sectionLens[numSections];
569 		deUint64	sectionTimes[numSections];
570 		deUint64	medianTime;
571 		deUint64	bestTime		= 0;
572 
573 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
574 			sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
575 		sectionStarts[numSections] = numBytes;
576 
577 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
578 			sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];
579 
580 		// Memcpy is usually called after mapbuffer range which may take
581 		// a lot of time. To prevent power management from kicking in during
582 		// copy, warm up more.
583 		{
584 			deYield();
585 			tcu::warmupCPU();
586 			deYield();
587 		}
588 
589 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
590 		{
591 			deUint64 startTime;
592 			deUint64 endTime;
593 
594 			startTime = deGetMicroseconds();
595 			deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
596 			endTime = deGetMicroseconds();
597 
598 			sectionTimes[sectionNdx] = endTime - startTime;
599 
600 			if (!bestTime || sectionTimes[sectionNdx] < bestTime)
601 				bestTime = sectionTimes[sectionNdx];
602 
603 			// Detect if write takes 50% longer than it should, and warm up if that happened
604 			if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * bestTime)
605 			{
606 				deYield();
607 				tcu::warmupCPU();
608 				deYield();
609 			}
610 		}
611 
612 		std::sort(sectionTimes, sectionTimes + numSections);
613 
614 		if ((numSections % 2) == 0)
615 			medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
616 		else
617 			medianTime = sectionTimes[numSections / 2];
618 
619 		return medianTime*numSections;
620 	}
621 }
622 
dummyCalculation(float initial,int workSize)623 static float dummyCalculation (float initial, int workSize)
624 {
625 	float	a = initial;
626 	int		b = 123;
627 
628 	for (int ndx = 0; ndx < workSize; ++ndx)
629 	{
630 		a = deFloatCos(a + (float)b);
631 		b = (b + 63) % 107 + de::abs((int)(a*10.0f));
632 	}
633 
634 	return a + (float)b;
635 }
636 
busyWait(int microseconds)637 static void busyWait (int microseconds)
638 {
639 	const deUint64	maxSingleWaitTime	= 1000; // 1ms
640 	const deUint64	endTime				= deGetMicroseconds() + microseconds;
641 	float			dummy				= *tcu::warmupCPUInternal::g_dummy.m_v;
642 	int				workSize			= 500;
643 
644 	// exponentially increase work, cap to 1ms
645 	while (deGetMicroseconds() < endTime)
646 	{
647 		const deUint64	startTime		= deGetMicroseconds();
648 		deUint64		totalTime;
649 
650 		dummy = dummyCalculation(dummy, workSize);
651 
652 		totalTime = deGetMicroseconds() - startTime;
653 
654 		if (totalTime >= maxSingleWaitTime)
655 			break;
656 		else
657 			workSize *= 2;
658 	}
659 
660 	// "wait"
661 	while (deGetMicroseconds() < endTime)
662 		dummy = dummyCalculation(dummy, workSize);
663 
664 	*tcu::warmupCPUInternal::g_dummy.m_v = dummy;
665 }
666 
667 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
668 template <typename T>
linearSample(const std::vector<T> & values,float position)669 static float linearSample (const std::vector<T>& values, float position)
670 {
671 	DE_ASSERT(position >= 0.0f);
672 	DE_ASSERT(position <= 1.0f);
673 
674 	const float	floatNdx			= ((int)values.size() - 1) * position;
675 	const int	lowerNdx			= (int)deFloatFloor(floatNdx);
676 	const int	higherNdx			= lowerNdx + 1;
677 	const float	interpolationFactor = floatNdx - (float)lowerNdx;
678 
679 	DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
680 	DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
681 	DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);
682 
683 	return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
684 }
685 
686 template <typename T>
calculateSingleOperationStatistics(const std::vector<T> & samples,deUint64 T::SampleType::* target)687 SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
688 {
689 	SingleOperationStatistics	stats;
690 	std::vector<deUint64>		values(samples.size());
691 
692 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
693 		values[ndx] = samples[ndx].duration.*target;
694 
695 	std::sort(values.begin(), values.end());
696 
697 	stats.minTime			= (float)values.front();
698 	stats.maxTime			= (float)values.back();
699 	stats.medianTime		= linearSample(values, 0.5f);
700 	stats.min2DecileTime	= linearSample(values, 0.1f);
701 	stats.max9DecileTime	= linearSample(values, 0.9f);
702 
703 	return stats;
704 }
705 
706 template <typename StatisticsType, typename SampleType>
calculateBasicStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples,int SampleType::* predictor)707 void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
708 {
709 	std::vector<deUint64> values(samples.size());
710 
711 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
712 		values[ndx] = samples[ndx].duration.fitResponseDuration;
713 
714 	// median rate
715 	{
716 		std::vector<float> processingRates(samples.size());
717 
718 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
719 		{
720 			const float timeInSeconds = values[ndx] / 1000.0f / 1000.0f;
721 			processingRates[ndx] = samples[ndx].*predictor / timeInSeconds;
722 		}
723 
724 		std::sort(processingRates.begin(), processingRates.end());
725 
726 		stats.medianRate = linearSample(processingRates, 0.5f);
727 	}
728 
729 	// results compared to the approximation
730 	{
731 		std::vector<float> timeDiffs(samples.size());
732 
733 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
734 		{
735 			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
736 			const float actual		= (float)values[ndx];
737 			timeDiffs[ndx] = actual - prediction;
738 		}
739 		std::sort(timeDiffs.begin(), timeDiffs.end());
740 
741 		stats.maxDiffTime			= timeDiffs.back();
742 		stats.maxDiff9DecileTime	= linearSample(timeDiffs, 0.9f);
743 		stats.medianDiffTime		= linearSample(timeDiffs, 0.5f);
744 	}
745 
746 	// relative comparison to the approximation
747 	{
748 		std::vector<float> relativeDiffs(samples.size());
749 
750 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
751 		{
752 			const float prediction	= samples[ndx].*predictor * fit.coefficient + fit.offset;
753 			const float actual		= (float)values[ndx];
754 
755 			// Ignore cases where we predict negative times, or if
756 			// ratio would be (nearly) infinite: ignore if predicted
757 			// time is less than 1 microsecond
758 			if (prediction < 1.0f)
759 				relativeDiffs[ndx] = 0.0f;
760 			else
761 				relativeDiffs[ndx] = (actual - prediction) / prediction;
762 		}
763 		std::sort(relativeDiffs.begin(), relativeDiffs.end());
764 
765 		stats.maxRelDiffTime		= relativeDiffs.back();
766 		stats.max9DecileRelDiffTime	= linearSample(relativeDiffs, 0.9f);
767 		stats.medianRelDiffTime		= linearSample(relativeDiffs, 0.5f);
768 	}
769 
770 	// values calculated using sorted timings
771 
772 	std::sort(values.begin(), values.end());
773 
774 	stats.result.minTime = (float)values.front();
775 	stats.result.maxTime = (float)values.back();
776 	stats.result.medianTime = linearSample(values, 0.5f);
777 	stats.result.min2DecileTime = linearSample(values, 0.1f);
778 	stats.result.max9DecileTime = linearSample(values, 0.9f);
779 }
780 
781 template <typename StatisticsType, typename SampleType>
calculateBasicTransferStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)782 void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
783 {
784 	calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
785 }
786 
787 template <typename StatisticsType, typename SampleType>
calculateBasicRenderStatistics(StatisticsType & stats,const LineParametersWithConfidence & fit,const std::vector<SampleType> & samples)788 void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
789 {
790 	calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
791 }
792 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)793 static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
794 {
795 	SingleCallStatistics stats;
796 
797 	calculateBasicTransferStatistics(stats, fit, samples);
798 
799 	return stats;
800 }
801 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)802 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
803 {
804 	MapCallStatistics stats;
805 
806 	calculateBasicTransferStatistics(stats, fit, samples);
807 
808 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
809 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
810 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
811 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);
812 
813 	return stats;
814 }
815 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)816 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
817 {
818 	MapFlushCallStatistics stats;
819 
820 	calculateBasicTransferStatistics(stats, fit, samples);
821 
822 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
823 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
824 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
825 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
826 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);
827 
828 	return stats;
829 }
830 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)831 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
832 {
833 	MapCallStatistics stats;
834 
835 	calculateBasicTransferStatistics(stats, fit, samples);
836 
837 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
838 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
839 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);
840 
841 	return stats;
842 }
843 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)844 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
845 {
846 	MapFlushCallStatistics stats;
847 
848 	calculateBasicTransferStatistics(stats, fit, samples);
849 
850 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
851 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
852 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
853 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);
854 
855 	return stats;
856 }
857 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)858 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
859 {
860 	RenderReadStatistics stats;
861 
862 	calculateBasicRenderStatistics(stats, fit, samples);
863 
864 	stats.render	= calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
865 	stats.read		= calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
866 	stats.total		= calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);
867 
868 	return stats;
869 }
870 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)871 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
872 {
873 	RenderReadStatistics stats;
874 
875 	calculateBasicRenderStatistics(stats, fit, samples);
876 
877 	stats.render	= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
878 	stats.read		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
879 	stats.total		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);
880 
881 	return stats;
882 }
883 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)884 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
885 {
886 	UploadRenderReadStatistics stats;
887 
888 	calculateBasicRenderStatistics(stats, fit, samples);
889 
890 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
891 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
892 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
893 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);
894 
895 	return stats;
896 }
897 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)898 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
899 {
900 	UploadRenderReadStatistics stats;
901 
902 	calculateBasicRenderStatistics(stats, fit, samples);
903 
904 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
905 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
906 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
907 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);
908 
909 	return stats;
910 }
911 
calculateSampleStatistics(const LineParametersWithConfidence & fit,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)912 static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
913 {
914 	RenderUploadRenderReadStatistics stats;
915 
916 	calculateBasicRenderStatistics(stats, fit, samples);
917 
918 	stats.firstRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
919 	stats.upload		= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
920 	stats.secondRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
921 	stats.read			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
922 	stats.total			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);
923 
924 	return stats;
925 }
926 
927 template <typename DurationType>
fitLineToSamples(const std::vector<UploadSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,deUint64 DurationType::* target=& DurationType::fitResponseDuration)928 static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
929 {
930 	std::vector<tcu::Vec2> samplePoints;
931 
932 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
933 	{
934 		tcu::Vec2 point;
935 
936 		point.x() = (float)(samples[sampleNdx].writtenSize);
937 		point.y() = (float)(samples[sampleNdx].duration.*target);
938 
939 		samplePoints.push_back(point);
940 	}
941 
942 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
943 }
944 
945 template <typename DurationType>
fitLineToSamples(const std::vector<RenderSampleResult<DurationType>> & samples,int beginNdx,int endNdx,int step,deUint64 DurationType::* target=& DurationType::fitResponseDuration)946 static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
947 {
948 	std::vector<tcu::Vec2> samplePoints;
949 
950 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
951 	{
952 		tcu::Vec2 point;
953 
954 		point.x() = (float)(samples[sampleNdx].renderDataSize);
955 		point.y() = (float)(samples[sampleNdx].duration.*target);
956 
957 		samplePoints.push_back(point);
958 	}
959 
960 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
961 }
962 
963 template <typename T>
fitLineToSamples(const std::vector<T> & samples,int beginNdx,int endNdx,deUint64 T::SampleType::* target=& T::SampleType::fitResponseDuration)964 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
965 {
966 	return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
967 }
968 
969 template <typename T>
fitLineToSamples(const std::vector<T> & samples,deUint64 T::SampleType::* target=& T::SampleType::fitResponseDuration)970 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
971 {
972 	return fitLineToSamples(samples, 0, (int)samples.size(), target);
973 }
974 
getAreaBetweenLines(float xmin,float xmax,float lineAOffset,float lineACoefficient,float lineBOffset,float lineBCoefficient)975 static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
976 {
977 	const float lineAMin		= lineAOffset + lineACoefficient * xmin;
978 	const float lineAMax		= lineAOffset + lineACoefficient * xmax;
979 	const float lineBMin		= lineBOffset + lineBCoefficient * xmin;
980 	const float lineBMax		= lineBOffset + lineBCoefficient * xmax;
981 	const bool	aOverBAtBegin	= (lineAMin > lineBMin);
982 	const bool	aOverBAtEnd		= (lineAMax > lineBMax);
983 
984 	if (aOverBAtBegin == aOverBAtEnd)
985 	{
986 		// lines do not intersect
987 
988 		const float midpoint	= (xmin + xmax) / 2.0f;
989 		const float width		= (xmax - xmin);
990 
991 		const float lineAHeight	= lineAOffset + lineACoefficient * midpoint;
992 		const float lineBHeight	= lineBOffset + lineBCoefficient * midpoint;
993 
994 		return width * de::abs(lineAHeight - lineBHeight);
995 	}
996 	else
997 	{
998 
999 		// lines intersect
1000 
1001 		const float approachCoeffient	= de::abs(lineACoefficient - lineBCoefficient);
1002 		const float epsilon				= 0.0001f;
1003 		const float leftHeight			= de::abs(lineAMin - lineBMin);
1004 		const float rightHeight			= de::abs(lineAMax - lineBMax);
1005 
1006 		if (approachCoeffient < epsilon)
1007 			return 0.0f;
1008 
1009 		return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
1010 	}
1011 }
1012 
1013 template <typename T>
calculateSampleFitLinearity(const std::vector<T> & samples,int T::* predictor)1014 static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
1015 {
1016 	// Compare the fitted line of first half of the samples to the fitted line of
1017 	// the second half of the samples. Calculate a AABB that fully contains every
1018 	// sample's x component and both fit lines in this range. Calculate the ratio
1019 	// of the area between the lines and the AABB.
1020 
1021 	const float				epsilon				= 1.e-6f;
1022 	const int				midPoint			= (int)samples.size() / 2;
1023 	const LineParametersWithConfidence	startApproximation	= fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
1024 	const LineParametersWithConfidence	endApproximation	= fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);
1025 
1026 	const float				aabbMinX			= (float)(samples.front().*predictor);
1027 	const float				aabbMinY			= de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
1028 	const float				aabbMaxX			= (float)(samples.back().*predictor);
1029 	const float				aabbMaxY			= de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);
1030 
1031 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1032 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
1033 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1034 
1035 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1036 }
1037 
1038 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> & samples)1039 static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
1040 {
1041 	return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
1042 }
1043 
1044 template <typename DurationType>
calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> & samples)1045 static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
1046 {
1047 	return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
1048 }
1049 
1050 template <typename T>
calculateSampleTemporalStability(const std::vector<T> & samples,int T::* predictor)1051 static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
1052 {
1053 	// Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
1054 	// Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
1055 	// contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
1056 	// the lines and the AABB.
1057 
1058 	const float				epsilon				= 1.e-6f;
1059 	const LineParametersWithConfidence	evenApproximation	= fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1060 	const LineParametersWithConfidence	oddApproximation	= fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
1061 
1062 	const float				aabbMinX			= (float)(samples.front().*predictor);
1063 	const float				aabbMinY			= de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
1064 	const float				aabbMaxX			= (float)(samples.back().*predictor);
1065 	const float				aabbMaxY			= de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);
1066 
1067 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
1068 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
1069 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);
1070 
1071 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
1072 }
1073 
1074 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> & samples)1075 static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
1076 {
1077 	return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
1078 }
1079 
1080 template <typename DurationType>
calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> & samples)1081 static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
1082 {
1083 	return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
1084 }
1085 
1086 template <typename DurationType>
bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> & samples,std::vector<UploadSampleResult<DurationType>> * buckets,int numBuckets,int & minBufferSize,int & maxBufferSize)1087 static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
1088 {
1089 	minBufferSize = 0;
1090 	maxBufferSize = 0;
1091 
1092 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1093 	{
1094 		DE_ASSERT(samples[sampleNdx].allocatedSize != 0);
1095 
1096 		if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
1097 			minBufferSize = samples[sampleNdx].allocatedSize;
1098 		if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
1099 			maxBufferSize = samples[sampleNdx].allocatedSize;
1100 	}
1101 
1102 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1103 	{
1104 		const float bucketNdxFloat	= (samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * numBuckets;
1105 		const int bucketNdx			= de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);
1106 
1107 		buckets[bucketNdx].push_back(samples[sampleNdx]);
1108 	}
1109 }
1110 
1111 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1112 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1113 {
1114 	log	<< tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
1115 		<< tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
1116 		<< tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
1117 		<< tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
1118 		<< tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1119 }
1120 
1121 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1122 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1123 {
1124 	log	<< tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
1125 		<< tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
1126 		<< tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
1127 		<< tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
1128 		<< tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1129 }
1130 
1131 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1132 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1133 {
1134 	log	<< tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
1135 		<< tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
1136 		<< tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
1137 		<< tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
1138 		<< tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1139 }
1140 
1141 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1142 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1143 {
1144 	log	<< tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
1145 		<< tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
1146 		<< tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
1147 		<< tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
1148 		<< tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1149 }
1150 
1151 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1152 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1153 {
1154 	log	<< tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
1155 		<< tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
1156 		<< tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
1157 		<< tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
1158 		<< tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1159 }
1160 
1161 template <typename SampleType>
logMapRangeStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1162 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1163 {
1164 	DE_UNREF(log);
1165 	DE_UNREF(stats);
1166 }
1167 
1168 template <typename SampleType>
logUnmapStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1169 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1170 {
1171 	DE_UNREF(log);
1172 	DE_UNREF(stats);
1173 }
1174 
1175 template <typename SampleType>
logWriteStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1176 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1177 {
1178 	DE_UNREF(log);
1179 	DE_UNREF(stats);
1180 }
1181 
1182 template <typename SampleType>
logFlushStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1183 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1184 {
1185 	DE_UNREF(log);
1186 	DE_UNREF(stats);
1187 }
1188 
1189 template <typename SampleType>
logAllocStats(tcu::TestLog & log,const typename SampleTypeTraits<SampleType>::StatsType & stats)1190 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1191 {
1192 	DE_UNREF(log);
1193 	DE_UNREF(stats);
1194 }
1195 
1196 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1197 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1198 {
1199 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
1200 	log	<< tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1201 		<< tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1202 		<< tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
1203 }
1204 
1205 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1206 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1207 {
1208 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
1209 	log	<< tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1210 		<< tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1211 		<< tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
1212 }
1213 
1214 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1215 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1216 {
1217 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
1218 	log	<< tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1219 		<< tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1220 		<< tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
1221 }
1222 
1223 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1224 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1225 {
1226 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
1227 	log	<< tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1228 		<< tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1229 		<< tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
1230 }
1231 
1232 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1233 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1234 {
1235 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
1236 	log	<< tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1237 		<< tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1238 		<< tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
1239 }
1240 
1241 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1242 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1243 {
1244 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
1245 	log	<< tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1246 		<< tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1247 		<< tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
1248 }
1249 
1250 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1251 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1252 {
1253 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
1254 	log	<< tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1255 		<< tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1256 		<< tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
1257 }
1258 
1259 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1260 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1261 {
1262 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
1263 	log	<< tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1264 		<< tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1265 		<< tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
1266 }
1267 
1268 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1269 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1270 {
1271 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
1272 	log	<< tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1273 		<< tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1274 		<< tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
1275 }
1276 
1277 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1278 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1279 {
1280 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
1281 	log	<< tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1282 		<< tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1283 		<< tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
1284 }
1285 
1286 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1287 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1288 {
1289 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
1290 	log	<< tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
1291 		<< tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
1292 		<< tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
1293 }
1294 
1295 template <typename SampleType>
logMapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1296 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1297 {
1298 	DE_UNREF(log);
1299 	DE_UNREF(samples);
1300 	DE_UNREF(stats);
1301 }
1302 
1303 template <typename SampleType>
logUnmapContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1304 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1305 {
1306 	DE_UNREF(log);
1307 	DE_UNREF(samples);
1308 	DE_UNREF(stats);
1309 }
1310 
1311 template <typename SampleType>
logWriteContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1312 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1313 {
1314 	DE_UNREF(log);
1315 	DE_UNREF(samples);
1316 	DE_UNREF(stats);
1317 }
1318 
1319 template <typename SampleType>
logFlushContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1320 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1321 {
1322 	DE_UNREF(log);
1323 	DE_UNREF(samples);
1324 	DE_UNREF(stats);
1325 }
1326 
1327 template <typename SampleType>
logAllocContribution(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1328 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1329 {
1330 	DE_UNREF(log);
1331 	DE_UNREF(samples);
1332 	DE_UNREF(stats);
1333 }
1334 
1335 template <typename SampleType>
logRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1336 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1337 {
1338 	DE_UNREF(log);
1339 	DE_UNREF(samples);
1340 	DE_UNREF(stats);
1341 }
1342 
1343 template <typename SampleType>
logReadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1344 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1345 {
1346 	DE_UNREF(log);
1347 	DE_UNREF(samples);
1348 	DE_UNREF(stats);
1349 }
1350 
1351 template <typename SampleType>
logUploadContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1352 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1353 {
1354 	DE_UNREF(log);
1355 	DE_UNREF(samples);
1356 	DE_UNREF(stats);
1357 }
1358 
1359 template <typename SampleType>
logTotalContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1360 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1361 {
1362 	DE_UNREF(log);
1363 	DE_UNREF(samples);
1364 	DE_UNREF(stats);
1365 }
1366 
1367 template <typename SampleType>
logFirstRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1368 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1369 {
1370 	DE_UNREF(log);
1371 	DE_UNREF(samples);
1372 	DE_UNREF(stats);
1373 }
1374 
1375 template <typename SampleType>
logSecondRenderContribution(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples,const typename SampleTypeTraits<SampleType>::StatsType & stats)1376 static typename EnableIfNot<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
1377 {
1378 	DE_UNREF(log);
1379 	DE_UNREF(samples);
1380 	DE_UNREF(stats);
1381 }
1382 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<SingleOperationDuration>> & samples)1383 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
1384 {
1385 	log << tcu::TestLog::SampleList("Samples", "Samples")
1386 		<< tcu::TestLog::SampleInfo
1387 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1388 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1389 		<< tcu::TestLog::ValueInfo("UploadTime",		"Upload time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1390 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1391 		<< tcu::TestLog::EndSampleInfo;
1392 
1393 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1394 	{
1395 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1396 		log	<< tcu::TestLog::Sample
1397 			<< samples[sampleNdx].writtenSize
1398 			<< samples[sampleNdx].bufferSize
1399 			<< (int)samples[sampleNdx].duration.totalDuration
1400 			<< fitResidual
1401 			<< tcu::TestLog::EndSample;
1402 	}
1403 
1404 	log << tcu::TestLog::EndSampleList;
1405 }
1406 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDuration>> & samples)1407 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
1408 {
1409 	log << tcu::TestLog::SampleList("Samples", "Samples")
1410 		<< tcu::TestLog::SampleInfo
1411 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1412 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1413 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1414 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1415 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1416 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1417 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1418 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1419 		<< tcu::TestLog::EndSampleInfo;
1420 
1421 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1422 	{
1423 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1424 		log	<< tcu::TestLog::Sample
1425 			<< samples[sampleNdx].writtenSize
1426 			<< samples[sampleNdx].bufferSize
1427 			<< (int)samples[sampleNdx].duration.totalDuration
1428 			<< (int)samples[sampleNdx].duration.allocDuration
1429 			<< (int)samples[sampleNdx].duration.mapDuration
1430 			<< (int)samples[sampleNdx].duration.unmapDuration
1431 			<< (int)samples[sampleNdx].duration.writeDuration
1432 			<< fitResidual
1433 			<< tcu::TestLog::EndSample;
1434 	}
1435 
1436 	log << tcu::TestLog::EndSampleList;
1437 }
1438 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> & samples)1439 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
1440 {
1441 	log << tcu::TestLog::SampleList("Samples", "Samples")
1442 		<< tcu::TestLog::SampleInfo
1443 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1444 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1445 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1446 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1447 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1448 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1449 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1450 		<< tcu::TestLog::EndSampleInfo;
1451 
1452 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1453 	{
1454 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1455 		log	<< tcu::TestLog::Sample
1456 			<< samples[sampleNdx].writtenSize
1457 			<< samples[sampleNdx].bufferSize
1458 			<< (int)samples[sampleNdx].duration.totalDuration
1459 			<< (int)samples[sampleNdx].duration.mapDuration
1460 			<< (int)samples[sampleNdx].duration.unmapDuration
1461 			<< (int)samples[sampleNdx].duration.writeDuration
1462 			<< fitResidual
1463 			<< tcu::TestLog::EndSample;
1464 	}
1465 
1466 	log << tcu::TestLog::EndSampleList;
1467 }
1468 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> & samples)1469 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
1470 {
1471 	log << tcu::TestLog::SampleList("Samples", "Samples")
1472 		<< tcu::TestLog::SampleInfo
1473 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1474 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1475 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1476 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1477 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1478 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1479 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1480 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1481 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1482 		<< tcu::TestLog::EndSampleInfo;
1483 
1484 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1485 	{
1486 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1487 		log	<< tcu::TestLog::Sample
1488 			<< samples[sampleNdx].writtenSize
1489 			<< samples[sampleNdx].bufferSize
1490 			<< (int)samples[sampleNdx].duration.totalDuration
1491 			<< (int)samples[sampleNdx].duration.allocDuration
1492 			<< (int)samples[sampleNdx].duration.mapDuration
1493 			<< (int)samples[sampleNdx].duration.unmapDuration
1494 			<< (int)samples[sampleNdx].duration.writeDuration
1495 			<< (int)samples[sampleNdx].duration.flushDuration
1496 			<< fitResidual
1497 			<< tcu::TestLog::EndSample;
1498 	}
1499 
1500 	log << tcu::TestLog::EndSampleList;
1501 }
1502 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> & samples)1503 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
1504 {
1505 	log << tcu::TestLog::SampleList("Samples", "Samples")
1506 		<< tcu::TestLog::SampleInfo
1507 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1508 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1509 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1510 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1511 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1512 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1513 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1514 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1515 		<< tcu::TestLog::EndSampleInfo;
1516 
1517 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1518 	{
1519 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].writtenSize);
1520 		log	<< tcu::TestLog::Sample
1521 			<< samples[sampleNdx].writtenSize
1522 			<< samples[sampleNdx].bufferSize
1523 			<< (int)samples[sampleNdx].duration.totalDuration
1524 			<< (int)samples[sampleNdx].duration.mapDuration
1525 			<< (int)samples[sampleNdx].duration.unmapDuration
1526 			<< (int)samples[sampleNdx].duration.writeDuration
1527 			<< (int)samples[sampleNdx].duration.flushDuration
1528 			<< fitResidual
1529 			<< tcu::TestLog::EndSample;
1530 	}
1531 
1532 	log << tcu::TestLog::EndSampleList;
1533 }
1534 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderReadDuration>> & samples)1535 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
1536 {
1537 	log << tcu::TestLog::SampleList("Samples", "Samples")
1538 		<< tcu::TestLog::SampleInfo
1539 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",		"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1540 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",	"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1541 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1542 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1543 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1544 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1545 		<< tcu::TestLog::EndSampleInfo;
1546 
1547 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1548 	{
1549 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1550 		log	<< tcu::TestLog::Sample
1551 			<< samples[sampleNdx].renderDataSize
1552 			<< samples[sampleNdx].numVertices
1553 			<< (int)samples[sampleNdx].duration.renderReadDuration
1554 			<< (int)samples[sampleNdx].duration.renderDuration
1555 			<< (int)samples[sampleNdx].duration.readDuration
1556 			<< fitResidual
1557 			<< tcu::TestLog::EndSample;
1558 	}
1559 
1560 	log << tcu::TestLog::EndSampleList;
1561 }
1562 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> & samples)1563 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
1564 {
1565 	log << tcu::TestLog::SampleList("Samples", "Samples")
1566 		<< tcu::TestLog::SampleInfo
1567 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1568 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",		"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1569 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",	"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1570 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1571 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1572 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1573 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1574 		<< tcu::TestLog::EndSampleInfo;
1575 
1576 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1577 	{
1578 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1579 		log	<< tcu::TestLog::Sample
1580 			<< samples[sampleNdx].renderDataSize
1581 			<< samples[sampleNdx].numVertices
1582 			<< samples[sampleNdx].unrelatedDataSize
1583 			<< (int)samples[sampleNdx].duration.renderReadDuration
1584 			<< (int)samples[sampleNdx].duration.renderDuration
1585 			<< (int)samples[sampleNdx].duration.readDuration
1586 			<< fitResidual
1587 			<< tcu::TestLog::EndSample;
1588 	}
1589 
1590 	log << tcu::TestLog::EndSampleList;
1591 }
1592 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDuration>> & samples)1593 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
1594 {
1595 	log << tcu::TestLog::SampleList("Samples", "Samples")
1596 		<< tcu::TestLog::SampleInfo
1597 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1598 		<< tcu::TestLog::ValueInfo("UploadSize",		"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1599 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1600 		<< tcu::TestLog::ValueInfo("DrawReadTime",		"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1601 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1602 		<< tcu::TestLog::ValueInfo("Upload time",		"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1603 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1604 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1605 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1606 		<< tcu::TestLog::EndSampleInfo;
1607 
1608 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1609 	{
1610 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1611 		log	<< tcu::TestLog::Sample
1612 			<< samples[sampleNdx].renderDataSize
1613 			<< samples[sampleNdx].uploadedDataSize
1614 			<< samples[sampleNdx].numVertices
1615 			<< (int)samples[sampleNdx].duration.renderReadDuration
1616 			<< (int)samples[sampleNdx].duration.totalDuration
1617 			<< (int)samples[sampleNdx].duration.uploadDuration
1618 			<< (int)samples[sampleNdx].duration.renderDuration
1619 			<< (int)samples[sampleNdx].duration.readDuration
1620 			<< fitResidual
1621 			<< tcu::TestLog::EndSample;
1622 	}
1623 
1624 	log << tcu::TestLog::EndSampleList;
1625 }
1626 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> & samples)1627 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
1628 {
1629 	log << tcu::TestLog::SampleList("Samples", "Samples")
1630 		<< tcu::TestLog::SampleInfo
1631 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1632 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1633 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1634 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1635 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1636 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1637 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1638 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1639 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1640 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1641 		<< tcu::TestLog::EndSampleInfo;
1642 
1643 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1644 	{
1645 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1646 		log	<< tcu::TestLog::Sample
1647 			<< samples[sampleNdx].renderDataSize
1648 			<< samples[sampleNdx].uploadedDataSize
1649 			<< samples[sampleNdx].numVertices
1650 			<< samples[sampleNdx].unrelatedDataSize
1651 			<< (int)samples[sampleNdx].duration.renderReadDuration
1652 			<< (int)samples[sampleNdx].duration.totalDuration
1653 			<< (int)samples[sampleNdx].duration.uploadDuration
1654 			<< (int)samples[sampleNdx].duration.renderDuration
1655 			<< (int)samples[sampleNdx].duration.readDuration
1656 			<< fitResidual
1657 			<< tcu::TestLog::EndSample;
1658 	}
1659 
1660 	log << tcu::TestLog::EndSampleList;
1661 }
1662 
logSampleList(tcu::TestLog & log,const LineParametersWithConfidence & theilSenFitting,const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> & samples)1663 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
1664 {
1665 	log << tcu::TestLog::SampleList("Samples", "Samples")
1666 		<< tcu::TestLog::SampleInfo
1667 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1668 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1669 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",					"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
1670 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Second draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1671 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1672 		<< tcu::TestLog::ValueInfo("FirstDrawCallTime",		"First draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1673 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1674 		<< tcu::TestLog::ValueInfo("SecondDrawCallTime",	"Second draw call time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1675 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1676 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
1677 		<< tcu::TestLog::EndSampleInfo;
1678 
1679 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
1680 	{
1681 		const float fitResidual = samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * samples[sampleNdx].renderDataSize);
1682 		log	<< tcu::TestLog::Sample
1683 			<< samples[sampleNdx].renderDataSize
1684 			<< samples[sampleNdx].uploadedDataSize
1685 			<< samples[sampleNdx].numVertices
1686 			<< (int)samples[sampleNdx].duration.renderReadDuration
1687 			<< (int)samples[sampleNdx].duration.totalDuration
1688 			<< (int)samples[sampleNdx].duration.firstRenderDuration
1689 			<< (int)samples[sampleNdx].duration.uploadDuration
1690 			<< (int)samples[sampleNdx].duration.secondRenderDuration
1691 			<< (int)samples[sampleNdx].duration.readDuration
1692 			<< fitResidual
1693 			<< tcu::TestLog::EndSample;
1694 	}
1695 
1696 	log << tcu::TestLog::EndSampleList;
1697 }
1698 
1699 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<UploadSampleResult<SampleType>> & samples,bool logBucketPerformance)1700 static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
1701 {
1702 	// Assume data is linear with some outliers, fit a line
1703 	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1704 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1705 	float													approximatedTransferRate;
1706 	float													approximatedTransferRateNoConstant;
1707 
1708 	// Output raw samples
1709 	{
1710 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1711 		logSampleList(log, theilSenFitting, samples);
1712 	}
1713 
1714 	// Calculate results for different ranges
1715 	if (logBucketPerformance)
1716 	{
1717 		const int										numBuckets				= 4;
1718 		int												minBufferSize			= 0;
1719 		int												maxBufferSize			= 0;
1720 		std::vector<UploadSampleResult<SampleType> >	buckets[numBuckets];
1721 
1722 		bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);
1723 
1724 		for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
1725 		{
1726 			if (buckets[bucketNdx].empty())
1727 				continue;
1728 
1729 			// Print a nice result summary
1730 
1731 			const int												bucketRangeMin	= minBufferSize + (int)(( bucketNdx    / (float)numBuckets) * (maxBufferSize - minBufferSize));
1732 			const int												bucketRangeMax	= minBufferSize + (int)(((bucketNdx+1) / (float)numBuckets) * (maxBufferSize - minBufferSize));
1733 			const typename SampleTypeTraits<SampleType>::StatsType	stats			= calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
1734 			const tcu::ScopedLogSection								section			(log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));
1735 
1736 			logMapRangeStats<SampleType>(log, stats);
1737 			logUnmapStats<SampleType>(log, stats);
1738 			logWriteStats<SampleType>(log, stats);
1739 			logFlushStats<SampleType>(log, stats);
1740 			logAllocStats<SampleType>(log, stats);
1741 
1742 			log	<< tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
1743 				<< tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
1744 				<< tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
1745 				<< tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
1746 				<< tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
1747 				<< tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
1748 				<< tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
1749 				<< tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
1750 				<< tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
1751 				<< tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
1752 				<< tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
1753 				<< tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
1754 		}
1755 	}
1756 
1757 	// Contributions
1758 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1759 	{
1760 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1761 
1762 		logMapContribution(log, samples, resultStats);
1763 		logUnmapContribution(log, samples, resultStats);
1764 		logWriteContribution(log, samples, resultStats);
1765 		logFlushContribution(log, samples, resultStats);
1766 		logAllocContribution(log, samples, resultStats);
1767 	}
1768 
1769 	// Print results
1770 	{
1771 		const tcu::ScopedLogSection	section(log, "Results", "Results");
1772 
1773 		const int	medianBufferSize					= (samples.front().bufferSize + samples.back().bufferSize) / 2;
1774 		const float	approximatedTransferTime			= (theilSenFitting.offset + theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1775 		const float	approximatedTransferTimeNoConstant	= (theilSenFitting.coefficient * medianBufferSize) / 1000.0f / 1000.0f;
1776 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1777 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1778 
1779 		approximatedTransferRateNoConstant				= medianBufferSize / approximatedTransferTimeNoConstant;
1780 		approximatedTransferRate						= medianBufferSize / approximatedTransferTime;
1781 
1782 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1783 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1784 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1785 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1786 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1787 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1788 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1789 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1790 			<< tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
1791 			<< tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
1792 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1793 			<< tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1794 	}
1795 
1796 	// return approximated transfer rate
1797 	{
1798 		UploadSampleAnalyzeResult result;
1799 
1800 		result.transferRateMedian = resultStats.medianRate;
1801 		result.transferRateAtRange = approximatedTransferRate;
1802 		result.transferRateAtInfinity = approximatedTransferRateNoConstant;
1803 
1804 		return result;
1805 	}
1806 }
1807 
1808 template <typename SampleType>
analyzeSampleResults(tcu::TestLog & log,const std::vector<RenderSampleResult<SampleType>> & samples)1809 static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
1810 {
1811 	// Assume data is linear with some outliers, fit a line
1812 	const LineParametersWithConfidence									theilSenFitting						= fitLineToSamples(samples);
1813 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
1814 	float													approximatedProcessingRate;
1815 	float													approximatedProcessingRateNoConstant;
1816 
1817 	// output raw samples
1818 	{
1819 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
1820 		logSampleList(log, theilSenFitting, samples);
1821 	}
1822 
1823 	// Contributions
1824 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
1825 	{
1826 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");
1827 
1828 		logFirstRenderContribution(log, samples, resultStats);
1829 		logUploadContribution(log, samples, resultStats);
1830 		logRenderContribution(log, samples, resultStats);
1831 		logSecondRenderContribution(log, samples, resultStats);
1832 		logReadContribution(log, samples, resultStats);
1833 		logTotalContribution(log, samples, resultStats);
1834 	}
1835 
1836 	// print results
1837 	{
1838 		const tcu::ScopedLogSection	section(log, "Results", "Results");
1839 
1840 		const int	medianDataSize						= (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
1841 		const float	approximatedRenderTime				= (theilSenFitting.offset + theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1842 		const float	approximatedRenderTimeNoConstant	= (theilSenFitting.coefficient * medianDataSize) / 1000.0f / 1000.0f;
1843 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
1844 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);
1845 
1846 		approximatedProcessingRateNoConstant			= medianDataSize / approximatedRenderTimeNoConstant;
1847 		approximatedProcessingRate						= medianDataSize / approximatedRenderTime;
1848 
1849 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
1850 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
1851 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
1852 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
1853 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
1854 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
1855 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
1856 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
1857 			<< tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
1858 			<< tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
1859 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
1860 			<< tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
1861 	}
1862 
1863 	// return approximated render rate
1864 	{
1865 		RenderSampleAnalyzeResult result;
1866 
1867 		result.renderRateMedian		= resultStats.medianRate;
1868 		result.renderRateAtRange	= approximatedProcessingRate;
1869 		result.renderRateAtInfinity = approximatedProcessingRateNoConstant;
1870 
1871 		return result;
1872 	}
1873 	return RenderSampleAnalyzeResult();
1874 }
1875 
generateTwoPassRandomIterationOrder(std::vector<int> & iterationOrder,int numSamples)1876 static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
1877 {
1878 	de::Random	rnd			(0xabc);
1879 	const int	midPoint	= (numSamples+1) / 2;		// !< ceil(m_numSamples / 2)
1880 
1881 	DE_ASSERT((int)iterationOrder.size() == numSamples);
1882 
1883 	// Two "passes" over range, randomize order in both passes
1884 	// This allows to us detect if iterations are not independent
1885 	// (first run and later run samples differ significantly?)
1886 
1887 	for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
1888 		iterationOrder[sampleNdx] = sampleNdx * 2;
1889 	for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
1890 		iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;
1891 
1892 	for (int ndx = 0; ndx < midPoint; ++ndx)
1893 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
1894 	for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
1895 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
1896 }
1897 
1898 template <typename SampleType>
1899 class BasicBufferCase : public TestCase
1900 {
1901 public:
1902 
1903 	enum Flags
1904 	{
1905 		FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
1906 	};
1907 							BasicBufferCase		(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
1908 							~BasicBufferCase	(void);
1909 
1910 	virtual void			init				(void);
1911 	virtual void			deinit				(void);
1912 
1913 protected:
1914 	IterateResult			iterate				(void);
1915 
1916 	virtual bool			runSample			(int iteration, UploadSampleResult<SampleType>& sample) = 0;
1917 	virtual void			logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results) = 0;
1918 
1919 	void					disableGLWarmup		(void);
1920 	void					waitGLResults		(void);
1921 
1922 	enum
1923 	{
1924 		DUMMY_RENDER_AREA_SIZE = 32
1925 	};
1926 
1927 	glu::ShaderProgram*		m_dummyProgram;
1928 	deInt32					m_dummyProgramPosLoc;
1929 	deUint32				m_bufferID;
1930 
1931 	const int				m_numSamples;
1932 	const int				m_bufferSizeMin;
1933 	const int				m_bufferSizeMax;
1934 	const bool				m_allocateLargerBuffer;
1935 
1936 private:
1937 	int						m_iteration;
1938 	std::vector<int>		m_iterationOrder;
1939 	std::vector<UploadSampleResult<SampleType> > m_results;
1940 
1941 	bool					m_useGL;
1942 	int						m_bufferRandomizerTimer;
1943 };
1944 
1945 template <typename SampleType>
BasicBufferCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,int flags)1946 BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
1947 	: TestCase					(context, tcu::NODETYPE_PERFORMANCE, name, desc)
1948 	, m_dummyProgram			(DE_NULL)
1949 	, m_dummyProgramPosLoc		(-1)
1950 	, m_bufferID				(0)
1951 	, m_numSamples				(numSamples)
1952 	, m_bufferSizeMin			(bufferSizeMin)
1953 	, m_bufferSizeMax			(bufferSizeMax)
1954 	, m_allocateLargerBuffer	((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
1955 	, m_iteration				(0)
1956 	, m_iterationOrder			(numSamples)
1957 	, m_results					(numSamples)
1958 	, m_useGL					(true)
1959 	, m_bufferRandomizerTimer	(0)
1960 {
1961 	// "randomize" iteration order. Deterministic, patternless
1962 	generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);
1963 
1964 	// choose buffer sizes
1965 	for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
1966 	{
1967 		const int rawBufferSize			= (int)deFloatFloor(bufferSizeMin + (bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / m_numSamples));
1968 		const int bufferSize			= deAlign32(rawBufferSize, 16);
1969 		const int allocatedBufferSize	= deAlign32((m_allocateLargerBuffer) ? ((int)(bufferSize * 1.5f)) : (bufferSize), 16);
1970 
1971 		m_results[sampleNdx].bufferSize		= bufferSize;
1972 		m_results[sampleNdx].allocatedSize	= allocatedBufferSize;
1973 		m_results[sampleNdx].writtenSize	= -1;
1974 	}
1975 }
1976 
1977 template <typename SampleType>
~BasicBufferCase(void)1978 BasicBufferCase<SampleType>::~BasicBufferCase (void)
1979 {
1980 	deinit();
1981 }
1982 
1983 template <typename SampleType>
init(void)1984 void BasicBufferCase<SampleType>::init (void)
1985 {
1986 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1987 
1988 	if (!m_useGL)
1989 		return;
1990 
1991 	// \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE
1992 
1993 	// dummy shader
1994 
1995 	m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
1996 	if (!m_dummyProgram->isOk())
1997 	{
1998 		m_testCtx.getLog() << *m_dummyProgram;
1999 		throw tcu::TestError("failed to build shader program");
2000 	}
2001 
2002 	m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
2003 	if (m_dummyProgramPosLoc == -1)
2004 		throw tcu::TestError("a_position location was -1");
2005 }
2006 
2007 template <typename SampleType>
deinit(void)2008 void BasicBufferCase<SampleType>::deinit (void)
2009 {
2010 	if (m_bufferID)
2011 	{
2012 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2013 		m_bufferID = 0;
2014 	}
2015 
2016 	delete m_dummyProgram;
2017 	m_dummyProgram = DE_NULL;
2018 }
2019 
2020 template <typename SampleType>
iterate(void)2021 TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
2022 {
2023 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2024 	static bool				buffersWarmedUp	= false;
2025 
2026 	static const deUint32	usages[] =
2027 	{
2028 		GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
2029 		GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
2030 		GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
2031 	};
2032 
2033 	// Allocate some random sized buffers and remove them to
2034 	// make sure the first samples too have some buffers removed
2035 	// just before their allocation. This is only needed by the
2036 	// the first test.
2037 
2038 	if (m_useGL && !buffersWarmedUp)
2039 	{
2040 		const int					numRandomBuffers				= 6;
2041 		const int					numRepeats						= 10;
2042 		const int					maxBufferSize					= 16777216;
2043 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2044 		de::Random					rnd								(0x1234);
2045 		deUint32					bufferIDs[numRandomBuffers]		= {0};
2046 
2047 		gl.useProgram(m_dummyProgram->getProgram());
2048 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2049 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2050 
2051 		for (int ndx = 0; ndx < numRepeats; ++ndx)
2052 		{
2053 			// Create buffer and maybe draw from it
2054 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2055 			{
2056 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2057 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2058 
2059 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2060 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2061 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2062 
2063 				if (rnd.getBool())
2064 				{
2065 					gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2066 					gl.drawArrays(GL_POINTS, 0, 1);
2067 					gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2068 				}
2069 			}
2070 
2071 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2072 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2073 
2074 			waitGLResults();
2075 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2076 
2077 			m_testCtx.touchWatchdog();
2078 		}
2079 
2080 		buffersWarmedUp = true;
2081 		return CONTINUE;
2082 	}
2083 	else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
2084 	{
2085 		// Do some random buffer operations to every now and then
2086 		// to make sure the previous test iterations won't affect
2087 		// following test runs.
2088 
2089 		const int					numRandomBuffers				= 3;
2090 		const int					maxBufferSize					= 16777216;
2091 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
2092 		de::Random					rnd								(0x1234 + 0xabc * m_bufferRandomizerTimer);
2093 
2094 		// BufferData
2095 		{
2096 			deUint32 bufferIDs[numRandomBuffers] = {0};
2097 
2098 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2099 			{
2100 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2101 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2102 
2103 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2104 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2105 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2106 			}
2107 
2108 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2109 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2110 		}
2111 
2112 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");
2113 
2114 		// Do some memory mappings
2115 		{
2116 			deUint32 bufferIDs[numRandomBuffers] = {0};
2117 
2118 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2119 			{
2120 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
2121 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
2122 				void*			ptr;
2123 
2124 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
2125 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
2126 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
2127 
2128 				gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2129 				gl.drawArrays(GL_POINTS, 0, 1);
2130 				gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
2131 
2132 				if (rnd.getBool())
2133 					waitGLResults();
2134 
2135 				ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
2136 				if (ptr)
2137 				{
2138 					medianTimeMemcpy(ptr, &zeroData[0], randomSize);
2139 					gl.unmapBuffer(GL_ARRAY_BUFFER);
2140 				}
2141 			}
2142 
2143 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
2144 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
2145 
2146 			waitGLResults();
2147 		}
2148 
2149 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
2150 		return CONTINUE;
2151 	}
2152 	else
2153 	{
2154 		const int	currentIteration	= m_iteration;
2155 		const int	sampleNdx			= m_iterationOrder[currentIteration];
2156 		const bool	sampleRunSuccessful	= runSample(currentIteration, m_results[sampleNdx]);
2157 
2158 		GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");
2159 
2160 		// Retry failed samples
2161 		if (!sampleRunSuccessful)
2162 			return CONTINUE;
2163 
2164 		if (++m_iteration >= m_numSamples)
2165 		{
2166 			logAndSetTestResult(m_results);
2167 			return STOP;
2168 		}
2169 		else
2170 			return CONTINUE;
2171 	}
2172 }
2173 
2174 template <typename SampleType>
disableGLWarmup(void)2175 void BasicBufferCase<SampleType>::disableGLWarmup (void)
2176 {
2177 	m_useGL = false;
2178 }
2179 
2180 template <typename SampleType>
waitGLResults(void)2181 void BasicBufferCase<SampleType>::waitGLResults (void)
2182 {
2183 	tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2184 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
2185 }
2186 
2187 template <typename SampleType>
2188 class BasicUploadCase : public BasicBufferCase<SampleType>
2189 {
2190 public:
2191 	enum CaseType
2192 	{
2193 		CASE_NO_BUFFERS = 0,
2194 		CASE_NEW_BUFFER,
2195 		CASE_UNSPECIFIED_BUFFER,
2196 		CASE_SPECIFIED_BUFFER,
2197 		CASE_USED_BUFFER,
2198 		CASE_USED_LARGER_BUFFER,
2199 
2200 		CASE_LAST
2201 	};
2202 
2203 	enum CaseFlags
2204 	{
2205 		FLAG_DONT_LOG_BUFFER_INFO				= 0x01,
2206 		FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT	= 0x02,
2207 	};
2208 
2209 	enum ResultType
2210 	{
2211 		RESULT_MEDIAN_TRANSFER_RATE = 0,
2212 		RESULT_ASYMPTOTIC_TRANSFER_RATE,
2213 	};
2214 
2215 						BasicUploadCase		(Context& context,
2216 											 const char* name,
2217 											 const char* desc,
2218 											 int bufferSizeMin,
2219 											 int bufferSizeMax,
2220 											 int numSamples,
2221 											 deUint32 bufferUsage,
2222 											 CaseType caseType,
2223 											 ResultType resultType,
2224 											 int flags = 0);
2225 
2226 						~BasicUploadCase	(void);
2227 
2228 	virtual void		init				(void);
2229 	virtual void		deinit				(void);
2230 
2231 private:
2232 	bool				runSample			(int iteration, UploadSampleResult<SampleType>& sample);
2233 	void				createBuffer		(int bufferSize, int iteration);
2234 	void				deleteBuffer		(int bufferSize);
2235 	void				useBuffer			(int bufferSize);
2236 
2237 	virtual void		testBufferUpload	(UploadSampleResult<SampleType>& result, int writeSize) = 0;
2238 	void				logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results);
2239 
2240 	deUint32			m_dummyBufferID;
2241 
2242 protected:
2243 	const CaseType		m_caseType;
2244 	const ResultType	m_resultType;
2245 	const deUint32		m_bufferUsage;
2246 	const bool			m_logBufferInfo;
2247 	const bool			m_bufferUnspecifiedContent;
2248 	std::vector<deUint8> m_zeroData;
2249 
2250 	using BasicBufferCase<SampleType>::m_testCtx;
2251 	using BasicBufferCase<SampleType>::m_context;
2252 
2253 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
2254 	using BasicBufferCase<SampleType>::m_dummyProgram;
2255 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
2256 	using BasicBufferCase<SampleType>::m_bufferID;
2257 	using BasicBufferCase<SampleType>::m_numSamples;
2258 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
2259 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
2260 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
2261 };
2262 
2263 template <typename SampleType>
BasicUploadCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,int numSamples,deUint32 bufferUsage,CaseType caseType,ResultType resultType,int flags)2264 BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
2265 	: BasicBufferCase<SampleType>	(context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
2266 	, m_dummyBufferID				(0)
2267 	, m_caseType					(caseType)
2268 	, m_resultType					(resultType)
2269 	, m_bufferUsage					(bufferUsage)
2270 	, m_logBufferInfo				((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
2271 	, m_bufferUnspecifiedContent	((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
2272 	, m_zeroData					()
2273 {
2274 	DE_ASSERT(m_caseType < CASE_LAST);
2275 }
2276 
2277 template <typename SampleType>
~BasicUploadCase(void)2278 BasicUploadCase<SampleType>::~BasicUploadCase (void)
2279 {
2280 	deinit();
2281 }
2282 
2283 template <typename SampleType>
init(void)2284 void BasicUploadCase<SampleType>::init (void)
2285 {
2286 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2287 
2288 	BasicBufferCase<SampleType>::init();
2289 
2290 	// zero buffer as upload source
2291 	m_zeroData.resize(m_bufferSizeMax, 0x00);
2292 
2293 	// dummy buffer
2294 
2295 	gl.genBuffers(1, &m_dummyBufferID);
2296 	GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");
2297 
2298 	// log basic info
2299 
2300 	m_testCtx.getLog()
2301 		<< tcu::TestLog::Message
2302 		<< "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
2303 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
2304 		<< tcu::TestLog::EndMessage;
2305 
2306 	if (m_logBufferInfo)
2307 	{
2308 		switch (m_caseType)
2309 		{
2310 			case CASE_NO_BUFFERS:
2311 				break;
2312 
2313 			case CASE_NEW_BUFFER:
2314 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
2315 				break;
2316 
2317 			case CASE_UNSPECIFIED_BUFFER:
2318 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
2319 				break;
2320 
2321 			case CASE_SPECIFIED_BUFFER:
2322 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
2323 				break;
2324 
2325 			case CASE_USED_BUFFER:
2326 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
2327 				break;
2328 
2329 			case CASE_USED_LARGER_BUFFER:
2330 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
2331 				break;
2332 
2333 			default:
2334 				DE_ASSERT(false);
2335 				break;
2336 		}
2337 	}
2338 
2339 	if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
2340 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
2341 	else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
2342 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
2343 	else
2344 		DE_ASSERT(false);
2345 }
2346 
2347 template <typename SampleType>
deinit(void)2348 void BasicUploadCase<SampleType>::deinit (void)
2349 {
2350 	if (m_dummyBufferID)
2351 	{
2352 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
2353 		m_dummyBufferID = 0;
2354 	}
2355 
2356 	m_zeroData.clear();
2357 
2358 	BasicBufferCase<SampleType>::deinit();
2359 }
2360 
2361 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)2362 bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
2363 {
2364 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2365 	const int				allocatedBufferSize	= sample.allocatedSize;
2366 	const int				bufferSize			= sample.bufferSize;
2367 
2368 	if (m_caseType != CASE_NO_BUFFERS)
2369 		createBuffer(iteration, allocatedBufferSize);
2370 
2371 	// warmup CPU before the test to make sure the power management governor
2372 	// keeps us in the "high performance" mode
2373 	{
2374 		deYield();
2375 		tcu::warmupCPU();
2376 		deYield();
2377 	}
2378 
2379 	testBufferUpload(sample, bufferSize);
2380 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
2381 
2382 	if (m_caseType != CASE_NO_BUFFERS)
2383 		deleteBuffer(bufferSize);
2384 
2385 	return true;
2386 }
2387 
2388 template <typename SampleType>
createBuffer(int iteration,int bufferSize)2389 void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
2390 {
2391 	DE_ASSERT(!m_bufferID);
2392 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2393 
2394 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2395 
2396 	// create buffer
2397 
2398 	if (m_caseType == CASE_NO_BUFFERS)
2399 		return;
2400 
2401 	// create empty buffer
2402 
2403 	gl.genBuffers(1, &m_bufferID);
2404 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2405 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");
2406 
2407 	if (m_caseType == CASE_NEW_BUFFER)
2408 	{
2409 		// upload something else first, this should reduce noise in samples
2410 
2411 		de::Random					rng				(0xbadc * iteration);
2412 		const int					sizeDelta		= rng.getInt(0, 2097140);
2413 		const int					dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
2414 		const std::vector<deUint8>	dummyData		(dummyUploadSize, 0x20);
2415 
2416 		gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
2417 		gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);
2418 
2419 		// make sure upload won't interfere with the test
2420 		useBuffer(dummyUploadSize);
2421 
2422 		// don't kill the buffer so that the following upload cannot potentially reuse the buffer
2423 
2424 		return;
2425 	}
2426 
2427 	// specify it
2428 
2429 	if (m_caseType == CASE_UNSPECIFIED_BUFFER)
2430 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2431 	else
2432 	{
2433 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2434 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2435 	}
2436 
2437 	if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
2438 		return;
2439 
2440 	// use it and make sure it is uploaded
2441 
2442 	useBuffer(bufferSize);
2443 	DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
2444 }
2445 
2446 template <typename SampleType>
deleteBuffer(int bufferSize)2447 void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
2448 {
2449 	DE_ASSERT(m_bufferID);
2450 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);
2451 
2452 	// render from the buffer to make sure it actually made it to the gpu. This is to
2453 	// make sure that if the upload actually happens later or is happening right now in
2454 	// the background, it will not interfere with further test runs
2455 
2456 	// if buffer contains unspecified content, sourcing data from it results in undefined
2457 	// results, possibly including program termination. Specify all data to prevent such
2458 	// case from happening
2459 
2460 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2461 
2462 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2463 
2464 	if (m_bufferUnspecifiedContent)
2465 	{
2466 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
2467 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
2468 
2469 		GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
2470 	}
2471 
2472 	useBuffer(bufferSize);
2473 
2474 	gl.deleteBuffers(1, &m_bufferID);
2475 	m_bufferID = 0;
2476 }
2477 
2478 template <typename SampleType>
useBuffer(int bufferSize)2479 void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
2480 {
2481 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2482 
2483 	gl.useProgram(m_dummyProgram->getProgram());
2484 
2485 	gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
2486 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
2487 	gl.enableVertexAttribArray(m_dummyProgramPosLoc);
2488 
2489 	// use whole buffer to make sure buffer is uploaded by drawing first and last
2490 	DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
2491 	gl.drawArrays(GL_POINTS, 0, 1);
2492 	gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);
2493 
2494 	BasicBufferCase<SampleType>::waitGLResults();
2495 }
2496 
2497 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)2498 void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
2499 {
2500 	const UploadSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), results, true);
2501 
2502 	// with small buffers, report the median transfer rate of the samples
2503 	// with large buffers, report the expected preformance of infinitely large buffers
2504 	const float						rate		= (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);
2505 
2506 	if (rate == std::numeric_limits<float>::infinity())
2507 	{
2508 		// sample times are 1) invalid or 2) timer resolution too low
2509 		// report speed 0 bytes / s since real value cannot be determined
2510 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
2511 	}
2512 	else
2513 	{
2514 		// report transfer rate in MB / s
2515 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
2516 	}
2517 }
2518 
2519 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
2520 {
2521 public:
2522 				ReferenceMemcpyCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
2523 				~ReferenceMemcpyCase	(void);
2524 
2525 	void		init					(void);
2526 	void		deinit					(void);
2527 private:
2528 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2529 
2530 	std::vector<deUint8> m_dstBuf;
2531 };
2532 
ReferenceMemcpyCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,bool largeBuffersCase)2533 ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
2534 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
2535 	, m_dstBuf									()
2536 {
2537 	disableGLWarmup();
2538 }
2539 
~ReferenceMemcpyCase(void)2540 ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
2541 {
2542 }
2543 
init(void)2544 void ReferenceMemcpyCase::init (void)
2545 {
2546 	// Describe what the test tries to do
2547 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;
2548 
2549 	m_dstBuf.resize(m_bufferSizeMax, 0x00);
2550 
2551 	BasicUploadCase<SingleOperationDuration>::init();
2552 }
2553 
deinit(void)2554 void ReferenceMemcpyCase::deinit (void)
2555 {
2556 	m_dstBuf.clear();
2557 	BasicUploadCase<SingleOperationDuration>::deinit();
2558 }
2559 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2560 void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2561 {
2562 	// write
2563 	result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
2564 	result.duration.fitResponseDuration = result.duration.totalDuration;
2565 
2566 	result.writtenSize = bufferSize;
2567 }
2568 
2569 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2570 {
2571 public:
2572 				BufferDataUploadCase	(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
2573 				~BufferDataUploadCase	(void);
2574 
2575 	void		init					(void);
2576 private:
2577 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2578 };
2579 
BufferDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,CaseType caseType)2580 BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
2581 	: BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
2582 {
2583 }
2584 
~BufferDataUploadCase(void)2585 BufferDataUploadCase::~BufferDataUploadCase (void)
2586 {
2587 }
2588 
init(void)2589 void BufferDataUploadCase::init (void)
2590 {
2591 	// Describe what the test tries to do
2592 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;
2593 
2594 	BasicUploadCase<SingleOperationDuration>::init();
2595 }
2596 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2597 void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2598 {
2599 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2600 
2601 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2602 
2603 	// upload
2604 	{
2605 		deUint64 startTime;
2606 		deUint64 endTime;
2607 
2608 		startTime = deGetMicroseconds();
2609 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2610 		endTime = deGetMicroseconds();
2611 
2612 		result.duration.totalDuration = endTime - startTime;
2613 		result.duration.fitResponseDuration = result.duration.totalDuration;
2614 		result.writtenSize = bufferSize;
2615 	}
2616 }
2617 
2618 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
2619 {
2620 public:
2621 	enum Flags
2622 	{
2623 		FLAG_FULL_UPLOAD			= 0x01,
2624 		FLAG_PARTIAL_UPLOAD			= 0x02,
2625 		FLAG_INVALIDATE_BEFORE_USE	= 0x04,
2626 	};
2627 
2628 				BufferSubDataUploadCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
2629 				~BufferSubDataUploadCase	(void);
2630 
2631 	void		init						(void);
2632 private:
2633 	void		testBufferUpload			(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
2634 
2635 	const bool	m_fullUpload;
2636 	const bool	m_invalidateBeforeUse;
2637 };
2638 
BufferSubDataUploadCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,CaseType parentCase,int flags)2639 BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
2640 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
2641 	, m_fullUpload								((flags & FLAG_FULL_UPLOAD) != 0)
2642 	, m_invalidateBeforeUse						((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
2643 {
2644 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
2645 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
2646 }
2647 
~BufferSubDataUploadCase(void)2648 BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
2649 {
2650 }
2651 
init(void)2652 void BufferSubDataUploadCase::init (void)
2653 {
2654 	// Describe what the test tries to do
2655 	m_testCtx.getLog()
2656 		<< tcu::TestLog::Message
2657 		<< "Testing glBufferSubData() function call performance. "
2658 		<< ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
2659 		<< ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
2660 		<< tcu::TestLog::EndMessage;
2661 
2662 	BasicUploadCase<SingleOperationDuration>::init();
2663 }
2664 
testBufferUpload(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)2665 void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
2666 {
2667 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2668 
2669 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2670 
2671 	// "invalidate", upload null
2672 	if (m_invalidateBeforeUse)
2673 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2674 
2675 	// upload
2676 	{
2677 		deUint64 startTime;
2678 		deUint64 endTime;
2679 
2680 		startTime = deGetMicroseconds();
2681 
2682 		if (m_fullUpload)
2683 			gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
2684 		else
2685 		{
2686 			// upload to buffer center
2687 			gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
2688 		}
2689 
2690 		endTime = deGetMicroseconds();
2691 
2692 		result.duration.totalDuration = endTime - startTime;
2693 		result.duration.fitResponseDuration = result.duration.totalDuration;
2694 
2695 		if (m_fullUpload)
2696 			result.writtenSize = bufferSize;
2697 		else
2698 			result.writtenSize = bufferSize / 2;
2699 	}
2700 }
2701 
2702 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
2703 {
2704 public:
2705 	enum Flags
2706 	{
2707 		FLAG_PARTIAL						= 0x01,
2708 		FLAG_MANUAL_INVALIDATION			= 0x02,
2709 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2710 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2711 	};
2712 
2713 					MapBufferRangeCase			(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2714 					~MapBufferRangeCase			(void);
2715 
2716 	void			init						(void);
2717 private:
2718 	static CaseType getBaseCaseType				(int caseFlags);
2719 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2720 
2721 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2722 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
2723 
2724 	const bool		m_manualInvalidation;
2725 	const bool		m_fullUpload;
2726 	const bool		m_useUnusedUnspecifiedBuffer;
2727 	const bool		m_useUnusedSpecifiedBuffer;
2728 	const deUint32	m_mapFlags;
2729 	int				m_unmapFailures;
2730 };
2731 
MapBufferRangeCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,deUint32 mapFlags,int caseFlags)2732 MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2733 	: BasicUploadCase<MapBufferRangeDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2734 	, m_manualInvalidation						((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
2735 	, m_fullUpload								((caseFlags&FLAG_PARTIAL) == 0)
2736 	, m_useUnusedUnspecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2737 	, m_useUnusedSpecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2738 	, m_mapFlags								(mapFlags)
2739 	, m_unmapFailures							(0)
2740 {
2741 	DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
2742 	DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
2743 }
2744 
~MapBufferRangeCase(void)2745 MapBufferRangeCase::~MapBufferRangeCase (void)
2746 {
2747 }
2748 
init(void)2749 void MapBufferRangeCase::init (void)
2750 {
2751 	// Describe what the test tries to do
2752 	m_testCtx.getLog()
2753 		<< tcu::TestLog::Message
2754 		<< "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
2755 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2756 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2757 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2758 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2759 		<< ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
2760 		<< "Map bits:\n"
2761 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2762 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2763 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2764 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2765 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2766 		<< tcu::TestLog::EndMessage;
2767 
2768 	BasicUploadCase<MapBufferRangeDuration>::init();
2769 }
2770 
getBaseCaseType(int caseFlags)2771 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
2772 {
2773 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2774 		return CASE_USED_BUFFER;
2775 	else
2776 		return CASE_NEW_BUFFER;
2777 }
2778 
getBaseFlags(deUint32 mapFlags,int caseFlags)2779 int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2780 {
2781 	int flags = FLAG_DONT_LOG_BUFFER_INFO;
2782 
2783 	// If buffer contains unspecified data when it is sourced (i.e drawn)
2784 	// results are undefined, and system errors may occur. Signal parent
2785 	// class to take this into account
2786 	if (caseFlags & FLAG_PARTIAL)
2787 	{
2788 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
2789 			(caseFlags & FLAG_MANUAL_INVALIDATION) != 0				||
2790 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2791 		{
2792 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
2793 		}
2794 	}
2795 
2796 	return flags;
2797 }
2798 
testBufferUpload(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)2799 void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2800 {
2801 	const int unmapFailureThreshold = 4;
2802 
2803 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
2804 	{
2805 		try
2806 		{
2807 			attemptBufferMap(result, bufferSize);
2808 			return;
2809 		}
2810 		catch (UnmapFailureError&)
2811 		{
2812 		}
2813 	}
2814 
2815 	throw tcu::TestError("Unmapping failures exceeded limit");
2816 }
2817 
attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> & result,int bufferSize)2818 void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
2819 {
2820 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2821 
2822 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
2823 
2824 	if (m_fullUpload)
2825 		result.writtenSize = bufferSize;
2826 	else
2827 		result.writtenSize = bufferSize / 2;
2828 
2829 	// Create unused buffer
2830 
2831 	if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
2832 	{
2833 		deUint64 startTime;
2834 		deUint64 endTime;
2835 
2836 		// "invalidate" or allocate, upload null
2837 		startTime = deGetMicroseconds();
2838 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
2839 		endTime = deGetMicroseconds();
2840 
2841 		result.duration.allocDuration = endTime - startTime;
2842 	}
2843 	else if (m_useUnusedSpecifiedBuffer)
2844 	{
2845 		deUint64 startTime;
2846 		deUint64 endTime;
2847 
2848 		// Specify buffer contents
2849 		startTime = deGetMicroseconds();
2850 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
2851 		endTime = deGetMicroseconds();
2852 
2853 		result.duration.allocDuration = endTime - startTime;
2854 	}
2855 	else
2856 	{
2857 		// No alloc, no time
2858 		result.duration.allocDuration = 0;
2859 	}
2860 
2861 	// upload
2862 	{
2863 		void* mapPtr;
2864 
2865 		// Map
2866 		{
2867 			deUint64 startTime;
2868 			deUint64 endTime;
2869 
2870 			startTime = deGetMicroseconds();
2871 			if (m_fullUpload)
2872 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
2873 			else
2874 			{
2875 				// upload to buffer center
2876 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
2877 			}
2878 			endTime = deGetMicroseconds();
2879 
2880 			if (!mapPtr)
2881 				throw tcu::Exception("MapBufferRange returned NULL");
2882 
2883 			result.duration.mapDuration = endTime - startTime;
2884 		}
2885 
2886 		// Write
2887 		{
2888 			result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
2889 		}
2890 
2891 		// Unmap
2892 		{
2893 			deUint64		startTime;
2894 			deUint64		endTime;
2895 			glw::GLboolean	unmapSuccessful;
2896 
2897 			startTime = deGetMicroseconds();
2898 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
2899 			endTime = deGetMicroseconds();
2900 
2901 			// if unmapping fails, just try again later
2902 			if (!unmapSuccessful)
2903 				throw UnmapFailureError();
2904 
2905 			result.duration.unmapDuration = endTime - startTime;
2906 		}
2907 
2908 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
2909 		result.duration.fitResponseDuration = result.duration.totalDuration;
2910 	}
2911 }
2912 
2913 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
2914 {
2915 public:
2916 	enum Flags
2917 	{
2918 		FLAG_PARTIAL						= 0x01,
2919 		FLAG_FLUSH_IN_PARTS					= 0x02,
2920 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
2921 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
2922 		FLAG_FLUSH_PARTIAL					= 0x10,
2923 	};
2924 
2925 					MapBufferRangeFlushCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
2926 					~MapBufferRangeFlushCase	(void);
2927 
2928 	void			init						(void);
2929 private:
2930 	static CaseType getBaseCaseType				(int caseFlags);
2931 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);
2932 
2933 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2934 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
2935 
2936 	const bool		m_fullUpload;
2937 	const bool		m_flushInParts;
2938 	const bool		m_flushPartial;
2939 	const bool		m_useUnusedUnspecifiedBuffer;
2940 	const bool		m_useUnusedSpecifiedBuffer;
2941 	const deUint32	m_mapFlags;
2942 	int				m_unmapFailures;
2943 };
2944 
MapBufferRangeFlushCase(Context & ctx,const char * name,const char * desc,int minBufferSize,int maxBufferSize,int numSamples,deUint32 bufferUsage,deUint32 mapFlags,int caseFlags)2945 MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
2946 	: BasicUploadCase<MapBufferRangeFlushDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
2947 	, m_fullUpload									((caseFlags&FLAG_PARTIAL) == 0)
2948 	, m_flushInParts								((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
2949 	, m_flushPartial								((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
2950 	, m_useUnusedUnspecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
2951 	, m_useUnusedSpecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
2952 	, m_mapFlags									(mapFlags)
2953 	, m_unmapFailures								(0)
2954 {
2955 	DE_ASSERT(!(m_flushPartial && m_flushInParts));
2956 	DE_ASSERT(!(m_flushPartial && !m_fullUpload));
2957 }
2958 
~MapBufferRangeFlushCase(void)2959 MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
2960 {
2961 }
2962 
init(void)2963 void MapBufferRangeFlushCase::init (void)
2964 {
2965 	// Describe what the test tries to do
2966 	m_testCtx.getLog()
2967 		<< tcu::TestLog::Message
2968 		<< "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
2969 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
2970 		<< ((m_flushInParts) ?
2971 			("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
2972 			(m_flushPartial) ?
2973 				("Half of the buffer range is flushed.") :
2974 				("The whole mapped range is flushed in one flush call.")) << "\n"
2975 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
2976 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
2977 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
2978 		<< "Map bits:\n"
2979 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
2980 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
2981 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
2982 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
2983 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
2984 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
2985 		<< tcu::TestLog::EndMessage;
2986 
2987 	BasicUploadCase<MapBufferRangeFlushDuration>::init();
2988 }
2989 
getBaseCaseType(int caseFlags)2990 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
2991 {
2992 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
2993 		return CASE_USED_BUFFER;
2994 	else
2995 		return CASE_NEW_BUFFER;
2996 }
2997 
getBaseFlags(deUint32 mapFlags,int caseFlags)2998 int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
2999 {
3000 	int flags = FLAG_DONT_LOG_BUFFER_INFO;
3001 
3002 	// If buffer contains unspecified data when it is sourced (i.e drawn)
3003 	// results are undefined, and system errors may occur. Signal parent
3004 	// class to take this into account
3005 	if (caseFlags & FLAG_PARTIAL)
3006 	{
3007 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
3008 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0	||
3009 			(caseFlags & FLAG_FLUSH_PARTIAL) != 0)
3010 		{
3011 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
3012 		}
3013 	}
3014 
3015 	return flags;
3016 }
3017 
testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3018 void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3019 {
3020 	const int unmapFailureThreshold = 4;
3021 
3022 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
3023 	{
3024 		try
3025 		{
3026 			attemptBufferMap(result, bufferSize);
3027 			return;
3028 		}
3029 		catch (UnmapFailureError&)
3030 		{
3031 		}
3032 	}
3033 
3034 	throw tcu::TestError("Unmapping failures exceeded limit");
3035 }
3036 
attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> & result,int bufferSize)3037 void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
3038 {
3039 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
3040 	const int				mappedSize	= (m_fullUpload) ? (bufferSize) : (bufferSize / 2);
3041 
3042 	if (m_fullUpload && !m_flushPartial)
3043 		result.writtenSize = bufferSize;
3044 	else
3045 		result.writtenSize = bufferSize / 2;
3046 
3047 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3048 
3049 	// Create unused buffer
3050 
3051 	if (m_useUnusedUnspecifiedBuffer)
3052 	{
3053 		deUint64 startTime;
3054 		deUint64 endTime;
3055 
3056 		// Don't specify contents
3057 		startTime = deGetMicroseconds();
3058 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
3059 		endTime = deGetMicroseconds();
3060 
3061 		result.duration.allocDuration = endTime - startTime;
3062 	}
3063 	else if (m_useUnusedSpecifiedBuffer)
3064 	{
3065 		deUint64 startTime;
3066 		deUint64 endTime;
3067 
3068 		// Specify buffer contents
3069 		startTime = deGetMicroseconds();
3070 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3071 		endTime = deGetMicroseconds();
3072 
3073 		result.duration.allocDuration = endTime - startTime;
3074 	}
3075 	else
3076 	{
3077 		// No alloc, no time
3078 		result.duration.allocDuration = 0;
3079 	}
3080 
3081 	// upload
3082 	{
3083 		void* mapPtr;
3084 
3085 		// Map
3086 		{
3087 			deUint64 startTime;
3088 			deUint64 endTime;
3089 
3090 			startTime = deGetMicroseconds();
3091 			if (m_fullUpload)
3092 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
3093 			else
3094 			{
3095 				// upload to buffer center
3096 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
3097 			}
3098 			endTime = deGetMicroseconds();
3099 
3100 			if (!mapPtr)
3101 				throw tcu::Exception("MapBufferRange returned NULL");
3102 
3103 			result.duration.mapDuration = endTime - startTime;
3104 		}
3105 
3106 		// Write
3107 		{
3108 			if (!m_flushPartial)
3109 				result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
3110 			else
3111 				result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
3112 		}
3113 
3114 		// Flush
3115 		{
3116 			deUint64	startTime;
3117 			deUint64	endTime;
3118 
3119 			startTime = deGetMicroseconds();
3120 
3121 			if (m_flushPartial)
3122 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
3123 			else if (!m_flushInParts)
3124 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
3125 			else
3126 			{
3127 				const int p1 = 0;
3128 				const int p2 = mappedSize / 3;
3129 				const int p3 = mappedSize / 2;
3130 				const int p4 = mappedSize * 2 / 4;
3131 				const int p5 = mappedSize;
3132 
3133 				// flush in mixed order
3134 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,	p3-p2);
3135 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,	p2-p1);
3136 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,	p5-p4);
3137 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,	p4-p3);
3138 			}
3139 
3140 			endTime = deGetMicroseconds();
3141 
3142 			result.duration.flushDuration = endTime - startTime;
3143 		}
3144 
3145 		// Unmap
3146 		{
3147 			deUint64		startTime;
3148 			deUint64		endTime;
3149 			glw::GLboolean	unmapSuccessful;
3150 
3151 			startTime = deGetMicroseconds();
3152 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
3153 			endTime = deGetMicroseconds();
3154 
3155 			// if unmapping fails, just try again later
3156 			if (!unmapSuccessful)
3157 				throw UnmapFailureError();
3158 
3159 			result.duration.unmapDuration = endTime - startTime;
3160 		}
3161 
3162 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
3163 		result.duration.fitResponseDuration = result.duration.totalDuration;
3164 	}
3165 }
3166 
3167 template <typename SampleType>
3168 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
3169 {
3170 public:
3171 						ModifyAfterBasicCase	(Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
3172 						~ModifyAfterBasicCase	(void);
3173 
3174 	void				init					(void);
3175 	void				deinit					(void);
3176 
3177 protected:
3178 	void				drawBufferRange			(int begin, int end);
3179 
3180 private:
3181 	enum
3182 	{
3183 		NUM_SAMPLES = 20,
3184 	};
3185 
3186 
3187 	bool				runSample				(int iteration, UploadSampleResult<SampleType>& sample);
3188 	bool				prepareAndRunTest		(int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
3189 	void				logAndSetTestResult		(const std::vector<UploadSampleResult<SampleType> >& results);
3190 
3191 	virtual void		testWithBufferSize		(UploadSampleResult<SampleType>& result, int bufferSize) = 0;
3192 
3193 	int					m_unmappingErrors;
3194 
3195 protected:
3196 	const bool			m_bufferUnspecifiedAfterTest;
3197 	const deUint32		m_bufferUsage;
3198 	std::vector<deUint8> m_zeroData;
3199 
3200 	using BasicBufferCase<SampleType>::m_testCtx;
3201 	using BasicBufferCase<SampleType>::m_context;
3202 
3203 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
3204 	using BasicBufferCase<SampleType>::m_dummyProgram;
3205 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
3206 	using BasicBufferCase<SampleType>::m_bufferID;
3207 	using BasicBufferCase<SampleType>::m_numSamples;
3208 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
3209 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
3210 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
3211 };
3212 
3213 template <typename SampleType>
ModifyAfterBasicCase(Context & context,const char * name,const char * description,int bufferSizeMin,int bufferSizeMax,deUint32 usage,bool bufferUnspecifiedAfterTest)3214 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
3215 	: BasicBufferCase<SampleType>	(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
3216 	, m_unmappingErrors				(0)
3217 	, m_bufferUnspecifiedAfterTest	(bufferUnspecifiedAfterTest)
3218 	, m_bufferUsage					(usage)
3219 	, m_zeroData					()
3220 {
3221 }
3222 
3223 template <typename SampleType>
~ModifyAfterBasicCase(void)3224 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
3225 {
3226 	BasicBufferCase<SampleType>::deinit();
3227 }
3228 
3229 template <typename SampleType>
init(void)3230 void ModifyAfterBasicCase<SampleType>::init (void)
3231 {
3232 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3233 
3234 	// init parent
3235 
3236 	BasicBufferCase<SampleType>::init();
3237 
3238 	// upload source
3239 	m_zeroData.resize(m_bufferSizeMax, 0x00);
3240 
3241 	// log basic info
3242 
3243 	m_testCtx.getLog()
3244 		<< tcu::TestLog::Message
3245 		<< "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
3246 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
3247 		<< tcu::TestLog::EndMessage;
3248 
3249 	// log which transfer rate is the test result and buffer info
3250 
3251 	m_testCtx.getLog()
3252 		<< tcu::TestLog::Message
3253 		<< "Test result is the median transfer rate of the test samples.\n"
3254 		<< "Buffer usage = " << glu::getUsageName(m_bufferUsage)
3255 		<< tcu::TestLog::EndMessage;
3256 
3257 	// Set state for drawing so that we don't have to change these during the iteration
3258 	{
3259 		gl.useProgram(m_dummyProgram->getProgram());
3260 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
3261 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
3262 	}
3263 }
3264 
3265 template <typename SampleType>
deinit(void)3266 void ModifyAfterBasicCase<SampleType>::deinit (void)
3267 {
3268 	m_zeroData.clear();
3269 
3270 	BasicBufferCase<SampleType>::deinit();
3271 }
3272 
3273 template <typename SampleType>
drawBufferRange(int begin,int end)3274 void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
3275 {
3276 	DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
3277 	DE_ASSERT(end % (int)sizeof(float[4]) == 0);
3278 
3279 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3280 
3281 	// use given range
3282 	gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
3283 	gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
3284 }
3285 
3286 template <typename SampleType>
runSample(int iteration,UploadSampleResult<SampleType> & sample)3287 bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
3288 {
3289 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
3290 	const int				bufferSize			= sample.bufferSize;
3291 	bool					testOk;
3292 
3293 	testOk = prepareAndRunTest(iteration, sample, bufferSize);
3294 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");
3295 
3296 	if (!testOk)
3297 	{
3298 		const int unmapFailureThreshold = 4;
3299 
3300 		// only unmapping error can cause iteration failure
3301 		if (++m_unmappingErrors >= unmapFailureThreshold)
3302 			throw tcu::TestError("Too many unmapping errors, cannot continue.");
3303 
3304 		// just try again
3305 		return false;
3306 	}
3307 
3308 	return true;
3309 }
3310 
3311 template <typename SampleType>
prepareAndRunTest(int iteration,UploadSampleResult<SampleType> & result,int bufferSize)3312 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
3313 {
3314 	DE_UNREF(iteration);
3315 
3316 	DE_ASSERT(!m_bufferID);
3317 	DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4
3318 
3319 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
3320 	bool						testRunOk		= true;
3321 	bool						unmappingFailed	= false;
3322 
3323 	// Upload initial buffer to the GPU...
3324 	gl.genBuffers(1, &m_bufferID);
3325 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
3326 	gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3327 
3328 	// ...use it...
3329 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
3330 	drawBufferRange(0, bufferSize);
3331 
3332 	// ..and make sure it is uploaded
3333 	BasicBufferCase<SampleType>::waitGLResults();
3334 
3335 	// warmup CPU before the test to make sure the power management governor
3336 	// keeps us in the "high performance" mode
3337 	{
3338 		deYield();
3339 		tcu::warmupCPU();
3340 		deYield();
3341 	}
3342 
3343 	// test
3344 	try
3345 	{
3346 		// buffer is uploaded to the GPU. Draw from it.
3347 		drawBufferRange(0, bufferSize);
3348 
3349 		// and test upload
3350 		testWithBufferSize(result, bufferSize);
3351 	}
3352 	catch (UnmapFailureError&)
3353 	{
3354 		testRunOk = false;
3355 		unmappingFailed = true;
3356 	}
3357 
3358 	// clean up: make sure buffer is not in upload queue and delete it
3359 
3360 	// sourcing unspecified data causes undefined results, possibly program termination
3361 	if (m_bufferUnspecifiedAfterTest || unmappingFailed)
3362 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
3363 
3364 	drawBufferRange(0, bufferSize);
3365 	BasicBufferCase<SampleType>::waitGLResults();
3366 
3367 	gl.deleteBuffers(1, &m_bufferID);
3368 	m_bufferID = 0;
3369 
3370 	return testRunOk;
3371 }
3372 
3373 template <typename SampleType>
logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> & results)3374 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
3375 {
3376 	const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);
3377 
3378 	// Return median transfer rate of the samples
3379 
3380 	if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
3381 	{
3382 		// sample times are 1) invalid or 2) timer resolution too low
3383 		// report speed 0 bytes / s since real value cannot be determined
3384 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
3385 	}
3386 	else
3387 	{
3388 		// report transfer rate in MB / s
3389 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
3390 	}
3391 }
3392 
3393 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3394 {
3395 public:
3396 
3397 	enum CaseFlags
3398 	{
3399 		FLAG_RESPECIFY_SIZE		= 0x1,
3400 		FLAG_UPLOAD_REPEATED	= 0x2,
3401 	};
3402 
3403 					ModifyAfterWithBufferDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3404 					~ModifyAfterWithBufferDataCase	(void);
3405 
3406 	void			init							(void);
3407 	void			deinit							(void);
3408 private:
3409 	void			testWithBufferSize				(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3410 
3411 	enum
3412 	{
3413 		NUM_REPEATS = 2
3414 	};
3415 
3416 	const bool		m_respecifySize;
3417 	const bool		m_repeatedUpload;
3418 	const float		m_sizeDifferenceFactor;
3419 };
3420 
ModifyAfterWithBufferDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags)3421 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3422 	: ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3423 	, m_respecifySize								((flags & FLAG_RESPECIFY_SIZE) != 0)
3424 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3425 	, m_sizeDifferenceFactor						(1.3f)
3426 {
3427 	DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
3428 }
3429 
~ModifyAfterWithBufferDataCase(void)3430 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
3431 {
3432 	deinit();
3433 }
3434 
init(void)3435 void ModifyAfterWithBufferDataCase::init (void)
3436 {
3437 	// Log the purpose of the test
3438 
3439 	if (m_repeatedUpload)
3440 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3441 	else
3442 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3443 
3444 	m_testCtx.getLog()
3445 		<< tcu::TestLog::Message
3446 		<< ((m_respecifySize) ?
3447 			("Buffer size is increased and contents are modified with BufferData().\n") :
3448 			("Buffer contents are modified with BufferData().\n"))
3449 		<< tcu::TestLog::EndMessage;
3450 
3451 	// init parent
3452 	ModifyAfterBasicCase<SingleOperationDuration>::init();
3453 
3454 	// make sure our zeroBuffer is large enough
3455 	if (m_respecifySize)
3456 	{
3457 		const int largerBufferSize = deAlign32((int)(m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
3458 		m_zeroData.resize(largerBufferSize, 0x00);
3459 	}
3460 }
3461 
deinit(void)3462 void ModifyAfterWithBufferDataCase::deinit (void)
3463 {
3464 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3465 }
3466 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3467 void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3468 {
3469 	// always draw the same amount to make compares between cases sensible
3470 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3471 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3472 
3473 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3474 	const int					largerBufferSize	= deAlign32((int)(bufferSize * m_sizeDifferenceFactor), 4*4);
3475 	const int					newBufferSize		= (m_respecifySize) ? (largerBufferSize) : (bufferSize);
3476 	deUint64					startTime;
3477 	deUint64					endTime;
3478 
3479 	// repeat upload-draw
3480 	if (m_repeatedUpload)
3481 	{
3482 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3483 		{
3484 			gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3485 			drawBufferRange(drawStart, drawEnd);
3486 		}
3487 	}
3488 
3489 	// test upload
3490 	startTime = deGetMicroseconds();
3491 	gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
3492 	endTime = deGetMicroseconds();
3493 
3494 	result.duration.totalDuration = endTime - startTime;
3495 	result.duration.fitResponseDuration = result.duration.totalDuration;
3496 	result.writtenSize = newBufferSize;
3497 }
3498 
3499 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
3500 {
3501 public:
3502 
3503 	enum CaseFlags
3504 	{
3505 		FLAG_PARTIAL			= 0x1,
3506 		FLAG_UPLOAD_REPEATED	= 0x2,
3507 	};
3508 
3509 					ModifyAfterWithBufferSubDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
3510 					~ModifyAfterWithBufferSubDataCase	(void);
3511 
3512 	void			init								(void);
3513 	void			deinit								(void);
3514 private:
3515 	void			testWithBufferSize					(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
3516 
3517 	enum
3518 	{
3519 		NUM_REPEATS = 2
3520 	};
3521 
3522 	const bool		m_partialUpload;
3523 	const bool		m_repeatedUpload;
3524 };
3525 
ModifyAfterWithBufferSubDataCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags)3526 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
3527 	: ModifyAfterBasicCase<SingleOperationDuration>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
3528 	, m_partialUpload								((flags & FLAG_PARTIAL) != 0)
3529 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
3530 {
3531 }
3532 
~ModifyAfterWithBufferSubDataCase(void)3533 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
3534 {
3535 	deinit();
3536 }
3537 
init(void)3538 void ModifyAfterWithBufferSubDataCase::init (void)
3539 {
3540 	// Log the purpose of the test
3541 
3542 	if (m_repeatedUpload)
3543 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
3544 	else
3545 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;
3546 
3547 	m_testCtx.getLog()
3548 		<< tcu::TestLog::Message
3549 		<< ((m_partialUpload) ?
3550 			("Half of the buffer contents are modified.\n") :
3551 			("Buffer contents are fully respecified.\n"))
3552 		<< tcu::TestLog::EndMessage;
3553 
3554 	ModifyAfterBasicCase<SingleOperationDuration>::init();
3555 }
3556 
deinit(void)3557 void ModifyAfterWithBufferSubDataCase::deinit (void)
3558 {
3559 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
3560 }
3561 
testWithBufferSize(UploadSampleResult<SingleOperationDuration> & result,int bufferSize)3562 void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
3563 {
3564 	// always draw the same amount to make compares between cases sensible
3565 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
3566 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);
3567 
3568 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3569 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3570 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3571 	deUint64					startTime;
3572 	deUint64					endTime;
3573 
3574 	// make upload-draw stream
3575 	if (m_repeatedUpload)
3576 	{
3577 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
3578 		{
3579 			gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3580 			drawBufferRange(drawStart, drawEnd);
3581 		}
3582 	}
3583 
3584 	// test upload
3585 	startTime = deGetMicroseconds();
3586 	gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
3587 	endTime = deGetMicroseconds();
3588 
3589 	result.duration.totalDuration = endTime - startTime;
3590 	result.duration.fitResponseDuration = result.duration.totalDuration;
3591 	result.writtenSize = subdataSize;
3592 }
3593 
3594 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
3595 {
3596 public:
3597 
3598 	enum CaseFlags
3599 	{
3600 		FLAG_PARTIAL = 0x1,
3601 	};
3602 
3603 					ModifyAfterWithMapBufferRangeCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3604 					~ModifyAfterWithMapBufferRangeCase	(void);
3605 
3606 	void			init								(void);
3607 	void			deinit								(void);
3608 private:
3609 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3610 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);
3611 
3612 	const bool		m_partialUpload;
3613 	const deUint32	m_mapFlags;
3614 };
3615 
ModifyAfterWithMapBufferRangeCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags,deUint32 glMapFlags)3616 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3617 	: ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3618 	, m_partialUpload										((flags & FLAG_PARTIAL) != 0)
3619 	, m_mapFlags											(glMapFlags)
3620 {
3621 }
3622 
~ModifyAfterWithMapBufferRangeCase(void)3623 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
3624 {
3625 	deinit();
3626 }
3627 
init(void)3628 void ModifyAfterWithMapBufferRangeCase::init (void)
3629 {
3630 	// Log the purpose of the test
3631 
3632 	m_testCtx.getLog()
3633 		<< tcu::TestLog::Message
3634 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3635 		<< ((m_partialUpload) ?
3636 			("Half of the buffer is mapped.\n") :
3637 			("Whole buffer is mapped.\n"))
3638 		<< "Map bits:\n"
3639 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3640 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3641 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3642 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3643 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3644 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3645 		<< tcu::TestLog::EndMessage;
3646 
3647 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
3648 }
3649 
deinit(void)3650 void ModifyAfterWithMapBufferRangeCase::deinit (void)
3651 {
3652 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
3653 }
3654 
isBufferUnspecifiedAfterUpload(int flags,deUint32 mapFlags)3655 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3656 {
3657 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3658 		return true;
3659 
3660 	return false;
3661 }
3662 
testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> & result,int bufferSize)3663 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
3664 {
3665 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3666 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3667 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3668 	void*						mapPtr;
3669 
3670 	// map
3671 	{
3672 		deUint64 startTime;
3673 		deUint64 endTime;
3674 
3675 		startTime = deGetMicroseconds();
3676 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3677 		endTime = deGetMicroseconds();
3678 
3679 		if (!mapPtr)
3680 			throw tcu::TestError("mapBufferRange returned null");
3681 
3682 		result.duration.mapDuration = endTime - startTime;
3683 	}
3684 
3685 	// write
3686 	{
3687 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3688 	}
3689 
3690 	// unmap
3691 	{
3692 		deUint64		startTime;
3693 		deUint64		endTime;
3694 		glw::GLboolean	unmapSucceeded;
3695 
3696 		startTime = deGetMicroseconds();
3697 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3698 		endTime = deGetMicroseconds();
3699 
3700 		if (unmapSucceeded != GL_TRUE)
3701 			throw UnmapFailureError();
3702 
3703 		result.duration.unmapDuration = endTime - startTime;
3704 	}
3705 
3706 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
3707 	result.duration.fitResponseDuration = result.duration.totalDuration;
3708 	result.writtenSize = subdataSize;
3709 }
3710 
3711 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
3712 {
3713 public:
3714 
3715 	enum CaseFlags
3716 	{
3717 		FLAG_PARTIAL = 0x1,
3718 	};
3719 
3720 					ModifyAfterWithMapBufferFlushCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
3721 					~ModifyAfterWithMapBufferFlushCase	(void);
3722 
3723 	void			init								(void);
3724 	void			deinit								(void);
3725 private:
3726 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
3727 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);
3728 
3729 	const bool		m_partialUpload;
3730 	const deUint32	m_mapFlags;
3731 };
3732 
ModifyAfterWithMapBufferFlushCase(Context & context,const char * name,const char * desc,int bufferSizeMin,int bufferSizeMax,deUint32 usage,int flags,deUint32 glMapFlags)3733 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
3734 	: ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
3735 	, m_partialUpload											((flags & FLAG_PARTIAL) != 0)
3736 	, m_mapFlags												(glMapFlags)
3737 {
3738 }
3739 
~ModifyAfterWithMapBufferFlushCase(void)3740 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
3741 {
3742 	deinit();
3743 }
3744 
init(void)3745 void ModifyAfterWithMapBufferFlushCase::init (void)
3746 {
3747 	// Log the purpose of the test
3748 
3749 	m_testCtx.getLog()
3750 		<< tcu::TestLog::Message
3751 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
3752 		<< ((m_partialUpload) ?
3753 			("Half of the buffer is mapped.\n") :
3754 			("Whole buffer is mapped.\n"))
3755 		<< "Map bits:\n"
3756 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
3757 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
3758 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
3759 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
3760 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
3761 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
3762 		<< tcu::TestLog::EndMessage;
3763 
3764 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
3765 }
3766 
deinit(void)3767 void ModifyAfterWithMapBufferFlushCase::deinit (void)
3768 {
3769 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
3770 }
3771 
isBufferUnspecifiedAfterUpload(int flags,deUint32 mapFlags)3772 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
3773 {
3774 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
3775 		return true;
3776 
3777 	return false;
3778 }
3779 
testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> & result,int bufferSize)3780 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
3781 {
3782 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
3783 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
3784 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
3785 	void*						mapPtr;
3786 
3787 	// map
3788 	{
3789 		deUint64 startTime;
3790 		deUint64 endTime;
3791 
3792 		startTime = deGetMicroseconds();
3793 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
3794 		endTime = deGetMicroseconds();
3795 
3796 		if (!mapPtr)
3797 			throw tcu::TestError("mapBufferRange returned null");
3798 
3799 		result.duration.mapDuration = endTime - startTime;
3800 	}
3801 
3802 	// write
3803 	{
3804 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
3805 	}
3806 
3807 	// flush
3808 	{
3809 		deUint64 startTime;
3810 		deUint64 endTime;
3811 
3812 		startTime = deGetMicroseconds();
3813 		gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
3814 		endTime = deGetMicroseconds();
3815 
3816 		result.duration.flushDuration = endTime - startTime;
3817 	}
3818 
3819 	// unmap
3820 	{
3821 		deUint64		startTime;
3822 		deUint64		endTime;
3823 		glw::GLboolean	unmapSucceeded;
3824 
3825 		startTime = deGetMicroseconds();
3826 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
3827 		endTime = deGetMicroseconds();
3828 
3829 		if (unmapSucceeded != GL_TRUE)
3830 			throw UnmapFailureError();
3831 
3832 		result.duration.unmapDuration = endTime - startTime;
3833 	}
3834 
3835 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
3836 	result.duration.fitResponseDuration = result.duration.totalDuration;
3837 	result.writtenSize = subdataSize;
3838 }
3839 
3840 enum DrawMethod
3841 {
3842 	DRAWMETHOD_DRAW_ARRAYS = 0,
3843 	DRAWMETHOD_DRAW_ELEMENTS,
3844 
3845 	DRAWMETHOD_LAST
3846 };
3847 
3848 enum TargetBuffer
3849 {
3850 	TARGETBUFFER_VERTEX = 0,
3851 	TARGETBUFFER_INDEX,
3852 
3853 	TARGETBUFFER_LAST
3854 };
3855 
3856 enum BufferState
3857 {
3858 	BUFFERSTATE_NEW = 0,
3859 	BUFFERSTATE_EXISTING,
3860 
3861 	BUFFERSTATE_LAST
3862 };
3863 
3864 enum UploadMethod
3865 {
3866 	UPLOADMETHOD_BUFFER_DATA = 0,
3867 	UPLOADMETHOD_BUFFER_SUB_DATA,
3868 	UPLOADMETHOD_MAP_BUFFER_RANGE,
3869 
3870 	UPLOADMETHOD_LAST
3871 };
3872 
3873 enum UnrelatedBufferType
3874 {
3875 	UNRELATEDBUFFERTYPE_NONE = 0,
3876 	UNRELATEDBUFFERTYPE_VERTEX,
3877 
3878 	UNRELATEDBUFFERTYPE_LAST
3879 };
3880 
3881 enum UploadRange
3882 {
3883 	UPLOADRANGE_FULL = 0,
3884 	UPLOADRANGE_PARTIAL,
3885 
3886 	UPLOADRANGE_LAST
3887 };
3888 
3889 struct LayeredGridSpec
3890 {
3891 	int gridWidth;
3892 	int gridHeight;
3893 	int gridLayers;
3894 };
3895 
getLayeredGridNumVertices(const LayeredGridSpec & scene)3896 static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
3897 {
3898 	return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
3899 }
3900 
generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> & vertexData,const LayeredGridSpec & scene)3901 static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
3902 {
3903 	// interleave color & vertex data
3904 	const tcu::Vec4 green	(0.0f, 1.0f, 0.0f, 0.7f);
3905 	const tcu::Vec4 yellow	(1.0f, 1.0f, 0.0f, 0.8f);
3906 
3907 	vertexData.resize(getLayeredGridNumVertices(scene) * 2);
3908 
3909 	for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
3910 	for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
3911 	for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
3912 	{
3913 		const tcu::Vec4	color		= (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
3914 		const float		cellLeft	= (float(cellX  ) / scene.gridWidth  - 0.5f) * 2.0f;
3915 		const float		cellRight	= (float(cellX+1) / scene.gridWidth  - 0.5f) * 2.0f;
3916 		const float		cellTop		= (float(cellY+1) / scene.gridHeight - 0.5f) * 2.0f;
3917 		const float		cellBottom	= (float(cellY  ) / scene.gridHeight - 0.5f) * 2.0f;
3918 
3919 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
3920 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3921 
3922 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
3923 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);
3924 
3925 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
3926 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3927 
3928 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
3929 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);
3930 
3931 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
3932 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);
3933 
3934 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
3935 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
3936 	}
3937 }
3938 
generateLayeredGridIndexData(std::vector<deUint32> & indexData,const LayeredGridSpec & scene)3939 static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
3940 {
3941 	indexData.resize(getLayeredGridNumVertices(scene) * 2);
3942 
3943 	for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
3944 		indexData[ndx] = ndx;
3945 }
3946 
3947 class RenderPerformanceTestBase : public TestCase
3948 {
3949 public:
3950 							RenderPerformanceTestBase	(Context& context, const char* name, const char* description);
3951 							~RenderPerformanceTestBase	(void);
3952 
3953 protected:
3954 	void					init						(void);
3955 	void					deinit						(void);
3956 
3957 	void					waitGLResults				(void) const;
3958 	void					setupVertexAttribs			(void) const;
3959 
3960 	enum
3961 	{
3962 		RENDER_AREA_SIZE = 128
3963 	};
3964 
3965 private:
3966 	glu::ShaderProgram*		m_renderProgram;
3967 	int						m_colorLoc;
3968 	int						m_positionLoc;
3969 };
3970 
RenderPerformanceTestBase(Context & context,const char * name,const char * description)3971 RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
3972 	: TestCase			(context, tcu::NODETYPE_PERFORMANCE, name, description)
3973 	, m_renderProgram	(DE_NULL)
3974 	, m_colorLoc		(0)
3975 	, m_positionLoc		(0)
3976 {
3977 }
3978 
~RenderPerformanceTestBase(void)3979 RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
3980 {
3981 	deinit();
3982 }
3983 
init(void)3984 void RenderPerformanceTestBase::init (void)
3985 {
3986 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3987 
3988 	m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
3989 	if (!m_renderProgram->isOk())
3990 	{
3991 		m_testCtx.getLog() << *m_renderProgram;
3992 		throw tcu::TestError("could not build program");
3993 	}
3994 
3995 	m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
3996 	m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");
3997 
3998 	if (m_colorLoc == -1)
3999 		throw tcu::TestError("Location of attribute a_color was -1");
4000 	if (m_positionLoc == -1)
4001 		throw tcu::TestError("Location of attribute a_position was -1");
4002 }
4003 
deinit(void)4004 void RenderPerformanceTestBase::deinit (void)
4005 {
4006 	delete m_renderProgram;
4007 	m_renderProgram = DE_NULL;
4008 }
4009 
setupVertexAttribs(void) const4010 void RenderPerformanceTestBase::setupVertexAttribs (void) const
4011 {
4012 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4013 
4014 	// buffers are bound
4015 
4016 	gl.enableVertexAttribArray(m_colorLoc);
4017 	gl.enableVertexAttribArray(m_positionLoc);
4018 
4019 	gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 0);
4020 	gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), (const tcu::Vec4*)DE_NULL + 1);
4021 
4022 	gl.useProgram(m_renderProgram->getProgram());
4023 
4024 	GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
4025 }
4026 
waitGLResults(void) const4027 void RenderPerformanceTestBase::waitGLResults (void) const
4028 {
4029 	tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4030 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
4031 }
4032 
4033 template <typename SampleType>
4034 class RenderCase : public RenderPerformanceTestBase
4035 {
4036 public:
4037 									RenderCase						(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4038 									~RenderCase						(void);
4039 
4040 protected:
4041 	void							init							(void);
4042 	void							deinit							(void);
4043 
4044 private:
4045 	IterateResult					iterate							(void);
4046 
4047 protected:
4048 	struct SampleResult
4049 	{
4050 		LayeredGridSpec					scene;
4051 		RenderSampleResult<SampleType>	result;
4052 	};
4053 
4054 	int								getMinWorkloadSize				(void) const;
4055 	int								getMaxWorkloadSize				(void) const;
4056 	int								getMinWorkloadDataSize			(void) const;
4057 	int								getMaxWorkloadDataSize			(void) const;
4058 	int								getVertexDataSize				(void) const;
4059 	int								getNumSamples					(void) const;
4060 	void							uploadScene						(const LayeredGridSpec& scene);
4061 
4062 	virtual void					runSample						(SampleResult& sample) = 0;
4063 	virtual void					logAndSetTestResult				(const std::vector<SampleResult>& results);
4064 
4065 	void							mapResultsToRenderRateFormat	(std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;
4066 
4067 	const DrawMethod				m_drawMethod;
4068 
4069 private:
4070 	glw::GLuint						m_attributeBufferID;
4071 	glw::GLuint						m_indexBufferID;
4072 	int								m_iterationNdx;
4073 	std::vector<int>				m_iterationOrder;
4074 	std::vector<SampleResult>		m_results;
4075 	int								m_numUnmapFailures;
4076 };
4077 
4078 template <typename SampleType>
RenderCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4079 RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4080 	: RenderPerformanceTestBase	(context, name, description)
4081 	, m_drawMethod				(drawMethod)
4082 	, m_attributeBufferID		(0)
4083 	, m_indexBufferID			(0)
4084 	, m_iterationNdx			(0)
4085 	, m_numUnmapFailures		(0)
4086 {
4087 	DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
4088 }
4089 
4090 template <typename SampleType>
~RenderCase(void)4091 RenderCase<SampleType>::~RenderCase (void)
4092 {
4093 	deinit();
4094 }
4095 
4096 template <typename SampleType>
init(void)4097 void RenderCase<SampleType>::init (void)
4098 {
4099 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4100 
4101 	RenderPerformanceTestBase::init();
4102 
4103 	// requirements
4104 
4105 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
4106 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
4107 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
4108 
4109 	// gl state
4110 
4111 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4112 
4113 	// enable bleding to prevent grid layers from being discarded
4114 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
4115 	gl.blendEquation(GL_FUNC_ADD);
4116 	gl.enable(GL_BLEND);
4117 
4118 	// generate iterations
4119 
4120 	{
4121 		const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };
4122 
4123 		for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
4124 		{
4125 			m_results.push_back(SampleResult());
4126 
4127 			m_results.back().scene.gridHeight = gridSizes[gridNdx];
4128 			m_results.back().scene.gridWidth = gridSizes[gridNdx];
4129 			m_results.back().scene.gridLayers = 5;
4130 
4131 			m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);
4132 
4133 			// test cases set these, initialize to dummy values
4134 			m_results.back().result.renderDataSize = -1;
4135 			m_results.back().result.uploadedDataSize = -1;
4136 			m_results.back().result.unrelatedDataSize = -1;
4137 		}
4138 	}
4139 
4140 	// randomize iteration order
4141 	{
4142 		m_iterationOrder.resize(m_results.size());
4143 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
4144 	}
4145 }
4146 
4147 template <typename SampleType>
deinit(void)4148 void RenderCase<SampleType>::deinit (void)
4149 {
4150 	RenderPerformanceTestBase::deinit();
4151 
4152 	if (m_attributeBufferID)
4153 	{
4154 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
4155 		m_attributeBufferID = 0;
4156 	}
4157 
4158 	if (m_indexBufferID)
4159 	{
4160 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
4161 		m_indexBufferID = 0;
4162 	}
4163 }
4164 
4165 template <typename SampleType>
iterate(void)4166 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
4167 {
4168 	const int		unmapFailureThreshold	= 3;
4169 	const int		currentIteration		= m_iterationNdx;
4170 	const int		currentConfigNdx		= m_iterationOrder[currentIteration];
4171 	SampleResult&	currentSample			= m_results[currentConfigNdx];
4172 
4173 	try
4174 	{
4175 		runSample(currentSample);
4176 		++m_iterationNdx;
4177 	}
4178 	catch (const UnmapFailureError& ex)
4179 	{
4180 		DE_UNREF(ex);
4181 		++m_numUnmapFailures;
4182 	}
4183 
4184 	if (m_numUnmapFailures > unmapFailureThreshold)
4185 		throw tcu::TestError("Got too many unmap errors");
4186 
4187 	if (m_iterationNdx < (int)m_iterationOrder.size())
4188 		return CONTINUE;
4189 
4190 	logAndSetTestResult(m_results);
4191 	return STOP;
4192 }
4193 
4194 template <typename SampleType>
getMinWorkloadSize(void) const4195 int RenderCase<SampleType>::getMinWorkloadSize (void) const
4196 {
4197 	int result = getLayeredGridNumVertices(m_results[0].scene);
4198 
4199 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4200 	{
4201 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4202 		result = de::min(result, workloadSize);
4203 	}
4204 
4205 	return result;
4206 }
4207 
4208 template <typename SampleType>
getMaxWorkloadSize(void) const4209 int RenderCase<SampleType>::getMaxWorkloadSize (void) const
4210 {
4211 	int result = getLayeredGridNumVertices(m_results[0].scene);
4212 
4213 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
4214 	{
4215 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
4216 		result = de::max(result, workloadSize);
4217 	}
4218 
4219 	return result;
4220 }
4221 
4222 template <typename SampleType>
getMinWorkloadDataSize(void) const4223 int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
4224 {
4225 	return getMinWorkloadSize() * getVertexDataSize();
4226 }
4227 
4228 template <typename SampleType>
getMaxWorkloadDataSize(void) const4229 int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
4230 {
4231 	return getMaxWorkloadSize() * getVertexDataSize();
4232 }
4233 
4234 template <typename SampleType>
getVertexDataSize(void) const4235 int RenderCase<SampleType>::getVertexDataSize (void) const
4236 {
4237 	const int numVectors	= 2;
4238 	const int vec4Size		= 4 * sizeof(float);
4239 
4240 	return numVectors * vec4Size;
4241 }
4242 
4243 template <typename SampleType>
getNumSamples(void) const4244 int RenderCase<SampleType>::getNumSamples (void) const
4245 {
4246 	return (int)m_results.size();
4247 }
4248 
4249 template <typename SampleType>
uploadScene(const LayeredGridSpec & scene)4250 void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
4251 {
4252 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
4253 
4254 	// vertex buffer
4255 	{
4256 		std::vector<tcu::Vec4> vertexData;
4257 
4258 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
4259 
4260 		if (m_attributeBufferID == 0)
4261 			gl.genBuffers(1, &m_attributeBufferID);
4262 		gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
4263 		gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4264 	}
4265 
4266 	// index buffer
4267 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4268 	{
4269 		std::vector<deUint32> indexData;
4270 
4271 		generateLayeredGridIndexData(indexData, scene);
4272 
4273 		if (m_indexBufferID == 0)
4274 			gl.genBuffers(1, &m_indexBufferID);
4275 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
4276 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4277 	}
4278 
4279 	GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
4280 }
4281 
4282 template <typename SampleType>
logAndSetTestResult(const std::vector<SampleResult> & results)4283 void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
4284 {
4285 	std::vector<RenderSampleResult<SampleType> > mappedResults;
4286 
4287 	mapResultsToRenderRateFormat(mappedResults, results);
4288 
4289 	{
4290 		const RenderSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), mappedResults);
4291 		const float						rate		= analysis.renderRateAtRange;
4292 
4293 		if (rate == std::numeric_limits<float>::infinity())
4294 		{
4295 			// sample times are 1) invalid or 2) timer resolution too low
4296 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
4297 		}
4298 		else
4299 		{
4300 			// report transfer rate in millions of MiB/s
4301 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
4302 		}
4303 	}
4304 }
4305 
4306 template <typename SampleType>
mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> & dst,const std::vector<SampleResult> & src) const4307 void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
4308 {
4309 	dst.resize(src.size());
4310 
4311 	for (int ndx = 0; ndx < (int)src.size(); ++ndx)
4312 		dst[ndx] = src[ndx].result;
4313 }
4314 
4315 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
4316 {
4317 public:
4318 			ReferenceRenderTimeCase		(Context& context, const char* name, const char* description, DrawMethod drawMethod);
4319 
4320 private:
4321 	void	init						(void);
4322 	void	runSample					(SampleResult& sample);
4323 };
4324 
ReferenceRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod)4325 ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
4326 	: RenderCase<RenderReadDuration>	(context, name, description, drawMethod)
4327 {
4328 }
4329 
init(void)4330 void ReferenceRenderTimeCase::init (void)
4331 {
4332 	const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4333 
4334 	// init parent
4335 	RenderCase<RenderReadDuration>::init();
4336 
4337 	// log
4338 	m_testCtx.getLog()
4339 		<< tcu::TestLog::Message
4340 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4341 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4342 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4343 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4344 		<< "Workload sizes are in the range ["
4345 			<< getMinWorkloadSize() << ",  "
4346 			<< getMaxWorkloadSize() << "] vertices (["
4347 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4348 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4349 		<< "Test result is the approximated total processing rate in MiB / s.\n"
4350 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4351 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4352 		<< tcu::TestLog::EndMessage;
4353 }
4354 
runSample(SampleResult & sample)4355 void ReferenceRenderTimeCase::runSample (SampleResult& sample)
4356 {
4357 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4358 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4359 	const int				numVertices		= getLayeredGridNumVertices(sample.scene);
4360 	const glu::Buffer		arrayBuffer		(m_context.getRenderContext());
4361 	const glu::Buffer		indexBuffer		(m_context.getRenderContext());
4362 	const glu::Buffer		unrelatedBuffer	(m_context.getRenderContext());
4363 	std::vector<tcu::Vec4>	vertexData;
4364 	std::vector<deUint32>	indexData;
4365 	deUint64				startTime;
4366 	deUint64				endTime;
4367 
4368 	// generate and upload buffers
4369 
4370 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4371 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4372 	gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4373 
4374 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4375 	{
4376 		generateLayeredGridIndexData(indexData, sample.scene);
4377 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4378 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4379 	}
4380 
4381 	setupVertexAttribs();
4382 
4383 	// make sure data is uploaded
4384 
4385 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4386 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4387 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4388 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4389 	else
4390 		DE_ASSERT(false);
4391 	waitGLResults();
4392 
4393 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4394 	gl.clear(GL_COLOR_BUFFER_BIT);
4395 	waitGLResults();
4396 
4397 	tcu::warmupCPU();
4398 
4399 	// Measure both draw and associated readpixels
4400 	{
4401 		startTime = deGetMicroseconds();
4402 
4403 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4404 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4405 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4406 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4407 		else
4408 			DE_ASSERT(false);
4409 
4410 		endTime = deGetMicroseconds();
4411 
4412 		sample.result.duration.renderDuration = endTime - startTime;
4413 	}
4414 
4415 	{
4416 		startTime = deGetMicroseconds();
4417 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4418 		endTime = deGetMicroseconds();
4419 
4420 		sample.result.duration.readDuration = endTime - startTime;
4421 	}
4422 
4423 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4424 	sample.result.uploadedDataSize = 0;
4425 	sample.result.unrelatedDataSize = 0;
4426 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4427 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4428 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4429 }
4430 
4431 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
4432 {
4433 public:
4434 									UnrelatedUploadRenderTimeCase	(Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);
4435 
4436 private:
4437 	void							init							(void);
4438 	void							runSample						(SampleResult& sample);
4439 
4440 	const UploadMethod				m_unrelatedUploadMethod;
4441 };
4442 
UnrelatedUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,UploadMethod unrelatedUploadMethod)4443 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
4444 	: RenderCase<UnrelatedUploadRenderReadDuration>	(context, name, description, drawMethod)
4445 	, m_unrelatedUploadMethod						(unrelatedUploadMethod)
4446 {
4447 	DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
4448 }
4449 
init(void)4450 void UnrelatedUploadRenderTimeCase::init (void)
4451 {
4452 	const char* const	targetFunctionName	= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4453 	tcu::MessageBuilder	message				(&m_testCtx.getLog());
4454 
4455 	// init parent
4456 	RenderCase<UnrelatedUploadRenderReadDuration>::init();
4457 
4458 	// log
4459 
4460 	message
4461 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4462 		<< "Uploading an unrelated buffer just before issuing the rendering command with "
4463 			<< ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")		:
4464 				(m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")		:
4465 				(m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange")	:
4466 				((const char*)DE_NULL))
4467 			<< ".\n"
4468 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
4469 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4470 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4471 		<< "Workload sizes are in the range ["
4472 			<< getMinWorkloadSize() << ",  "
4473 			<< getMaxWorkloadSize() << "] vertices (["
4474 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
4475 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
4476 		<< "Unrelated upload sizes are in the range ["
4477 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
4478 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
4479 		<< "Test result is the approximated total processing rate in MiB / s.\n"
4480 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4481 		<< "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
4482 		<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
4483 		<< tcu::TestLog::EndMessage;
4484 }
4485 
runSample(SampleResult & sample)4486 void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
4487 {
4488 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4489 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4490 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4491 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4492 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4493 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4494 	int						unrelatedUploadSize	= -1;
4495 	int						renderUploadSize;
4496 	std::vector<tcu::Vec4>	vertexData;
4497 	std::vector<deUint32>	indexData;
4498 	deUint64				startTime;
4499 	deUint64				endTime;
4500 
4501 	// generate and upload buffers
4502 
4503 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4504 	renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4505 
4506 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4507 	gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);
4508 
4509 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4510 	{
4511 		generateLayeredGridIndexData(indexData, sample.scene);
4512 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4513 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4514 	}
4515 
4516 	setupVertexAttribs();
4517 
4518 	// make sure data is uploaded
4519 
4520 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4521 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4522 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4523 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4524 	else
4525 		DE_ASSERT(false);
4526 	waitGLResults();
4527 
4528 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4529 	gl.clear(GL_COLOR_BUFFER_BIT);
4530 	waitGLResults();
4531 
4532 	tcu::warmupCPU();
4533 
4534 	// Unrelated upload
4535 	if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
4536 	{
4537 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4538 
4539 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4540 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
4541 	}
4542 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4543 	{
4544 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4545 
4546 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4547 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4548 		gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
4549 	}
4550 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4551 	{
4552 		void*			mapPtr;
4553 		glw::GLboolean	unmapSuccessful;
4554 
4555 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
4556 
4557 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
4558 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
4559 
4560 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4561 		if (!mapPtr)
4562 			throw tcu::Exception("MapBufferRange returned NULL");
4563 
4564 		deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);
4565 
4566 		// if unmapping fails, just try again later
4567 		unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
4568 		if (!unmapSuccessful)
4569 			throw UnmapFailureError();
4570 	}
4571 	else
4572 		DE_ASSERT(false);
4573 
4574 	DE_ASSERT(unrelatedUploadSize != -1);
4575 
4576 	// Measure both draw and associated readpixels
4577 	{
4578 		startTime = deGetMicroseconds();
4579 
4580 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
4581 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4582 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4583 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4584 		else
4585 			DE_ASSERT(false);
4586 
4587 		endTime = deGetMicroseconds();
4588 
4589 		sample.result.duration.renderDuration = endTime - startTime;
4590 	}
4591 
4592 	{
4593 		startTime = deGetMicroseconds();
4594 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4595 		endTime = deGetMicroseconds();
4596 
4597 		sample.result.duration.readDuration = endTime - startTime;
4598 	}
4599 
4600 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
4601 	sample.result.uploadedDataSize = renderUploadSize;
4602 	sample.result.unrelatedDataSize = unrelatedUploadSize;
4603 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4604 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
4605 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
4606 }
4607 
4608 class ReferenceReadPixelsTimeCase : public TestCase
4609 {
4610 public:
4611 					ReferenceReadPixelsTimeCase		(Context& context, const char* name, const char* description);
4612 
4613 private:
4614 	void			init							(void);
4615 	IterateResult	iterate							(void);
4616 	void			logAndSetTestResult				(void);
4617 
4618 	enum
4619 	{
4620 		RENDER_AREA_SIZE = 128
4621 	};
4622 
4623 	const int			m_numSamples;
4624 	int					m_sampleNdx;
4625 	std::vector<int>	m_samples;
4626 };
4627 
ReferenceReadPixelsTimeCase(Context & context,const char * name,const char * description)4628 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
4629 	: TestCase		(context, tcu::NODETYPE_PERFORMANCE, name, description)
4630 	, m_numSamples	(20)
4631 	, m_sampleNdx	(0)
4632 	, m_samples		(m_numSamples)
4633 {
4634 }
4635 
init(void)4636 void ReferenceReadPixelsTimeCase::init (void)
4637 {
4638 	m_testCtx.getLog()
4639 		<< tcu::TestLog::Message
4640 		<< "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
4641 		<< "Test result is the median of the samples in microseconds.\n"
4642 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
4643 		<< tcu::TestLog::EndMessage;
4644 }
4645 
iterate(void)4646 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
4647 {
4648 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
4649 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4650 	deUint64				startTime;
4651 	deUint64				endTime;
4652 
4653 	deYield();
4654 	tcu::warmupCPU();
4655 	deYield();
4656 
4657 	// "Render" something and wait for it
4658 	gl.clearColor(0.0f, 1.0f, m_sampleNdx / float(m_numSamples), 1.0f);
4659 	gl.clear(GL_COLOR_BUFFER_BIT);
4660 
4661 	// wait for results
4662 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4663 
4664 	// measure time used in readPixels
4665 	startTime = deGetMicroseconds();
4666 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
4667 	endTime = deGetMicroseconds();
4668 
4669 	m_samples[m_sampleNdx] = (int)(endTime - startTime);
4670 
4671 	if (++m_sampleNdx < m_numSamples)
4672 		return CONTINUE;
4673 
4674 	logAndSetTestResult();
4675 	return STOP;
4676 }
4677 
logAndSetTestResult(void)4678 void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
4679 {
4680 	// Log sample list
4681 	{
4682 		m_testCtx.getLog()
4683 			<< tcu::TestLog::SampleList("Samples", "Samples")
4684 			<< tcu::TestLog::SampleInfo
4685 			<< tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
4686 			<< tcu::TestLog::EndSampleInfo;
4687 
4688 		for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
4689 			m_testCtx.getLog()
4690 				<< tcu::TestLog::Sample
4691 				<< m_samples[sampleNdx]
4692 				<< tcu::TestLog::EndSample;
4693 
4694 		m_testCtx.getLog() << tcu::TestLog::EndSampleList;
4695 	}
4696 
4697 	// Log median
4698 	{
4699 		float median;
4700 		float limit60Low;
4701 		float limit60Up;
4702 
4703 		std::sort(m_samples.begin(), m_samples.end());
4704 		median		= linearSample(m_samples, 0.5f);
4705 		limit60Low	= linearSample(m_samples, 0.2f);
4706 		limit60Up	= linearSample(m_samples, 0.8f);
4707 
4708 		m_testCtx.getLog()
4709 			<< tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
4710 			<< tcu::TestLog::Message
4711 			<< "60 % of samples within range:\n"
4712 			<< tcu::TestLog::EndMessage
4713 			<< tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
4714 			<< tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);
4715 
4716 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
4717 	}
4718 }
4719 
4720 template <typename SampleType>
4721 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
4722 {
4723 public:
4724 	typedef typename RenderCase<SampleType>::SampleResult SampleResult;
4725 
4726 							GenericUploadRenderTimeCase	(Context&				context,
4727 														 const char*			name,
4728 														 const char*			description,
4729 														 DrawMethod				method,
4730 														 TargetBuffer			targetBuffer,
4731 														 UploadMethod			uploadMethod,
4732 														 BufferState			bufferState,
4733 														 UploadRange			uploadRange,
4734 														 UnrelatedBufferType	unrelatedBufferType);
4735 
4736 private:
4737 	void						init					(void);
4738 	void						runSample				(SampleResult& sample);
4739 
4740 	using RenderCase<SampleType>::RENDER_AREA_SIZE;
4741 
4742 	const TargetBuffer			m_targetBuffer;
4743 	const BufferState			m_bufferState;
4744 	const UploadMethod			m_uploadMethod;
4745 	const UnrelatedBufferType	m_unrelatedBufferType;
4746 	const UploadRange			m_uploadRange;
4747 
4748 	using RenderCase<SampleType>::m_context;
4749 	using RenderCase<SampleType>::m_testCtx;
4750 	using RenderCase<SampleType>::m_drawMethod;
4751 };
4752 
4753 template <typename SampleType>
GenericUploadRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState,UploadRange uploadRange,UnrelatedBufferType unrelatedBufferType)4754 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&				context,
4755 																	  const char*			name,
4756 																	  const char*			description,
4757 																	  DrawMethod			method,
4758 																	  TargetBuffer			targetBuffer,
4759 																	  UploadMethod			uploadMethod,
4760 																	  BufferState			bufferState,
4761 																	  UploadRange			uploadRange,
4762 																	  UnrelatedBufferType	unrelatedBufferType)
4763 	: RenderCase<SampleType>	(context, name, description, method)
4764 	, m_targetBuffer			(targetBuffer)
4765 	, m_bufferState				(bufferState)
4766 	, m_uploadMethod			(uploadMethod)
4767 	, m_unrelatedBufferType		(unrelatedBufferType)
4768 	, m_uploadRange				(uploadRange)
4769 {
4770 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
4771 	DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
4772 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
4773 	DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
4774 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
4775 }
4776 
4777 template <typename SampleType>
init(void)4778 void GenericUploadRenderTimeCase<SampleType>::init (void)
4779 {
4780 	// init parent
4781 	RenderCase<SampleType>::init();
4782 
4783 	// log
4784 	{
4785 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
4786 		const int			perVertexSize			= (m_targetBuffer == TARGETBUFFER_INDEX) ? (sizeof(deUint32)) : (sizeof(tcu::Vec4[2]));
4787 		const int			fullMinUploadSize		= RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
4788 		const int			fullMaxUploadSize		= RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
4789 		const int			minUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
4790 		const int			maxUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
4791 		const int			minUnrelatedUploadSize	= RenderCase<SampleType>::getMinWorkloadSize() * sizeof(tcu::Vec4[2]);
4792 		const int			maxUnrelatedUploadSize	= RenderCase<SampleType>::getMaxWorkloadSize() * sizeof(tcu::Vec4[2]);
4793 
4794 		m_testCtx.getLog()
4795 			<< tcu::TestLog::Message
4796 			<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
4797 			<< "The "
4798 				<< ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
4799 				<< " buffer "
4800 				<< ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
4801 				<< "sourced by the rendering command "
4802 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded ") :
4803 					(m_uploadRange == UPLOADRANGE_FULL)		? ("are specified ") :
4804 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("are updated (partial upload) ") :
4805 					((const char*)DE_NULL))
4806 				<< "just before issuing the rendering command.\n"
4807 			<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
4808 			<< "Buffer "
4809 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded") :
4810 					(m_uploadRange == UPLOADRANGE_FULL)		? ("contents are specified") :
4811 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("contents are partially updated") :
4812 					((const char*)DE_NULL))
4813 				<< " with "
4814 				<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
4815 				<< " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
4816 			<< ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
4817 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
4818 			<< RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
4819 			<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
4820 			<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
4821 			<< "Workload sizes are in the range ["
4822 				<< RenderCase<SampleType>::getMinWorkloadSize() << ",  "
4823 				<< RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
4824 				<< "(["
4825 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
4826 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
4827 			<< "Upload sizes are in the range ["
4828 				<< getHumanReadableByteSize(minUploadSize) << ","
4829 				<< getHumanReadableByteSize(maxUploadSize) << "].\n"
4830 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
4831 				("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
4832 				(""))
4833 			<< "Test result is the approximated processing rate in MiB / s.\n"
4834 			<< "Note that while upload time is measured, the time used is not included in the results.\n"
4835 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
4836 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
4837 			<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
4838 			<< tcu::TestLog::EndMessage;
4839 	}
4840 }
4841 
4842 template <typename SampleType>
runSample(SampleResult & sample)4843 void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
4844 {
4845 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
4846 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
4847 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
4848 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
4849 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
4850 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
4851 	deUint64				startTime;
4852 	deUint64				endTime;
4853 	std::vector<tcu::Vec4>	vertexData;
4854 	std::vector<deUint32>	indexData;
4855 
4856 	// create data
4857 
4858 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
4859 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
4860 		generateLayeredGridIndexData(indexData, sample.scene);
4861 
4862 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4863 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
4864 	RenderCase<SampleType>::setupVertexAttribs();
4865 
4866 	// target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu
4867 
4868 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
4869 	{
4870 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
4871 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4872 	}
4873 	else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
4874 	{
4875 		// do not touch the vertex buffer
4876 	}
4877 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
4878 	{
4879 		// hint that the target buffer will be modified soon
4880 		const glw::GLenum vertexDataUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4881 		const glw::GLenum indexDataUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
4882 
4883 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
4884 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
4885 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4886 	}
4887 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
4888 	{
4889 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
4890 		{
4891 			// make the index buffer present on the gpu
4892 			// use another vertex buffer to keep original buffer in unused state
4893 			const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
4894 
4895 			gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
4896 			RenderCase<SampleType>::setupVertexAttribs();
4897 
4898 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4899 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
4900 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
4901 
4902 			// restore original state
4903 			gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
4904 			RenderCase<SampleType>::setupVertexAttribs();
4905 		}
4906 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
4907 		{
4908 			// make the vertex buffer present on the gpu
4909 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
4910 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
4911 		}
4912 		else
4913 			DE_ASSERT(false);
4914 	}
4915 	else
4916 		DE_ASSERT(false);
4917 
4918 	RenderCase<SampleType>::waitGLResults();
4919 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
4920 
4921 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
4922 	gl.clear(GL_COLOR_BUFFER_BIT);
4923 	RenderCase<SampleType>::waitGLResults();
4924 
4925 	tcu::warmupCPU();
4926 
4927 	// upload
4928 
4929 	{
4930 		glw::GLenum		target;
4931 		glw::GLsizeiptr	size;
4932 		glw::GLintptr	offset = 0;
4933 		const void*		source;
4934 
4935 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
4936 		{
4937 			target	= GL_ARRAY_BUFFER;
4938 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
4939 			source	= &vertexData[0];
4940 		}
4941 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
4942 		{
4943 			target	= GL_ELEMENT_ARRAY_BUFFER;
4944 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
4945 			source	= &indexData[0];
4946 		}
4947 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4948 		{
4949 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4950 
4951 			target	= GL_ARRAY_BUFFER;
4952 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
4953 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4954 			source	= (const deUint8*)&vertexData[0] + offset;
4955 		}
4956 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
4957 		{
4958 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);
4959 
4960 			// upload to 25% - 75% range
4961 			target	= GL_ELEMENT_ARRAY_BUFFER;
4962 			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
4963 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
4964 			source	= (const deUint8*)&indexData[0] + offset;
4965 		}
4966 		else
4967 		{
4968 			DE_ASSERT(false);
4969 			return;
4970 		}
4971 
4972 		startTime = deGetMicroseconds();
4973 
4974 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
4975 			gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
4976 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
4977 		{
4978 			// create buffer storage
4979 			if (m_bufferState == BUFFERSTATE_NEW)
4980 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4981 			gl.bufferSubData(target, offset, size, source);
4982 		}
4983 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
4984 		{
4985 			void*			mapPtr;
4986 			glw::GLboolean	unmapSuccessful;
4987 
4988 			// create buffer storage
4989 			if (m_bufferState == BUFFERSTATE_NEW)
4990 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
4991 
4992 			mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
4993 			if (!mapPtr)
4994 				throw tcu::Exception("MapBufferRange returned NULL");
4995 
4996 			deMemcpy(mapPtr, source, (int)size);
4997 
4998 			// if unmapping fails, just try again later
4999 			unmapSuccessful = gl.unmapBuffer(target);
5000 			if (!unmapSuccessful)
5001 				throw UnmapFailureError();
5002 		}
5003 		else
5004 			DE_ASSERT(false);
5005 
5006 		endTime = deGetMicroseconds();
5007 
5008 		sample.result.uploadedDataSize = (int)size;
5009 		sample.result.duration.uploadDuration = endTime - startTime;
5010 	}
5011 
5012 	// unrelated
5013 	if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
5014 	{
5015 		const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));
5016 
5017 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
5018 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
5019 		// Attibute pointers are not modified, no need restore state
5020 
5021 		sample.result.unrelatedDataSize = unrelatedUploadSize;
5022 	}
5023 
5024 	// draw
5025 	{
5026 		startTime = deGetMicroseconds();
5027 
5028 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5029 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5030 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5031 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5032 		else
5033 			DE_ASSERT(false);
5034 
5035 		endTime = deGetMicroseconds();
5036 
5037 		sample.result.duration.renderDuration = endTime - startTime;
5038 	}
5039 
5040 	// read
5041 	{
5042 		startTime = deGetMicroseconds();
5043 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5044 		endTime = deGetMicroseconds();
5045 
5046 		sample.result.duration.readDuration = endTime - startTime;
5047 	}
5048 
5049 	// set results
5050 
5051 	sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;
5052 
5053 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
5054 	sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
5055 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
5056 }
5057 
5058 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
5059 {
5060 public:
5061 	enum MapFlags
5062 	{
5063 		MAPFLAG_NONE = 0,
5064 		MAPFLAG_INVALIDATE_BUFFER,
5065 		MAPFLAG_INVALIDATE_RANGE,
5066 
5067 		MAPFLAG_LAST
5068 	};
5069 	enum UploadBufferTarget
5070 	{
5071 		UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
5072 		UPLOADBUFFERTARGET_SAME_BUFFER,
5073 
5074 		UPLOADBUFFERTARGET_LAST
5075 	};
5076 								BufferInUseRenderTimeCase	(Context&			context,
5077 															 const char*		name,
5078 															 const char*		description,
5079 															 DrawMethod			method,
5080 															 MapFlags			mapFlags,
5081 															 TargetBuffer		targetBuffer,
5082 															 UploadMethod		uploadMethod,
5083 															 UploadRange		uploadRange,
5084 															 UploadBufferTarget	uploadTarget);
5085 
5086 private:
5087 	void						init						(void);
5088 	void						runSample					(SampleResult& sample);
5089 
5090 	const TargetBuffer			m_targetBuffer;
5091 	const UploadMethod			m_uploadMethod;
5092 	const UploadRange			m_uploadRange;
5093 	const MapFlags				m_mapFlags;
5094 	const UploadBufferTarget	m_uploadBufferTarget;
5095 };
5096 
BufferInUseRenderTimeCase(Context & context,const char * name,const char * description,DrawMethod method,MapFlags mapFlags,TargetBuffer targetBuffer,UploadMethod uploadMethod,UploadRange uploadRange,UploadBufferTarget uploadTarget)5097 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&				context,
5098 													  const char*			name,
5099 													  const char*			description,
5100 													  DrawMethod			method,
5101 													  MapFlags				mapFlags,
5102 													  TargetBuffer			targetBuffer,
5103 													  UploadMethod			uploadMethod,
5104 													  UploadRange			uploadRange,
5105 													  UploadBufferTarget	uploadTarget)
5106 	: RenderCase<RenderUploadRenderReadDuration>	(context, name, description, method)
5107 	, m_targetBuffer								(targetBuffer)
5108 	, m_uploadMethod								(uploadMethod)
5109 	, m_uploadRange									(uploadRange)
5110 	, m_mapFlags									(mapFlags)
5111 	, m_uploadBufferTarget							(uploadTarget)
5112 {
5113 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
5114 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
5115 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
5116 	DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
5117 	DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
5118 }
5119 
init(void)5120 void BufferInUseRenderTimeCase::init (void)
5121 {
5122 	RenderCase<RenderUploadRenderReadDuration>::init();
5123 
5124 	// log
5125 	{
5126 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
5127 		const char* const	uploadFunctionName		= (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
5128 		const bool			isReferenceCase			= (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
5129 		tcu::MessageBuilder	message					(&m_testCtx.getLog());
5130 
5131 		message	<< "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
5132 				<< targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";
5133 
5134 		if (isReferenceCase)
5135 			message << "Rendering:\n"
5136 					<< "    before test: create and use buffers B and C\n"
5137 					<< "    first draw: render using buffer B\n"
5138 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer C contents\n")	:
5139 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer C contents\n")	:
5140 						((const char*)DE_NULL))
5141 					<< "    second draw: render using buffer C\n"
5142 					<< "    read: readPixels\n";
5143 		else
5144 			message << "Rendering:\n"
5145 					<< "    before test: create and use buffer B\n"
5146 					<< "    first draw: render using buffer B\n"
5147 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer B contents\n")	:
5148 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer B contents\n")	:
5149 						((const char*)DE_NULL))
5150 					<< "    second draw: render using buffer B\n"
5151 					<< "    read: readPixels\n";
5152 
5153 		message	<< "Uploading using " << uploadFunctionName
5154 					<< ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")	:
5155 						(m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")	:
5156 						(m_mapFlags == MAPFLAG_NONE)				? ("")														:
5157 						((const char*)DE_NULL))
5158 					<< "\n"
5159 				<< getNumSamples() << " test samples. Sample order is randomized.\n"
5160 				<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
5161 				<< "Workload sizes are in the range ["
5162 					<< getMinWorkloadSize() << ",  "
5163 					<< getMaxWorkloadSize() << "] vertices "
5164 					<< "(["
5165 					<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
5166 					<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
5167 				<< "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";
5168 
5169 		if (isReferenceCase)
5170 			message	<< "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
5171 		else
5172 			message	<< "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";
5173 
5174 		message << tcu::TestLog::EndMessage;
5175 	}
5176 }
5177 
runSample(SampleResult & sample)5178 void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
5179 {
5180 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5181 	const glu::Buffer		arrayBuffer				(m_context.getRenderContext());
5182 	const glu::Buffer		indexBuffer				(m_context.getRenderContext());
5183 	const glu::Buffer		alternativeUploadBuffer	(m_context.getRenderContext());
5184 	const int				numVertices				= getLayeredGridNumVertices(sample.scene);
5185 	tcu::Surface			resultSurface			(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5186 	deUint64				startTime;
5187 	deUint64				endTime;
5188 	std::vector<tcu::Vec4>	vertexData;
5189 	std::vector<deUint32>	indexData;
5190 
5191 	// create data
5192 
5193 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
5194 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5195 		generateLayeredGridIndexData(indexData, sample.scene);
5196 
5197 	// make buffers used
5198 
5199 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5200 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5201 	setupVertexAttribs();
5202 
5203 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5204 	{
5205 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5206 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5207 	}
5208 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5209 	{
5210 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5211 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5212 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5213 	}
5214 	else
5215 		DE_ASSERT(false);
5216 
5217 	// another pair of buffers for reference case
5218 	if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5219 	{
5220 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5221 		{
5222 			gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
5223 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
5224 
5225 			setupVertexAttribs();
5226 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5227 		}
5228 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5229 		{
5230 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
5231 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
5232 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5233 		}
5234 		else
5235 			DE_ASSERT(false);
5236 
5237 		// restore state
5238 		gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
5239 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
5240 		setupVertexAttribs();
5241 	}
5242 
5243 	waitGLResults();
5244 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");
5245 
5246 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
5247 	gl.clear(GL_COLOR_BUFFER_BIT);
5248 	waitGLResults();
5249 
5250 	tcu::warmupCPU();
5251 
5252 	// first draw
5253 	{
5254 		startTime = deGetMicroseconds();
5255 
5256 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5257 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5258 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5259 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5260 		else
5261 			DE_ASSERT(false);
5262 
5263 		endTime = deGetMicroseconds();
5264 
5265 		sample.result.duration.firstRenderDuration = endTime - startTime;
5266 	}
5267 
5268 	// upload
5269 	{
5270 		glw::GLenum		target;
5271 		glw::GLsizeiptr	size;
5272 		glw::GLintptr	offset = 0;
5273 		const void*		source;
5274 
5275 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
5276 		{
5277 			target	= GL_ARRAY_BUFFER;
5278 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
5279 			source	= &vertexData[0];
5280 		}
5281 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
5282 		{
5283 			target	= GL_ELEMENT_ARRAY_BUFFER;
5284 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
5285 			source	= &indexData[0];
5286 		}
5287 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5288 		{
5289 			target	= GL_ARRAY_BUFFER;
5290 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
5291 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5292 			source	= (const deUint8*)&vertexData[0] + offset;
5293 		}
5294 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
5295 		{
5296 			// upload to 25% - 75% range
5297 			target	= GL_ELEMENT_ARRAY_BUFFER;
5298 			size	= (glw::GLsizeiptr)deAlign32((glw::GLsizeiptr)((int)(indexData.size() * sizeof(deUint32))) / 2, 4);
5299 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
5300 			source	= (const deUint8*)&indexData[0] + offset;
5301 		}
5302 		else
5303 		{
5304 			DE_ASSERT(false);
5305 			return;
5306 		}
5307 
5308 		// reference case? don't modify the buffer in use
5309 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
5310 			gl.bindBuffer(target, *alternativeUploadBuffer);
5311 
5312 		startTime = deGetMicroseconds();
5313 
5314 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5315 			gl.bufferData(target, size, source, GL_STREAM_DRAW);
5316 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5317 			gl.bufferSubData(target, offset, size, source);
5318 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5319 		{
5320 			const int		mapFlags	= (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)	:
5321 										  (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)	:
5322 										  (-1);
5323 			void*			mapPtr;
5324 			glw::GLboolean	unmapSuccessful;
5325 
5326 			mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
5327 			if (!mapPtr)
5328 				throw tcu::Exception("MapBufferRange returned NULL");
5329 
5330 			deMemcpy(mapPtr, source, (int)size);
5331 
5332 			// if unmapping fails, just try again later
5333 			unmapSuccessful = gl.unmapBuffer(target);
5334 			if (!unmapSuccessful)
5335 				throw UnmapFailureError();
5336 		}
5337 		else
5338 			DE_ASSERT(false);
5339 
5340 		endTime = deGetMicroseconds();
5341 
5342 		sample.result.uploadedDataSize = (int)size;
5343 		sample.result.duration.uploadDuration = endTime - startTime;
5344 	}
5345 
5346 	// second draw
5347 	{
5348 		// Source vertex data from alternative buffer in refernce case
5349 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
5350 			setupVertexAttribs();
5351 
5352 		startTime = deGetMicroseconds();
5353 
5354 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5355 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
5356 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5357 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
5358 		else
5359 			DE_ASSERT(false);
5360 
5361 		endTime = deGetMicroseconds();
5362 
5363 		sample.result.duration.secondRenderDuration = endTime - startTime;
5364 	}
5365 
5366 	// read
5367 	{
5368 		startTime = deGetMicroseconds();
5369 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5370 		endTime = deGetMicroseconds();
5371 
5372 		sample.result.duration.readDuration = endTime - startTime;
5373 	}
5374 
5375 	// set results
5376 
5377 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
5378 
5379 	sample.result.duration.renderReadDuration	= sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
5380 	sample.result.duration.totalDuration		= sample.result.duration.firstRenderDuration +
5381 												  sample.result.duration.uploadDuration +
5382 												  sample.result.duration.secondRenderDuration +
5383 												  sample.result.duration.readDuration;
5384 	sample.result.duration.fitResponseDuration	= sample.result.duration.renderReadDuration;
5385 }
5386 
5387 class UploadWaitDrawCase : public RenderPerformanceTestBase
5388 {
5389 public:
5390 	struct Sample
5391 	{
5392 		int			numFrames;
5393 		deUint64	uploadCallEndTime;
5394 	};
5395 	struct Result
5396 	{
5397 		deUint64	uploadDuration;
5398 		deUint64	renderDuration;
5399 		deUint64	readDuration;
5400 		deUint64	renderReadDuration;
5401 
5402 		deUint64	timeBeforeUse;
5403 	};
5404 
5405 							UploadWaitDrawCase				(Context&		context,
5406 															 const char*	name,
5407 															 const char*	description,
5408 															 DrawMethod		drawMethod,
5409 															 TargetBuffer	targetBuffer,
5410 															 UploadMethod	uploadMethod,
5411 															 BufferState	bufferState);
5412 							~UploadWaitDrawCase				(void);
5413 
5414 private:
5415 	void					init							(void);
5416 	void					deinit							(void);
5417 	IterateResult			iterate							(void);
5418 
5419 	void					uploadBuffer					(Sample& sample, Result& result);
5420 	void					drawFromBuffer					(Sample& sample, Result& result);
5421 	void					reuseAndDeleteBuffer			(void);
5422 	void					logAndSetTestResult				(void);
5423 	void					logSamples						(void);
5424 	void					drawMisc						(void);
5425 	int						findStabilizationSample			(deUint64 (Result::*target), const char* description);
5426 	bool					checkSampleTemporalStability	(deUint64 (Result::*target), const char* description);
5427 
5428 	const DrawMethod		m_drawMethod;
5429 	const TargetBuffer		m_targetBuffer;
5430 	const UploadMethod		m_uploadMethod;
5431 	const BufferState		m_bufferState;
5432 
5433 	const int				m_numSamplesPerSwap;
5434 	const int				m_numMaxSwaps;
5435 
5436 	int						m_frameNdx;
5437 	int						m_sampleNdx;
5438 	int						m_numVertices;
5439 
5440 	std::vector<tcu::Vec4>	m_vertexData;
5441 	std::vector<deUint32>	m_indexData;
5442 	std::vector<Sample>		m_samples;
5443 	std::vector<Result>		m_results;
5444 	std::vector<int>		m_iterationOrder;
5445 
5446 	deUint32				m_vertexBuffer;
5447 	deUint32				m_indexBuffer;
5448 	deUint32				m_miscBuffer;
5449 	int						m_numMiscVertices;
5450 };
5451 
UploadWaitDrawCase(Context & context,const char * name,const char * description,DrawMethod drawMethod,TargetBuffer targetBuffer,UploadMethod uploadMethod,BufferState bufferState)5452 UploadWaitDrawCase::UploadWaitDrawCase (Context&		context,
5453 										const char*		name,
5454 										const char*		description,
5455 										DrawMethod		drawMethod,
5456 										TargetBuffer	targetBuffer,
5457 										UploadMethod	uploadMethod,
5458 										BufferState		bufferState)
5459 	: RenderPerformanceTestBase	(context, name, description)
5460 	, m_drawMethod				(drawMethod)
5461 	, m_targetBuffer			(targetBuffer)
5462 	, m_uploadMethod			(uploadMethod)
5463 	, m_bufferState				(bufferState)
5464 	, m_numSamplesPerSwap		(10)
5465 	, m_numMaxSwaps				(4)
5466 	, m_frameNdx				(0)
5467 	, m_sampleNdx				(0)
5468 	, m_numVertices				(-1)
5469 	, m_vertexBuffer			(0)
5470 	, m_indexBuffer				(0)
5471 	, m_miscBuffer				(0)
5472 	, m_numMiscVertices			(-1)
5473 {
5474 }
5475 
~UploadWaitDrawCase(void)5476 UploadWaitDrawCase::~UploadWaitDrawCase (void)
5477 {
5478 	deinit();
5479 }
5480 
init(void)5481 void UploadWaitDrawCase::init (void)
5482 {
5483 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
5484 	const int				vertexAttribSize		= (int)sizeof(tcu::Vec4) * 2; // color4, position4
5485 	const int				vertexIndexSize			= (int)sizeof(deUint32);
5486 	const int				vertexUploadDataSize	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);
5487 
5488 	RenderPerformanceTestBase::init();
5489 
5490 	// requirements
5491 
5492 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
5493 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
5494 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");
5495 
5496 	// gl state
5497 
5498 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5499 
5500 	// enable bleding to prevent grid layers from being discarded
5501 
5502 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
5503 	gl.blendEquation(GL_FUNC_ADD);
5504 	gl.enable(GL_BLEND);
5505 
5506 	// scene
5507 
5508 	{
5509 		LayeredGridSpec scene;
5510 
5511 		// create ~8MB workload with similar characteristics as in the other test
5512 		// => makes comparison to other results more straightforward
5513 		scene.gridWidth = 93;
5514 		scene.gridHeight = 93;
5515 		scene.gridLayers = 5;
5516 
5517 		generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
5518 		generateLayeredGridIndexData(m_indexData, scene);
5519 		m_numVertices = getLayeredGridNumVertices(scene);
5520 	}
5521 
5522 	// buffers
5523 
5524 	if (m_bufferState == BUFFERSTATE_NEW)
5525 	{
5526 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5527 		{
5528 			// reads from two buffers, prepare the static buffer
5529 
5530 			if (m_targetBuffer == TARGETBUFFER_VERTEX)
5531 			{
5532 				// index buffer is static, use another vertex buffer to keep original buffer in unused state
5533 				const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());
5534 
5535 				gl.genBuffers(1, &m_indexBuffer);
5536 				gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
5537 				gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5538 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5539 				gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);
5540 
5541 				setupVertexAttribs();
5542 				gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5543 			}
5544 			else if (m_targetBuffer == TARGETBUFFER_INDEX)
5545 			{
5546 				// vertex buffer is static
5547 				gl.genBuffers(1, &m_vertexBuffer);
5548 				gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5549 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
5550 
5551 				setupVertexAttribs();
5552 				gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5553 			}
5554 			else
5555 				DE_ASSERT(false);
5556 		}
5557 	}
5558 	else if (m_bufferState == BUFFERSTATE_EXISTING)
5559 	{
5560 		const glw::GLenum vertexUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5561 		const glw::GLenum indexUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
5562 
5563 		gl.genBuffers(1, &m_vertexBuffer);
5564 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5565 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);
5566 
5567 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5568 		{
5569 			gl.genBuffers(1, &m_indexBuffer);
5570 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5571 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
5572 		}
5573 
5574 		setupVertexAttribs();
5575 
5576 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5577 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5578 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5579 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5580 		else
5581 			DE_ASSERT(false);
5582 	}
5583 	else
5584 		DE_ASSERT(false);
5585 
5586 	// misc draw buffer
5587 	{
5588 		std::vector<tcu::Vec4>	vertexData;
5589 		LayeredGridSpec			scene;
5590 
5591 		// create ~1.5MB workload with similar characteristics
5592 		scene.gridWidth = 40;
5593 		scene.gridHeight = 40;
5594 		scene.gridLayers = 5;
5595 
5596 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);
5597 
5598 		gl.genBuffers(1, &m_miscBuffer);
5599 		gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
5600 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);
5601 
5602 		m_numMiscVertices = getLayeredGridNumVertices(scene);
5603 	}
5604 
5605 	// iterations
5606 	{
5607 		m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5608 		m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
5609 
5610 		for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
5611 		for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
5612 		{
5613 			const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;
5614 
5615 			m_samples[index].numFrames = numSwaps;
5616 		}
5617 
5618 		m_iterationOrder.resize(m_samples.size());
5619 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
5620 	}
5621 
5622 	// log
5623 	m_testCtx.getLog()
5624 		<< tcu::TestLog::Message
5625 		<< "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
5626 		<< "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
5627 		<< "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
5628 		<< "Uploading using "
5629 			<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")																							:
5630 				(m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")																							:
5631 				(m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")	:
5632 				((const char*)DE_NULL))
5633 			<< "\n"
5634 		<< "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
5635 		<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
5636 		<< "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
5637 		<< "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
5638 		<< tcu::TestLog::EndMessage;
5639 }
5640 
deinit(void)5641 void UploadWaitDrawCase::deinit (void)
5642 {
5643 	RenderPerformanceTestBase::deinit();
5644 
5645 	if (m_vertexBuffer)
5646 	{
5647 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
5648 		m_vertexBuffer = 0;
5649 	}
5650 	if (m_indexBuffer)
5651 	{
5652 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
5653 		m_indexBuffer = 0;
5654 	}
5655 	if (m_miscBuffer)
5656 	{
5657 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
5658 		m_miscBuffer = 0;
5659 	}
5660 }
5661 
iterate(void)5662 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
5663 {
5664 	const glw::Functions&	gl								= m_context.getRenderContext().getFunctions();
5665 	const int				betweenIterationDummyFrameCount = 5; // draw misc between test samples
5666 	const int				frameNdx						= m_frameNdx++;
5667 	const int				currentSampleNdx				= m_iterationOrder[m_sampleNdx];
5668 
5669 	// Simulate work for about 8ms
5670 	busyWait(8000);
5671 
5672 	// Dummy rendering during dummy frames
5673 	if (frameNdx != m_samples[currentSampleNdx].numFrames)
5674 	{
5675 		// draw similar from another buffer
5676 		drawMisc();
5677 	}
5678 
5679 	if (frameNdx == 0)
5680 	{
5681 		// upload and start the clock
5682 		uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5683 	}
5684 
5685 	if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
5686 	{
5687 		// draw using the uploaded buffer
5688 		drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
5689 
5690 		// re-use buffer for something else to make sure test iteration do not affect each other
5691 		if (m_bufferState == BUFFERSTATE_NEW)
5692 			reuseAndDeleteBuffer();
5693 	}
5694 	else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
5695 	{
5696 		// next sample
5697 		++m_sampleNdx;
5698 		m_frameNdx = 0;
5699 	}
5700 
5701 	GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");
5702 
5703 	if (m_sampleNdx < (int)m_samples.size())
5704 		return CONTINUE;
5705 
5706 	logAndSetTestResult();
5707 	return STOP;
5708 }
5709 
uploadBuffer(Sample & sample,Result & result)5710 void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
5711 {
5712 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
5713 	deUint64				startTime;
5714 	deUint64				endTime;
5715 	glw::GLenum				target;
5716 	glw::GLsizeiptr			size;
5717 	const void*				source;
5718 
5719 	// data source
5720 
5721 	if (m_targetBuffer == TARGETBUFFER_VERTEX)
5722 	{
5723 		DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5724 
5725 		target	= GL_ARRAY_BUFFER;
5726 		size	= (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
5727 		source	= &m_vertexData[0];
5728 	}
5729 	else if (m_targetBuffer == TARGETBUFFER_INDEX)
5730 	{
5731 		DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));
5732 
5733 		target	= GL_ELEMENT_ARRAY_BUFFER;
5734 		size	= (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
5735 		source	= &m_indexData[0];
5736 	}
5737 	else
5738 	{
5739 		DE_ASSERT(false);
5740 		return;
5741 	}
5742 
5743 	// gen buffer
5744 
5745 	if (m_bufferState == BUFFERSTATE_NEW)
5746 	{
5747 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5748 		{
5749 			gl.genBuffers(1, &m_vertexBuffer);
5750 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5751 		}
5752 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5753 		{
5754 			gl.genBuffers(1, &m_indexBuffer);
5755 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5756 		}
5757 		else
5758 			DE_ASSERT(false);
5759 
5760 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
5761 			m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5762 		{
5763 			gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
5764 		}
5765 	}
5766 	else if (m_bufferState == BUFFERSTATE_EXISTING)
5767 	{
5768 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
5769 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5770 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
5771 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5772 		else
5773 			DE_ASSERT(false);
5774 	}
5775 	else
5776 		DE_ASSERT(false);
5777 
5778 	// upload
5779 
5780 	startTime = deGetMicroseconds();
5781 
5782 	if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
5783 		gl.bufferData(target, size, source, GL_STATIC_DRAW);
5784 	else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
5785 		gl.bufferSubData(target, 0, size, source);
5786 	else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
5787 	{
5788 		void*			mapPtr;
5789 		glw::GLboolean	unmapSuccessful;
5790 
5791 		mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
5792 		if (!mapPtr)
5793 			throw tcu::Exception("MapBufferRange returned NULL");
5794 
5795 		deMemcpy(mapPtr, source, (int)size);
5796 
5797 		// if unmapping fails, just try again later
5798 		unmapSuccessful = gl.unmapBuffer(target);
5799 		if (!unmapSuccessful)
5800 			throw UnmapFailureError();
5801 	}
5802 	else
5803 		DE_ASSERT(false);
5804 
5805 	endTime = deGetMicroseconds();
5806 
5807 	sample.uploadCallEndTime = endTime;
5808 	result.uploadDuration = endTime - startTime;
5809 }
5810 
drawFromBuffer(Sample & sample,Result & result)5811 void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
5812 {
5813 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
5814 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
5815 	deUint64				startTime;
5816 	deUint64				endTime;
5817 
5818 	DE_ASSERT(m_vertexBuffer != 0);
5819 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5820 		DE_ASSERT(m_indexBuffer == 0);
5821 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5822 		DE_ASSERT(m_indexBuffer != 0);
5823 	else
5824 		DE_ASSERT(false);
5825 
5826 	// draw
5827 	{
5828 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
5829 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5830 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
5831 
5832 		setupVertexAttribs();
5833 
5834 		// microseconds passed since return from upload call
5835 		result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;
5836 
5837 		startTime = deGetMicroseconds();
5838 
5839 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
5840 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
5841 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
5842 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
5843 		else
5844 			DE_ASSERT(false);
5845 
5846 		endTime = deGetMicroseconds();
5847 
5848 		result.renderDuration = endTime - startTime;
5849 	}
5850 
5851 	// read
5852 	{
5853 		startTime = deGetMicroseconds();
5854 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
5855 		endTime = deGetMicroseconds();
5856 
5857 		result.readDuration = endTime - startTime;
5858 	}
5859 
5860 	result.renderReadDuration = result.renderDuration + result.readDuration;
5861 }
5862 
reuseAndDeleteBuffer(void)5863 void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
5864 {
5865 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
5866 
5867 	if (m_targetBuffer == TARGETBUFFER_INDEX)
5868 	{
5869 		// respecify and delete index buffer
5870 		static const deUint32 indices[3] = {1, 3, 8};
5871 
5872 		DE_ASSERT(m_indexBuffer != 0);
5873 
5874 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
5875 		gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
5876 		gl.deleteBuffers(1, &m_indexBuffer);
5877 		m_indexBuffer = 0;
5878 	}
5879 	else if (m_targetBuffer == TARGETBUFFER_VERTEX)
5880 	{
5881 		// respecify and delete vertex buffer
5882 		static const tcu::Vec4 coloredTriangle[6] =
5883 		{
5884 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
5885 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
5886 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
5887 		};
5888 
5889 		DE_ASSERT(m_vertexBuffer != 0);
5890 
5891 		gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
5892 		gl.drawArrays(GL_TRIANGLES, 0, 3);
5893 		gl.deleteBuffers(1, &m_vertexBuffer);
5894 		m_vertexBuffer = 0;
5895 	}
5896 
5897 	waitGLResults();
5898 }
5899 
logAndSetTestResult(void)5900 void UploadWaitDrawCase::logAndSetTestResult (void)
5901 {
5902 	int		uploadStabilization;
5903 	int		renderReadStabilization;
5904 	int		renderStabilization;
5905 	int		readStabilization;
5906 	bool	temporallyStable;
5907 
5908 	{
5909 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
5910 		logSamples();
5911 	}
5912 
5913 	{
5914 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");
5915 
5916 		// log stabilization points
5917 		renderReadStabilization	= findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
5918 		uploadStabilization		= findStabilizationSample(&Result::uploadDuration, "Upload time");
5919 		renderStabilization		= findStabilizationSample(&Result::renderDuration, "Draw call time");
5920 		readStabilization		= findStabilizationSample(&Result::readDuration, "ReadPixels time");
5921 
5922 		temporallyStable		= true;
5923 		temporallyStable		&= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
5924 		temporallyStable		&= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
5925 		temporallyStable		&= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
5926 		temporallyStable		&= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
5927 	}
5928 
5929 	{
5930 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");
5931 
5932 		// Check result sanily
5933 		if (uploadStabilization != 0)
5934 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
5935 		if (!temporallyStable)
5936 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;
5937 
5938 		// render & read
5939 		if (renderReadStabilization == -1)
5940 			m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5941 		else
5942 			m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);
5943 
5944 		// draw call
5945 		if (renderStabilization == -1)
5946 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
5947 		else
5948 			m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);
5949 
5950 		// readpixels
5951 		if (readStabilization == -1)
5952 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
5953 		else
5954 			m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);
5955 
5956 		// Report renderReadStabilization
5957 		if (renderReadStabilization != -1)
5958 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
5959 		else
5960 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
5961 	}
5962 }
5963 
logSamples(void)5964 void UploadWaitDrawCase::logSamples (void)
5965 {
5966 	// Inverse m_iterationOrder
5967 
5968 	std::vector<int> runOrder(m_iterationOrder.size());
5969 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
5970 		runOrder[m_iterationOrder[ndx]] = ndx;
5971 
5972 	// Log samples
5973 
5974 	m_testCtx.getLog()
5975 		<< tcu::TestLog::SampleList("Samples", "Samples")
5976 		<< tcu::TestLog::SampleInfo
5977 		<< tcu::TestLog::ValueInfo("NumSwaps",		"SwapBuffers before use",			"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5978 		<< tcu::TestLog::ValueInfo("Delay",			"Time before use",					"us",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
5979 		<< tcu::TestLog::ValueInfo("RunOrder",		"Sample run order",					"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
5980 		<< tcu::TestLog::ValueInfo("DrawReadTime",	"Draw call and ReadPixels time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5981 		<< tcu::TestLog::ValueInfo("TotalTime",		"Total time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5982 		<< tcu::TestLog::ValueInfo("Upload time",	"Upload time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5983 		<< tcu::TestLog::ValueInfo("DrawCallTime",	"Draw call time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5984 		<< tcu::TestLog::ValueInfo("ReadTime",		"ReadPixels time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
5985 		<< tcu::TestLog::EndSampleInfo;
5986 
5987 	for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
5988 		m_testCtx.getLog()
5989 			<< tcu::TestLog::Sample
5990 			<< m_samples[sampleNdx].numFrames
5991 			<< (int)m_results[sampleNdx].timeBeforeUse
5992 			<< runOrder[sampleNdx]
5993 			<< (int)m_results[sampleNdx].renderReadDuration
5994 			<< (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
5995 			<< (int)m_results[sampleNdx].uploadDuration
5996 			<< (int)m_results[sampleNdx].renderDuration
5997 			<< (int)m_results[sampleNdx].readDuration
5998 			<< tcu::TestLog::EndSample;
5999 
6000 	m_testCtx.getLog() << tcu::TestLog::EndSampleList;
6001 }
6002 
drawMisc(void)6003 void UploadWaitDrawCase::drawMisc (void)
6004 {
6005 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
6006 
6007 	gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
6008 	setupVertexAttribs();
6009 	gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
6010 }
6011 
6012 struct DistributionCompareResult
6013 {
6014 	bool	equal;
6015 	float	standardDeviations;
6016 };
6017 
6018 template <typename Comparer>
sumOfRanks(const std::vector<deUint64> & testSamples,const std::vector<deUint64> & allSamples,const Comparer & comparer)6019 static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
6020 {
6021 	float sum = 0;
6022 
6023 	for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
6024 	{
6025 		const deUint64	testSample		= testSamples[sampleNdx];
6026 		const int		lowerIndex		= (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6027 		const int		upperIndex		= (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
6028 		const int		lowerRank		= lowerIndex + 1;	// convert zero-indexed to rank
6029 		const int		upperRank		= upperIndex;		// convert zero-indexed to rank, upperIndex is last equal + 1
6030 		const float		rankMidpoint	= (lowerRank + upperRank) / 2.0f;
6031 
6032 		sum += rankMidpoint;
6033 	}
6034 
6035 	return sum;
6036 }
6037 
6038 template <typename Comparer>
distributionCompare(const std::vector<deUint64> & orderedObservationsA,const std::vector<deUint64> & orderedObservationsB,const Comparer & comparer)6039 static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
6040 {
6041 	// Mann�Whitney U test
6042 
6043 	const int				n1			= (int)orderedObservationsA.size();
6044 	const int				n2			= (int)orderedObservationsB.size();
6045 	std::vector<deUint64>	allSamples	(n1 + n2);
6046 
6047 	std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
6048 	std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
6049 	std::sort(allSamples.begin(), allSamples.end());
6050 
6051 	{
6052 		const float					R1		= sumOfRanks(orderedObservationsA, allSamples, comparer);
6053 
6054 		const float					U1		= n1*n2 + n1*(n1 + 1)/2 - R1;
6055 		const float					U2		= (n1 * n2) - U1;
6056 		const float					U		= de::min(U1, U2);
6057 
6058 		// \note: sample sizes might not be large enough to expect normal distribution but we do it anyway
6059 
6060 		const float					mU		= n1*n2 / 2.0f;
6061 		const float					sigmaU	= deFloatSqrt((n1*n2*(n1+n2+1)) / 12.0f);
6062 		const float					z		= (U - mU) / sigmaU;
6063 
6064 		DistributionCompareResult	result;
6065 
6066 		result.equal				= (de::abs(z) <= 1.96f); // accept within 95% confidence interval
6067 		result.standardDeviations	= z;
6068 
6069 		return result;
6070 	}
6071 }
6072 
6073 template <typename T>
6074 struct ThresholdComparer
6075 {
6076 	float	relativeThreshold;
6077 	T		absoluteThreshold;
6078 
operator ()deqp::gles3::Performance::__anonde104e5c0111::ThresholdComparer6079 	bool operator() (const T& a, const T& b) const
6080 	{
6081 		const float diff = de::abs((float)a - (float)b);
6082 
6083 		// thresholds
6084 		if (diff <= (float)absoluteThreshold)
6085 			return false;
6086 		if (diff <= a*relativeThreshold ||
6087 			diff <= b*relativeThreshold)
6088 			return false;
6089 
6090 		// cmp
6091 		return a < b;
6092 	}
6093 };
6094 
findStabilizationSample(deUint64 (UploadWaitDrawCase::Result::* target),const char * description)6095 int UploadWaitDrawCase::findStabilizationSample (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6096 {
6097 	std::vector<std::vector<deUint64> >	sampleObservations(m_numMaxSwaps+1);
6098 	ThresholdComparer<deUint64>			comparer;
6099 
6100 	comparer.relativeThreshold = 0.15f;	// 15%
6101 	comparer.absoluteThreshold = 100;	// (us), assumed sampling precision
6102 
6103 	// get observations and order them
6104 
6105 	for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
6106 	{
6107 		int insertNdx = 0;
6108 
6109 		sampleObservations[swapNdx].resize(m_numSamplesPerSwap);
6110 
6111 		for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
6112 			if (m_samples[ndx].numFrames == swapNdx)
6113 				sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;
6114 
6115 		DE_ASSERT(insertNdx == m_numSamplesPerSwap);
6116 
6117 		std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
6118 	}
6119 
6120 	// find stabilization point
6121 
6122 	for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
6123 	{
6124 		// Distribution is equal to all following distributions
6125 		for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
6126 		{
6127 			// Stable section ends here?
6128 			const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
6129 			if (!result.equal)
6130 			{
6131 				// Last two samples are not equal? Samples never stabilized
6132 				if (sampleNdx == m_numMaxSwaps-1)
6133 				{
6134 					m_testCtx.getLog()
6135 						<< tcu::TestLog::Message
6136 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6137 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6138 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6139 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6140 						<< tcu::TestLog::EndMessage;
6141 					return -1;
6142 				}
6143 				else
6144 				{
6145 					m_testCtx.getLog()
6146 						<< tcu::TestLog::Message
6147 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
6148 						<< "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
6149 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
6150 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
6151 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
6152 						<< tcu::TestLog::EndMessage;
6153 
6154 					return sampleNdx+1;
6155 				}
6156 			}
6157 		}
6158 	}
6159 
6160 	m_testCtx.getLog()
6161 		<< tcu::TestLog::Message
6162 		<< description << ": All samples seem to have the same distribution"
6163 		<< tcu::TestLog::EndMessage;
6164 
6165 	// all distributions equal
6166 	return 0;
6167 }
6168 
checkSampleTemporalStability(deUint64 (UploadWaitDrawCase::Result::* target),const char * description)6169 bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 (UploadWaitDrawCase::Result::*target), const char* description)
6170 {
6171 	// Try to find correlation with sample order and sample times
6172 
6173 	const int						numDataPoints	= (int)m_iterationOrder.size();
6174 	std::vector<tcu::Vec2>			dataPoints		(m_iterationOrder.size());
6175 	LineParametersWithConfidence	lineFit;
6176 
6177 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
6178 	{
6179 		dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
6180 		dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
6181 	}
6182 
6183 	lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);
6184 
6185 	// Difference of more than 25% of the offset along the whole sample range
6186 	if (de::abs(lineFit.coefficient) * numDataPoints > de::abs(lineFit.offset) * 0.25f)
6187 	{
6188 		m_testCtx.getLog()
6189 			<< tcu::TestLog::Message
6190 			<< description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
6191 			<< "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
6192 			<< tcu::TestLog::EndMessage;
6193 
6194 		return false;
6195 	}
6196 	else
6197 		return true;
6198 }
6199 
6200 } // anonymous
6201 
BufferDataUploadTests(Context & context)6202 BufferDataUploadTests::BufferDataUploadTests (Context& context)
6203 	: TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
6204 {
6205 }
6206 
~BufferDataUploadTests(void)6207 BufferDataUploadTests::~BufferDataUploadTests (void)
6208 {
6209 }
6210 
init(void)6211 void BufferDataUploadTests::init (void)
6212 {
6213 	static const struct BufferUsage
6214 	{
6215 		const char* name;
6216 		deUint32	usage;
6217 		bool		primaryUsage;
6218 	} bufferUsages[] =
6219 	{
6220 		{ "stream_draw",	GL_STREAM_DRAW,		true	},
6221 		{ "stream_read",	GL_STREAM_READ,		false	},
6222 		{ "stream_copy",	GL_STREAM_COPY,		false	},
6223 		{ "static_draw",	GL_STATIC_DRAW,		true	},
6224 		{ "static_read",	GL_STATIC_READ,		false	},
6225 		{ "static_copy",	GL_STATIC_COPY,		false	},
6226 		{ "dynamic_draw",	GL_DYNAMIC_DRAW,	true	},
6227 		{ "dynamic_read",	GL_DYNAMIC_READ,	false	},
6228 		{ "dynamic_copy",	GL_DYNAMIC_COPY,	false	},
6229 	};
6230 
6231 	tcu::TestCaseGroup* const referenceGroup			= new tcu::TestCaseGroup(m_testCtx, "reference",			"Reference functions");
6232 	tcu::TestCaseGroup* const functionCallGroup			= new tcu::TestCaseGroup(m_testCtx, "function_call",		"Function call timing");
6233 	tcu::TestCaseGroup* const modifyAfterUseGroup		= new tcu::TestCaseGroup(m_testCtx, "modify_after_use",		"Function call time after buffer has been used");
6234 	tcu::TestCaseGroup* const renderAfterUploadGroup	= new tcu::TestCaseGroup(m_testCtx, "render_after_upload",	"Function call time of draw commands after buffer has been modified");
6235 
6236 	addChild(referenceGroup);
6237 	addChild(functionCallGroup);
6238 	addChild(modifyAfterUseGroup);
6239 	addChild(renderAfterUploadGroup);
6240 
6241 	// .reference
6242 	{
6243 		static const struct BufferSizeRange
6244 		{
6245 			const char* name;
6246 			int			minBufferSize;
6247 			int			maxBufferSize;
6248 			int			numSamples;
6249 			bool		largeBuffersCase;
6250 		} sizeRanges[] =
6251 		{
6252 			{ "small_buffers", 0,		1 << 18,	64,		false	}, // !< 0kB - 256kB
6253 			{ "large_buffers", 1 << 18,	1 << 24,	32,		true	}, // !< 256kB - 16MB
6254 		};
6255 
6256 		for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
6257 		{
6258 			referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
6259 															 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
6260 															 "Test memcpy performance",
6261 															 sizeRanges[bufferSizeRangeNdx].minBufferSize,
6262 															 sizeRanges[bufferSizeRangeNdx].maxBufferSize,
6263 															 sizeRanges[bufferSizeRangeNdx].numSamples,
6264 															 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
6265 		}
6266 	}
6267 
6268 	// .function_call
6269 	{
6270 		const int minBufferSize		= 0;		// !< 0kiB
6271 		const int maxBufferSize		= 1 << 24;	// !< 16MiB
6272 		const int numDataSamples	= 25;
6273 		const int numMapSamples		= 25;
6274 
6275 		tcu::TestCaseGroup* const bufferDataMethodGroup		= new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
6276 		tcu::TestCaseGroup* const bufferSubDataMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
6277 		tcu::TestCaseGroup* const mapBufferRangeMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");
6278 
6279 		functionCallGroup->addChild(bufferDataMethodGroup);
6280 		functionCallGroup->addChild(bufferSubDataMethodGroup);
6281 		functionCallGroup->addChild(mapBufferRangeMethodGroup);
6282 
6283 		// .buffer_data
6284 		{
6285 			static const struct TargetCase
6286 			{
6287 				tcu::TestCaseGroup*				group;
6288 				BufferDataUploadCase::CaseType	caseType;
6289 				bool							allUsages;
6290 			} targetCases[] =
6291 			{
6292 				{ new tcu::TestCaseGroup(m_testCtx, "new_buffer",				"Target new buffer"),							BufferDataUploadCase::CASE_NEW_BUFFER,			true	},
6293 				{ new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",		"Target new unspecified buffer"),				BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,	true	},
6294 				{ new tcu::TestCaseGroup(m_testCtx, "specified_buffer",			"Target new specified buffer"),					BufferDataUploadCase::CASE_SPECIFIED_BUFFER,	true	},
6295 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer",				"Target buffer that was used in draw"),			BufferDataUploadCase::CASE_USED_BUFFER,			true	},
6296 				{ new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",		"Target larger buffer that was used in draw"),	BufferDataUploadCase::CASE_USED_LARGER_BUFFER,	false	},
6297 			};
6298 
6299 			for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
6300 			{
6301 				bufferDataMethodGroup->addChild(targetCases[targetNdx].group);
6302 
6303 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6304 					if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
6305 						targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
6306 																						std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6307 																						std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6308 																						minBufferSize,
6309 																						maxBufferSize,
6310 																						numDataSamples,
6311 																						bufferUsages[usageNdx].usage,
6312 																						targetCases[targetNdx].caseType));
6313 			}
6314 		}
6315 
6316 		// .buffer_sub_data
6317 		{
6318 			static const struct FlagCase
6319 			{
6320 				tcu::TestCaseGroup*					group;
6321 				BufferSubDataUploadCase::CaseType	parentCase;
6322 				bool								allUsages;
6323 				int									flags;
6324 			} flagCases[] =
6325 			{
6326 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",					    ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD															},
6327 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6328 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD														},
6329 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
6330 			};
6331 
6332 			for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
6333 			{
6334 				bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);
6335 
6336 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6337 					if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
6338 							flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
6339 																						   std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
6340 																						   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6341 																						   minBufferSize,
6342 																						   maxBufferSize,
6343 																						   numDataSamples,
6344 																						   bufferUsages[usageNdx].usage,
6345 																						   flagCases[flagNdx].parentCase,
6346 																						   flagCases[flagNdx].flags));
6347 			}
6348 		}
6349 
6350 		// .map_buffer_range
6351 		{
6352 			static const struct FlagCase
6353 			{
6354 				const char*	name;
6355 				bool		usefulForUnusedBuffers;
6356 				bool		allUsages;
6357 				int			glFlags;
6358 				int			caseFlags;
6359 			} flagCases[] =
6360 			{
6361 				{ "flag_write_full",										true,	true,	GL_MAP_WRITE_BIT,																0																				},
6362 				{ "flag_write_partial",										true,	true,	GL_MAP_WRITE_BIT,																MapBufferRangeCase::FLAG_PARTIAL												},
6363 				{ "flag_read_write_full",									true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												0																				},
6364 				{ "flag_read_write_partial",								true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												MapBufferRangeCase::FLAG_PARTIAL												},
6365 				{ "flag_invalidate_range_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									0																				},
6366 				{ "flag_invalidate_range_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6367 				{ "flag_invalidate_buffer_full",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								0																				},
6368 				{ "flag_invalidate_buffer_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								MapBufferRangeCase::FLAG_PARTIAL												},
6369 				{ "flag_write_full_manual_invalidate_buffer",				false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_MANUAL_INVALIDATION									},
6370 				{ "flag_write_partial_manual_invalidate_buffer",			false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION	},
6371 				{ "flag_unsynchronized_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									0																				},
6372 				{ "flag_unsynchronized_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
6373 				{ "flag_unsynchronized_and_invalidate_buffer_full",			true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	0																				},
6374 				{ "flag_unsynchronized_and_invalidate_buffer_partial",		true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	MapBufferRangeCase::FLAG_PARTIAL												},
6375 			};
6376 			static const struct FlushCases
6377 			{
6378 				const char*	name;
6379 				int			glFlags;
6380 				int			caseFlags;
6381 			} flushCases[] =
6382 			{
6383 				{ "flag_flush_explicit_map_full",					GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	0												},
6384 				{ "flag_flush_explicit_map_partial",				GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_PARTIAL			},
6385 				{ "flag_flush_explicit_map_full_flush_in_parts",	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS	},
6386 				{ "flag_flush_explicit_map_full_flush_partial",		GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL		},
6387 			};
6388 			static const struct MapTestGroup
6389 			{
6390 				int					flags;
6391 				bool				unusedBufferCase;
6392 				tcu::TestCaseGroup* group;
6393 			} groups[] =
6394 			{
6395 				{ MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,	true,	new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),				},
6396 				{ MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,		true,	new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),					},
6397 				{ 0,														false,	new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")	},
6398 			};
6399 
6400 			// we OR same flags to both range and flushRange cases, make sure it is legal
6401 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
6402 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);
6403 
6404 			for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
6405 			{
6406 				tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;
6407 
6408 				mapBufferRangeMethodGroup->addChild(bufferTypeGroup);
6409 
6410 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
6411 				{
6412 					if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
6413 						continue;
6414 
6415 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
6416 					bufferTypeGroup->addChild(bufferUsageGroup);
6417 
6418 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6419 						if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
6420 							bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
6421 																			  bufferUsages[usageNdx].name,
6422 																			  std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6423 																			  minBufferSize,
6424 																			  maxBufferSize,
6425 																			  numMapSamples,
6426 																			  bufferUsages[usageNdx].usage,
6427 																			  flagCases[caseNdx].glFlags,
6428 																			  flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
6429 				}
6430 
6431 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
6432 				{
6433 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
6434 					bufferTypeGroup->addChild(bufferUsageGroup);
6435 
6436 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
6437 						if (bufferUsages[usageNdx].primaryUsage)
6438 							bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
6439 																				   bufferUsages[usageNdx].name,
6440 																				   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
6441 																				   minBufferSize,
6442 																				   maxBufferSize,
6443 																				   numMapSamples,
6444 																				   bufferUsages[usageNdx].usage,
6445 																				   flushCases[caseNdx].glFlags,
6446 																				   flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
6447 				}
6448 			}
6449 		}
6450 	}
6451 
6452 	// .modify_after_use
6453 	{
6454 		const int minBufferSize	= 0;		// !< 0kiB
6455 		const int maxBufferSize	= 1 << 24;	// !< 16MiB
6456 
6457 		static const struct Usage
6458 		{
6459 			const char* name;
6460 			const char* description;
6461 			deUint32	usage;
6462 		} usages[] =
6463 		{
6464 			{ "static_draw",	"Test with GL_STATIC_DRAW",		GL_STATIC_DRAW	},
6465 			{ "dynamic_draw",	"Test with GL_DYNAMIC_DRAW",	GL_DYNAMIC_DRAW	},
6466 			{ "stream_draw",	"Test with GL_STREAM_DRAW",		GL_STREAM_DRAW },
6467 
6468 		};
6469 
6470 		for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
6471 		{
6472 			tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
6473 			modifyAfterUseGroup->addChild(usageGroup);
6474 
6475 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data",							"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6476 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_different_size",			"Respecify buffer contents and size after use",			minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
6477 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_repeated",					"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));
6478 
6479 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
6480 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial",				"Respecify buffer contents partially use",				minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6481 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full_repeated",		"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
6482 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial_repeated",		"Respecify buffer contents partially upload and use",	minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
6483 
6484 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT));
6485 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_partial",				"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT));
6486 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_full",				"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6487 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_partial",			"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
6488 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6489 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
6490 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6491 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
6492 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6493 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
6494 
6495 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6496 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
6497 		}
6498 	}
6499 
6500 	// .render_after_upload
6501 	{
6502 		// .reference
6503 		{
6504 			tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
6505 			renderAfterUploadGroup->addChild(renderReferenceGroup);
6506 
6507 			// .draw
6508 			{
6509 				tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
6510 				renderReferenceGroup->addChild(drawGroup);
6511 
6512 				// Time consumed by readPixels
6513 				drawGroup->addChild(new ReferenceReadPixelsTimeCase	(m_context, "read_pixels",		"Measure time consumed by readPixels() function call"));
6514 
6515 				// Time consumed by rendering
6516 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_arrays",		"Measure time consumed by drawArrays() function call",		DRAWMETHOD_DRAW_ARRAYS));
6517 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_elements",	"Measure time consumed by drawElements() function call",	DRAWMETHOD_DRAW_ELEMENTS));
6518 			}
6519 
6520 			// .draw_upload_draw
6521 			{
6522 				static const struct
6523 				{
6524 					const char*		name;
6525 					const char*		description;
6526 					DrawMethod		drawMethod;
6527 					TargetBuffer	targetBuffer;
6528 					bool			partial;
6529 				} uploadTargets[] =
6530 				{
6531 					{
6532 						"draw_arrays_upload_vertices",
6533 						"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6534 						DRAWMETHOD_DRAW_ARRAYS,
6535 						TARGETBUFFER_VERTEX,
6536 						false
6537 					},
6538 					{
6539 						"draw_arrays_upload_vertices_partial",
6540 						"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6541 						DRAWMETHOD_DRAW_ARRAYS,
6542 						TARGETBUFFER_VERTEX,
6543 						true
6544 					},
6545 					{
6546 						"draw_elements_upload_vertices",
6547 						"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6548 						DRAWMETHOD_DRAW_ELEMENTS,
6549 						TARGETBUFFER_VERTEX,
6550 						false
6551 					},
6552 					{
6553 						"draw_elements_upload_indices",
6554 						"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6555 						DRAWMETHOD_DRAW_ELEMENTS,
6556 						TARGETBUFFER_INDEX,
6557 						false
6558 					},
6559 					{
6560 						"draw_elements_upload_indices_partial",
6561 						"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6562 						DRAWMETHOD_DRAW_ELEMENTS,
6563 						TARGETBUFFER_INDEX,
6564 						true
6565 					},
6566 				};
6567 				static const struct
6568 				{
6569 					const char*							name;
6570 					const char*							description;
6571 					UploadMethod						uploadMethod;
6572 					BufferInUseRenderTimeCase::MapFlags	mapFlags;
6573 					bool								supportsPartialUpload;
6574 				} uploadMethods[] =
6575 				{
6576 					{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6577 					{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6578 					{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6579 					{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6580 				};
6581 
6582 				tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
6583 				renderReferenceGroup->addChild(drawUploadDrawGroup);
6584 
6585 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6586 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6587 				{
6588 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6589 
6590 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6591 						continue;
6592 
6593 					drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6594 																				name.c_str(),
6595 																				uploadTargets[uploadTargetNdx].description,
6596 																				uploadTargets[uploadTargetNdx].drawMethod,
6597 																				uploadMethods[uploadMethodNdx].mapFlags,
6598 																				uploadTargets[uploadTargetNdx].targetBuffer,
6599 																				uploadMethods[uploadMethodNdx].uploadMethod,
6600 																				(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6601 																				BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
6602 				}
6603 			}
6604 		}
6605 
6606 		// .upload_unrelated_and_draw
6607 		{
6608 			static const struct
6609 			{
6610 				const char*		name;
6611 				const char*		description;
6612 				DrawMethod		drawMethod;
6613 			} drawMethods[] =
6614 			{
6615 				{ "draw_arrays",	"drawArrays",	DRAWMETHOD_DRAW_ARRAYS		},
6616 				{ "draw_elements",	"drawElements",	DRAWMETHOD_DRAW_ELEMENTS	},
6617 			};
6618 
6619 			static const struct
6620 			{
6621 				const char*		name;
6622 				UploadMethod	uploadMethod;
6623 			} uploadMethods[] =
6624 			{
6625 				{ "buffer_data",		UPLOADMETHOD_BUFFER_DATA		},
6626 				{ "buffer_sub_data",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6627 				{ "map_buffer_range",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6628 			};
6629 
6630 			tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
6631 			renderAfterUploadGroup->addChild(uploadUnrelatedGroup);
6632 
6633 			for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
6634 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6635 			{
6636 				const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
6637 				const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";
6638 
6639 				// Time consumed by rendering command after an unrelated upload
6640 
6641 				uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
6642 			}
6643 		}
6644 
6645 		// .upload_and_draw
6646 		{
6647 			static const struct
6648 			{
6649 				const char*			name;
6650 				const char*			description;
6651 				BufferState			bufferState;
6652 				UnrelatedBufferType	unrelatedBuffer;
6653 				bool				supportsPartialUpload;
6654 			} bufferConfigs[] =
6655 			{
6656 				{ "used_buffer",						"Upload to an used buffer",											BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_NONE,	true	},
6657 				{ "new_buffer",							"Upload to a new buffer",											BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_NONE,	false	},
6658 				{ "used_buffer_and_unrelated_upload",	"Upload to an used buffer and an unrelated buffer and then draw",	BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_VERTEX,	true	},
6659 				{ "new_buffer_and_unrelated_upload",	"Upload to a new buffer and an unrelated buffer and then draw",		BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_VERTEX,	false	},
6660 			};
6661 
6662 			tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
6663 			renderAfterUploadGroup->addChild(uploadAndDrawGroup);
6664 
6665 			// .used_buffer
6666 			// .new_buffer
6667 			// .used_buffer_and_unrelated_upload
6668 			// .new_buffer_and_unrelated_upload
6669 			for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
6670 			{
6671 				static const struct
6672 				{
6673 					const char*		name;
6674 					const char*		description;
6675 					DrawMethod		drawMethod;
6676 					TargetBuffer	targetBuffer;
6677 					bool			partial;
6678 				} uploadTargets[] =
6679 				{
6680 					{
6681 						"draw_arrays_upload_vertices",
6682 						"Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
6683 						DRAWMETHOD_DRAW_ARRAYS,
6684 						TARGETBUFFER_VERTEX,
6685 						false
6686 					},
6687 					{
6688 						"draw_arrays_upload_vertices_partial",
6689 						"Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
6690 						DRAWMETHOD_DRAW_ARRAYS,
6691 						TARGETBUFFER_VERTEX,
6692 						true
6693 					},
6694 					{
6695 						"draw_elements_upload_vertices",
6696 						"Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
6697 						DRAWMETHOD_DRAW_ELEMENTS,
6698 						TARGETBUFFER_VERTEX,
6699 						false
6700 					},
6701 					{
6702 						"draw_elements_upload_indices",
6703 						"Measure time consumed by index upload, drawElements, and readPixels function calls",
6704 						DRAWMETHOD_DRAW_ELEMENTS,
6705 						TARGETBUFFER_INDEX,
6706 						false
6707 					},
6708 					{
6709 						"draw_elements_upload_indices_partial",
6710 						"Measure time consumed by partial index upload, drawElements, and readPixels function calls",
6711 						DRAWMETHOD_DRAW_ELEMENTS,
6712 						TARGETBUFFER_INDEX,
6713 						true
6714 					},
6715 				};
6716 				static const struct
6717 				{
6718 					const char*		name;
6719 					const char*		description;
6720 					UploadMethod	uploadMethod;
6721 					bool			supportsPartialUpload;
6722 				} uploadMethods[] =
6723 				{
6724 					{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA,		false	},
6725 					{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	true	},
6726 					{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	true	},
6727 				};
6728 
6729 				tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
6730 				uploadAndDrawGroup->addChild(group);
6731 
6732 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6733 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6734 				{
6735 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6736 
6737 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6738 						continue;
6739 					if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
6740 						continue;
6741 
6742 					// Don't log unrelated buffer information to samples if there is no such buffer
6743 
6744 					if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
6745 					{
6746 						typedef UploadRenderReadDuration				SampleType;
6747 						typedef GenericUploadRenderTimeCase<SampleType>	TestType;
6748 
6749 						group->addChild(new TestType(m_context,
6750 													 name.c_str(),
6751 													 uploadTargets[uploadTargetNdx].description,
6752 													 uploadTargets[uploadTargetNdx].drawMethod,
6753 													 uploadTargets[uploadTargetNdx].targetBuffer,
6754 													 uploadMethods[uploadMethodNdx].uploadMethod,
6755 													 bufferConfigs[stateNdx].bufferState,
6756 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6757 													 bufferConfigs[stateNdx].unrelatedBuffer));
6758 					}
6759 					else
6760 					{
6761 						typedef UploadRenderReadDurationWithUnrelatedUploadSize	SampleType;
6762 						typedef GenericUploadRenderTimeCase<SampleType>			TestType;
6763 
6764 						group->addChild(new TestType(m_context,
6765 													 name.c_str(),
6766 													 uploadTargets[uploadTargetNdx].description,
6767 													 uploadTargets[uploadTargetNdx].drawMethod,
6768 													 uploadTargets[uploadTargetNdx].targetBuffer,
6769 													 uploadMethods[uploadMethodNdx].uploadMethod,
6770 													 bufferConfigs[stateNdx].bufferState,
6771 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6772 													 bufferConfigs[stateNdx].unrelatedBuffer));
6773 					}
6774 				}
6775 			}
6776 		}
6777 
6778 		// .draw_modify_draw
6779 		{
6780 			static const struct
6781 			{
6782 				const char*		name;
6783 				const char*		description;
6784 				DrawMethod		drawMethod;
6785 				TargetBuffer	targetBuffer;
6786 				bool			partial;
6787 			} uploadTargets[] =
6788 			{
6789 				{
6790 					"draw_arrays_upload_vertices",
6791 					"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
6792 					DRAWMETHOD_DRAW_ARRAYS,
6793 					TARGETBUFFER_VERTEX,
6794 					false
6795 				},
6796 				{
6797 					"draw_arrays_upload_vertices_partial",
6798 					"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
6799 					DRAWMETHOD_DRAW_ARRAYS,
6800 					TARGETBUFFER_VERTEX,
6801 					true
6802 				},
6803 				{
6804 					"draw_elements_upload_vertices",
6805 					"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
6806 					DRAWMETHOD_DRAW_ELEMENTS,
6807 					TARGETBUFFER_VERTEX,
6808 					false
6809 				},
6810 				{
6811 					"draw_elements_upload_indices",
6812 					"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
6813 					DRAWMETHOD_DRAW_ELEMENTS,
6814 					TARGETBUFFER_INDEX,
6815 					false
6816 				},
6817 				{
6818 					"draw_elements_upload_indices_partial",
6819 					"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
6820 					DRAWMETHOD_DRAW_ELEMENTS,
6821 					TARGETBUFFER_INDEX,
6822 					true
6823 				},
6824 			};
6825 			static const struct
6826 			{
6827 				const char*							name;
6828 				const char*							description;
6829 				UploadMethod						uploadMethod;
6830 				BufferInUseRenderTimeCase::MapFlags	mapFlags;
6831 				bool								supportsPartialUpload;
6832 			} uploadMethods[] =
6833 			{
6834 				{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
6835 				{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
6836 				{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
6837 				{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
6838 			};
6839 
6840 			tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
6841 			renderAfterUploadGroup->addChild(drawModifyDrawGroup);
6842 
6843 			for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6844 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6845 			{
6846 				const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6847 
6848 				if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
6849 					continue;
6850 
6851 				drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
6852 																			name.c_str(),
6853 																			uploadTargets[uploadTargetNdx].description,
6854 																			uploadTargets[uploadTargetNdx].drawMethod,
6855 																			uploadMethods[uploadMethodNdx].mapFlags,
6856 																			uploadTargets[uploadTargetNdx].targetBuffer,
6857 																			uploadMethods[uploadMethodNdx].uploadMethod,
6858 																			(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
6859 																			BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
6860 			}
6861 		}
6862 
6863 		// .upload_wait_draw
6864 		{
6865 			static const struct
6866 			{
6867 				const char*	name;
6868 				const char*	description;
6869 				BufferState	bufferState;
6870 			} bufferStates[] =
6871 			{
6872 				{ "new_buffer",		"Uploading to just generated name",	BUFFERSTATE_NEW			},
6873 				{ "used_buffer",	"Uploading to a used buffer",		BUFFERSTATE_EXISTING	},
6874 			};
6875 			static const struct
6876 			{
6877 				const char*		name;
6878 				const char*		description;
6879 				DrawMethod		drawMethod;
6880 				TargetBuffer	targetBuffer;
6881 			} uploadTargets[] =
6882 			{
6883 				{ "draw_arrays_vertices",	"Upload vertex data, draw with drawArrays",		DRAWMETHOD_DRAW_ARRAYS,		TARGETBUFFER_VERTEX	},
6884 				{ "draw_elements_vertices",	"Upload vertex data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_VERTEX	},
6885 				{ "draw_elements_indices",	"Upload index data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_INDEX	},
6886 			};
6887 			static const struct
6888 			{
6889 				const char*		name;
6890 				const char*		description;
6891 				UploadMethod	uploadMethod;
6892 			} uploadMethods[] =
6893 			{
6894 				{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA		},
6895 				{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA	},
6896 				{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
6897 			};
6898 
6899 			tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
6900 			renderAfterUploadGroup->addChild(uploadSwapDrawGroup);
6901 
6902 			for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
6903 			{
6904 				tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
6905 				uploadSwapDrawGroup->addChild(bufferGroup);
6906 
6907 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
6908 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
6909 				{
6910 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;
6911 
6912 					bufferGroup->addChild(new UploadWaitDrawCase(m_context,
6913 																 name.c_str(),
6914 																 uploadTargets[uploadTargetNdx].description,
6915 																 uploadTargets[uploadTargetNdx].drawMethod,
6916 																 uploadTargets[uploadTargetNdx].targetBuffer,
6917 																 uploadMethods[uploadMethodNdx].uploadMethod,
6918 																 bufferStates[bufferStateNdx].bufferState));
6919 				}
6920 			}
6921 		}
6922 	}
6923 }
6924 
6925 } // Performance
6926 } // gles3
6927 } // deqp
6928