1 //
2 // Copyright 2020 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // TracePerf:
7 // Performance test for ANGLE replaying traces.
8 //
9
10 #include <gtest/gtest.h>
11 #include "common/PackedEnums.h"
12 #include "common/system_utils.h"
13 #include "tests/perf_tests/ANGLEPerfTest.h"
14 #include "tests/perf_tests/ANGLEPerfTestArgs.h"
15 #include "tests/perf_tests/DrawCallPerfParams.h"
16 #include "util/egl_loader_autogen.h"
17 #include "util/frame_capture_test_utils.h"
18 #include "util/png_utils.h"
19 #include "util/test_utils.h"
20
21 #include "restricted_traces/restricted_traces_autogen.h"
22
23 #include <cassert>
24 #include <functional>
25 #include <sstream>
26
27 // When --minimize-gpu-work is specified, we want to reduce GPU work to minimum and lift up the CPU
28 // overhead to surface so that we can see how much CPU overhead each driver has for each app trace.
29 // On some driver(s) the bufferSubData/texSubImage calls end up dominating the frame time when the
30 // actual GPU work is minimized. Even reducing the texSubImage calls to only update 1x1 area is not
31 // enough. The driver may be implementing copy on write by cloning the entire texture to another
32 // memory storage for texSubImage call. While this information is also important for performance,
33 // they should be evaluated separately in real app usage scenario, or write stand alone tests for
34 // these. For the purpose of CPU overhead and avoid data copy to dominate the trace, I am using this
35 // flag to noop the texSubImage and bufferSubData call when --minimize-gpu-work is specified. Feel
36 // free to disable this when you have other needs. Or it can be turned to another run time option
37 // when desired.
38 #define NOOP_SUBDATA_SUBIMAGE_FOR_MINIMIZE_GPU_WORK
39
40 using namespace angle;
41 using namespace egl_platform;
42
43 namespace
44 {
45 struct TracePerfParams final : public RenderTestParams
46 {
47 // Common default options
TracePerfParams__anon75ef38220111::TracePerfParams48 TracePerfParams()
49 {
50 // Display the frame after every drawBenchmark invocation
51 iterationsPerStep = 1;
52 }
53
story__anon75ef38220111::TracePerfParams54 std::string story() const override
55 {
56 std::stringstream strstr;
57 strstr << RenderTestParams::story() << "_" << GetTraceInfo(testID).name;
58 return strstr.str();
59 }
60
61 RestrictedTraceID testID;
62 };
63
operator <<(std::ostream & os,const TracePerfParams & params)64 std::ostream &operator<<(std::ostream &os, const TracePerfParams ¶ms)
65 {
66 os << params.backendAndStory().substr(1);
67 return os;
68 }
69
70 class TracePerfTest : public ANGLERenderTest
71 {
72 public:
73 TracePerfTest(const TracePerfParams ¶ms);
74
75 void initializeBenchmark() override;
76 void destroyBenchmark() override;
77 void drawBenchmark() override;
78
79 void onReplayFramebufferChange(GLenum target, GLuint framebuffer);
80 void onReplayInvalidateFramebuffer(GLenum target,
81 GLsizei numAttachments,
82 const GLenum *attachments);
83 void onReplayInvalidateSubFramebuffer(GLenum target,
84 GLsizei numAttachments,
85 const GLenum *attachments,
86 GLint x,
87 GLint y,
88 GLsizei width,
89 GLsizei height);
90 void onReplayDrawBuffers(GLsizei n, const GLenum *bufs);
91 void onReplayReadBuffer(GLenum src);
92 void onReplayDiscardFramebufferEXT(GLenum target,
93 GLsizei numAttachments,
94 const GLenum *attachments);
95
96 bool isDefaultFramebuffer(GLenum target) const;
97
98 double getHostTimeFromGLTime(GLint64 glTime);
99
getStepAlignment() const100 int getStepAlignment() const override
101 {
102 // Align step counts to the number of frames in a trace.
103 const TraceInfo &traceInfo = GetTraceInfo(mParams.testID);
104 return static_cast<int>(traceInfo.endFrame - traceInfo.startFrame + 1);
105 }
106
TestBody()107 void TestBody() override { run(); }
108
109 private:
110 struct QueryInfo
111 {
112 GLuint beginTimestampQuery;
113 GLuint endTimestampQuery;
114 GLuint framebuffer;
115 };
116
117 struct TimeSample
118 {
119 GLint64 glTime;
120 double hostTime;
121 };
122
123 void sampleTime();
124 void saveScreenshot(const std::string &screenshotName) override;
125 void swap();
126
127 const TracePerfParams mParams;
128
129 uint32_t mStartFrame;
130 uint32_t mEndFrame;
131
132 // For tracking RenderPass/FBO change timing.
133 QueryInfo mCurrentQuery = {};
134 std::vector<QueryInfo> mRunningQueries;
135 std::vector<TimeSample> mTimeline;
136
137 std::string mStartingDirectory;
138 bool mUseTimestampQueries = false;
139 static constexpr int mMaxOffscreenBufferCount = 2;
140 std::array<GLuint, mMaxOffscreenBufferCount> mOffscreenFramebuffers = {0, 0};
141 std::array<GLuint, mMaxOffscreenBufferCount> mOffscreenTextures = {0, 0};
142 GLuint mOffscreenDepthStencil = 0;
143 int mWindowWidth = 0;
144 int mWindowHeight = 0;
145 GLuint mDrawFramebufferBinding = 0;
146 GLuint mReadFramebufferBinding = 0;
147 uint32_t mCurrentFrame = 0;
148 uint32_t mOffscreenFrameCount = 0;
149 uint32_t mTotalFrameCount = 0;
150 bool mScreenshotSaved = false;
151 std::unique_ptr<TraceLibrary> mTraceLibrary;
152 };
153
154 TracePerfTest *gCurrentTracePerfTest = nullptr;
155
156 // Don't forget to include KHRONOS_APIENTRY in override methods. Neccessary on Win/x86.
BindFramebufferProc(GLenum target,GLuint framebuffer)157 void KHRONOS_APIENTRY BindFramebufferProc(GLenum target, GLuint framebuffer)
158 {
159 gCurrentTracePerfTest->onReplayFramebufferChange(target, framebuffer);
160 }
161
InvalidateFramebufferProc(GLenum target,GLsizei numAttachments,const GLenum * attachments)162 void KHRONOS_APIENTRY InvalidateFramebufferProc(GLenum target,
163 GLsizei numAttachments,
164 const GLenum *attachments)
165 {
166 gCurrentTracePerfTest->onReplayInvalidateFramebuffer(target, numAttachments, attachments);
167 }
168
InvalidateSubFramebufferProc(GLenum target,GLsizei numAttachments,const GLenum * attachments,GLint x,GLint y,GLsizei width,GLsizei height)169 void KHRONOS_APIENTRY InvalidateSubFramebufferProc(GLenum target,
170 GLsizei numAttachments,
171 const GLenum *attachments,
172 GLint x,
173 GLint y,
174 GLsizei width,
175 GLsizei height)
176 {
177 gCurrentTracePerfTest->onReplayInvalidateSubFramebuffer(target, numAttachments, attachments, x,
178 y, width, height);
179 }
180
DrawBuffersProc(GLsizei n,const GLenum * bufs)181 void KHRONOS_APIENTRY DrawBuffersProc(GLsizei n, const GLenum *bufs)
182 {
183 gCurrentTracePerfTest->onReplayDrawBuffers(n, bufs);
184 }
185
ReadBufferProc(GLenum src)186 void KHRONOS_APIENTRY ReadBufferProc(GLenum src)
187 {
188 gCurrentTracePerfTest->onReplayReadBuffer(src);
189 }
190
DiscardFramebufferEXTProc(GLenum target,GLsizei numAttachments,const GLenum * attachments)191 void KHRONOS_APIENTRY DiscardFramebufferEXTProc(GLenum target,
192 GLsizei numAttachments,
193 const GLenum *attachments)
194 {
195 gCurrentTracePerfTest->onReplayDiscardFramebufferEXT(target, numAttachments, attachments);
196 }
197
ViewportMinimizedProc(GLint x,GLint y,GLsizei width,GLsizei height)198 void KHRONOS_APIENTRY ViewportMinimizedProc(GLint x, GLint y, GLsizei width, GLsizei height)
199 {
200 glViewport(x, y, 1, 1);
201 }
202
ScissorMinimizedProc(GLint x,GLint y,GLsizei width,GLsizei height)203 void KHRONOS_APIENTRY ScissorMinimizedProc(GLint x, GLint y, GLsizei width, GLsizei height)
204 {
205 glScissor(x, y, 1, 1);
206 }
207
208 // Interpose the calls that generate actual GPU work
DrawElementsMinimizedProc(GLenum mode,GLsizei count,GLenum type,const void * indices)209 void KHRONOS_APIENTRY DrawElementsMinimizedProc(GLenum mode,
210 GLsizei count,
211 GLenum type,
212 const void *indices)
213 {
214 glDrawElements(GL_POINTS, 1, type, indices);
215 }
216
DrawElementsIndirectMinimizedProc(GLenum mode,GLenum type,const void * indirect)217 void KHRONOS_APIENTRY DrawElementsIndirectMinimizedProc(GLenum mode,
218 GLenum type,
219 const void *indirect)
220 {
221 glDrawElementsInstancedBaseVertex(GL_POINTS, 1, type, 0, 1, 0);
222 }
223
DrawElementsInstancedMinimizedProc(GLenum mode,GLsizei count,GLenum type,const void * indices,GLsizei instancecount)224 void KHRONOS_APIENTRY DrawElementsInstancedMinimizedProc(GLenum mode,
225 GLsizei count,
226 GLenum type,
227 const void *indices,
228 GLsizei instancecount)
229 {
230 glDrawElementsInstanced(GL_POINTS, 1, type, indices, 1);
231 }
232
DrawElementsBaseVertexMinimizedProc(GLenum mode,GLsizei count,GLenum type,const void * indices,GLint basevertex)233 void KHRONOS_APIENTRY DrawElementsBaseVertexMinimizedProc(GLenum mode,
234 GLsizei count,
235 GLenum type,
236 const void *indices,
237 GLint basevertex)
238 {
239 glDrawElementsBaseVertex(GL_POINTS, 1, type, indices, basevertex);
240 }
241
DrawElementsInstancedBaseVertexMinimizedProc(GLenum mode,GLsizei count,GLenum type,const void * indices,GLsizei instancecount,GLint basevertex)242 void KHRONOS_APIENTRY DrawElementsInstancedBaseVertexMinimizedProc(GLenum mode,
243 GLsizei count,
244 GLenum type,
245 const void *indices,
246 GLsizei instancecount,
247 GLint basevertex)
248 {
249 glDrawElementsInstancedBaseVertex(GL_POINTS, 1, type, indices, 1, basevertex);
250 }
251
DrawRangeElementsMinimizedProc(GLenum mode,GLuint start,GLuint end,GLsizei count,GLenum type,const void * indices)252 void KHRONOS_APIENTRY DrawRangeElementsMinimizedProc(GLenum mode,
253 GLuint start,
254 GLuint end,
255 GLsizei count,
256 GLenum type,
257 const void *indices)
258 {
259 glDrawRangeElements(GL_POINTS, start, end, 1, type, indices);
260 }
261
DrawArraysMinimizedProc(GLenum mode,GLint first,GLsizei count)262 void KHRONOS_APIENTRY DrawArraysMinimizedProc(GLenum mode, GLint first, GLsizei count)
263 {
264 glDrawArrays(GL_POINTS, first, 1);
265 }
266
DrawArraysInstancedMinimizedProc(GLenum mode,GLint first,GLsizei count,GLsizei instancecount)267 void KHRONOS_APIENTRY DrawArraysInstancedMinimizedProc(GLenum mode,
268 GLint first,
269 GLsizei count,
270 GLsizei instancecount)
271 {
272 glDrawArraysInstanced(GL_POINTS, first, 1, 1);
273 }
274
DrawArraysIndirectMinimizedProc(GLenum mode,const void * indirect)275 void KHRONOS_APIENTRY DrawArraysIndirectMinimizedProc(GLenum mode, const void *indirect)
276 {
277 glDrawArraysInstanced(GL_POINTS, 0, 1, 1);
278 }
279
DispatchComputeMinimizedProc(GLuint num_groups_x,GLuint num_groups_y,GLuint num_groups_z)280 void KHRONOS_APIENTRY DispatchComputeMinimizedProc(GLuint num_groups_x,
281 GLuint num_groups_y,
282 GLuint num_groups_z)
283 {
284 glDispatchCompute(1, 1, 1);
285 }
286
DispatchComputeIndirectMinimizedProc(GLintptr indirect)287 void KHRONOS_APIENTRY DispatchComputeIndirectMinimizedProc(GLintptr indirect)
288 {
289 glDispatchCompute(1, 1, 1);
290 }
291
292 // Interpose the calls that generate data copying work
BufferDataMinimizedProc(GLenum target,GLsizeiptr size,const void * data,GLenum usage)293 void KHRONOS_APIENTRY BufferDataMinimizedProc(GLenum target,
294 GLsizeiptr size,
295 const void *data,
296 GLenum usage)
297 {
298 glBufferData(target, size, nullptr, usage);
299 }
300
BufferSubDataMinimizedProc(GLenum target,GLintptr offset,GLsizeiptr size,const void * data)301 void KHRONOS_APIENTRY BufferSubDataMinimizedProc(GLenum target,
302 GLintptr offset,
303 GLsizeiptr size,
304 const void *data)
305 {
306 #if !defined(NOOP_SUBDATA_SUBIMAGE_FOR_MINIMIZE_GPU_WORK)
307 glBufferSubData(target, offset, 1, data);
308 #endif
309 }
310
MapBufferRangeMinimizedProc(GLenum target,GLintptr offset,GLsizeiptr length,GLbitfield access)311 void *KHRONOS_APIENTRY MapBufferRangeMinimizedProc(GLenum target,
312 GLintptr offset,
313 GLsizeiptr length,
314 GLbitfield access)
315 {
316 access |= GL_MAP_UNSYNCHRONIZED_BIT;
317 return glMapBufferRange(target, offset, length, access);
318 }
319
TexImage2DMinimizedProc(GLenum target,GLint level,GLint internalformat,GLsizei width,GLsizei height,GLint border,GLenum format,GLenum type,const void * pixels)320 void KHRONOS_APIENTRY TexImage2DMinimizedProc(GLenum target,
321 GLint level,
322 GLint internalformat,
323 GLsizei width,
324 GLsizei height,
325 GLint border,
326 GLenum format,
327 GLenum type,
328 const void *pixels)
329 {
330 GLint unpackBuffer = 0;
331 glGetIntegerv(GL_PIXEL_UNPACK_BUFFER_BINDING, &unpackBuffer);
332 if (unpackBuffer)
333 {
334 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
335 }
336 glTexImage2D(target, level, internalformat, width, height, border, format, type, nullptr);
337 if (unpackBuffer)
338 {
339 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, unpackBuffer);
340 }
341 }
342
TexSubImage2DMinimizedProc(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLsizei width,GLsizei height,GLenum format,GLenum type,const void * pixels)343 void KHRONOS_APIENTRY TexSubImage2DMinimizedProc(GLenum target,
344 GLint level,
345 GLint xoffset,
346 GLint yoffset,
347 GLsizei width,
348 GLsizei height,
349 GLenum format,
350 GLenum type,
351 const void *pixels)
352 {
353 #if !defined(NOOP_SUBDATA_SUBIMAGE_FOR_MINIMIZE_GPU_WORK)
354 glTexSubImage2D(target, level, xoffset, yoffset, 1, 1, format, type, pixels);
355 #endif
356 }
357
TexImage3DMinimizedProc(GLenum target,GLint level,GLint internalformat,GLsizei width,GLsizei height,GLsizei depth,GLint border,GLenum format,GLenum type,const void * pixels)358 void KHRONOS_APIENTRY TexImage3DMinimizedProc(GLenum target,
359 GLint level,
360 GLint internalformat,
361 GLsizei width,
362 GLsizei height,
363 GLsizei depth,
364 GLint border,
365 GLenum format,
366 GLenum type,
367 const void *pixels)
368 {
369 GLint unpackBuffer = 0;
370 glGetIntegerv(GL_PIXEL_UNPACK_BUFFER_BINDING, &unpackBuffer);
371 if (unpackBuffer)
372 {
373 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
374 }
375 glTexImage3D(target, level, internalformat, width, height, depth, border, format, type,
376 nullptr);
377 if (unpackBuffer)
378 {
379 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, unpackBuffer);
380 }
381 }
382
TexSubImage3DMinimizedProc(GLenum target,GLint level,GLint xoffset,GLint yoffset,GLint zoffset,GLsizei width,GLsizei height,GLsizei depth,GLenum format,GLenum type,const void * pixels)383 void KHRONOS_APIENTRY TexSubImage3DMinimizedProc(GLenum target,
384 GLint level,
385 GLint xoffset,
386 GLint yoffset,
387 GLint zoffset,
388 GLsizei width,
389 GLsizei height,
390 GLsizei depth,
391 GLenum format,
392 GLenum type,
393 const void *pixels)
394 {
395 #if !defined(NOOP_SUBDATA_SUBIMAGE_FOR_MINIMIZE_GPU_WORK)
396 glTexSubImage3D(target, level, xoffset, yoffset, zoffset, 1, 1, 1, format, type, pixels);
397 #endif
398 }
399
GenerateMipmapMinimizedProc(GLenum target)400 void KHRONOS_APIENTRY GenerateMipmapMinimizedProc(GLenum target)
401 {
402 // Noop it for now. There is a risk that this will leave an incomplete mipmap chain and cause
403 // other issues. If this turns out to be a real issue with app traces, we can turn this into a
404 // glTexImage2D call for each generated level.
405 }
406
BlitFramebufferMinimizedProc(GLint srcX0,GLint srcY0,GLint srcX1,GLint srcY1,GLint dstX0,GLint dstY0,GLint dstX1,GLint dstY1,GLbitfield mask,GLenum filter)407 void KHRONOS_APIENTRY BlitFramebufferMinimizedProc(GLint srcX0,
408 GLint srcY0,
409 GLint srcX1,
410 GLint srcY1,
411 GLint dstX0,
412 GLint dstY0,
413 GLint dstX1,
414 GLint dstY1,
415 GLbitfield mask,
416 GLenum filter)
417 {
418 glBlitFramebuffer(srcX0, srcY0, srcX0 + 1, srcY0 + 1, dstX0, dstY0, dstX0 + 1, dstY0 + 1, mask,
419 filter);
420 }
421
ReadPixelsMinimizedProc(GLint x,GLint y,GLsizei width,GLsizei height,GLenum format,GLenum type,void * pixels)422 void KHRONOS_APIENTRY ReadPixelsMinimizedProc(GLint x,
423 GLint y,
424 GLsizei width,
425 GLsizei height,
426 GLenum format,
427 GLenum type,
428 void *pixels)
429 {
430 glReadPixels(x, y, 1, 1, format, type, pixels);
431 }
432
BeginTransformFeedbackMinimizedProc(GLenum primitiveMode)433 void KHRONOS_APIENTRY BeginTransformFeedbackMinimizedProc(GLenum primitiveMode)
434 {
435 glBeginTransformFeedback(GL_POINTS);
436 }
437
TraceLoadProc(const char * procName)438 angle::GenericProc KHRONOS_APIENTRY TraceLoadProc(const char *procName)
439 {
440 if (strcmp(procName, "glBindFramebuffer") == 0)
441 {
442 return reinterpret_cast<angle::GenericProc>(BindFramebufferProc);
443 }
444 if (strcmp(procName, "glInvalidateFramebuffer") == 0)
445 {
446 return reinterpret_cast<angle::GenericProc>(InvalidateFramebufferProc);
447 }
448 if (strcmp(procName, "glInvalidateSubFramebuffer") == 0)
449 {
450 return reinterpret_cast<angle::GenericProc>(InvalidateSubFramebufferProc);
451 }
452 if (strcmp(procName, "glDrawBuffers") == 0)
453 {
454 return reinterpret_cast<angle::GenericProc>(DrawBuffersProc);
455 }
456 if (strcmp(procName, "glReadBuffer") == 0)
457 {
458 return reinterpret_cast<angle::GenericProc>(ReadBufferProc);
459 }
460 if (strcmp(procName, "glDiscardFramebufferEXT") == 0)
461 {
462 return reinterpret_cast<angle::GenericProc>(DiscardFramebufferEXTProc);
463 }
464
465 if (gMinimizeGPUWork)
466 {
467 if (strcmp(procName, "glViewport") == 0)
468 {
469 return reinterpret_cast<angle::GenericProc>(ViewportMinimizedProc);
470 }
471
472 if (strcmp(procName, "glScissor") == 0)
473 {
474 return reinterpret_cast<angle::GenericProc>(ScissorMinimizedProc);
475 }
476
477 // Interpose the calls that generate actual GPU work
478 if (strcmp(procName, "glDrawElements") == 0)
479 {
480 return reinterpret_cast<angle::GenericProc>(DrawElementsMinimizedProc);
481 }
482 if (strcmp(procName, "glDrawElementsIndirect") == 0)
483 {
484 return reinterpret_cast<angle::GenericProc>(DrawElementsIndirectMinimizedProc);
485 }
486 if (strcmp(procName, "glDrawElementsInstanced") == 0 ||
487 strcmp(procName, "glDrawElementsInstancedEXT") == 0)
488 {
489 return reinterpret_cast<angle::GenericProc>(DrawElementsInstancedMinimizedProc);
490 }
491 if (strcmp(procName, "glDrawElementsBaseVertex") == 0 ||
492 strcmp(procName, "glDrawElementsBaseVertexEXT") == 0 ||
493 strcmp(procName, "glDrawElementsBaseVertexOES") == 0)
494 {
495 return reinterpret_cast<angle::GenericProc>(DrawElementsBaseVertexMinimizedProc);
496 }
497 if (strcmp(procName, "glDrawElementsInstancedBaseVertex") == 0 ||
498 strcmp(procName, "glDrawElementsInstancedBaseVertexEXT") == 0 ||
499 strcmp(procName, "glDrawElementsInstancedBaseVertexOES") == 0)
500 {
501 return reinterpret_cast<angle::GenericProc>(
502 DrawElementsInstancedBaseVertexMinimizedProc);
503 }
504 if (strcmp(procName, "glDrawRangeElements") == 0)
505 {
506 return reinterpret_cast<angle::GenericProc>(DrawRangeElementsMinimizedProc);
507 }
508 if (strcmp(procName, "glDrawArrays") == 0)
509 {
510 return reinterpret_cast<angle::GenericProc>(DrawArraysMinimizedProc);
511 }
512 if (strcmp(procName, "glDrawArraysInstanced") == 0 ||
513 strcmp(procName, "glDrawArraysInstancedEXT") == 0)
514 {
515 return reinterpret_cast<angle::GenericProc>(DrawArraysInstancedMinimizedProc);
516 }
517 if (strcmp(procName, "glDrawArraysIndirect") == 0)
518 {
519 return reinterpret_cast<angle::GenericProc>(DrawArraysIndirectMinimizedProc);
520 }
521 if (strcmp(procName, "glDispatchCompute") == 0)
522 {
523 return reinterpret_cast<angle::GenericProc>(DispatchComputeMinimizedProc);
524 }
525 if (strcmp(procName, "glDispatchComputeIndirect") == 0)
526 {
527 return reinterpret_cast<angle::GenericProc>(DispatchComputeIndirectMinimizedProc);
528 }
529
530 // Interpose the calls that generate data copying work
531 if (strcmp(procName, "glBufferData") == 0)
532 {
533 return reinterpret_cast<angle::GenericProc>(BufferDataMinimizedProc);
534 }
535 if (strcmp(procName, "glBufferSubData") == 0)
536 {
537 return reinterpret_cast<angle::GenericProc>(BufferSubDataMinimizedProc);
538 }
539 if (strcmp(procName, "glMapBufferRange") == 0 ||
540 strcmp(procName, "glMapBufferRangeEXT") == 0)
541 {
542 return reinterpret_cast<angle::GenericProc>(MapBufferRangeMinimizedProc);
543 }
544 if (strcmp(procName, "glTexImage2D") == 0)
545 {
546 return reinterpret_cast<angle::GenericProc>(TexImage2DMinimizedProc);
547 }
548 if (strcmp(procName, "glTexImage3D") == 0)
549 {
550 return reinterpret_cast<angle::GenericProc>(TexImage3DMinimizedProc);
551 }
552 if (strcmp(procName, "glTexSubImage2D") == 0)
553 {
554 return reinterpret_cast<angle::GenericProc>(TexSubImage2DMinimizedProc);
555 }
556 if (strcmp(procName, "glTexSubImage3D") == 0)
557 {
558 return reinterpret_cast<angle::GenericProc>(TexSubImage3DMinimizedProc);
559 }
560 if (strcmp(procName, "glGenerateMipmap") == 0 ||
561 strcmp(procName, "glGenerateMipmapOES") == 0)
562 {
563 return reinterpret_cast<angle::GenericProc>(GenerateMipmapMinimizedProc);
564 }
565 if (strcmp(procName, "glBlitFramebuffer") == 0)
566 {
567 return reinterpret_cast<angle::GenericProc>(BlitFramebufferMinimizedProc);
568 }
569 if (strcmp(procName, "glReadPixels") == 0)
570 {
571 return reinterpret_cast<angle::GenericProc>(ReadPixelsMinimizedProc);
572 }
573 if (strcmp(procName, "glBeginTransformFeedback") == 0)
574 {
575 return reinterpret_cast<angle::GenericProc>(BeginTransformFeedbackMinimizedProc);
576 }
577 }
578
579 return gCurrentTracePerfTest->getGLWindow()->getProcAddress(procName);
580 }
581
TracePerfTest(const TracePerfParams & params)582 TracePerfTest::TracePerfTest(const TracePerfParams ¶ms)
583 : ANGLERenderTest("TracePerf", params, "ms"), mParams(params), mStartFrame(0), mEndFrame(0)
584 {
585 // TODO: http://anglebug.com/4533 This fails after the upgrade to the 26.20.100.7870 driver.
586 if (IsWindows() && IsIntel() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE &&
587 mParams.testID == RestrictedTraceID::manhattan_10)
588 {
589 mSkipTest = true;
590 }
591
592 // TODO: http://anglebug.com/4731 Fails on older Intel drivers. Passes in newer.
593 if (IsWindows() && IsIntel() && mParams.driver != GLESDriverType::AngleEGL &&
594 mParams.testID == RestrictedTraceID::angry_birds_2_1500)
595 {
596 mSkipTest = true;
597 }
598
599 if (mParams.surfaceType != SurfaceType::Window && !gEnableAllTraceTests)
600 {
601 printf("Test skipped. Use --enable-all-trace-tests to run.\n");
602 mSkipTest = true;
603 }
604
605 if (mParams.eglParameters.deviceType != EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE &&
606 !gEnableAllTraceTests)
607 {
608 printf("Test skipped. Use --enable-all-trace-tests to run.\n");
609 mSkipTest = true;
610 }
611
612 if (mParams.testID == RestrictedTraceID::cod_mobile)
613 {
614 // TODO: http://anglebug.com/4967 Vulkan: GL_EXT_color_buffer_float not supported on Pixel 2
615 // The COD:Mobile trace uses a framebuffer attachment with:
616 // format = GL_RGB
617 // type = GL_UNSIGNED_INT_10F_11F_11F_REV
618 // That combination is only renderable if GL_EXT_color_buffer_float is supported.
619 // It happens to not be supported on Pixel 2's Vulkan driver.
620 addExtensionPrerequisite("GL_EXT_color_buffer_float");
621
622 // TODO: http://anglebug.com/4731 This extension is missing on older Intel drivers.
623 addExtensionPrerequisite("GL_OES_EGL_image_external");
624 }
625
626 if (mParams.testID == RestrictedTraceID::brawl_stars)
627 {
628 addExtensionPrerequisite("GL_EXT_shadow_samplers");
629 }
630
631 if (mParams.testID == RestrictedTraceID::free_fire)
632 {
633 addExtensionPrerequisite("GL_OES_EGL_image_external");
634 }
635
636 if (mParams.testID == RestrictedTraceID::marvel_contest_of_champions)
637 {
638 addExtensionPrerequisite("GL_EXT_color_buffer_half_float");
639 }
640
641 if (mParams.testID == RestrictedTraceID::world_of_tanks_blitz)
642 {
643 addExtensionPrerequisite("GL_EXT_disjoint_timer_query");
644 }
645
646 if (mParams.testID == RestrictedTraceID::dragon_ball_legends)
647 {
648 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
649 }
650
651 if (mParams.testID == RestrictedTraceID::lego_legacy)
652 {
653 addExtensionPrerequisite("GL_EXT_shadow_samplers");
654 }
655
656 if (mParams.testID == RestrictedTraceID::world_war_doh)
657 {
658 // Linux+Nvidia doesn't support GL_KHR_texture_compression_astc_ldr (possibly others also)
659 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
660 }
661
662 if (mParams.testID == RestrictedTraceID::saint_seiya_awakening)
663 {
664 addExtensionPrerequisite("GL_EXT_shadow_samplers");
665
666 // TODO(https://anglebug.com/5517) Linux+Intel generates "Framebuffer is incomplete" errors.
667 if (IsLinux() && IsIntel() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
668 {
669 mSkipTest = true;
670 }
671 }
672
673 if (mParams.testID == RestrictedTraceID::magic_tiles_3)
674 {
675 // Linux+Nvidia doesn't support GL_KHR_texture_compression_astc_ldr (possibly others also)
676 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
677 }
678
679 if (mParams.testID == RestrictedTraceID::real_gangster_crime)
680 {
681 // Linux+Nvidia doesn't support GL_KHR_texture_compression_astc_ldr (possibly others also)
682 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
683
684 // Intel doesn't support external images.
685 addExtensionPrerequisite("GL_OES_EGL_image_external");
686
687 // Failing on Linux Intel and AMD due to invalid enum. http://anglebug.com/5822
688 if (IsLinux() && (IsIntel() || IsAMD()) && mParams.driver != GLESDriverType::AngleEGL)
689 {
690 mSkipTest = true;
691 }
692 }
693
694 if (mParams.testID == RestrictedTraceID::asphalt_8)
695 {
696 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
697 }
698
699 if (mParams.testID == RestrictedTraceID::hearthstone)
700 {
701 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
702 }
703
704 if (mParams.testID == RestrictedTraceID::efootball_pes_2021)
705 {
706 // TODO(https://anglebug.com/5517) Linux+Intel and Pixel 2 generate "Framebuffer is
707 // incomplete" errors with the Vulkan backend.
708 if (mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE &&
709 ((IsLinux() && IsIntel()) || IsPixel2()))
710 {
711 mSkipTest = true;
712 }
713 }
714
715 if (mParams.testID == RestrictedTraceID::manhattan_31)
716 {
717 // TODO: http://anglebug.com/5591 Trace crashes on Pixel 2 in vulkan driver
718 if (IsPixel2() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
719 {
720 mSkipTest = true;
721 }
722 }
723
724 if (mParams.testID == RestrictedTraceID::idle_heroes)
725 {
726 // TODO: http://anglebug.com/5591 Trace crashes on Pixel 2
727 if (IsPixel2())
728 {
729 mSkipTest = true;
730 }
731 }
732
733 if (mParams.testID == RestrictedTraceID::shadow_fight_2)
734 {
735 addExtensionPrerequisite("GL_OES_EGL_image_external");
736 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
737 }
738
739 if (mParams.testID == RestrictedTraceID::rise_of_kingdoms)
740 {
741 addExtensionPrerequisite("GL_OES_EGL_image_external");
742 }
743
744 if (mParams.testID == RestrictedTraceID::happy_color)
745 {
746 if (IsWindows() && IsAMD() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
747 {
748 mSkipTest = true;
749 }
750 }
751
752 if (mParams.testID == RestrictedTraceID::bus_simulator_indonesia)
753 {
754 // TODO(https://anglebug.com/5629) Linux+(Intel|AMD) native GLES generates
755 // GL_INVALID_OPERATION
756 if (IsLinux() && (IsIntel() || IsAMD()) &&
757 mParams.getRenderer() != EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
758 {
759 mSkipTest = true;
760 }
761 }
762
763 if (mParams.testID == RestrictedTraceID::messenger_lite)
764 {
765 // TODO: https://anglebug.com/5663 Incorrect pixels on Nvidia Windows for first frame
766 if (IsWindows() && IsNVIDIA() &&
767 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE &&
768 mParams.getDeviceType() != EGL_PLATFORM_ANGLE_DEVICE_TYPE_SWIFTSHADER_ANGLE)
769 {
770 mSkipTest = true;
771 }
772 }
773
774 if (mParams.testID == RestrictedTraceID::among_us)
775 {
776 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
777 }
778
779 if (mParams.testID == RestrictedTraceID::car_parking_multiplayer)
780 {
781 // TODO: https://anglebug.com/5613 Nvidia native driver spews undefined behavior warnings
782 if (IsNVIDIA() && mParams.getRenderer() != EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
783 {
784 mSkipTest = true;
785 }
786 // TODO: https://anglebug.com/5724 Device lost on Win Intel
787 if (IsWindows() && IsIntel() &&
788 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
789 {
790 mSkipTest = true;
791 }
792 }
793
794 if (mParams.testID == RestrictedTraceID::fifa_mobile)
795 {
796 // TODO: http://anglebug.com/5875 Intel Windows Vulkan flakily renders entirely black
797 if (IsWindows() && IsIntel() &&
798 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
799 {
800 mSkipTest = true;
801 }
802 }
803
804 if (mParams.testID == RestrictedTraceID::rope_hero_vice_town)
805 {
806 // TODO: http://anglebug.com/5716 Trace crashes on Pixel 2 in vulkan driver
807 if (IsPixel2() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
808 {
809 mSkipTest = true;
810 }
811 }
812
813 if (mParams.testID == RestrictedTraceID::extreme_car_driving_simulator)
814 {
815 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
816 }
817
818 if (mParams.testID == RestrictedTraceID::plants_vs_zombies_2)
819 {
820 // TODO: http://crbug.com/1187752 Corrupted image
821 if (IsWindows() && IsAMD() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
822 {
823 mSkipTest = true;
824 }
825 }
826
827 if (mParams.testID == RestrictedTraceID::junes_journey)
828 {
829 addExtensionPrerequisite("GL_OES_EGL_image_external");
830 }
831
832 if (mParams.testID == RestrictedTraceID::ragnarok_m_eternal_love)
833 {
834 addExtensionPrerequisite("GL_OES_EGL_image_external");
835 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
836
837 // TODO: http://anglebug.com/5772 Pixel 2 errors with "Framebuffer is incomplete" on Vulkan
838 if (IsPixel2() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
839 {
840 mSkipTest = true;
841 }
842 }
843
844 if (mParams.testID == RestrictedTraceID::real_cricket_20)
845 {
846 // TODO: http://anglebug.com/5777 ARM doesn't have enough VS storage blocks
847 if (IsAndroid() && IsARM())
848 {
849 mSkipTest = true;
850 }
851 }
852
853 if (mParams.testID == RestrictedTraceID::league_of_legends_wild_rift)
854 {
855 addExtensionPrerequisite("GL_OES_EGL_image_external");
856 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
857
858 // TODO: http://anglebug.com/5815 Trace is crashing on Intel Linux
859 if (IsLinux() && IsIntel() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
860 {
861 mSkipTest = true;
862 }
863 }
864
865 if (mParams.testID == RestrictedTraceID::aztec_ruins)
866 {
867 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
868
869 // TODO: http://anglebug.com/5553 Pixel 2 errors with "Framebuffer is incomplete" on Vulkan
870 if (IsPixel2() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
871 {
872 mSkipTest = true;
873 }
874 }
875
876 if (mParams.testID == RestrictedTraceID::dragon_raja)
877 {
878 addExtensionPrerequisite("GL_OES_EGL_image_external");
879
880 // TODO: http://anglebug.com/5807 Intel Linux and Pixel 2 error with "Framebuffer is
881 // incomplete" on Vulkan
882 if (((IsLinux() && IsIntel()) || IsPixel2()) &&
883 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
884 {
885 mSkipTest = true;
886 }
887 }
888
889 // Adreno gives a driver error with empty/small draw calls. http://anglebug.com/5823
890 if (mParams.testID == RestrictedTraceID::hill_climb_racing)
891 {
892 if (IsAndroid() && (IsPixel2() || IsPixel4()) &&
893 mParams.driver == GLESDriverType::SystemEGL)
894 {
895 mSkipTest = true;
896 }
897 }
898
899 if (mParams.testID == RestrictedTraceID::avakin_life)
900 {
901 addExtensionPrerequisite("GL_OES_EGL_image_external");
902 }
903
904 if (mParams.testID == RestrictedTraceID::professional_baseball_spirits)
905 {
906 // TODO(https://anglebug.com/5827) Linux+Mesa/RADV Vulkan generates
907 // GL_INVALID_FRAMEBUFFER_OPERATION.
908 // Mesa versions below 20.3.5 produce the same issue on Linux+Mesa/Intel Vulkan
909 if (IsLinux() && (IsAMD() || IsIntel()) &&
910 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE &&
911 mParams.eglParameters.deviceType != EGL_PLATFORM_ANGLE_DEVICE_TYPE_SWIFTSHADER_ANGLE)
912 {
913 mSkipTest = true;
914 }
915 }
916
917 if (mParams.testID == RestrictedTraceID::call_break_offline_card_game)
918 {
919 // TODO: http://anglebug.com/5837 Intel Linux Vulkan errors with "Framebuffer is incomplete"
920 if ((IsLinux() && IsIntel()) &&
921 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
922 {
923 mSkipTest = true;
924 }
925 }
926
927 if (mParams.testID == RestrictedTraceID::slingshot_test1 ||
928 mParams.testID == RestrictedTraceID::slingshot_test2)
929 {
930 // TODO: http://anglebug.com/5877 Trace crashes on Pixel 2 in vulkan driver
931 if (IsPixel2() && mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
932 {
933 mSkipTest = true;
934 }
935 }
936
937 if (mParams.testID == RestrictedTraceID::ludo_king)
938 {
939 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
940 }
941
942 // TODO: http://anglebug.com/5943 GL_INVALID_ENUM on Windows/Intel.
943 if (mParams.testID == RestrictedTraceID::summoners_war)
944 {
945 if (IsWindows() && IsIntel() && mParams.driver != GLESDriverType::AngleEGL)
946 {
947 mSkipTest = true;
948 }
949 }
950
951 if (mParams.testID == RestrictedTraceID::pokemon_go)
952 {
953 addExtensionPrerequisite("GL_EXT_texture_cube_map_array");
954 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
955
956 // TODO: http://anglebug.com/5989 Intel Linux crashing on teardown
957 // TODO: http://anglebug.com/5994 Intel Windows timing out periodically
958 if ((IsLinux() || IsWindows()) && IsIntel() &&
959 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
960 {
961 mSkipTest = true;
962 }
963 }
964
965 if (mParams.testID == RestrictedTraceID::cookie_run_kingdom)
966 {
967 addExtensionPrerequisite("GL_EXT_texture_cube_map_array");
968 addExtensionPrerequisite("GL_OES_EGL_image_external");
969
970 // TODO: http://anglebug.com/6017 ARM doesn't have enough VS storage blocks
971 if (IsAndroid() && IsARM())
972 {
973 mSkipTest = true;
974 }
975 }
976
977 if (mParams.testID == RestrictedTraceID::genshin_impact)
978 {
979 addExtensionPrerequisite("GL_KHR_texture_compression_astc_ldr");
980
981 // TODO: http://anglebug.com/6023 Crashes on Pixel 2 in vulkan driver
982 // TODO: http://anglebug.com/6029 Crashes on Linux Intel Vulkan
983 if (((IsLinux() && IsIntel()) || IsPixel2()) &&
984 mParams.getRenderer() == EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE)
985 {
986 mSkipTest = true;
987 }
988 }
989
990 // We already swap in TracePerfTest::drawBenchmark, no need to swap again in the harness.
991 disableTestHarnessSwap();
992
993 gCurrentTracePerfTest = this;
994 }
995
initializeBenchmark()996 void TracePerfTest::initializeBenchmark()
997 {
998 const TraceInfo &traceInfo = GetTraceInfo(mParams.testID);
999
1000 mStartingDirectory = angle::GetCWD().value();
1001
1002 std::stringstream traceNameStr;
1003 traceNameStr << "angle_restricted_trace_" << traceInfo.name;
1004 std::string traceName = traceNameStr.str();
1005 mTraceLibrary.reset(new TraceLibrary(traceName.c_str()));
1006
1007 // To load the trace data path correctly we set the CWD to the executable dir.
1008 if (!IsAndroid())
1009 {
1010 std::string exeDir = angle::GetExecutableDirectory();
1011 angle::SetCWD(exeDir.c_str());
1012 }
1013
1014 trace_angle::LoadGLES(TraceLoadProc);
1015
1016 if (!mTraceLibrary->valid())
1017 {
1018 ERR() << "Could not load trace library.";
1019 mSkipTest = true;
1020 return;
1021 }
1022
1023 mStartFrame = traceInfo.startFrame;
1024 mEndFrame = traceInfo.endFrame;
1025 mTraceLibrary->setBinaryDataDecompressCallback(DecompressBinaryData);
1026
1027 std::string relativeTestDataDir = std::string("src/tests/restricted_traces/") + traceInfo.name;
1028
1029 constexpr size_t kMaxDataDirLen = 1000;
1030 char testDataDir[kMaxDataDirLen];
1031 if (!angle::FindTestDataPath(relativeTestDataDir.c_str(), testDataDir, kMaxDataDirLen))
1032 {
1033 ERR() << "Could not find test data folder.";
1034 mSkipTest = true;
1035 return;
1036 }
1037
1038 mTraceLibrary->setBinaryDataDir(testDataDir);
1039
1040 if (gMinimizeGPUWork)
1041 {
1042 // Shrink the offscreen window to 1x1.
1043 mWindowWidth = 1;
1044 mWindowHeight = 1;
1045 }
1046 else
1047 {
1048 mWindowWidth = mTestParams.windowWidth;
1049 mWindowHeight = mTestParams.windowHeight;
1050 }
1051 mCurrentFrame = mStartFrame;
1052
1053 if (IsAndroid())
1054 {
1055 // On Android, set the orientation used by the app, based on width/height
1056 getWindow()->setOrientation(mTestParams.windowWidth, mTestParams.windowHeight);
1057 }
1058
1059 // If we're rendering offscreen we set up a default backbuffer.
1060 if (mParams.surfaceType == SurfaceType::Offscreen)
1061 {
1062 if (!IsAndroid())
1063 {
1064 mWindowWidth *= 4;
1065 mWindowHeight *= 4;
1066 }
1067
1068 glGenRenderbuffers(1, &mOffscreenDepthStencil);
1069 glBindRenderbuffer(GL_RENDERBUFFER, mOffscreenDepthStencil);
1070 glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, mWindowWidth, mWindowHeight);
1071 glBindRenderbuffer(GL_RENDERBUFFER, 0);
1072
1073 glGenFramebuffers(mMaxOffscreenBufferCount, mOffscreenFramebuffers.data());
1074 glGenTextures(mMaxOffscreenBufferCount, mOffscreenTextures.data());
1075 for (int i = 0; i < mMaxOffscreenBufferCount; i++)
1076 {
1077 glBindFramebuffer(GL_FRAMEBUFFER, mOffscreenFramebuffers[i]);
1078
1079 // Hard-code RGBA8/D24S8. This should be specified in the trace info.
1080 glBindTexture(GL_TEXTURE_2D, mOffscreenTextures[i]);
1081 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, mWindowWidth, mWindowHeight, 0, GL_RGBA,
1082 GL_UNSIGNED_BYTE, nullptr);
1083
1084 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
1085 mOffscreenTextures[i], 0);
1086 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER,
1087 mOffscreenDepthStencil);
1088 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER,
1089 mOffscreenDepthStencil);
1090 glBindTexture(GL_TEXTURE_2D, 0);
1091 }
1092 }
1093
1094 // Potentially slow. Can load a lot of resources.
1095 mTraceLibrary->setupReplay();
1096
1097 glFinish();
1098
1099 ASSERT_TRUE(mEndFrame > mStartFrame);
1100
1101 getWindow()->ignoreSizeEvents();
1102 getWindow()->setVisible(true);
1103
1104 // If we're re-tracing, trigger capture start after setup. This ensures the Setup function gets
1105 // recaptured into another Setup function and not merged with the first frame.
1106 if (angle::gRetraceMode)
1107 {
1108 angle::SetEnvironmentVar("ANGLE_CAPTURE_TRIGGER", "0");
1109 getGLWindow()->swap();
1110 }
1111 }
1112
1113 #undef TRACE_TEST_CASE
1114
destroyBenchmark()1115 void TracePerfTest::destroyBenchmark()
1116 {
1117 if (mParams.surfaceType == SurfaceType::Offscreen)
1118 {
1119 glDeleteTextures(mMaxOffscreenBufferCount, mOffscreenTextures.data());
1120 mOffscreenTextures.fill(0);
1121
1122 glDeleteRenderbuffers(1, &mOffscreenDepthStencil);
1123 mOffscreenDepthStencil = 0;
1124
1125 glDeleteFramebuffers(mMaxOffscreenBufferCount, mOffscreenFramebuffers.data());
1126 mOffscreenFramebuffers.fill(0);
1127 }
1128
1129 mTraceLibrary->finishReplay();
1130 mTraceLibrary.reset(nullptr);
1131
1132 // In order for the next test to load, restore the working directory
1133 angle::SetCWD(mStartingDirectory.c_str());
1134 }
1135
sampleTime()1136 void TracePerfTest::sampleTime()
1137 {
1138 if (mUseTimestampQueries)
1139 {
1140 GLint64 glTime;
1141 // glGetInteger64vEXT is exported by newer versions of the timer query extensions.
1142 // Unfortunately only the core EP is exposed by some desktop drivers (e.g. NVIDIA).
1143 if (glGetInteger64vEXT)
1144 {
1145 glGetInteger64vEXT(GL_TIMESTAMP_EXT, &glTime);
1146 }
1147 else
1148 {
1149 glGetInteger64v(GL_TIMESTAMP_EXT, &glTime);
1150 }
1151 mTimeline.push_back({glTime, angle::GetHostTimeSeconds()});
1152 }
1153 }
1154
drawBenchmark()1155 void TracePerfTest::drawBenchmark()
1156 {
1157 constexpr uint32_t kFramesPerX = 6;
1158 constexpr uint32_t kFramesPerY = 4;
1159 constexpr uint32_t kFramesPerXY = kFramesPerY * kFramesPerX;
1160
1161 const uint32_t kOffscreenOffsetX =
1162 static_cast<uint32_t>(static_cast<double>(mTestParams.windowWidth) / 3.0f);
1163 const uint32_t kOffscreenOffsetY =
1164 static_cast<uint32_t>(static_cast<double>(mTestParams.windowHeight) / 3.0f);
1165 const uint32_t kOffscreenWidth = kOffscreenOffsetX;
1166 const uint32_t kOffscreenHeight = kOffscreenOffsetY;
1167
1168 const uint32_t kOffscreenFrameWidth = static_cast<uint32_t>(
1169 static_cast<double>(kOffscreenWidth / static_cast<double>(kFramesPerX)));
1170 const uint32_t kOffscreenFrameHeight = static_cast<uint32_t>(
1171 static_cast<double>(kOffscreenHeight / static_cast<double>(kFramesPerY)));
1172
1173 // Add a time sample from GL and the host.
1174 if (mCurrentFrame == mStartFrame)
1175 {
1176 sampleTime();
1177 }
1178
1179 if (mParams.surfaceType == SurfaceType::Offscreen)
1180 {
1181 // Some driver (ARM and ANGLE) try to nop or defer the glFlush if it is called within the
1182 // renderpass to avoid breaking renderpass (performance reason). For app traces that does
1183 // not use any FBO, when we run in the offscreen mode, there is no frame boundary and
1184 // glFlush call we issued at end of frame will get skipped. To overcome this (and also
1185 // matches what onscreen double buffering behavior as well), we use two offscreen FBOs and
1186 // ping pong between them for each frame.
1187 glBindFramebuffer(GL_FRAMEBUFFER,
1188 mOffscreenFramebuffers[mTotalFrameCount % mMaxOffscreenBufferCount]);
1189 }
1190
1191 char frameName[32];
1192 sprintf(frameName, "Frame %u", mCurrentFrame);
1193 beginInternalTraceEvent(frameName);
1194
1195 startGpuTimer();
1196 mTraceLibrary->replayFrame(mCurrentFrame);
1197 stopGpuTimer();
1198
1199 if (mParams.surfaceType == SurfaceType::Offscreen)
1200 {
1201 if (gMinimizeGPUWork)
1202 {
1203 // To keep GPU work minimum, we skip the blit.
1204 glFlush();
1205 mOffscreenFrameCount++;
1206 }
1207 else
1208 {
1209 GLint currentDrawFBO, currentReadFBO;
1210 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, ¤tDrawFBO);
1211 glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, ¤tReadFBO);
1212
1213 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
1214 glBindFramebuffer(
1215 GL_READ_FRAMEBUFFER,
1216 mOffscreenFramebuffers[mOffscreenFrameCount % mMaxOffscreenBufferCount]);
1217
1218 uint32_t frameX = (mOffscreenFrameCount % kFramesPerXY) % kFramesPerX;
1219 uint32_t frameY = (mOffscreenFrameCount % kFramesPerXY) / kFramesPerX;
1220 uint32_t windowX = kOffscreenOffsetX + frameX * kOffscreenFrameWidth;
1221 uint32_t windowY = kOffscreenOffsetY + frameY * kOffscreenFrameHeight;
1222
1223 if (gVerboseLogging)
1224 {
1225 printf("Frame %d: x %d y %d (screen x %d, screen y %d)\n", mOffscreenFrameCount,
1226 frameX, frameY, windowX, windowY);
1227 }
1228
1229 GLboolean scissorTest = GL_FALSE;
1230 glGetBooleanv(GL_SCISSOR_TEST, &scissorTest);
1231
1232 if (scissorTest)
1233 {
1234 glDisable(GL_SCISSOR_TEST);
1235 }
1236
1237 glBlitFramebuffer(0, 0, mWindowWidth, mWindowHeight, windowX, windowY,
1238 windowX + kOffscreenFrameWidth, windowY + kOffscreenFrameHeight,
1239 GL_COLOR_BUFFER_BIT, GL_NEAREST);
1240
1241 if (frameX == kFramesPerX - 1 && frameY == kFramesPerY - 1)
1242 {
1243 swap();
1244 glBindFramebuffer(GL_FRAMEBUFFER, 0);
1245 glClear(GL_COLOR_BUFFER_BIT);
1246 mOffscreenFrameCount = 0;
1247 }
1248 else
1249 {
1250 glFlush();
1251 mOffscreenFrameCount++;
1252 }
1253
1254 if (scissorTest)
1255 {
1256 glEnable(GL_SCISSOR_TEST);
1257 }
1258 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, currentDrawFBO);
1259 glBindFramebuffer(GL_READ_FRAMEBUFFER, currentReadFBO);
1260 }
1261
1262 mTotalFrameCount++;
1263 }
1264 else
1265 {
1266 swap();
1267 }
1268
1269 endInternalTraceEvent(frameName);
1270
1271 if (mCurrentFrame == mEndFrame)
1272 {
1273 mTraceLibrary->resetReplay();
1274 mCurrentFrame = mStartFrame;
1275 }
1276 else
1277 {
1278 mCurrentFrame++;
1279 }
1280
1281 // Process any running queries once per iteration.
1282 for (size_t queryIndex = 0; queryIndex < mRunningQueries.size();)
1283 {
1284 const QueryInfo &query = mRunningQueries[queryIndex];
1285
1286 GLuint endResultAvailable = 0;
1287 glGetQueryObjectuivEXT(query.endTimestampQuery, GL_QUERY_RESULT_AVAILABLE,
1288 &endResultAvailable);
1289
1290 if (endResultAvailable == GL_TRUE)
1291 {
1292 char fboName[32];
1293 sprintf(fboName, "FBO %u", query.framebuffer);
1294
1295 GLint64 beginTimestamp = 0;
1296 glGetQueryObjecti64vEXT(query.beginTimestampQuery, GL_QUERY_RESULT, &beginTimestamp);
1297 glDeleteQueriesEXT(1, &query.beginTimestampQuery);
1298 double beginHostTime = getHostTimeFromGLTime(beginTimestamp);
1299 beginGLTraceEvent(fboName, beginHostTime);
1300
1301 GLint64 endTimestamp = 0;
1302 glGetQueryObjecti64vEXT(query.endTimestampQuery, GL_QUERY_RESULT, &endTimestamp);
1303 glDeleteQueriesEXT(1, &query.endTimestampQuery);
1304 double endHostTime = getHostTimeFromGLTime(endTimestamp);
1305 endGLTraceEvent(fboName, endHostTime);
1306
1307 mRunningQueries.erase(mRunningQueries.begin() + queryIndex);
1308 }
1309 else
1310 {
1311 queryIndex++;
1312 }
1313 }
1314 }
1315
1316 // Converts a GL timestamp into a host-side CPU time aligned with "GetHostTimeSeconds".
1317 // This check is necessary to line up sampled trace events in a consistent timeline.
1318 // Uses a linear interpolation from a series of samples. We do a blocking call to sample
1319 // both host and GL time once per swap. We then find the two closest GL timestamps and
1320 // interpolate the host times between them to compute our result. If we are past the last
1321 // GL timestamp we sample a new data point pair.
getHostTimeFromGLTime(GLint64 glTime)1322 double TracePerfTest::getHostTimeFromGLTime(GLint64 glTime)
1323 {
1324 // Find two samples to do a lerp.
1325 size_t firstSampleIndex = mTimeline.size() - 1;
1326 while (firstSampleIndex > 0)
1327 {
1328 if (mTimeline[firstSampleIndex].glTime < glTime)
1329 {
1330 break;
1331 }
1332 firstSampleIndex--;
1333 }
1334
1335 // Add an extra sample if we're missing an ending sample.
1336 if (firstSampleIndex == mTimeline.size() - 1)
1337 {
1338 sampleTime();
1339 }
1340
1341 const TimeSample &start = mTimeline[firstSampleIndex];
1342 const TimeSample &end = mTimeline[firstSampleIndex + 1];
1343
1344 // Note: we have observed in some odd cases later timestamps producing values that are
1345 // smaller than preceding timestamps. This bears further investigation.
1346
1347 // Compute the scaling factor for the lerp.
1348 double glDelta = static_cast<double>(glTime - start.glTime);
1349 double glRange = static_cast<double>(end.glTime - start.glTime);
1350 double t = glDelta / glRange;
1351
1352 // Lerp(t1, t2, t)
1353 double hostRange = end.hostTime - start.hostTime;
1354 return mTimeline[firstSampleIndex].hostTime + hostRange * t;
1355 }
1356
1357 // Triggered when the replay calls glBindFramebuffer.
onReplayFramebufferChange(GLenum target,GLuint framebuffer)1358 void TracePerfTest::onReplayFramebufferChange(GLenum target, GLuint framebuffer)
1359 {
1360 if (framebuffer == 0 && mParams.surfaceType == SurfaceType::Offscreen)
1361 {
1362 glBindFramebuffer(target,
1363 mOffscreenFramebuffers[mTotalFrameCount % mMaxOffscreenBufferCount]);
1364 }
1365 else
1366 {
1367 glBindFramebuffer(target, framebuffer);
1368 }
1369
1370 switch (target)
1371 {
1372 case GL_FRAMEBUFFER:
1373 mDrawFramebufferBinding = framebuffer;
1374 mReadFramebufferBinding = framebuffer;
1375 break;
1376 case GL_DRAW_FRAMEBUFFER:
1377 mDrawFramebufferBinding = framebuffer;
1378 break;
1379 case GL_READ_FRAMEBUFFER:
1380 mReadFramebufferBinding = framebuffer;
1381 return;
1382
1383 default:
1384 UNREACHABLE();
1385 break;
1386 }
1387
1388 if (!mUseTimestampQueries)
1389 return;
1390
1391 // We have at most one active timestamp query at a time. This code will end the current
1392 // query and immediately start a new one.
1393 if (mCurrentQuery.beginTimestampQuery != 0)
1394 {
1395 glGenQueriesEXT(1, &mCurrentQuery.endTimestampQuery);
1396 glQueryCounterEXT(mCurrentQuery.endTimestampQuery, GL_TIMESTAMP_EXT);
1397 mRunningQueries.push_back(mCurrentQuery);
1398 mCurrentQuery = {};
1399 }
1400
1401 ASSERT(mCurrentQuery.beginTimestampQuery == 0);
1402
1403 glGenQueriesEXT(1, &mCurrentQuery.beginTimestampQuery);
1404 glQueryCounterEXT(mCurrentQuery.beginTimestampQuery, GL_TIMESTAMP_EXT);
1405 mCurrentQuery.framebuffer = framebuffer;
1406 }
1407
isDefaultFramebuffer(GLenum target) const1408 bool TracePerfTest::isDefaultFramebuffer(GLenum target) const
1409 {
1410 switch (target)
1411 {
1412 case GL_FRAMEBUFFER:
1413 case GL_DRAW_FRAMEBUFFER:
1414 return (mDrawFramebufferBinding == 0);
1415
1416 case GL_READ_FRAMEBUFFER:
1417 return (mReadFramebufferBinding == 0);
1418
1419 default:
1420 UNREACHABLE();
1421 return false;
1422 }
1423 }
1424
ConvertDefaultFramebufferEnum(GLenum value)1425 GLenum ConvertDefaultFramebufferEnum(GLenum value)
1426 {
1427 switch (value)
1428 {
1429 case GL_NONE:
1430 return GL_NONE;
1431 case GL_BACK:
1432 case GL_COLOR:
1433 return GL_COLOR_ATTACHMENT0;
1434 case GL_DEPTH:
1435 return GL_DEPTH_ATTACHMENT;
1436 case GL_STENCIL:
1437 return GL_STENCIL_ATTACHMENT;
1438 case GL_DEPTH_STENCIL:
1439 return GL_DEPTH_STENCIL_ATTACHMENT;
1440 default:
1441 UNREACHABLE();
1442 return GL_NONE;
1443 }
1444 }
1445
ConvertDefaultFramebufferEnums(GLsizei numAttachments,const GLenum * attachments)1446 std::vector<GLenum> ConvertDefaultFramebufferEnums(GLsizei numAttachments,
1447 const GLenum *attachments)
1448 {
1449 std::vector<GLenum> translatedAttachments;
1450 for (GLsizei attachmentIndex = 0; attachmentIndex < numAttachments; ++attachmentIndex)
1451 {
1452 GLenum converted = ConvertDefaultFramebufferEnum(attachments[attachmentIndex]);
1453 translatedAttachments.push_back(converted);
1454 }
1455 return translatedAttachments;
1456 }
1457
1458 // Needs special handling to treat the 0 framebuffer in offscreen mode.
onReplayInvalidateFramebuffer(GLenum target,GLsizei numAttachments,const GLenum * attachments)1459 void TracePerfTest::onReplayInvalidateFramebuffer(GLenum target,
1460 GLsizei numAttachments,
1461 const GLenum *attachments)
1462 {
1463 if (mParams.surfaceType != SurfaceType::Offscreen || !isDefaultFramebuffer(target))
1464 {
1465 glInvalidateFramebuffer(target, numAttachments, attachments);
1466 }
1467 else
1468 {
1469 std::vector<GLenum> translatedAttachments =
1470 ConvertDefaultFramebufferEnums(numAttachments, attachments);
1471 glInvalidateFramebuffer(target, numAttachments, translatedAttachments.data());
1472 }
1473 }
1474
onReplayInvalidateSubFramebuffer(GLenum target,GLsizei numAttachments,const GLenum * attachments,GLint x,GLint y,GLsizei width,GLsizei height)1475 void TracePerfTest::onReplayInvalidateSubFramebuffer(GLenum target,
1476 GLsizei numAttachments,
1477 const GLenum *attachments,
1478 GLint x,
1479 GLint y,
1480 GLsizei width,
1481 GLsizei height)
1482 {
1483 if (mParams.surfaceType != SurfaceType::Offscreen || !isDefaultFramebuffer(target))
1484 {
1485 glInvalidateSubFramebuffer(target, numAttachments, attachments, x, y, width, height);
1486 }
1487 else
1488 {
1489 std::vector<GLenum> translatedAttachments =
1490 ConvertDefaultFramebufferEnums(numAttachments, attachments);
1491 glInvalidateSubFramebuffer(target, numAttachments, translatedAttachments.data(), x, y,
1492 width, height);
1493 }
1494 }
1495
onReplayDrawBuffers(GLsizei n,const GLenum * bufs)1496 void TracePerfTest::onReplayDrawBuffers(GLsizei n, const GLenum *bufs)
1497 {
1498 if (mParams.surfaceType != SurfaceType::Offscreen || !isDefaultFramebuffer(GL_DRAW_FRAMEBUFFER))
1499 {
1500 glDrawBuffers(n, bufs);
1501 }
1502 else
1503 {
1504 std::vector<GLenum> translatedBufs = ConvertDefaultFramebufferEnums(n, bufs);
1505 glDrawBuffers(n, translatedBufs.data());
1506 }
1507 }
1508
onReplayReadBuffer(GLenum src)1509 void TracePerfTest::onReplayReadBuffer(GLenum src)
1510 {
1511 if (mParams.surfaceType != SurfaceType::Offscreen || !isDefaultFramebuffer(GL_READ_FRAMEBUFFER))
1512 {
1513 glReadBuffer(src);
1514 }
1515 else
1516 {
1517 GLenum translated = ConvertDefaultFramebufferEnum(src);
1518 glReadBuffer(translated);
1519 }
1520 }
1521
onReplayDiscardFramebufferEXT(GLenum target,GLsizei numAttachments,const GLenum * attachments)1522 void TracePerfTest::onReplayDiscardFramebufferEXT(GLenum target,
1523 GLsizei numAttachments,
1524 const GLenum *attachments)
1525 {
1526 if (mParams.surfaceType != SurfaceType::Offscreen || !isDefaultFramebuffer(target))
1527 {
1528 glDiscardFramebufferEXT(target, numAttachments, attachments);
1529 }
1530 else
1531 {
1532 std::vector<GLenum> translatedAttachments =
1533 ConvertDefaultFramebufferEnums(numAttachments, attachments);
1534 glDiscardFramebufferEXT(target, numAttachments, translatedAttachments.data());
1535 }
1536 }
1537
swap()1538 void TracePerfTest::swap()
1539 {
1540 // Capture a screenshot if enabled.
1541 if (gScreenShotDir != nullptr && !mScreenshotSaved &&
1542 static_cast<uint32_t>(gScreenShotFrame) == mCurrentFrame)
1543 {
1544 std::stringstream screenshotNameStr;
1545 screenshotNameStr << gScreenShotDir << GetPathSeparator() << "angle" << mBackend << "_"
1546 << mStory;
1547
1548 // Add a marker to the name for any screenshot that isn't start frame
1549 if (mStartFrame != static_cast<uint32_t>(gScreenShotFrame))
1550 {
1551 screenshotNameStr << "_frame" << gScreenShotFrame;
1552 }
1553
1554 screenshotNameStr << ".png";
1555
1556 std::string screenshotName = screenshotNameStr.str();
1557 saveScreenshot(screenshotName);
1558 mScreenshotSaved = true;
1559 }
1560
1561 getGLWindow()->swap();
1562 }
1563
saveScreenshot(const std::string & screenshotName)1564 void TracePerfTest::saveScreenshot(const std::string &screenshotName)
1565 {
1566 // The frame is already rendered and is waiting in the default framebuffer.
1567
1568 // RGBA 4-byte data.
1569 uint32_t pixelCount = mTestParams.windowWidth * mTestParams.windowHeight;
1570 std::vector<uint8_t> pixelData(pixelCount * 4);
1571
1572 // Only unbind the framebuffer on context versions where it's available.
1573 const TraceInfo &traceInfo = GetTraceInfo(mParams.testID);
1574 if (traceInfo.contextClientMajorVersion > 1)
1575 {
1576 glBindFramebuffer(GL_FRAMEBUFFER, 0);
1577 }
1578
1579 glReadPixels(0, 0, mTestParams.windowWidth, mTestParams.windowHeight, GL_RGBA, GL_UNSIGNED_BYTE,
1580 pixelData.data());
1581
1582 // Convert to RGB and flip y.
1583 std::vector<uint8_t> rgbData(pixelCount * 3);
1584 for (EGLint y = 0; y < mTestParams.windowHeight; ++y)
1585 {
1586 for (EGLint x = 0; x < mTestParams.windowWidth; ++x)
1587 {
1588 EGLint srcPixel = x + y * mTestParams.windowWidth;
1589 EGLint dstPixel = x + (mTestParams.windowHeight - y - 1) * mTestParams.windowWidth;
1590 memcpy(&rgbData[dstPixel * 3], &pixelData[srcPixel * 4], 3);
1591 }
1592 }
1593
1594 if (!angle::SavePNGRGB(screenshotName.c_str(), "ANGLE Screenshot", mTestParams.windowWidth,
1595 mTestParams.windowHeight, rgbData))
1596 {
1597 FAIL() << "Error saving screenshot: " << screenshotName;
1598 }
1599 else
1600 {
1601 printf("Saved screenshot: '%s'\n", screenshotName.c_str());
1602 }
1603 }
1604
CombineTestID(const TracePerfParams & in,RestrictedTraceID id)1605 TracePerfParams CombineTestID(const TracePerfParams &in, RestrictedTraceID id)
1606 {
1607 const TraceInfo &traceInfo = GetTraceInfo(id);
1608
1609 TracePerfParams out = in;
1610 out.testID = id;
1611 out.majorVersion = traceInfo.contextClientMajorVersion;
1612 out.minorVersion = traceInfo.contextClientMinorVersion;
1613 out.windowWidth = traceInfo.drawSurfaceWidth;
1614 out.windowHeight = traceInfo.drawSurfaceHeight;
1615 return out;
1616 }
1617
CombineWithSurfaceType(const TracePerfParams & in,SurfaceType surfaceType)1618 TracePerfParams CombineWithSurfaceType(const TracePerfParams &in, SurfaceType surfaceType)
1619 {
1620 TracePerfParams out = in;
1621 out.surfaceType = surfaceType;
1622
1623 if (!IsAndroid() && surfaceType == SurfaceType::Offscreen)
1624 {
1625 out.windowWidth /= 4;
1626 out.windowHeight /= 4;
1627 }
1628
1629 // We track GPU time only in frame-rate-limited cases.
1630 out.trackGpuTime = surfaceType == SurfaceType::WindowWithVSync;
1631
1632 return out;
1633 }
1634
1635 } // anonymous namespace
1636
1637 using namespace params;
1638 using P = TracePerfParams;
1639 using PV = std::vector<P>;
1640
RegisterTraceTests()1641 void RegisterTraceTests()
1642 {
1643 std::vector<SurfaceType> surfaceTypes = {SurfaceType::Window};
1644 if (gEnableAllTraceTests)
1645 {
1646 surfaceTypes.push_back(SurfaceType::Offscreen);
1647 surfaceTypes.push_back(SurfaceType::WindowWithVSync);
1648 }
1649
1650 std::vector<ModifierFunc<P>> renderers = {Vulkan<P>, Native<P>};
1651 if (gEnableAllTraceTests)
1652 {
1653 if (!IsAndroid())
1654 {
1655 renderers.push_back(VulkanMockICD<P>);
1656 }
1657 renderers.push_back(VulkanSwiftShader<P>);
1658 }
1659
1660 PV testsWithID = CombineWithValues({P()}, AllEnums<RestrictedTraceID>(), CombineTestID);
1661 PV testsWithSurfaceType = CombineWithValues(testsWithID, surfaceTypes, CombineWithSurfaceType);
1662 PV testsWithRenderer = CombineWithFuncs(testsWithSurfaceType, renderers);
1663 PV filteredTests = FilterTestParams(testsWithRenderer);
1664
1665 for (const TracePerfParams ¶ms : filteredTests)
1666 {
1667 auto factory = [params]() { return new TracePerfTest(params); };
1668 std::string paramName = testing::PrintToString(params);
1669 std::stringstream testNameStr;
1670 testNameStr << "Run/" << paramName;
1671 std::string testName = testNameStr.str();
1672 testing::RegisterTest("TracePerfTest", testName.c_str(), nullptr, paramName.c_str(),
1673 __FILE__, __LINE__, factory);
1674 }
1675 }
1676