1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL (ES) Module
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader performance measurer; handles calibration and measurement
22 *//*--------------------------------------------------------------------*/
23
24 #include "glsShaderPerformanceMeasurer.hpp"
25 #include "gluDefs.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuRenderTarget.hpp"
28 #include "deStringUtil.hpp"
29 #include "deMath.h"
30 #include "deClock.h"
31
32 #include "glwFunctions.hpp"
33 #include "glwEnums.hpp"
34
35 #include <algorithm>
36
37 using tcu::Vec4;
38 using std::string;
39 using std::vector;
40 using tcu::TestLog;
41 using namespace glw; // GL types
42
43 namespace deqp
44 {
45 namespace gls
46 {
47
triangleInterpolate(float v0,float v1,float v2,float x,float y)48 static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
49 {
50 return v0 + (v2-v0)*x + (v1-v0)*y;
51 }
52
triQuadInterpolate(float x,float y,const tcu::Vec4 & quad)53 static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
54 {
55 // \note Top left fill rule.
56 if (x + y < 1.0f)
57 return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
58 else
59 return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
60 }
61
getNumVertices(int gridSizeX,int gridSizeY)62 static inline int getNumVertices (int gridSizeX, int gridSizeY)
63 {
64 return (gridSizeX + 1) * (gridSizeY + 1);
65 }
66
getNumIndices(int gridSizeX,int gridSizeY)67 static inline int getNumIndices (int gridSizeX, int gridSizeY)
68 {
69 return gridSizeX*gridSizeY*6;
70 }
71
getVtxIndex(int x,int y,int gridSizeX)72 static inline deUint16 getVtxIndex (int x, int y, int gridSizeX)
73 {
74 return (deUint16)(y*(gridSizeX+1) + x);
75 }
76
generateVertices(std::vector<float> & dst,int gridSizeX,int gridSizeY,const AttribSpec & spec)77 static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const AttribSpec& spec)
78 {
79 const int numComponents = 4;
80
81 DE_ASSERT((gridSizeX + 1)*(gridSizeY + 1) <= (1<<16)); // Must fit into 16-bit indices.
82 DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
83 dst.resize((gridSizeX + 1) * (gridSizeY + 1) * 4);
84
85 for (int y = 0; y <= gridSizeY; y++)
86 {
87 for (int x = 0; x <= gridSizeX; x++)
88 {
89 float xf = (float)x / (float)gridSizeX;
90 float yf = (float)y / (float)gridSizeY;
91
92 for (int compNdx = 0; compNdx < numComponents; compNdx++)
93 dst[getVtxIndex(x, y, gridSizeX)*numComponents + compNdx] = triQuadInterpolate(xf, yf, tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]));
94 }
95 }
96 }
97
generateIndices(std::vector<deUint16> & dst,int gridSizeX,int gridSizeY)98 static void generateIndices (std::vector<deUint16>& dst, int gridSizeX, int gridSizeY)
99 {
100 const int numIndicesPerQuad = 6;
101 int numIndices = gridSizeX * gridSizeY * numIndicesPerQuad;
102 dst.resize(numIndices);
103
104 for (int y = 0; y < gridSizeY; y++)
105 {
106 for (int x = 0; x < gridSizeX; x++)
107 {
108 int quadNdx = y*gridSizeX + x;
109
110 dst[quadNdx*numIndicesPerQuad + 0] = getVtxIndex(x+0, y+0, gridSizeX);
111 dst[quadNdx*numIndicesPerQuad + 1] = getVtxIndex(x+1, y+0, gridSizeX);
112 dst[quadNdx*numIndicesPerQuad + 2] = getVtxIndex(x+0, y+1, gridSizeX);
113
114 dst[quadNdx*numIndicesPerQuad + 3] = getVtxIndex(x+0, y+1, gridSizeX);
115 dst[quadNdx*numIndicesPerQuad + 4] = getVtxIndex(x+1, y+0, gridSizeX);
116 dst[quadNdx*numIndicesPerQuad + 5] = getVtxIndex(x+1, y+1, gridSizeX);
117 }
118 }
119 }
120
ShaderPerformanceMeasurer(const glu::RenderContext & renderCtx,PerfCaseType measureType)121 ShaderPerformanceMeasurer::ShaderPerformanceMeasurer (const glu::RenderContext& renderCtx, PerfCaseType measureType)
122 : m_renderCtx (renderCtx)
123 , m_gridSizeX (measureType == CASETYPE_FRAGMENT ? 1 : 255)
124 , m_gridSizeY (measureType == CASETYPE_FRAGMENT ? 1 : 255)
125 , m_viewportWidth (measureType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
126 , m_viewportHeight (measureType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
127 , m_state(STATE_UNINITIALIZED)
128 , m_isFirstIteration (false)
129 , m_prevRenderStartTime (0)
130 , m_result (-1.0f, -1.0f)
131 , m_indexBuffer (0)
132 , m_vao (0)
133 {
134 }
135
logParameters(TestLog & log) const136 void ShaderPerformanceMeasurer::logParameters (TestLog& log) const
137 {
138 log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage
139 << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
140 }
141
init(deUint32 program,const vector<AttribSpec> & attributes,int calibratorInitialNumCalls)142 void ShaderPerformanceMeasurer::init (deUint32 program, const vector<AttribSpec>& attributes, int calibratorInitialNumCalls)
143 {
144 DE_ASSERT(m_state == STATE_UNINITIALIZED);
145
146 const glw::Functions& gl = m_renderCtx.getFunctions();
147 const bool useVAO = glu::isContextTypeGLCore(m_renderCtx.getType());
148
149 if (useVAO)
150 {
151 DE_ASSERT(!m_vao);
152 gl.genVertexArrays(1, &m_vao);
153 gl.bindVertexArray(m_vao);
154 GLU_EXPECT_NO_ERROR(gl.getError(), "Create VAO");
155 }
156
157 // Validate that we have sane grid and viewport setup.
158
159 DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
160
161 {
162 bool widthTooSmall = m_renderCtx.getRenderTarget().getWidth() < m_viewportWidth;
163 bool heightTooSmall = m_renderCtx.getRenderTarget().getHeight() < m_viewportHeight;
164
165 if (widthTooSmall || heightTooSmall)
166 throw tcu::NotSupportedError("Render target too small (" +
167 (widthTooSmall ? "width must be at least " + de::toString(m_viewportWidth) : "") +
168 (heightTooSmall ? string(widthTooSmall ? ", " : "") + "height must be at least " + de::toString(m_viewportHeight) : "") +
169 ")");
170 }
171
172 TCU_CHECK_INTERNAL(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
173 de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
174
175 // Insert a_position to attributes.
176 m_attributes = attributes;
177 m_attributes.push_back(AttribSpec("a_position",
178 Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
179 Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
180 Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
181 Vec4( 1.0f, 1.0f, 0.0f, 1.0f)));
182
183 // Generate indices.
184 {
185 std::vector<deUint16> indices;
186 generateIndices(indices, m_gridSizeX, m_gridSizeY);
187
188 gl.genBuffers(1, &m_indexBuffer);
189 gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
190 gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (GLsizeiptr)(indices.size()*sizeof(deUint16)), &indices[0], GL_STATIC_DRAW);
191
192 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload index data");
193 }
194
195 // Generate vertices.
196 m_attribBuffers.resize(m_attributes.size(), 0);
197 gl.genBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
198
199 for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
200 {
201 std::vector<float> vertices;
202 generateVertices(vertices, m_gridSizeX, m_gridSizeY, m_attributes[attribNdx]);
203
204 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
205 gl.bufferData(GL_ARRAY_BUFFER, (GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
206 }
207
208 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload vertex data");
209
210 // Setup attribute bindings.
211 for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
212 {
213 int location = gl.getAttribLocation(program, m_attributes[attribNdx].name.c_str());
214
215 if (location >= 0)
216 {
217 gl.enableVertexAttribArray(location);
218 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
219 gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
220 }
221
222 GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex attribute state");
223 }
224
225 gl.useProgram(program);
226 GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram()");
227
228 m_state = STATE_MEASURING;
229 m_isFirstIteration = true;
230
231 m_calibrator.clear(CalibratorParameters(calibratorInitialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
232 1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
233 }
234
deinit(void)235 void ShaderPerformanceMeasurer::deinit (void)
236 {
237 const glw::Functions& gl = m_renderCtx.getFunctions();
238
239 if (m_indexBuffer)
240 {
241 gl.deleteBuffers(1, &m_indexBuffer);
242 m_indexBuffer = 0;
243 }
244
245 if (m_vao)
246 {
247 gl.deleteVertexArrays(1, &m_vao);
248 m_vao = 0;
249 }
250
251 if (!m_attribBuffers.empty())
252 {
253 gl.deleteBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
254 m_attribBuffers.clear();
255 }
256
257 m_state = STATE_UNINITIALIZED;
258 }
259
render(int numDrawCalls)260 void ShaderPerformanceMeasurer::render (int numDrawCalls)
261 {
262 const glw::Functions& gl = m_renderCtx.getFunctions();
263 GLsizei numIndices = (GLsizei)getNumIndices(m_gridSizeX, m_gridSizeY);
264
265 gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
266
267 for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
268 gl.drawElements(GL_TRIANGLES, numIndices, GL_UNSIGNED_SHORT, DE_NULL);
269 }
270
iterate(void)271 void ShaderPerformanceMeasurer::iterate (void)
272 {
273 DE_ASSERT(m_state == STATE_MEASURING);
274
275 deUint64 renderStartTime = deGetMicroseconds();
276 render(m_calibrator.getCallCount()); // Always render. This gives more stable performance behavior.
277
278 TheilSenCalibrator::State calibratorState = m_calibrator.getState();
279
280 if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
281 {
282 m_calibrator.recomputeParameters();
283
284 m_isFirstIteration = true;
285 m_prevRenderStartTime = renderStartTime;
286 }
287 else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
288 {
289 if (!m_isFirstIteration)
290 m_calibrator.recordIteration(renderStartTime - m_prevRenderStartTime);
291
292 m_isFirstIteration = false;
293 m_prevRenderStartTime = renderStartTime;
294 }
295 else
296 {
297 DE_ASSERT(calibratorState == TheilSenCalibrator::STATE_FINISHED);
298
299 GLU_EXPECT_NO_ERROR(m_renderCtx.getFunctions().getError(), "End of rendering");
300
301 const MeasureState& measureState = m_calibrator.getMeasureState();
302
303 // Compute result.
304 deUint64 totalTime = measureState.getTotalTime();
305 int numFrames = (int)measureState.frameTimes.size();
306 deInt64 numQuadGrids = measureState.numDrawCalls * numFrames;
307 deInt64 numPixels = (deInt64)m_viewportWidth * (deInt64)m_viewportHeight * numQuadGrids;
308 deInt64 numVertices = (deInt64)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
309 double mfragPerSecond = (double)numPixels / (double)totalTime;
310 double mvertPerSecond = (double)numVertices / (double)totalTime;
311
312 m_result = Result((float)mvertPerSecond, (float)mfragPerSecond);
313 m_state = STATE_FINISHED;
314 }
315 }
316
logMeasurementInfo(TestLog & log) const317 void ShaderPerformanceMeasurer::logMeasurementInfo (TestLog& log) const
318 {
319 DE_ASSERT(m_state == STATE_FINISHED);
320
321 const MeasureState& measureState(m_calibrator.getMeasureState());
322
323 // Compute totals.
324 deUint64 totalTime = measureState.getTotalTime();
325 int numFrames = (int)measureState.frameTimes.size();
326 deInt64 numQuadGrids = measureState.numDrawCalls * numFrames;
327 deInt64 numPixels = (deInt64)m_viewportWidth * (deInt64)m_viewportHeight * numQuadGrids;
328 deInt64 numVertices = (deInt64)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
329 double mfragPerSecond = (double)numPixels / (double)totalTime;
330 double mvertPerSecond = (double)numVertices / (double)totalTime;
331 double framesPerSecond = (double)numFrames / ((double)totalTime / 1000000.0);
332
333 logCalibrationInfo(log, m_calibrator);
334
335 log << TestLog::Float("FramesPerSecond", "Frames per second in measurement", "Frames/s", QP_KEY_TAG_PERFORMANCE, (float)framesPerSecond)
336 << TestLog::Float("FragmentsPerVertices", "Vertex-fragment ratio", "Fragments/Vertices", QP_KEY_TAG_NONE, (float)numPixels / (float)numVertices)
337 << TestLog::Float("FragmentPerf", "Fragment performance", "MPix/s", QP_KEY_TAG_PERFORMANCE, (float)mfragPerSecond)
338 << TestLog::Float("VertexPerf", "Vertex performance", "MVert/s", QP_KEY_TAG_PERFORMANCE, (float)mvertPerSecond);
339 }
340
setGridSize(int gridW,int gridH)341 void ShaderPerformanceMeasurer::setGridSize (int gridW, int gridH)
342 {
343 DE_ASSERT(m_state == STATE_UNINITIALIZED);
344 DE_ASSERT(de::inBounds(gridW, 1, 256) && de::inBounds(gridH, 1, 256));
345 m_gridSizeX = gridW;
346 m_gridSizeY = gridH;
347 }
348
setViewportSize(int width,int height)349 void ShaderPerformanceMeasurer::setViewportSize (int width, int height)
350 {
351 DE_ASSERT(m_state == STATE_UNINITIALIZED);
352 DE_ASSERT(de::inRange(width, 1, m_renderCtx.getRenderTarget().getWidth()) &&
353 de::inRange(height, 1, m_renderCtx.getRenderTarget().getHeight()));
354 m_viewportWidth = width;
355 m_viewportHeight = height;
356 }
357
358 } // gls
359 } // deqp
360