1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "GpuTimer.h"
9 #include "GrContextFactory.h"
10 #include "SkCanvas.h"
11 #include "SkCommonFlagsPathRenderer.h"
12 #include "SkOSFile.h"
13 #include "SkOSPath.h"
14 #include "SkPerlinNoiseShader.h"
15 #include "SkPicture.h"
16 #include "SkPictureRecorder.h"
17 #include "SkStream.h"
18 #include "SkSurface.h"
19 #include "SkSurfaceProps.h"
20 #include "picture_utils.h"
21 #include "sk_tool_utils.h"
22 #include "flags/SkCommandLineFlags.h"
23 #include "flags/SkCommonFlagsConfig.h"
24 #include <stdlib.h>
25 #include <algorithm>
26 #include <array>
27 #include <chrono>
28 #include <cmath>
29 #include <vector>
30
31 /**
32 * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
33 * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
34 * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
35 *
36 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
37 * render target and syncs the GPU after each draw.
38 *
39 * Currently, only GPU configs are supported.
40 */
41
42 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
43 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
44 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
45 DEFINE_bool(fps, false, "use fps instead of ms");
46 DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
47 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
48 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
49 DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
50 DEFINE_pathrenderer_flag;
51
52 static const char* header =
53 " accum median max min stddev samples sample_ms clock metric config bench";
54
55 static const char* resultFormat =
56 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7li %9i %-5s %-6s %-9s %s";
57
58 struct Sample {
59 using duration = std::chrono::nanoseconds;
60
SampleSample61 Sample() : fFrames(0), fDuration(0) {}
secondsSample62 double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample63 double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample64 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample65 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
66
67 int fFrames;
68 duration fDuration;
69 };
70
71 class GpuSync {
72 public:
73 GpuSync(const sk_gpu_test::FenceSync* fenceSync);
74 ~GpuSync();
75
76 void syncToPreviousFrame();
77
78 private:
79 void updateFence();
80
81 const sk_gpu_test::FenceSync* const fFenceSync;
82 sk_gpu_test::PlatformFence fFence;
83 };
84
85 enum class ExitErr {
86 kOk = 0,
87 kUsage = 64,
88 kData = 65,
89 kUnavailable = 69,
90 kIO = 74,
91 kSoftware = 70
92 };
93
94 static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
95 static sk_sp<SkPicture> create_warmup_skp();
96 static bool mkdir_p(const SkString& name);
97 static SkString join(const SkCommandLineFlags::StringArray&);
98 static void exitf(ExitErr, const char* format, ...);
99
run_benchmark(const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)100 static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
101 const SkPicture* skp, std::vector<Sample>* samples) {
102 using clock = std::chrono::high_resolution_clock;
103 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
104 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
105
106 draw_skp_and_flush(canvas, skp);
107 GpuSync gpuSync(fenceSync);
108
109 draw_skp_and_flush(canvas, skp);
110 gpuSync.syncToPreviousFrame();
111
112 clock::time_point now = clock::now();
113 const clock::time_point endTime = now + benchDuration;
114
115 do {
116 clock::time_point sampleStart = now;
117 samples->emplace_back();
118 Sample& sample = samples->back();
119
120 do {
121 draw_skp_and_flush(canvas, skp);
122 gpuSync.syncToPreviousFrame();
123
124 now = clock::now();
125 sample.fDuration = now - sampleStart;
126 ++sample.fFrames;
127 } while (sample.fDuration < sampleDuration);
128 } while (now < endTime || 0 == samples->size() % 2);
129 }
130
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)131 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
132 const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
133 const SkPicture* skp, std::vector<Sample>* samples) {
134 using sk_gpu_test::PlatformTimerQuery;
135 using clock = std::chrono::steady_clock;
136 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
137 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
138
139 if (!gpuTimer->disjointSupport()) {
140 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
141 "results may be unreliable\n");
142 }
143
144 draw_skp_and_flush(canvas, skp);
145 GpuSync gpuSync(fenceSync);
146
147 gpuTimer->queueStart();
148 draw_skp_and_flush(canvas, skp);
149 PlatformTimerQuery previousTime = gpuTimer->queueStop();
150 gpuSync.syncToPreviousFrame();
151
152 clock::time_point now = clock::now();
153 const clock::time_point endTime = now + benchDuration;
154
155 do {
156 const clock::time_point sampleEndTime = now + sampleDuration;
157 samples->emplace_back();
158 Sample& sample = samples->back();
159
160 do {
161 gpuTimer->queueStart();
162 draw_skp_and_flush(canvas, skp);
163 PlatformTimerQuery time = gpuTimer->queueStop();
164 gpuSync.syncToPreviousFrame();
165
166 switch (gpuTimer->checkQueryStatus(previousTime)) {
167 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
168 case QueryStatus::kInvalid:
169 exitf(ExitErr::kUnavailable, "GPU timer failed");
170 case QueryStatus::kPending:
171 exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
172 case QueryStatus::kDisjoint:
173 if (FLAGS_verbosity >= 4) {
174 fprintf(stderr, "discarding timer query due to disjoint operations.\n");
175 }
176 break;
177 case QueryStatus::kAccurate:
178 sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
179 ++sample.fFrames;
180 break;
181 }
182 gpuTimer->deleteQuery(previousTime);
183 previousTime = time;
184 now = clock::now();
185 } while (now < sampleEndTime || 0 == sample.fFrames);
186 } while (now < endTime || 0 == samples->size() % 2);
187
188 gpuTimer->deleteQuery(previousTime);
189 }
190
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)191 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) {
192 if (0 == (samples.size() % 2)) {
193 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
194 }
195
196 Sample accum = Sample();
197 std::vector<double> values;
198 values.reserve(samples.size());
199 for (const Sample& sample : samples) {
200 accum.fFrames += sample.fFrames;
201 accum.fDuration += sample.fDuration;
202 values.push_back(sample.value());
203 }
204 std::sort(values.begin(), values.end());
205
206 const double accumValue = accum.value();
207 double variance = 0;
208 for (double value : values) {
209 const double delta = value - accumValue;
210 variance += delta * delta;
211 }
212 variance /= values.size();
213 // Technically, this is the relative standard deviation.
214 const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
215
216 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
217 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
218 config, bench);
219 printf("\n");
220 fflush(stdout);
221 }
222
main(int argc,char ** argv)223 int main(int argc, char** argv) {
224 SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
225 "You usually don't want to use this program directly.");
226 SkCommandLineFlags::Parse(argc, argv);
227
228 if (!FLAGS_suppressHeader) {
229 printf("%s\n", header);
230 }
231 if (FLAGS_duration <= 0) {
232 exit(0); // This can be used to print the header and quit.
233 }
234
235 // Parse the config.
236 const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
237 SkCommandLineConfigArray configs;
238 ParseConfigs(FLAGS_config, &configs);
239 if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
240 exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
241 join(FLAGS_config).c_str());
242 }
243
244 // Parse the skp.
245 if (FLAGS_skp.count() != 1) {
246 exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
247 join(FLAGS_skp).c_str());
248 }
249 sk_sp<SkPicture> skp;
250 SkString skpname;
251 if (0 == strcmp(FLAGS_skp[0], "warmup")) {
252 skp = create_warmup_skp();
253 skpname = "warmup";
254 } else {
255 const char* skpfile = FLAGS_skp[0];
256 std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
257 if (!skpstream) {
258 exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
259 }
260 skp = SkPicture::MakeFromStream(skpstream.get());
261 if (!skp) {
262 exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
263 }
264 skpname = SkOSPath::Basename(skpfile);
265 }
266 int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
267 height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
268 if (FLAGS_verbosity >= 3 &&
269 (width != skp->cullRect().width() || height != skp->cullRect().height())) {
270 fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
271 skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
272 SkScalarCeilToInt(skp->cullRect().height()), width, height);
273 }
274
275 // Create a context.
276 GrContextOptions ctxOptions;
277 ctxOptions.fGpuPathRenderers = CollectGpuPathRenderersFromFlags();
278 sk_gpu_test::GrContextFactory factory(ctxOptions);
279 sk_gpu_test::ContextInfo ctxInfo =
280 factory.getContextInfo(config->getContextType(), config->getContextOverrides());
281 GrContext* ctx = ctxInfo.grContext();
282 if (!ctx) {
283 exitf(ExitErr::kUnavailable, "failed to create context for config %s",
284 config->getTag().c_str());
285 }
286 if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
287 exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
288 width, height, ctx->caps()->maxRenderTargetSize());
289 }
290 if (ctx->caps()->maxSampleCount() < config->getSamples()) {
291 exitf(ExitErr::kUnavailable, "sample count %i not supported by platform (max: %i)",
292 config->getSamples(), ctx->caps()->maxSampleCount());
293 }
294 sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
295 if (!testCtx) {
296 exitf(ExitErr::kSoftware, "testContext is null");
297 }
298 if (!testCtx->fenceSyncSupport()) {
299 exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
300 }
301
302 // Create a render target.
303 SkImageInfo info = SkImageInfo::Make(width, height, config->getColorType(),
304 kPremul_SkAlphaType, sk_ref_sp(config->getColorSpace()));
305 uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
306 SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
307 sk_sp<SkSurface> surface =
308 SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
309 if (!surface) {
310 exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
311 width, height, config->getTag().c_str());
312 }
313
314 // Run the benchmark.
315 std::vector<Sample> samples;
316 if (FLAGS_sampleMs > 0) {
317 // +1 because we might take one more sample in order to have an odd number.
318 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
319 } else {
320 samples.reserve(2 * FLAGS_duration);
321 }
322 SkCanvas* canvas = surface->getCanvas();
323 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
324 if (!FLAGS_gpuClock) {
325 run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
326 } else {
327 if (!testCtx->gpuTimingSupport()) {
328 exitf(ExitErr::kUnavailable, "GPU does not support timing");
329 }
330 run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
331 &samples);
332 }
333 print_result(samples, config->getTag().c_str(), skpname.c_str());
334
335 // Save a proof (if one was requested).
336 if (!FLAGS_png.isEmpty()) {
337 SkBitmap bmp;
338 bmp.setInfo(info);
339 if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) {
340 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
341 }
342 const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
343 &basename = SkOSPath::Basename(FLAGS_png[0]);
344 if (!mkdir_p(dirname)) {
345 exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
346 }
347 if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
348 exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
349 }
350 }
351
352 exit(0);
353 }
354
draw_skp_and_flush(SkCanvas * canvas,const SkPicture * skp)355 static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
356 canvas->drawPicture(skp);
357 canvas->flush();
358 }
359
create_warmup_skp()360 static sk_sp<SkPicture> create_warmup_skp() {
361 static constexpr SkRect bounds{0, 0, 500, 500};
362 SkPictureRecorder recorder;
363 SkCanvas* recording = recorder.beginRecording(bounds);
364
365 recording->clear(SK_ColorWHITE);
366
367 SkPaint stroke;
368 stroke.setStyle(SkPaint::kStroke_Style);
369 stroke.setStrokeWidth(2);
370
371 // Use a big path to (theoretically) warmup the CPU.
372 SkPath bigPath;
373 sk_tool_utils::make_big_path(bigPath);
374 recording->drawPath(bigPath, stroke);
375
376 // Use a perlin shader to warmup the GPU.
377 SkPaint perlin;
378 perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
379 recording->drawRect(bounds, perlin);
380
381 return recorder.finishRecordingAsPicture();
382 }
383
mkdir_p(const SkString & dirname)384 bool mkdir_p(const SkString& dirname) {
385 if (dirname.isEmpty()) {
386 return true;
387 }
388 return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
389 }
390
join(const SkCommandLineFlags::StringArray & stringArray)391 static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
392 SkString joined;
393 for (int i = 0; i < stringArray.count(); ++i) {
394 joined.appendf(i ? " %s" : "%s", stringArray[i]);
395 }
396 return joined;
397 }
398
exitf(ExitErr err,const char * format,...)399 static void exitf(ExitErr err, const char* format, ...) {
400 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
401 va_list args;
402 va_start(args, format);
403 vfprintf(stderr, format, args);
404 va_end(args);
405 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
406 exit((int)err);
407 }
408
GpuSync(const sk_gpu_test::FenceSync * fenceSync)409 GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
410 : fFenceSync(fenceSync) {
411 this->updateFence();
412 }
413
~GpuSync()414 GpuSync::~GpuSync() {
415 fFenceSync->deleteFence(fFence);
416 }
417
syncToPreviousFrame()418 void GpuSync::syncToPreviousFrame() {
419 if (sk_gpu_test::kInvalidFence == fFence) {
420 exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
421 }
422 if (!fFenceSync->waitFence(fFence)) {
423 exitf(ExitErr::kUnavailable, "failed to wait for fence");
424 }
425 fFenceSync->deleteFence(fFence);
426 this->updateFence();
427 }
428
updateFence()429 void GpuSync::updateFence() {
430 fFence = fFenceSync->insertFence();
431 if (sk_gpu_test::kInvalidFence == fFence) {
432 exitf(ExitErr::kUnavailable, "failed to insert fence");
433 }
434 }
435