• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "GpuTimer.h"
9 #include "GrContextFactory.h"
10 #include "SkCanvas.h"
11 #include "SkCommonFlagsPathRenderer.h"
12 #include "SkOSFile.h"
13 #include "SkOSPath.h"
14 #include "SkPerlinNoiseShader.h"
15 #include "SkPicture.h"
16 #include "SkPictureRecorder.h"
17 #include "SkStream.h"
18 #include "SkSurface.h"
19 #include "SkSurfaceProps.h"
20 #include "picture_utils.h"
21 #include "sk_tool_utils.h"
22 #include "flags/SkCommandLineFlags.h"
23 #include "flags/SkCommonFlagsConfig.h"
24 #include <stdlib.h>
25 #include <algorithm>
26 #include <array>
27 #include <chrono>
28 #include <cmath>
29 #include <vector>
30 
31 /**
32  * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
33  * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
34  * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
35  *
36  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
37  * render target and syncs the GPU after each draw.
38  *
39  * Currently, only GPU configs are supported.
40  */
41 
42 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
43 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
44 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
45 DEFINE_bool(fps, false, "use fps instead of ms");
46 DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
47 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
48 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
49 DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
50 DEFINE_pathrenderer_flag;
51 
52 static const char* header =
53 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
54 
55 static const char* resultFormat =
56 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
57 
58 struct Sample {
59     using duration = std::chrono::nanoseconds;
60 
SampleSample61     Sample() : fFrames(0), fDuration(0) {}
secondsSample62     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample63     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample64     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample65     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
66 
67     int        fFrames;
68     duration   fDuration;
69 };
70 
71 class GpuSync {
72 public:
73     GpuSync(const sk_gpu_test::FenceSync* fenceSync);
74     ~GpuSync();
75 
76     void syncToPreviousFrame();
77 
78 private:
79     void updateFence();
80 
81     const sk_gpu_test::FenceSync* const   fFenceSync;
82     sk_gpu_test::PlatformFence            fFence;
83 };
84 
85 enum class ExitErr {
86     kOk           = 0,
87     kUsage        = 64,
88     kData         = 65,
89     kUnavailable  = 69,
90     kIO           = 74,
91     kSoftware     = 70
92 };
93 
94 static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
95 static sk_sp<SkPicture> create_warmup_skp();
96 static bool mkdir_p(const SkString& name);
97 static SkString join(const SkCommandLineFlags::StringArray&);
98 static void exitf(ExitErr, const char* format, ...);
99 
run_benchmark(const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)100 static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
101                           const SkPicture* skp, std::vector<Sample>* samples) {
102     using clock = std::chrono::high_resolution_clock;
103     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
104     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
105 
106     draw_skp_and_flush(canvas, skp);
107     GpuSync gpuSync(fenceSync);
108 
109     draw_skp_and_flush(canvas, skp);
110     gpuSync.syncToPreviousFrame();
111 
112     clock::time_point now = clock::now();
113     const clock::time_point endTime = now + benchDuration;
114 
115     do {
116         clock::time_point sampleStart = now;
117         samples->emplace_back();
118         Sample& sample = samples->back();
119 
120         do {
121             draw_skp_and_flush(canvas, skp);
122             gpuSync.syncToPreviousFrame();
123 
124             now = clock::now();
125             sample.fDuration = now - sampleStart;
126             ++sample.fFrames;
127         } while (sample.fDuration < sampleDuration);
128     } while (now < endTime || 0 == samples->size() % 2);
129 }
130 
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)131 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
132                                    const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
133                                    const SkPicture* skp, std::vector<Sample>* samples) {
134     using sk_gpu_test::PlatformTimerQuery;
135     using clock = std::chrono::steady_clock;
136     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
137     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
138 
139     if (!gpuTimer->disjointSupport()) {
140         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
141                         "results may be unreliable\n");
142     }
143 
144     draw_skp_and_flush(canvas, skp);
145     GpuSync gpuSync(fenceSync);
146 
147     gpuTimer->queueStart();
148     draw_skp_and_flush(canvas, skp);
149     PlatformTimerQuery previousTime = gpuTimer->queueStop();
150     gpuSync.syncToPreviousFrame();
151 
152     clock::time_point now = clock::now();
153     const clock::time_point endTime = now + benchDuration;
154 
155     do {
156         const clock::time_point sampleEndTime = now + sampleDuration;
157         samples->emplace_back();
158         Sample& sample = samples->back();
159 
160         do {
161             gpuTimer->queueStart();
162             draw_skp_and_flush(canvas, skp);
163             PlatformTimerQuery time = gpuTimer->queueStop();
164             gpuSync.syncToPreviousFrame();
165 
166             switch (gpuTimer->checkQueryStatus(previousTime)) {
167                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
168                 case QueryStatus::kInvalid:
169                     exitf(ExitErr::kUnavailable, "GPU timer failed");
170                 case QueryStatus::kPending:
171                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
172                 case QueryStatus::kDisjoint:
173                     if (FLAGS_verbosity >= 4) {
174                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
175                     }
176                     break;
177                 case QueryStatus::kAccurate:
178                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
179                     ++sample.fFrames;
180                     break;
181             }
182             gpuTimer->deleteQuery(previousTime);
183             previousTime = time;
184             now = clock::now();
185         } while (now < sampleEndTime || 0 == sample.fFrames);
186     } while (now < endTime || 0 == samples->size() % 2);
187 
188     gpuTimer->deleteQuery(previousTime);
189 }
190 
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)191 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
192     if (0 == (samples.size() % 2)) {
193         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
194     }
195 
196     Sample accum = Sample();
197     std::vector<double> values;
198     values.reserve(samples.size());
199     for (const Sample& sample : samples) {
200         accum.fFrames += sample.fFrames;
201         accum.fDuration += sample.fDuration;
202         values.push_back(sample.value());
203     }
204     std::sort(values.begin(), values.end());
205 
206     const double accumValue = accum.value();
207     double variance = 0;
208     for (double value : values) {
209         const double delta = value - accumValue;
210         variance += delta * delta;
211     }
212     variance /= values.size();
213     // Technically, this is the relative standard deviation.
214     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
215 
216     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
217            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
218            config, bench);
219     printf("\n");
220     fflush(stdout);
221 }
222 
main(int argc,char ** argv)223 int main(int argc, char** argv) {
224     SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
225                                  "You usually don't want to use this program directly.");
226     SkCommandLineFlags::Parse(argc, argv);
227 
228     if (!FLAGS_suppressHeader) {
229         printf("%s\n", header);
230     }
231     if (FLAGS_duration <= 0) {
232         exit(0); // This can be used to print the header and quit.
233     }
234 
235     // Parse the config.
236     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
237     SkCommandLineConfigArray configs;
238     ParseConfigs(FLAGS_config, &configs);
239     if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
240         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
241                                join(FLAGS_config).c_str());
242     }
243 
244     // Parse the skp.
245     if (FLAGS_skp.count() != 1) {
246         exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
247                                join(FLAGS_skp).c_str());
248     }
249     sk_sp<SkPicture> skp;
250     SkString skpname;
251     if (0 == strcmp(FLAGS_skp[0], "warmup")) {
252         skp = create_warmup_skp();
253         skpname = "warmup";
254     } else {
255         const char* skpfile = FLAGS_skp[0];
256         std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
257         if (!skpstream) {
258             exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
259         }
260         skp = SkPicture::MakeFromStream(skpstream.get());
261         if (!skp) {
262             exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
263         }
264         skpname = SkOSPath::Basename(skpfile);
265     }
266     int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
267         height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
268     if (FLAGS_verbosity >= 3 &&
269         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
270         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
271                         skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
272                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
273     }
274 
275     // Create a context.
276     GrContextOptions ctxOptions;
277     ctxOptions.fGpuPathRenderers = CollectGpuPathRenderersFromFlags();
278     sk_gpu_test::GrContextFactory factory(ctxOptions);
279     sk_gpu_test::ContextInfo ctxInfo =
280         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
281     GrContext* ctx = ctxInfo.grContext();
282     if (!ctx) {
283         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
284                                      config->getTag().c_str());
285     }
286     if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
287         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
288                                      width, height, ctx->caps()->maxRenderTargetSize());
289     }
290     if (ctx->caps()->maxSampleCount() < config->getSamples()) {
291         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform (max: %i)",
292                                      config->getSamples(), ctx->caps()->maxSampleCount());
293     }
294     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
295     if (!testCtx) {
296         exitf(ExitErr::kSoftware, "testContext is null");
297     }
298     if (!testCtx->fenceSyncSupport()) {
299         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
300     }
301 
302     // Create a render target.
303     SkImageInfo info = SkImageInfo::Make(width, height, config->getColorType(),
304                                          kPremul_SkAlphaType, sk_ref_sp(config->getColorSpace()));
305     uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
306     SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
307     sk_sp<SkSurface> surface =
308         SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
309     if (!surface) {
310         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
311                                      width, height, config->getTag().c_str());
312     }
313 
314     // Run the benchmark.
315     std::vector<Sample> samples;
316     if (FLAGS_sampleMs > 0) {
317         // +1 because we might take one more sample in order to have an odd number.
318         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
319     } else {
320         samples.reserve(2 * FLAGS_duration);
321     }
322     SkCanvas* canvas = surface->getCanvas();
323     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
324     if (!FLAGS_gpuClock) {
325         run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
326     } else {
327         if (!testCtx->gpuTimingSupport()) {
328             exitf(ExitErr::kUnavailable, "GPU does not support timing");
329         }
330         run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
331                                &samples);
332     }
333     print_result(samples, config->getTag().c_str(), skpname.c_str());
334 
335     // Save a proof (if one was requested).
336     if (!FLAGS_png.isEmpty()) {
337         SkBitmap bmp;
338         bmp.setInfo(info);
339         if (!surface->getCanvas()->readPixels(&bmp, 0, 0)) {
340             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
341         }
342         const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
343                        &basename = SkOSPath::Basename(FLAGS_png[0]);
344         if (!mkdir_p(dirname)) {
345             exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
346         }
347         if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
348             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
349         }
350     }
351 
352     exit(0);
353 }
354 
draw_skp_and_flush(SkCanvas * canvas,const SkPicture * skp)355 static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
356     canvas->drawPicture(skp);
357     canvas->flush();
358 }
359 
create_warmup_skp()360 static sk_sp<SkPicture> create_warmup_skp() {
361     static constexpr SkRect bounds{0, 0, 500, 500};
362     SkPictureRecorder recorder;
363     SkCanvas* recording = recorder.beginRecording(bounds);
364 
365     recording->clear(SK_ColorWHITE);
366 
367     SkPaint stroke;
368     stroke.setStyle(SkPaint::kStroke_Style);
369     stroke.setStrokeWidth(2);
370 
371     // Use a big path to (theoretically) warmup the CPU.
372     SkPath bigPath;
373     sk_tool_utils::make_big_path(bigPath);
374     recording->drawPath(bigPath, stroke);
375 
376     // Use a perlin shader to warmup the GPU.
377     SkPaint perlin;
378     perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
379     recording->drawRect(bounds, perlin);
380 
381     return recorder.finishRecordingAsPicture();
382 }
383 
mkdir_p(const SkString & dirname)384 bool mkdir_p(const SkString& dirname) {
385     if (dirname.isEmpty()) {
386         return true;
387     }
388     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
389 }
390 
join(const SkCommandLineFlags::StringArray & stringArray)391 static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
392     SkString joined;
393     for (int i = 0; i < stringArray.count(); ++i) {
394         joined.appendf(i ? " %s" : "%s", stringArray[i]);
395     }
396     return joined;
397 }
398 
exitf(ExitErr err,const char * format,...)399 static void exitf(ExitErr err, const char* format, ...) {
400     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
401     va_list args;
402     va_start(args, format);
403     vfprintf(stderr, format, args);
404     va_end(args);
405     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
406     exit((int)err);
407 }
408 
GpuSync(const sk_gpu_test::FenceSync * fenceSync)409 GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
410     : fFenceSync(fenceSync) {
411     this->updateFence();
412 }
413 
~GpuSync()414 GpuSync::~GpuSync() {
415     fFenceSync->deleteFence(fFence);
416 }
417 
syncToPreviousFrame()418 void GpuSync::syncToPreviousFrame() {
419     if (sk_gpu_test::kInvalidFence == fFence) {
420         exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
421     }
422     if (!fFenceSync->waitFence(fFence)) {
423         exitf(ExitErr::kUnavailable, "failed to wait for fence");
424     }
425     fFenceSync->deleteFence(fFence);
426     this->updateFence();
427 }
428 
updateFence()429 void GpuSync::updateFence() {
430     fFence = fFenceSync->insertFence();
431     if (sk_gpu_test::kInvalidFence == fFence) {
432         exitf(ExitErr::kUnavailable, "failed to insert fence");
433     }
434 }
435