• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "GpuTimer.h"
9 #include "GrContextFactory.h"
10 #include "SkGr.h"
11 
12 #include "SkCanvas.h"
13 #include "SkCommonFlagsPathRenderer.h"
14 #include "SkOSFile.h"
15 #include "SkOSPath.h"
16 #include "SkPerlinNoiseShader.h"
17 #include "SkPicture.h"
18 #include "SkPictureRecorder.h"
19 #include "SkStream.h"
20 #include "SkSurface.h"
21 #include "SkSurfaceProps.h"
22 #include "picture_utils.h"
23 #include "sk_tool_utils.h"
24 #include "flags/SkCommandLineFlags.h"
25 #include "flags/SkCommonFlagsConfig.h"
26 #include <stdlib.h>
27 #include <algorithm>
28 #include <array>
29 #include <chrono>
30 #include <cmath>
31 #include <vector>
32 
33 /**
34  * This is a minimalist program whose sole purpose is to open an skp file, benchmark it on a single
35  * config, and exit. It is intended to be used through skpbench.py rather than invoked directly.
36  * Limiting the entire process to a single config/skp pair helps to keep the results repeatable.
37  *
38  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
39  * render target and syncs the GPU after each draw.
40  *
41  * Currently, only GPU configs are supported.
42  */
43 
44 DEFINE_int32(duration, 5000, "number of milliseconds to run the benchmark");
45 DEFINE_int32(sampleMs, 50, "minimum duration of a sample");
46 DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
47 DEFINE_bool(fps, false, "use fps instead of ms");
48 DEFINE_string(skp, "", "path to a single .skp file, or 'warmup' for a builtin warmup run");
49 DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
50 DEFINE_int32(verbosity, 4, "level of verbosity (0=none to 5=debug)");
51 DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
52 DEFINE_pathrenderer_flag;
53 
54 static const char* header =
55 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
56 
57 static const char* resultFormat =
58 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7li  %9i  %-5s  %-6s  %-9s %s";
59 
60 struct Sample {
61     using duration = std::chrono::nanoseconds;
62 
SampleSample63     Sample() : fFrames(0), fDuration(0) {}
secondsSample64     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample65     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample66     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample67     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
68 
69     int        fFrames;
70     duration   fDuration;
71 };
72 
73 class GpuSync {
74 public:
75     GpuSync(const sk_gpu_test::FenceSync* fenceSync);
76     ~GpuSync();
77 
78     void syncToPreviousFrame();
79 
80 private:
81     void updateFence();
82 
83     const sk_gpu_test::FenceSync* const   fFenceSync;
84     sk_gpu_test::PlatformFence            fFence;
85 };
86 
87 enum class ExitErr {
88     kOk           = 0,
89     kUsage        = 64,
90     kData         = 65,
91     kUnavailable  = 69,
92     kIO           = 74,
93     kSoftware     = 70
94 };
95 
96 static void draw_skp_and_flush(SkCanvas*, const SkPicture*);
97 static sk_sp<SkPicture> create_warmup_skp();
98 static bool mkdir_p(const SkString& name);
99 static SkString join(const SkCommandLineFlags::StringArray&);
100 static void exitf(ExitErr, const char* format, ...);
101 
run_benchmark(const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)102 static void run_benchmark(const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
103                           const SkPicture* skp, std::vector<Sample>* samples) {
104     using clock = std::chrono::high_resolution_clock;
105     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
106     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
107 
108     draw_skp_and_flush(canvas, skp);
109     GpuSync gpuSync(fenceSync);
110 
111     draw_skp_and_flush(canvas, skp);
112     gpuSync.syncToPreviousFrame();
113 
114     clock::time_point now = clock::now();
115     const clock::time_point endTime = now + benchDuration;
116 
117     do {
118         clock::time_point sampleStart = now;
119         samples->emplace_back();
120         Sample& sample = samples->back();
121 
122         do {
123             draw_skp_and_flush(canvas, skp);
124             gpuSync.syncToPreviousFrame();
125 
126             now = clock::now();
127             sample.fDuration = now - sampleStart;
128             ++sample.fFrames;
129         } while (sample.fDuration < sampleDuration);
130     } while (now < endTime || 0 == samples->size() % 2);
131 }
132 
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,const sk_gpu_test::FenceSync * fenceSync,SkCanvas * canvas,const SkPicture * skp,std::vector<Sample> * samples)133 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
134                                    const sk_gpu_test::FenceSync* fenceSync, SkCanvas* canvas,
135                                    const SkPicture* skp, std::vector<Sample>* samples) {
136     using sk_gpu_test::PlatformTimerQuery;
137     using clock = std::chrono::steady_clock;
138     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
139     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
140 
141     if (!gpuTimer->disjointSupport()) {
142         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
143                         "results may be unreliable\n");
144     }
145 
146     draw_skp_and_flush(canvas, skp);
147     GpuSync gpuSync(fenceSync);
148 
149     gpuTimer->queueStart();
150     draw_skp_and_flush(canvas, skp);
151     PlatformTimerQuery previousTime = gpuTimer->queueStop();
152     gpuSync.syncToPreviousFrame();
153 
154     clock::time_point now = clock::now();
155     const clock::time_point endTime = now + benchDuration;
156 
157     do {
158         const clock::time_point sampleEndTime = now + sampleDuration;
159         samples->emplace_back();
160         Sample& sample = samples->back();
161 
162         do {
163             gpuTimer->queueStart();
164             draw_skp_and_flush(canvas, skp);
165             PlatformTimerQuery time = gpuTimer->queueStop();
166             gpuSync.syncToPreviousFrame();
167 
168             switch (gpuTimer->checkQueryStatus(previousTime)) {
169                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
170                 case QueryStatus::kInvalid:
171                     exitf(ExitErr::kUnavailable, "GPU timer failed");
172                 case QueryStatus::kPending:
173                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
174                 case QueryStatus::kDisjoint:
175                     if (FLAGS_verbosity >= 4) {
176                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
177                     }
178                     break;
179                 case QueryStatus::kAccurate:
180                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
181                     ++sample.fFrames;
182                     break;
183             }
184             gpuTimer->deleteQuery(previousTime);
185             previousTime = time;
186             now = clock::now();
187         } while (now < sampleEndTime || 0 == sample.fFrames);
188     } while (now < endTime || 0 == samples->size() % 2);
189 
190     gpuTimer->deleteQuery(previousTime);
191 }
192 
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)193 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
194     if (0 == (samples.size() % 2)) {
195         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
196     }
197 
198     Sample accum = Sample();
199     std::vector<double> values;
200     values.reserve(samples.size());
201     for (const Sample& sample : samples) {
202         accum.fFrames += sample.fFrames;
203         accum.fDuration += sample.fDuration;
204         values.push_back(sample.value());
205     }
206     std::sort(values.begin(), values.end());
207 
208     const double accumValue = accum.value();
209     double variance = 0;
210     for (double value : values) {
211         const double delta = value - accumValue;
212         variance += delta * delta;
213     }
214     variance /= values.size();
215     // Technically, this is the relative standard deviation.
216     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
217 
218     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
219            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
220            config, bench);
221     printf("\n");
222     fflush(stdout);
223 }
224 
main(int argc,char ** argv)225 int main(int argc, char** argv) {
226     SkCommandLineFlags::SetUsage("Use skpbench.py instead. "
227                                  "You usually don't want to use this program directly.");
228     SkCommandLineFlags::Parse(argc, argv);
229 
230     if (!FLAGS_suppressHeader) {
231         printf("%s\n", header);
232     }
233     if (FLAGS_duration <= 0) {
234         exit(0); // This can be used to print the header and quit.
235     }
236 
237     // Parse the config.
238     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
239     SkCommandLineConfigArray configs;
240     ParseConfigs(FLAGS_config, &configs);
241     if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
242         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
243                                join(FLAGS_config).c_str());
244     }
245 
246     // Parse the skp.
247     if (FLAGS_skp.count() != 1) {
248         exitf(ExitErr::kUsage, "invalid skp '%s': must specify a single skp file, or 'warmup'",
249                                join(FLAGS_skp).c_str());
250     }
251     sk_sp<SkPicture> skp;
252     SkString skpname;
253     if (0 == strcmp(FLAGS_skp[0], "warmup")) {
254         skp = create_warmup_skp();
255         skpname = "warmup";
256     } else {
257         const char* skpfile = FLAGS_skp[0];
258         std::unique_ptr<SkStream> skpstream(SkStream::MakeFromFile(skpfile));
259         if (!skpstream) {
260             exitf(ExitErr::kIO, "failed to open skp file %s", skpfile);
261         }
262         skp = SkPicture::MakeFromStream(skpstream.get());
263         if (!skp) {
264             exitf(ExitErr::kData, "failed to parse skp file %s", skpfile);
265         }
266         skpname = SkOSPath::Basename(skpfile);
267     }
268     int width = SkTMin(SkScalarCeilToInt(skp->cullRect().width()), 2048),
269         height = SkTMin(SkScalarCeilToInt(skp->cullRect().height()), 2048);
270     if (FLAGS_verbosity >= 3 &&
271         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
272         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
273                         skpname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
274                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
275     }
276 
277     // Create a context.
278     GrContextOptions ctxOptions;
279     ctxOptions.fGpuPathRenderers = CollectGpuPathRenderersFromFlags();
280     sk_gpu_test::GrContextFactory factory(ctxOptions);
281     sk_gpu_test::ContextInfo ctxInfo =
282         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
283     GrContext* ctx = ctxInfo.grContext();
284     if (!ctx) {
285         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
286                                      config->getTag().c_str());
287     }
288     if (ctx->caps()->maxRenderTargetSize() < SkTMax(width, height)) {
289         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
290                                      width, height, ctx->caps()->maxRenderTargetSize());
291     }
292     GrPixelConfig grPixConfig = SkImageInfo2GrPixelConfig(config->getColorType(),
293                                                           config->getColorSpace(),
294                                                           *ctx->caps());
295     int supportedSampleCount = ctx->caps()->getSampleCount(config->getSamples(), grPixConfig);
296     if (supportedSampleCount != config->getSamples()) {
297         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
298                                      config->getSamples());
299     }
300     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
301     if (!testCtx) {
302         exitf(ExitErr::kSoftware, "testContext is null");
303     }
304     if (!testCtx->fenceSyncSupport()) {
305         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
306     }
307 
308     // Create a render target.
309     SkImageInfo info =
310             SkImageInfo::Make(width, height, config->getColorType(), config->getAlphaType(),
311                               sk_ref_sp(config->getColorSpace()));
312     uint32_t flags = config->getUseDIText() ? SkSurfaceProps::kUseDeviceIndependentFonts_Flag : 0;
313     SkSurfaceProps props(flags, SkSurfaceProps::kLegacyFontHost_InitType);
314     sk_sp<SkSurface> surface =
315         SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
316     if (!surface) {
317         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
318                                      width, height, config->getTag().c_str());
319     }
320 
321     // Run the benchmark.
322     std::vector<Sample> samples;
323     if (FLAGS_sampleMs > 0) {
324         // +1 because we might take one more sample in order to have an odd number.
325         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
326     } else {
327         samples.reserve(2 * FLAGS_duration);
328     }
329     SkCanvas* canvas = surface->getCanvas();
330     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
331     if (!FLAGS_gpuClock) {
332         run_benchmark(testCtx->fenceSync(), canvas, skp.get(), &samples);
333     } else {
334         if (!testCtx->gpuTimingSupport()) {
335             exitf(ExitErr::kUnavailable, "GPU does not support timing");
336         }
337         run_gpu_time_benchmark(testCtx->gpuTimer(), testCtx->fenceSync(), canvas, skp.get(),
338                                &samples);
339     }
340     print_result(samples, config->getTag().c_str(), skpname.c_str());
341 
342     // Save a proof (if one was requested).
343     if (!FLAGS_png.isEmpty()) {
344         SkBitmap bmp;
345         bmp.allocPixels(info);
346         if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
347             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
348         }
349         const SkString &dirname = SkOSPath::Dirname(FLAGS_png[0]),
350                        &basename = SkOSPath::Basename(FLAGS_png[0]);
351         if (!mkdir_p(dirname)) {
352             exitf(ExitErr::kIO, "failed to create directory \"%s\" for png", dirname.c_str());
353         }
354         if (!sk_tools::write_bitmap_to_disk(bmp, dirname, nullptr, basename)) {
355             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
356         }
357     }
358 
359     exit(0);
360 }
361 
draw_skp_and_flush(SkCanvas * canvas,const SkPicture * skp)362 static void draw_skp_and_flush(SkCanvas* canvas, const SkPicture* skp) {
363     canvas->drawPicture(skp);
364     canvas->flush();
365 }
366 
create_warmup_skp()367 static sk_sp<SkPicture> create_warmup_skp() {
368     static constexpr SkRect bounds{0, 0, 500, 500};
369     SkPictureRecorder recorder;
370     SkCanvas* recording = recorder.beginRecording(bounds);
371 
372     recording->clear(SK_ColorWHITE);
373 
374     SkPaint stroke;
375     stroke.setStyle(SkPaint::kStroke_Style);
376     stroke.setStrokeWidth(2);
377 
378     // Use a big path to (theoretically) warmup the CPU.
379     SkPath bigPath;
380     sk_tool_utils::make_big_path(bigPath);
381     recording->drawPath(bigPath, stroke);
382 
383     // Use a perlin shader to warmup the GPU.
384     SkPaint perlin;
385     perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
386     recording->drawRect(bounds, perlin);
387 
388     return recorder.finishRecordingAsPicture();
389 }
390 
mkdir_p(const SkString & dirname)391 bool mkdir_p(const SkString& dirname) {
392     if (dirname.isEmpty()) {
393         return true;
394     }
395     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
396 }
397 
join(const SkCommandLineFlags::StringArray & stringArray)398 static SkString join(const SkCommandLineFlags::StringArray& stringArray) {
399     SkString joined;
400     for (int i = 0; i < stringArray.count(); ++i) {
401         joined.appendf(i ? " %s" : "%s", stringArray[i]);
402     }
403     return joined;
404 }
405 
exitf(ExitErr err,const char * format,...)406 static void exitf(ExitErr err, const char* format, ...) {
407     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
408     va_list args;
409     va_start(args, format);
410     vfprintf(stderr, format, args);
411     va_end(args);
412     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
413     exit((int)err);
414 }
415 
GpuSync(const sk_gpu_test::FenceSync * fenceSync)416 GpuSync::GpuSync(const sk_gpu_test::FenceSync* fenceSync)
417     : fFenceSync(fenceSync) {
418     this->updateFence();
419 }
420 
~GpuSync()421 GpuSync::~GpuSync() {
422     fFenceSync->deleteFence(fFence);
423 }
424 
syncToPreviousFrame()425 void GpuSync::syncToPreviousFrame() {
426     if (sk_gpu_test::kInvalidFence == fFence) {
427         exitf(ExitErr::kSoftware, "attempted to sync with invalid fence");
428     }
429     if (!fFenceSync->waitFence(fFence)) {
430         exitf(ExitErr::kUnavailable, "failed to wait for fence");
431     }
432     fFenceSync->deleteFence(fFence);
433     this->updateFence();
434 }
435 
updateFence()436 void GpuSync::updateFence() {
437     fFence = fFenceSync->insertFence();
438     if (sk_gpu_test::kInvalidFence == fFence) {
439         exitf(ExitErr::kUnavailable, "failed to insert fence");
440     }
441 }
442