• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "bench/BigPath.h"
9 #include "include/core/SkCanvas.h"
10 #include "include/core/SkDeferredDisplayList.h"
11 #include "include/core/SkGraphics.h"
12 #include "include/core/SkPicture.h"
13 #include "include/core/SkPictureRecorder.h"
14 #include "include/core/SkStream.h"
15 #include "include/core/SkSurface.h"
16 #include "include/core/SkSurfaceProps.h"
17 #include "include/effects/SkPerlinNoiseShader.h"
18 #include "include/gpu/GrDirectContext.h"
19 #include "src/core/SkOSFile.h"
20 #include "src/core/SkTaskGroup.h"
21 #include "src/gpu/GrCaps.h"
22 #include "src/gpu/GrDirectContextPriv.h"
23 #include "src/gpu/SkGr.h"
24 #include "src/utils/SkMultiPictureDocument.h"
25 #include "src/utils/SkOSPath.h"
26 #include "tools/DDLPromiseImageHelper.h"
27 #include "tools/DDLTileHelper.h"
28 #include "tools/SkSharingProc.h"
29 #include "tools/ToolUtils.h"
30 #include "tools/flags/CommandLineFlags.h"
31 #include "tools/flags/CommonFlags.h"
32 #include "tools/flags/CommonFlagsConfig.h"
33 #include "tools/gpu/FlushFinishTracker.h"
34 #include "tools/gpu/GpuTimer.h"
35 #include "tools/gpu/GrContextFactory.h"
36 
37 #if defined(SK_ENABLE_SVG)
38 #include "modules/svg/include/SkSVGDOM.h"
39 #include "src/xml/SkDOM.h"
40 #endif
41 
42 #include <stdlib.h>
43 #include <algorithm>
44 #include <array>
45 #include <chrono>
46 #include <cinttypes>
47 #include <cmath>
48 #include <vector>
49 
50 /**
51  * This is a minimalist program whose sole purpose is to open a .skp or .svg file, benchmark it on a
52  * single config, and exit. It is intended to be used through skpbench.py rather than invoked
53  * directly. Limiting the entire process to a single config/skp pair helps to keep the results
54  * repeatable.
55  *
56  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
57  * render target and syncs the GPU after each draw.
58  *
59  * Well, maybe a little fanciness, MSKP's can be loaded and played. The animation is played as many
60  * times as necessary to reach the target sample duration and FPS is reported.
61  *
62  * Currently, only GPU configs are supported.
63  */
64 
65 static DEFINE_bool(ddl, false, "record the skp into DDLs before rendering");
66 static DEFINE_int(ddlNumRecordingThreads, 0, "number of DDL recording threads (0=num_cores)");
67 static DEFINE_int(ddlTilingWidthHeight, 0, "number of tiles along one edge when in DDL mode");
68 
69 static DEFINE_bool(comparableDDL, false, "render in a way that is comparable to 'comparableSKP'");
70 static DEFINE_bool(comparableSKP, false, "report in a way that is comparable to 'comparableDDL'");
71 
72 static DEFINE_int(duration, 5000, "number of milliseconds to run the benchmark");
73 static DEFINE_int(sampleMs, 50, "minimum duration of a sample");
74 static DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
75 static DEFINE_bool(fps, false, "use fps instead of ms");
76 static DEFINE_string(src, "",
77                      "path to a single .skp or .svg file, or 'warmup' for a builtin warmup run");
78 static DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
79 static DEFINE_int(verbosity, 4, "level of verbosity (0=none to 5=debug)");
80 static DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
81 static DEFINE_double(scale, 1, "Scale the size of the canvas and the zoom level by this factor.");
82 static DEFINE_bool(dumpSamples, false, "print the individual samples to stdout");
83 
84 static const char header[] =
85 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
86 
87 static const char resultFormat[] =
88 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7zu  %9i  %-5s  %-6s  %-9s %s";
89 
90 static constexpr int kNumFlushesToPrimeCache = 3;
91 
92 struct Sample {
93     using duration = std::chrono::nanoseconds;
94 
SampleSample95     Sample() : fFrames(0), fDuration(0) {}
secondsSample96     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample97     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample98     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample99     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
100 
101     int        fFrames;
102     duration   fDuration;
103 };
104 
105 class GpuSync {
106 public:
GpuSync()107     GpuSync() {}
~GpuSync()108     ~GpuSync() {}
109 
110     void waitIfNeeded();
111 
112     sk_gpu_test::FlushFinishTracker* newFlushTracker(GrDirectContext* context);
113 
114 private:
115     enum { kMaxFrameLag = 3 };
116     sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
117     int fCurrentFlushIdx = 0;
118 };
119 
120 enum class ExitErr {
121     kOk           = 0,
122     kUsage        = 64,
123     kData         = 65,
124     kUnavailable  = 69,
125     kIO           = 74,
126     kSoftware     = 70
127 };
128 
129 static void flush_with_sync(GrDirectContext*, GpuSync&);
130 static void draw_skp_and_flush_with_sync(GrDirectContext*, SkSurface*, const SkPicture*, GpuSync&);
131 static sk_sp<SkPicture> create_warmup_skp();
132 static sk_sp<SkPicture> create_skp_from_svg(SkStream*, const char* filename);
133 static bool mkdir_p(const SkString& name);
134 static SkString         join(const CommandLineFlags::StringArray&);
135 static void exitf(ExitErr, const char* format, ...);
136 
137 // An interface used by both static SKPs and animated SKPs
138 class SkpProducer {
139 public:
~SkpProducer()140     virtual ~SkpProducer() {}
141     // Draw an SkPicture to the provided surface, flush the surface, and sync the GPU.
142     // You may use the static draw_skp_and_flush_with_sync declared above.
143     // returned int tells how many draw/flush/sync were done.
144     virtual int drawAndFlushAndSync(GrDirectContext*, SkSurface* surface, GpuSync& gpuSync) = 0;
145 };
146 
147 class StaticSkp : public SkpProducer {
148 public:
StaticSkp(sk_sp<SkPicture> skp)149     StaticSkp(sk_sp<SkPicture> skp) : fSkp(skp) {}
150 
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)151     int drawAndFlushAndSync(GrDirectContext* context,
152                             SkSurface* surface,
153                             GpuSync& gpuSync) override {
154         draw_skp_and_flush_with_sync(context, surface, fSkp.get(), gpuSync);
155         return 1;
156     }
157 
158 private:
159     sk_sp<SkPicture> fSkp;
160 };
161 
162 // A class for playing/benchmarking a multi frame SKP file.
163 // the recorded frames are looped over repeatedly.
164 // This type of benchmark may have a much higher std dev in frame times.
165 class MultiFrameSkp : public SkpProducer {
166 public:
MultiFrameSkp(const std::vector<SkDocumentPage> & frames)167     MultiFrameSkp(const std::vector<SkDocumentPage>& frames) : fFrames(frames){}
168 
MakeFromFile(const SkString & path)169     static std::unique_ptr<MultiFrameSkp> MakeFromFile(const SkString& path) {
170         // Load the multi frame skp at the given filename.
171         std::unique_ptr<SkStreamAsset> stream = SkStream::MakeFromFile(path.c_str());
172         if (!stream) { return nullptr; }
173 
174         // Attempt to deserialize with an image sharing serial proc.
175         auto deserialContext = std::make_unique<SkSharingDeserialContext>();
176         SkDeserialProcs procs;
177         procs.fImageProc = SkSharingDeserialContext::deserializeImage;
178         procs.fImageCtx = deserialContext.get();
179 
180         // The outer format of multi-frame skps is the multi-picture document, which is a
181         // skp file containing subpictures separated by annotations.
182         int page_count = SkMultiPictureDocumentReadPageCount(stream.get());
183         if (!page_count) {
184             return nullptr;
185         }
186         std::vector<SkDocumentPage> frames(page_count); // can't call reserve, why?
187         if (!SkMultiPictureDocumentRead(stream.get(), frames.data(), page_count, &procs)) {
188             return nullptr;
189         }
190 
191         return std::make_unique<MultiFrameSkp>(frames);
192     }
193 
194     // Draw the whole animation once.
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)195     int drawAndFlushAndSync(GrDirectContext* context,
196                             SkSurface* surface,
197                             GpuSync& gpuSync) override {
198         for (int i=0; i<this->count(); i++){
199             draw_skp_and_flush_with_sync(context, surface, this->frame(i).get(), gpuSync);
200         }
201         return this->count();
202     }
203     // Return the requested frame.
frame(int n) const204     sk_sp<SkPicture> frame(int n) const { return fFrames[n].fPicture; }
205     // Return the number of frames in the recording.
count() const206     int count() const { return fFrames.size(); }
207 private:
208     std::vector<SkDocumentPage> fFrames;
209 };
210 
ddl_sample(GrDirectContext * dContext,DDLTileHelper * tiles,GpuSync & gpuSync,Sample * sample,SkTaskGroup * recordingTaskGroup,SkTaskGroup * gpuTaskGroup,std::chrono::high_resolution_clock::time_point * startStopTime,SkPicture * picture)211 static void ddl_sample(GrDirectContext* dContext, DDLTileHelper* tiles, GpuSync& gpuSync,
212                        Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
213                        std::chrono::high_resolution_clock::time_point* startStopTime,
214                        SkPicture* picture) {
215     using clock = std::chrono::high_resolution_clock;
216 
217     clock::time_point start = *startStopTime;
218 
219     if (FLAGS_comparableDDL) {
220         SkASSERT(!FLAGS_comparableSKP);
221 
222         // In this mode we simply alternate between creating a DDL and drawing it - all on one
223         // thread. The interleaving is so that we don't starve the GPU.
224         // One unfortunate side effect of this is that we can't delete the DDLs until after
225         // the GPU work is flushed.
226         tiles->interleaveDDLCreationAndDraw(dContext, picture);
227     } else if (FLAGS_comparableSKP) {
228         // In this mode simply draw the re-inflated per-tile SKPs directly to the GPU w/o going
229         // through a DDL.
230         tiles->drawAllTilesDirectly(dContext, picture);
231     } else {
232         tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, picture);
233         recordingTaskGroup->wait();
234     }
235 
236     if (gpuTaskGroup) {
237         gpuTaskGroup->add([&]{
238             flush_with_sync(dContext, gpuSync);
239         });
240         gpuTaskGroup->wait();
241     } else {
242         flush_with_sync(dContext, gpuSync);
243     }
244 
245     *startStopTime = clock::now();
246 
247     if (sample) {
248         sample->fDuration += *startStopTime - start;
249         sample->fFrames++;
250     }
251 }
252 
run_ddl_benchmark(sk_gpu_test::TestContext * testContext,GrDirectContext * dContext,sk_sp<SkSurface> dstSurface,SkPicture * inputPicture,std::vector<Sample> * samples)253 static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectContext *dContext,
254                               sk_sp<SkSurface> dstSurface, SkPicture* inputPicture,
255                               std::vector<Sample>* samples) {
256     using clock = std::chrono::high_resolution_clock;
257     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
258     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
259 
260     SkSurfaceCharacterization dstCharacterization;
261     SkAssertResult(dstSurface->characterize(&dstCharacterization));
262 
263     SkIRect viewport = dstSurface->imageInfo().bounds();
264 
265     SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADataTypes(*dContext);
266     DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
267     sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture);
268     if (!newSKP) {
269         exitf(ExitErr::kUnavailable, "DDL: conversion of skp failed");
270     }
271 
272     promiseImageHelper.uploadAllToGPU(nullptr, dContext);
273 
274     DDLTileHelper tiles(dContext, dstCharacterization, viewport,
275                         FLAGS_ddlTilingWidthHeight, FLAGS_ddlTilingWidthHeight,
276                         /* addRandomPaddingToDst */ false);
277 
278     tiles.createBackendTextures(nullptr, dContext);
279 
280     // In comparable modes, there is no GPU thread. The following pointers are all null.
281     // Otherwise, we transfer testContext onto the GPU thread until after the bench.
282     std::unique_ptr<SkExecutor> gpuThread;
283     std::unique_ptr<SkTaskGroup> gpuTaskGroup;
284     std::unique_ptr<SkExecutor> recordingThreadPool;
285     std::unique_ptr<SkTaskGroup> recordingTaskGroup;
286     if (!FLAGS_comparableDDL && !FLAGS_comparableSKP) {
287         gpuThread = SkExecutor::MakeFIFOThreadPool(1, false);
288         gpuTaskGroup = std::make_unique<SkTaskGroup>(*gpuThread);
289         recordingThreadPool = SkExecutor::MakeFIFOThreadPool(FLAGS_ddlNumRecordingThreads, false);
290         recordingTaskGroup = std::make_unique<SkTaskGroup>(*recordingThreadPool);
291         testContext->makeNotCurrent();
292         gpuTaskGroup->add([=]{ testContext->makeCurrent(); });
293     }
294 
295     clock::time_point startStopTime = clock::now();
296 
297     GpuSync gpuSync;
298     ddl_sample(dContext, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
299                gpuTaskGroup.get(), &startStopTime, newSKP.get());
300 
301     clock::duration cumulativeDuration = std::chrono::milliseconds(0);
302 
303     do {
304         samples->emplace_back();
305         Sample& sample = samples->back();
306 
307         do {
308             tiles.resetAllTiles();
309             ddl_sample(dContext, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
310                        gpuTaskGroup.get(), &startStopTime, newSKP.get());
311         } while (sample.fDuration < sampleDuration);
312 
313         cumulativeDuration += sample.fDuration;
314     } while (cumulativeDuration < benchDuration || 0 == samples->size() % 2);
315 
316     // Move the context back to this thread now that we're done benching.
317     if (gpuTaskGroup) {
318         gpuTaskGroup->add([=]{
319             testContext->makeNotCurrent();
320         });
321         gpuTaskGroup->wait();
322         testContext->makeCurrent();
323     }
324 
325     if (!FLAGS_png.isEmpty()) {
326         // The user wants to see the final result
327         dstSurface->draw(tiles.composeDDL());
328         dstSurface->flushAndSubmit();
329     }
330 
331     tiles.resetAllTiles();
332 
333     // Make sure the gpu has finished all its work before we exit this function and delete the
334     // fence.
335     dContext->flush();
336     dContext->submit(true);
337 
338     promiseImageHelper.deleteAllFromGPU(nullptr, dContext);
339 
340     tiles.deleteBackendTextures(nullptr, dContext);
341 
342 }
343 
run_benchmark(GrDirectContext * context,SkSurface * surface,SkpProducer * skpp,std::vector<Sample> * samples)344 static void run_benchmark(GrDirectContext* context, SkSurface* surface, SkpProducer* skpp,
345                           std::vector<Sample>* samples) {
346     using clock = std::chrono::high_resolution_clock;
347     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
348     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
349 
350     GpuSync gpuSync;
351     int i = 0;
352     do {
353         i += skpp->drawAndFlushAndSync(context, surface, gpuSync);
354     } while(i < kNumFlushesToPrimeCache);
355 
356     clock::time_point now = clock::now();
357     const clock::time_point endTime = now + benchDuration;
358 
359     do {
360         clock::time_point sampleStart = now;
361         samples->emplace_back();
362         Sample& sample = samples->back();
363 
364         do {
365             sample.fFrames += skpp->drawAndFlushAndSync(context, surface, gpuSync);
366             now = clock::now();
367             sample.fDuration = now - sampleStart;
368         } while (sample.fDuration < sampleDuration);
369     } while (now < endTime || 0 == samples->size() % 2);
370 
371     // Make sure the gpu has finished all its work before we exit this function and delete the
372     // fence.
373     surface->flush();
374     context->submit(true);
375 }
376 
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,GrDirectContext * context,SkSurface * surface,const SkPicture * skp,std::vector<Sample> * samples)377 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, GrDirectContext* context,
378                                    SkSurface* surface, const SkPicture* skp,
379                                    std::vector<Sample>* samples) {
380     using sk_gpu_test::PlatformTimerQuery;
381     using clock = std::chrono::steady_clock;
382     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
383     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
384 
385     if (!gpuTimer->disjointSupport()) {
386         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
387                         "results may be unreliable\n");
388     }
389 
390     GpuSync gpuSync;
391     draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
392 
393     PlatformTimerQuery previousTime = 0;
394     for (int i = 1; i < kNumFlushesToPrimeCache; ++i) {
395         gpuTimer->queueStart();
396         draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
397         previousTime = gpuTimer->queueStop();
398     }
399 
400     clock::time_point now = clock::now();
401     const clock::time_point endTime = now + benchDuration;
402 
403     do {
404         const clock::time_point sampleEndTime = now + sampleDuration;
405         samples->emplace_back();
406         Sample& sample = samples->back();
407 
408         do {
409             gpuTimer->queueStart();
410             draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
411             PlatformTimerQuery time = gpuTimer->queueStop();
412 
413             switch (gpuTimer->checkQueryStatus(previousTime)) {
414                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
415                 case QueryStatus::kInvalid:
416                     exitf(ExitErr::kUnavailable, "GPU timer failed");
417                     break;
418                 case QueryStatus::kPending:
419                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
420                     break;
421                 case QueryStatus::kDisjoint:
422                     if (FLAGS_verbosity >= 4) {
423                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
424                     }
425                     break;
426                 case QueryStatus::kAccurate:
427                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
428                     ++sample.fFrames;
429                     break;
430             }
431             gpuTimer->deleteQuery(previousTime);
432             previousTime = time;
433             now = clock::now();
434         } while (now < sampleEndTime || 0 == sample.fFrames);
435     } while (now < endTime || 0 == samples->size() % 2);
436 
437     gpuTimer->deleteQuery(previousTime);
438 
439     // Make sure the gpu has finished all its work before we exit this function and delete the
440     // fence.
441     surface->flush();
442     context->submit(true);
443 }
444 
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)445 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
446     if (0 == (samples.size() % 2)) {
447         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
448     }
449 
450     if (FLAGS_dumpSamples) {
451         printf("Samples: ");
452         for (const Sample& sample : samples) {
453             printf("%" PRId64 " ", static_cast<int64_t>(sample.fDuration.count()));
454         }
455         printf("%s\n", bench);
456     }
457 
458     Sample accum = Sample();
459     std::vector<double> values;
460     values.reserve(samples.size());
461     for (const Sample& sample : samples) {
462         accum.fFrames += sample.fFrames;
463         accum.fDuration += sample.fDuration;
464         values.push_back(sample.value());
465     }
466     std::sort(values.begin(), values.end());
467 
468     const double accumValue = accum.value();
469     double variance = 0;
470     for (double value : values) {
471         const double delta = value - accumValue;
472         variance += delta * delta;
473     }
474     variance /= values.size();
475     // Technically, this is the relative standard deviation.
476     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
477 
478     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
479            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
480            config, bench);
481     printf("\n");
482     fflush(stdout);
483 }
484 
main(int argc,char ** argv)485 int main(int argc, char** argv) {
486     CommandLineFlags::SetUsage(
487             "Use skpbench.py instead. "
488             "You usually don't want to use this program directly.");
489     CommandLineFlags::Parse(argc, argv);
490 
491     if (!FLAGS_suppressHeader) {
492         printf("%s\n", header);
493     }
494     if (FLAGS_duration <= 0) {
495         exit(0); // This can be used to print the header and quit.
496     }
497 
498     // Parse the config.
499     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
500     SkCommandLineConfigArray configs;
501     ParseConfigs(FLAGS_config, &configs);
502     if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
503         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
504                                join(FLAGS_config).c_str());
505     }
506 
507     // Parse the skp.
508     if (FLAGS_src.count() != 1) {
509         exitf(ExitErr::kUsage,
510               "invalid input '%s': must specify a single .skp or .svg file, or 'warmup'",
511               join(FLAGS_src).c_str());
512     }
513 
514     SkGraphics::Init();
515 
516     sk_sp<SkPicture> skp;
517     std::unique_ptr<MultiFrameSkp> mskp; // populated if the file is multi frame.
518     SkString srcname;
519     if (0 == strcmp(FLAGS_src[0], "warmup")) {
520         skp = create_warmup_skp();
521         srcname = "warmup";
522     } else {
523         SkString srcfile(FLAGS_src[0]);
524         std::unique_ptr<SkStream> srcstream(SkStream::MakeFromFile(srcfile.c_str()));
525         if (!srcstream) {
526             exitf(ExitErr::kIO, "failed to open file %s", srcfile.c_str());
527         }
528         if (srcfile.endsWith(".svg")) {
529             skp = create_skp_from_svg(srcstream.get(), srcfile.c_str());
530         } else if (srcfile.endsWith(".mskp")) {
531             mskp = MultiFrameSkp::MakeFromFile(srcfile);
532             // populate skp with it's first frame, for width height determination.
533             skp = mskp->frame(0);
534         } else {
535             skp = SkPicture::MakeFromStream(srcstream.get());
536         }
537         if (!skp) {
538             exitf(ExitErr::kData, "failed to parse file %s", srcfile.c_str());
539         }
540         srcname = SkOSPath::Basename(srcfile.c_str());
541     }
542     int width = std::min(SkScalarCeilToInt(skp->cullRect().width()), 2048),
543         height = std::min(SkScalarCeilToInt(skp->cullRect().height()), 2048);
544     if (FLAGS_verbosity >= 3 &&
545         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
546         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
547                         srcname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
548                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
549     }
550     if (FLAGS_scale != 1) {
551         width *= FLAGS_scale;
552         height *= FLAGS_scale;
553         if (FLAGS_verbosity >= 3) {
554             fprintf(stderr, "Scale factor of %.2f: scaling to %ix%i.\n",
555                     FLAGS_scale, width, height);
556         }
557     }
558 
559     if (config->getSurfType() != SkCommandLineConfigGpu::SurfType::kDefault) {
560         exitf(ExitErr::kUnavailable, "This tool only supports the default surface type. (%s)",
561               config->getTag().c_str());
562     }
563 
564     // Create a context.
565     GrContextOptions ctxOptions;
566     CommonFlags::SetCtxOptions(&ctxOptions);
567     sk_gpu_test::GrContextFactory factory(ctxOptions);
568     sk_gpu_test::ContextInfo ctxInfo =
569         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
570     auto ctx = ctxInfo.directContext();
571     if (!ctx) {
572         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
573                                      config->getTag().c_str());
574     }
575     if (ctx->maxRenderTargetSize() < std::max(width, height)) {
576         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
577               width, height, ctx->maxRenderTargetSize());
578     }
579     GrBackendFormat format = ctx->defaultBackendFormat(config->getColorType(), GrRenderable::kYes);
580     if (!format.isValid()) {
581         exitf(ExitErr::kUnavailable, "failed to get GrBackendFormat from SkColorType: %d",
582                                      config->getColorType());
583     }
584     int supportedSampleCount = ctx->priv().caps()->getRenderTargetSampleCount(
585             config->getSamples(), format);
586     if (supportedSampleCount != config->getSamples()) {
587         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
588                                      config->getSamples());
589     }
590     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
591     if (!testCtx) {
592         exitf(ExitErr::kSoftware, "testContext is null");
593     }
594     if (!testCtx->fenceSyncSupport()) {
595         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
596     }
597 
598     // Create a render target.
599     SkImageInfo info = SkImageInfo::Make(
600             width, height, config->getColorType(), config->getAlphaType(), config->refColorSpace());
601     SkSurfaceProps props(config->getSurfaceFlags(), kRGB_H_SkPixelGeometry);
602     sk_sp<SkSurface> surface =
603         SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
604     if (!surface) {
605         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
606                                      width, height, config->getTag().c_str());
607     }
608 
609     // Run the benchmark.
610     std::vector<Sample> samples;
611     if (FLAGS_sampleMs > 0) {
612         // +1 because we might take one more sample in order to have an odd number.
613         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
614     } else {
615         samples.reserve(2 * FLAGS_duration);
616     }
617     SkCanvas* canvas = surface->getCanvas();
618     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
619     if (FLAGS_scale != 1) {
620         canvas->scale(FLAGS_scale, FLAGS_scale);
621     }
622     if (!FLAGS_gpuClock) {
623         if (FLAGS_ddl) {
624             run_ddl_benchmark(testCtx, ctx, surface, skp.get(), &samples);
625         } else if (!mskp) {
626             auto s = std::make_unique<StaticSkp>(skp);
627             run_benchmark(ctx, surface.get(), s.get(), &samples);
628         } else {
629             run_benchmark(ctx, surface.get(), mskp.get(), &samples);
630         }
631     } else {
632         if (FLAGS_ddl) {
633             exitf(ExitErr::kUnavailable, "DDL: GPU-only timing not supported");
634         }
635         if (!testCtx->gpuTimingSupport()) {
636             exitf(ExitErr::kUnavailable, "GPU does not support timing");
637         }
638         run_gpu_time_benchmark(testCtx->gpuTimer(), ctx, surface.get(), skp.get(), &samples);
639     }
640     print_result(samples, config->getTag().c_str(), srcname.c_str());
641 
642     // Save a proof (if one was requested).
643     if (!FLAGS_png.isEmpty()) {
644         SkBitmap bmp;
645         bmp.allocPixels(info);
646         if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
647             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
648         }
649         if (!mkdir_p(SkOSPath::Dirname(FLAGS_png[0]))) {
650             exitf(ExitErr::kIO, "failed to create directory for png \"%s\"", FLAGS_png[0]);
651         }
652         if (!ToolUtils::EncodeImageToFile(FLAGS_png[0], bmp, SkEncodedImageFormat::kPNG, 100)) {
653             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
654         }
655     }
656 
657     return(0);
658 }
659 
flush_with_sync(GrDirectContext * context,GpuSync & gpuSync)660 static void flush_with_sync(GrDirectContext* context, GpuSync& gpuSync) {
661     gpuSync.waitIfNeeded();
662 
663     GrFlushInfo flushInfo;
664     flushInfo.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
665     flushInfo.fFinishedContext = gpuSync.newFlushTracker(context);
666 
667     context->flush(flushInfo);
668     context->submit();
669 }
670 
draw_skp_and_flush_with_sync(GrDirectContext * context,SkSurface * surface,const SkPicture * skp,GpuSync & gpuSync)671 static void draw_skp_and_flush_with_sync(GrDirectContext* context, SkSurface* surface,
672                                          const SkPicture* skp, GpuSync& gpuSync) {
673     auto canvas = surface->getCanvas();
674     canvas->drawPicture(skp);
675 
676     flush_with_sync(context, gpuSync);
677 }
678 
create_warmup_skp()679 static sk_sp<SkPicture> create_warmup_skp() {
680     static constexpr SkRect bounds{0, 0, 500, 500};
681     SkPictureRecorder recorder;
682     SkCanvas* recording = recorder.beginRecording(bounds);
683 
684     recording->clear(SK_ColorWHITE);
685 
686     SkPaint stroke;
687     stroke.setStyle(SkPaint::kStroke_Style);
688     stroke.setStrokeWidth(2);
689 
690     // Use a big path to (theoretically) warmup the CPU.
691     SkPath bigPath = BenchUtils::make_big_path();
692     recording->drawPath(bigPath, stroke);
693 
694     // Use a perlin shader to warmup the GPU.
695     SkPaint perlin;
696     perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
697     recording->drawRect(bounds, perlin);
698 
699     return recorder.finishRecordingAsPicture();
700 }
701 
create_skp_from_svg(SkStream * stream,const char * filename)702 static sk_sp<SkPicture> create_skp_from_svg(SkStream* stream, const char* filename) {
703 #if defined(SK_ENABLE_SVG)
704     sk_sp<SkSVGDOM> svg = SkSVGDOM::MakeFromStream(*stream);
705     if (!svg) {
706         exitf(ExitErr::kData, "failed to build svg dom from file %s", filename);
707     }
708 
709     static constexpr SkRect bounds{0, 0, 1200, 1200};
710     SkPictureRecorder recorder;
711     SkCanvas* recording = recorder.beginRecording(bounds);
712 
713     svg->setContainerSize(SkSize::Make(recording->getBaseLayerSize()));
714     svg->render(recording);
715 
716     return recorder.finishRecordingAsPicture();
717 #endif
718     exitf(ExitErr::kData, "SK_ENABLE_SVG is disabled; cannot open svg file %s", filename);
719     return nullptr;
720 }
721 
mkdir_p(const SkString & dirname)722 bool mkdir_p(const SkString& dirname) {
723     if (dirname.isEmpty() || dirname == SkString("/")) {
724         return true;
725     }
726     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
727 }
728 
join(const CommandLineFlags::StringArray & stringArray)729 static SkString join(const CommandLineFlags::StringArray& stringArray) {
730     SkString joined;
731     for (int i = 0; i < stringArray.count(); ++i) {
732         joined.appendf(i ? " %s" : "%s", stringArray[i]);
733     }
734     return joined;
735 }
736 
exitf(ExitErr err,const char * format,...)737 static void exitf(ExitErr err, const char* format, ...) {
738     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
739     va_list args;
740     va_start(args, format);
741     vfprintf(stderr, format, args);
742     va_end(args);
743     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
744     exit((int)err);
745 }
746 
waitIfNeeded()747 void GpuSync::waitIfNeeded() {
748     if (fFinishTrackers[fCurrentFlushIdx]) {
749         fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
750     }
751 }
752 
newFlushTracker(GrDirectContext * context)753 sk_gpu_test::FlushFinishTracker* GpuSync::newFlushTracker(GrDirectContext* context) {
754     fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
755 
756     sk_gpu_test::FlushFinishTracker* tracker = fFinishTrackers[fCurrentFlushIdx].get();
757     // We add an additional ref to the current flush tracker here. This ref is owned by the finish
758     // callback on the flush call. The finish callback will unref the tracker when called.
759     tracker->ref();
760 
761     fCurrentFlushIdx = (fCurrentFlushIdx + 1) % SK_ARRAY_COUNT(fFinishTrackers);
762     return tracker;
763 }
764