• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "bench/BigPath.h"
9 #include "include/core/SkCanvas.h"
10 #include "include/core/SkGraphics.h"
11 #include "include/core/SkPicture.h"
12 #include "include/core/SkPictureRecorder.h"
13 #include "include/core/SkStream.h"
14 #include "include/core/SkSurface.h"
15 #include "include/core/SkSurfaceProps.h"
16 #include "include/docs/SkMultiPictureDocument.h"
17 #include "include/effects/SkPerlinNoiseShader.h"
18 #include "include/gpu/GrDirectContext.h"
19 #include "include/gpu/ganesh/SkSurfaceGanesh.h"
20 #include "include/private/chromium/GrDeferredDisplayList.h"
21 #include "src/core/SkOSFile.h"
22 #include "src/core/SkTaskGroup.h"
23 #include "src/gpu/ganesh/GrCaps.h"
24 #include "src/gpu/ganesh/GrDirectContextPriv.h"
25 #include "src/gpu/ganesh/SkGr.h"
26 #include "src/gpu/ganesh/image/GrImageUtils.h"
27 #include "src/utils/SkOSPath.h"
28 #include "tools/DDLPromiseImageHelper.h"
29 #include "tools/DDLTileHelper.h"
30 #include "tools/EncodeUtils.h"
31 #include "tools/SkSharingProc.h"
32 #include "tools/flags/CommandLineFlags.h"
33 #include "tools/flags/CommonFlags.h"
34 #include "tools/flags/CommonFlagsConfig.h"
35 #include "tools/fonts/FontToolUtils.h"
36 #include "tools/gpu/FlushFinishTracker.h"
37 #include "tools/gpu/GpuTimer.h"
38 #include "tools/gpu/GrContextFactory.h"
39 
40 #if defined(SK_ENABLE_SVG)
41 #include "modules/skshaper/utils/FactoryHelpers.h"
42 #include "modules/svg/include/SkSVGDOM.h"
43 #include "src/xml/SkDOM.h"
44 #endif
45 
46 #include <stdlib.h>
47 #include <algorithm>
48 #include <array>
49 #include <chrono>
50 #include <cinttypes>
51 #include <cmath>
52 #include <vector>
53 
54 /**
55  * This is a minimalist program whose sole purpose is to open a .skp or .svg file, benchmark it on a
56  * single config, and exit. It is intended to be used through skpbench.py rather than invoked
57  * directly. Limiting the entire process to a single config/skp pair helps to keep the results
58  * repeatable.
59  *
60  * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
61  * render target and syncs the GPU after each draw.
62  *
63  * Well, maybe a little fanciness, MSKP's can be loaded and played. The animation is played as many
64  * times as necessary to reach the target sample duration and FPS is reported.
65  *
66  * Currently, only GPU configs are supported.
67  */
68 
69 static DEFINE_bool(ddl, false, "record the skp into DDLs before rendering");
70 static DEFINE_int(ddlNumRecordingThreads, 0, "number of DDL recording threads (0=num_cores)");
71 static DEFINE_int(ddlTilingWidthHeight, 0, "number of tiles along one edge when in DDL mode");
72 
73 static DEFINE_bool(comparableDDL, false, "render in a way that is comparable to 'comparableSKP'");
74 static DEFINE_bool(comparableSKP, false, "report in a way that is comparable to 'comparableDDL'");
75 
76 static DEFINE_int(duration, 5000, "number of milliseconds to run the benchmark");
77 static DEFINE_int(sampleMs, 50, "minimum duration of a sample");
78 static DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
79 static DEFINE_bool(fps, false, "use fps instead of ms");
80 static DEFINE_string(src, "",
81                      "path to a single .skp or .svg file, or 'warmup' for a builtin warmup run");
82 static DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
83 static DEFINE_int(verbosity, 4, "level of verbosity (0=none to 5=debug)");
84 static DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
85 static DEFINE_double(scale, 1, "Scale the size of the canvas and the zoom level by this factor.");
86 static DEFINE_bool(dumpSamples, false, "print the individual samples to stdout");
87 
88 static const char header[] =
89 "   accum    median       max       min   stddev  samples  sample_ms  clock  metric  config    bench";
90 
91 static const char resultFormat[] =
92 "%8.4g  %8.4g  %8.4g  %8.4g  %6.3g%%  %7zu  %9i  %-5s  %-6s  %-9s %s";
93 
94 static constexpr int kNumFlushesToPrimeCache = 3;
95 
96 struct Sample {
97     using duration = std::chrono::nanoseconds;
98 
SampleSample99     Sample() : fFrames(0), fDuration(0) {}
secondsSample100     double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample101     double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample102     double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample103     static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
104 
105     int        fFrames;
106     duration   fDuration;
107 };
108 
109 class GpuSync {
110 public:
GpuSync()111     GpuSync() {}
~GpuSync()112     ~GpuSync() {}
113 
114     void waitIfNeeded();
115 
116     sk_gpu_test::FlushFinishTracker* newFlushTracker(GrDirectContext* context);
117 
118 private:
119     enum { kMaxFrameLag = 3 };
120     sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
121     int fCurrentFlushIdx = 0;
122 };
123 
124 enum class ExitErr {
125     kOk           = 0,
126     kUsage        = 64,
127     kData         = 65,
128     kUnavailable  = 69,
129     kIO           = 74,
130     kSoftware     = 70
131 };
132 
133 static void flush_with_sync(GrDirectContext*, GpuSync&);
134 static void draw_skp_and_flush_with_sync(GrDirectContext*, SkSurface*, const SkPicture*, GpuSync&);
135 static sk_sp<SkPicture> create_warmup_skp();
136 static sk_sp<SkPicture> create_skp_from_svg(SkStream*, const char* filename);
137 static bool mkdir_p(const SkString& name);
138 static SkString         join(const CommandLineFlags::StringArray&);
139 static void exitf(ExitErr, const char* format, ...);
140 
141 // An interface used by both static SKPs and animated SKPs
142 class SkpProducer {
143 public:
~SkpProducer()144     virtual ~SkpProducer() {}
145     // Draw an SkPicture to the provided surface, flush the surface, and sync the GPU.
146     // You may use the static draw_skp_and_flush_with_sync declared above.
147     // returned int tells how many draw/flush/sync were done.
148     virtual int drawAndFlushAndSync(GrDirectContext*, SkSurface* surface, GpuSync& gpuSync) = 0;
149 };
150 
151 class StaticSkp : public SkpProducer {
152 public:
StaticSkp(sk_sp<SkPicture> skp)153     StaticSkp(sk_sp<SkPicture> skp) : fSkp(skp) {}
154 
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)155     int drawAndFlushAndSync(GrDirectContext* context,
156                             SkSurface* surface,
157                             GpuSync& gpuSync) override {
158         draw_skp_and_flush_with_sync(context, surface, fSkp.get(), gpuSync);
159         return 1;
160     }
161 
162 private:
163     sk_sp<SkPicture> fSkp;
164 };
165 
166 // A class for playing/benchmarking a multi frame SKP file.
167 // the recorded frames are looped over repeatedly.
168 // This type of benchmark may have a much higher std dev in frame times.
169 class MultiFrameSkp : public SkpProducer {
170 public:
MultiFrameSkp(const std::vector<SkDocumentPage> & frames)171     MultiFrameSkp(const std::vector<SkDocumentPage>& frames) : fFrames(frames){}
172 
MakeFromFile(const SkString & path)173     static std::unique_ptr<MultiFrameSkp> MakeFromFile(const SkString& path) {
174         // Load the multi frame skp at the given filename.
175         std::unique_ptr<SkStreamAsset> stream = SkStream::MakeFromFile(path.c_str());
176         if (!stream) { return nullptr; }
177 
178         // Attempt to deserialize with an image sharing serial proc.
179         auto deserialContext = std::make_unique<SkSharingDeserialContext>();
180         SkDeserialProcs procs;
181         procs.fImageProc = SkSharingDeserialContext::deserializeImage;
182         procs.fImageCtx = deserialContext.get();
183 
184         // The outer format of multi-frame skps is the multi-picture document, which is a
185         // skp file containing subpictures separated by annotations.
186         int page_count = SkMultiPictureDocument::ReadPageCount(stream.get());
187         if (!page_count) {
188             return nullptr;
189         }
190         std::vector<SkDocumentPage> frames(page_count); // can't call reserve, why?
191         if (!SkMultiPictureDocument::Read(stream.get(), frames.data(), page_count, &procs)) {
192             return nullptr;
193         }
194 
195         return std::make_unique<MultiFrameSkp>(frames);
196     }
197 
198     // Draw the whole animation once.
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)199     int drawAndFlushAndSync(GrDirectContext* context,
200                             SkSurface* surface,
201                             GpuSync& gpuSync) override {
202         for (int i=0; i<this->count(); i++){
203             draw_skp_and_flush_with_sync(context, surface, this->frame(i).get(), gpuSync);
204         }
205         return this->count();
206     }
207     // Return the requested frame.
frame(int n) const208     sk_sp<SkPicture> frame(int n) const { return fFrames[n].fPicture; }
209     // Return the number of frames in the recording.
count() const210     int count() const { return fFrames.size(); }
211 private:
212     std::vector<SkDocumentPage> fFrames;
213 };
214 
ddl_sample(GrDirectContext * dContext,DDLTileHelper * tiles,GpuSync & gpuSync,Sample * sample,SkTaskGroup * recordingTaskGroup,SkTaskGroup * gpuTaskGroup,std::chrono::high_resolution_clock::time_point * startStopTime,SkPicture * picture)215 static void ddl_sample(GrDirectContext* dContext, DDLTileHelper* tiles, GpuSync& gpuSync,
216                        Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
217                        std::chrono::high_resolution_clock::time_point* startStopTime,
218                        SkPicture* picture) {
219     using clock = std::chrono::high_resolution_clock;
220 
221     clock::time_point start = *startStopTime;
222 
223     if (FLAGS_comparableDDL) {
224         SkASSERT(!FLAGS_comparableSKP);
225 
226         // In this mode we simply alternate between creating a DDL and drawing it - all on one
227         // thread. The interleaving is so that we don't starve the GPU.
228         // One unfortunate side effect of this is that we can't delete the DDLs until after
229         // the GPU work is flushed.
230         tiles->interleaveDDLCreationAndDraw(dContext, picture);
231     } else if (FLAGS_comparableSKP) {
232         // In this mode simply draw the re-inflated per-tile SKPs directly to the GPU w/o going
233         // through a DDL.
234         tiles->drawAllTilesDirectly(dContext, picture);
235     } else {
236         tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, picture);
237         recordingTaskGroup->wait();
238     }
239 
240     if (gpuTaskGroup) {
241         gpuTaskGroup->add([&]{
242             flush_with_sync(dContext, gpuSync);
243         });
244         gpuTaskGroup->wait();
245     } else {
246         flush_with_sync(dContext, gpuSync);
247     }
248 
249     *startStopTime = clock::now();
250 
251     if (sample) {
252         sample->fDuration += *startStopTime - start;
253         sample->fFrames++;
254     }
255 }
256 
run_ddl_benchmark(sk_gpu_test::TestContext * testContext,GrDirectContext * dContext,sk_sp<SkSurface> dstSurface,SkPicture * inputPicture,std::vector<Sample> * samples)257 static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext,
258                               GrDirectContext* dContext,
259                               sk_sp<SkSurface> dstSurface,
260                               SkPicture* inputPicture,
261                               std::vector<Sample>* samples) {
262     using clock = std::chrono::high_resolution_clock;
263     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
264     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
265 
266     GrSurfaceCharacterization dstCharacterization;
267     SkAssertResult(dstSurface->characterize(&dstCharacterization));
268 
269     SkIRect viewport = dstSurface->imageInfo().bounds();
270 
271     auto supportedYUVADataTypes = skgpu::ganesh::SupportedTextureFormats(*dContext);
272     DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
273     sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture);
274     if (!newSKP) {
275         exitf(ExitErr::kUnavailable, "DDL: conversion of skp failed");
276     }
277 
278     promiseImageHelper.uploadAllToGPU(nullptr, dContext);
279 
280     DDLTileHelper tiles(dContext, dstCharacterization, viewport,
281                         FLAGS_ddlTilingWidthHeight, FLAGS_ddlTilingWidthHeight,
282                         /* addRandomPaddingToDst */ false);
283 
284     tiles.createBackendTextures(nullptr, dContext);
285 
286     // In comparable modes, there is no GPU thread. The following pointers are all null.
287     // Otherwise, we transfer testContext onto the GPU thread until after the bench.
288     std::unique_ptr<SkExecutor> gpuThread;
289     std::unique_ptr<SkTaskGroup> gpuTaskGroup;
290     std::unique_ptr<SkExecutor> recordingThreadPool;
291     std::unique_ptr<SkTaskGroup> recordingTaskGroup;
292     if (!FLAGS_comparableDDL && !FLAGS_comparableSKP) {
293         gpuThread = SkExecutor::MakeFIFOThreadPool(1, false);
294         gpuTaskGroup = std::make_unique<SkTaskGroup>(*gpuThread);
295         recordingThreadPool = SkExecutor::MakeFIFOThreadPool(FLAGS_ddlNumRecordingThreads, false);
296         recordingTaskGroup = std::make_unique<SkTaskGroup>(*recordingThreadPool);
297         testContext->makeNotCurrent();
298         gpuTaskGroup->add([=]{ testContext->makeCurrent(); });
299     }
300 
301     clock::time_point startStopTime = clock::now();
302 
303     GpuSync gpuSync;
304     ddl_sample(dContext, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
305                gpuTaskGroup.get(), &startStopTime, newSKP.get());
306 
307     clock::duration cumulativeDuration = std::chrono::milliseconds(0);
308 
309     do {
310         samples->emplace_back();
311         Sample& sample = samples->back();
312 
313         do {
314             tiles.resetAllTiles();
315             ddl_sample(dContext, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
316                        gpuTaskGroup.get(), &startStopTime, newSKP.get());
317         } while (sample.fDuration < sampleDuration);
318 
319         cumulativeDuration += sample.fDuration;
320     } while (cumulativeDuration < benchDuration || 0 == samples->size() % 2);
321 
322     // Move the context back to this thread now that we're done benching.
323     if (gpuTaskGroup) {
324         gpuTaskGroup->add([=]{
325             testContext->makeNotCurrent();
326         });
327         gpuTaskGroup->wait();
328         testContext->makeCurrent();
329     }
330 
331     if (!FLAGS_png.isEmpty()) {
332         // The user wants to see the final result
333         skgpu::ganesh::DrawDDL(dstSurface, tiles.composeDDL());
334         dContext->flushAndSubmit(dstSurface.get(), GrSyncCpu::kNo);
335     }
336 
337     tiles.resetAllTiles();
338 
339     // Make sure the gpu has finished all its work before we exit this function and delete the
340     // fence.
341     dContext->flush();
342     dContext->submit(GrSyncCpu::kYes);
343 
344     promiseImageHelper.deleteAllFromGPU(nullptr, dContext);
345 
346     tiles.deleteBackendTextures(nullptr, dContext);
347 }
348 
run_benchmark(GrDirectContext * context,sk_sp<SkSurface> surface,SkpProducer * skpp,std::vector<Sample> * samples)349 static void run_benchmark(GrDirectContext* context,
350                           sk_sp<SkSurface> surface,
351                           SkpProducer* skpp,
352                           std::vector<Sample>* samples) {
353     using clock = std::chrono::high_resolution_clock;
354     const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
355     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
356 
357     GpuSync gpuSync;
358     int i = 0;
359     do {
360         i += skpp->drawAndFlushAndSync(context, surface.get(), gpuSync);
361     } while(i < kNumFlushesToPrimeCache);
362 
363     clock::time_point now = clock::now();
364     const clock::time_point endTime = now + benchDuration;
365 
366     do {
367         clock::time_point sampleStart = now;
368         samples->emplace_back();
369         Sample& sample = samples->back();
370 
371         do {
372             sample.fFrames += skpp->drawAndFlushAndSync(context, surface.get(), gpuSync);
373             now = clock::now();
374             sample.fDuration = now - sampleStart;
375         } while (sample.fDuration < sampleDuration);
376     } while (now < endTime || 0 == samples->size() % 2);
377 
378     // Make sure the gpu has finished all its work before we exit this function and delete the
379     // fence.
380     context->flush(surface.get());
381     context->submit(GrSyncCpu::kYes);
382 }
383 
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,GrDirectContext * context,sk_sp<SkSurface> surface,const SkPicture * skp,std::vector<Sample> * samples)384 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer,
385                                    GrDirectContext* context,
386                                    sk_sp<SkSurface> surface,
387                                    const SkPicture* skp,
388                                    std::vector<Sample>* samples) {
389     using sk_gpu_test::PlatformTimerQuery;
390     using clock = std::chrono::steady_clock;
391     const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
392     const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
393 
394     if (!gpuTimer->disjointSupport()) {
395         fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
396                         "results may be unreliable\n");
397     }
398 
399     GpuSync gpuSync;
400     draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
401 
402     PlatformTimerQuery previousTime = 0;
403     for (int i = 1; i < kNumFlushesToPrimeCache; ++i) {
404         gpuTimer->queueStart();
405         draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
406         previousTime = gpuTimer->queueStop();
407     }
408 
409     clock::time_point now = clock::now();
410     const clock::time_point endTime = now + benchDuration;
411 
412     do {
413         const clock::time_point sampleEndTime = now + sampleDuration;
414         samples->emplace_back();
415         Sample& sample = samples->back();
416 
417         do {
418             gpuTimer->queueStart();
419             draw_skp_and_flush_with_sync(context, surface.get(), skp, gpuSync);
420             PlatformTimerQuery time = gpuTimer->queueStop();
421 
422             switch (gpuTimer->checkQueryStatus(previousTime)) {
423                 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
424                 case QueryStatus::kInvalid:
425                     exitf(ExitErr::kUnavailable, "GPU timer failed");
426                     break;
427                 case QueryStatus::kPending:
428                     exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
429                     break;
430                 case QueryStatus::kDisjoint:
431                     if (FLAGS_verbosity >= 4) {
432                         fprintf(stderr, "discarding timer query due to disjoint operations.\n");
433                     }
434                     break;
435                 case QueryStatus::kAccurate:
436                     sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
437                     ++sample.fFrames;
438                     break;
439             }
440             gpuTimer->deleteQuery(previousTime);
441             previousTime = time;
442             now = clock::now();
443         } while (now < sampleEndTime || 0 == sample.fFrames);
444     } while (now < endTime || 0 == samples->size() % 2);
445 
446     gpuTimer->deleteQuery(previousTime);
447 
448     // Make sure the gpu has finished all its work before we exit this function and delete the
449     // fence.
450     context->flush(surface.get());
451     context->submit(GrSyncCpu::kYes);
452 }
453 
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)454 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench)  {
455     if (0 == (samples.size() % 2)) {
456         exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
457     }
458 
459     if (FLAGS_dumpSamples) {
460         printf("Samples: ");
461         for (const Sample& sample : samples) {
462             printf("%" PRId64 " ", static_cast<int64_t>(sample.fDuration.count()));
463         }
464         printf("%s\n", bench);
465     }
466 
467     Sample accum = Sample();
468     std::vector<double> values;
469     values.reserve(samples.size());
470     for (const Sample& sample : samples) {
471         accum.fFrames += sample.fFrames;
472         accum.fDuration += sample.fDuration;
473         values.push_back(sample.value());
474     }
475     std::sort(values.begin(), values.end());
476 
477     const double accumValue = accum.value();
478     double variance = 0;
479     for (double value : values) {
480         const double delta = value - accumValue;
481         variance += delta * delta;
482     }
483     variance /= values.size();
484     // Technically, this is the relative standard deviation.
485     const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
486 
487     printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
488            stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
489            config, bench);
490     printf("\n");
491     fflush(stdout);
492 }
493 
main(int argc,char ** argv)494 int main(int argc, char** argv) {
495     CommandLineFlags::SetUsage(
496             "Use skpbench.py instead. "
497             "You usually don't want to use this program directly.");
498     CommandLineFlags::Parse(argc, argv);
499 
500     if (!FLAGS_suppressHeader) {
501         printf("%s\n", header);
502     }
503     if (FLAGS_duration <= 0) {
504         exit(0); // This can be used to print the header and quit.
505     }
506 
507     // Parse the config.
508     const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
509     SkCommandLineConfigArray configs;
510     ParseConfigs(FLAGS_config, &configs);
511     if (configs.size() != 1 || !(config = configs[0]->asConfigGpu())) {
512         exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
513                                join(FLAGS_config).c_str());
514     }
515 
516     // Parse the skp.
517     if (FLAGS_src.size() != 1) {
518         exitf(ExitErr::kUsage,
519               "invalid input '%s': must specify a single .skp or .svg file, or 'warmup'",
520               join(FLAGS_src).c_str());
521     }
522 
523     SkGraphics::Init();
524 
525     sk_sp<SkPicture> skp;
526     std::unique_ptr<MultiFrameSkp> mskp; // populated if the file is multi frame.
527     SkString srcname;
528     if (0 == strcmp(FLAGS_src[0], "warmup")) {
529         skp = create_warmup_skp();
530         srcname = "warmup";
531     } else {
532         SkString srcfile(FLAGS_src[0]);
533         std::unique_ptr<SkStream> srcstream(SkStream::MakeFromFile(srcfile.c_str()));
534         if (!srcstream) {
535             exitf(ExitErr::kIO, "failed to open file %s", srcfile.c_str());
536         }
537         if (srcfile.endsWith(".svg")) {
538             skp = create_skp_from_svg(srcstream.get(), srcfile.c_str());
539         } else if (srcfile.endsWith(".mskp")) {
540             mskp = MultiFrameSkp::MakeFromFile(srcfile);
541             // populate skp with it's first frame, for width height determination.
542             skp = mskp->frame(0);
543         } else {
544             skp = SkPicture::MakeFromStream(srcstream.get());
545         }
546         if (!skp) {
547             exitf(ExitErr::kData, "failed to parse file %s", srcfile.c_str());
548         }
549         srcname = SkOSPath::Basename(srcfile.c_str());
550     }
551     int width = std::min(SkScalarCeilToInt(skp->cullRect().width()), 2048),
552         height = std::min(SkScalarCeilToInt(skp->cullRect().height()), 2048);
553     if (FLAGS_verbosity >= 3 &&
554         (width != skp->cullRect().width() || height != skp->cullRect().height())) {
555         fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
556                         srcname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
557                         SkScalarCeilToInt(skp->cullRect().height()), width, height);
558     }
559     if (FLAGS_scale != 1) {
560         width *= FLAGS_scale;
561         height *= FLAGS_scale;
562         if (FLAGS_verbosity >= 3) {
563             fprintf(stderr, "Scale factor of %.2f: scaling to %ix%i.\n",
564                     FLAGS_scale, width, height);
565         }
566     }
567 
568     if (config->getSurfType() != SkCommandLineConfigGpu::SurfType::kDefault) {
569         exitf(ExitErr::kUnavailable, "This tool only supports the default surface type. (%s)",
570               config->getTag().c_str());
571     }
572 
573     // Create a context.
574     GrContextOptions ctxOptions;
575     CommonFlags::SetCtxOptions(&ctxOptions);
576     sk_gpu_test::GrContextFactory factory(ctxOptions);
577     sk_gpu_test::ContextInfo ctxInfo =
578         factory.getContextInfo(config->getContextType(), config->getContextOverrides());
579     auto ctx = ctxInfo.directContext();
580     if (!ctx) {
581         exitf(ExitErr::kUnavailable, "failed to create context for config %s",
582                                      config->getTag().c_str());
583     }
584     if (ctx->maxRenderTargetSize() < std::max(width, height)) {
585         exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
586               width, height, ctx->maxRenderTargetSize());
587     }
588     GrBackendFormat format = ctx->defaultBackendFormat(config->getColorType(), GrRenderable::kYes);
589     if (!format.isValid()) {
590         exitf(ExitErr::kUnavailable, "failed to get GrBackendFormat from SkColorType: %d",
591                                      config->getColorType());
592     }
593     int supportedSampleCount = ctx->priv().caps()->getRenderTargetSampleCount(
594             config->getSamples(), format);
595     if (supportedSampleCount != config->getSamples()) {
596         exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
597                                      config->getSamples());
598     }
599     sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
600     if (!testCtx) {
601         exitf(ExitErr::kSoftware, "testContext is null");
602     }
603     if (!testCtx->fenceSyncSupport()) {
604         exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
605     }
606 
607     // Create a render target.
608     SkImageInfo info = SkImageInfo::Make(
609             width, height, config->getColorType(), config->getAlphaType(), config->refColorSpace());
610     SkSurfaceProps props(config->getSurfaceFlags(), kRGB_H_SkPixelGeometry);
611     sk_sp<SkSurface> surface =
612             SkSurfaces::RenderTarget(ctx, skgpu::Budgeted::kNo, info, config->getSamples(), &props);
613     if (!surface) {
614         exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
615                                      width, height, config->getTag().c_str());
616     }
617 
618     // Run the benchmark.
619     std::vector<Sample> samples;
620     if (FLAGS_sampleMs > 0) {
621         // +1 because we might take one more sample in order to have an odd number.
622         samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
623     } else {
624         samples.reserve(2 * FLAGS_duration);
625     }
626     SkCanvas* canvas = surface->getCanvas();
627     canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
628     if (FLAGS_scale != 1) {
629         canvas->scale(FLAGS_scale, FLAGS_scale);
630     }
631     if (!FLAGS_gpuClock) {
632         if (FLAGS_ddl) {
633             run_ddl_benchmark(testCtx, ctx, surface, skp.get(), &samples);
634         } else if (!mskp) {
635             auto s = std::make_unique<StaticSkp>(skp);
636             run_benchmark(ctx, surface, s.get(), &samples);
637         } else {
638             run_benchmark(ctx, surface, mskp.get(), &samples);
639         }
640     } else {
641         if (FLAGS_ddl) {
642             exitf(ExitErr::kUnavailable, "DDL: GPU-only timing not supported");
643         }
644         if (!testCtx->gpuTimingSupport()) {
645             exitf(ExitErr::kUnavailable, "GPU does not support timing");
646         }
647         run_gpu_time_benchmark(testCtx->gpuTimer(), ctx, surface, skp.get(), &samples);
648     }
649     print_result(samples, config->getTag().c_str(), srcname.c_str());
650 
651     // Save a proof (if one was requested).
652     if (!FLAGS_png.isEmpty()) {
653         SkBitmap bmp;
654         bmp.allocPixels(info);
655         if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
656             exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
657         }
658         if (!mkdir_p(SkOSPath::Dirname(FLAGS_png[0]))) {
659             exitf(ExitErr::kIO, "failed to create directory for png \"%s\"", FLAGS_png[0]);
660         }
661         if (!ToolUtils::EncodeImageToPngFile(FLAGS_png[0], bmp)) {
662             exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
663         }
664     }
665 
666     return(0);
667 }
668 
flush_with_sync(GrDirectContext * context,GpuSync & gpuSync)669 static void flush_with_sync(GrDirectContext* context, GpuSync& gpuSync) {
670     gpuSync.waitIfNeeded();
671 
672     GrFlushInfo flushInfo;
673     flushInfo.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
674     flushInfo.fFinishedContext = gpuSync.newFlushTracker(context);
675 
676     context->flush(flushInfo);
677     context->submit();
678 }
679 
draw_skp_and_flush_with_sync(GrDirectContext * context,SkSurface * surface,const SkPicture * skp,GpuSync & gpuSync)680 static void draw_skp_and_flush_with_sync(GrDirectContext* context, SkSurface* surface,
681                                          const SkPicture* skp, GpuSync& gpuSync) {
682     auto canvas = surface->getCanvas();
683     canvas->drawPicture(skp);
684 
685     flush_with_sync(context, gpuSync);
686 }
687 
create_warmup_skp()688 static sk_sp<SkPicture> create_warmup_skp() {
689     static constexpr SkRect bounds{0, 0, 500, 500};
690     SkPictureRecorder recorder;
691     SkCanvas* recording = recorder.beginRecording(bounds);
692 
693     recording->clear(SK_ColorWHITE);
694 
695     SkPaint stroke;
696     stroke.setStyle(SkPaint::kStroke_Style);
697     stroke.setStrokeWidth(2);
698 
699     // Use a big path to (theoretically) warmup the CPU.
700     SkPath bigPath = BenchUtils::make_big_path();
701     recording->drawPath(bigPath, stroke);
702 
703     // Use a perlin shader to warmup the GPU.
704     SkPaint perlin;
705     perlin.setShader(SkShaders::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
706     recording->drawRect(bounds, perlin);
707 
708     return recorder.finishRecordingAsPicture();
709 }
710 
create_skp_from_svg(SkStream * stream,const char * filename)711 static sk_sp<SkPicture> create_skp_from_svg(SkStream* stream, const char* filename) {
712 #if defined(SK_ENABLE_SVG)
713     sk_sp<SkSVGDOM> svg = SkSVGDOM::Builder()
714                                   .setFontManager(ToolUtils::TestFontMgr())
715                                   .setTextShapingFactory(SkShapers::BestAvailable())
716                                   .make(*stream);
717     if (!svg) {
718         exitf(ExitErr::kData, "failed to build svg dom from file %s", filename);
719     }
720 
721     static constexpr SkRect bounds{0, 0, 1200, 1200};
722     SkPictureRecorder recorder;
723     SkCanvas* recording = recorder.beginRecording(bounds);
724 
725     svg->setContainerSize(SkSize::Make(recording->getBaseLayerSize()));
726     svg->render(recording);
727 
728     return recorder.finishRecordingAsPicture();
729 #endif
730     exitf(ExitErr::kData, "SK_ENABLE_SVG is disabled; cannot open svg file %s", filename);
731     return nullptr;
732 }
733 
mkdir_p(const SkString & dirname)734 bool mkdir_p(const SkString& dirname) {
735     if (dirname.isEmpty() || dirname == SkString("/")) {
736         return true;
737     }
738     return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
739 }
740 
join(const CommandLineFlags::StringArray & stringArray)741 static SkString join(const CommandLineFlags::StringArray& stringArray) {
742     SkString joined;
743     for (int i = 0; i < stringArray.size(); ++i) {
744         joined.appendf(i ? " %s" : "%s", stringArray[i]);
745     }
746     return joined;
747 }
748 
749 static void exitf(ExitErr err, const char* format, ...) SK_PRINTF_LIKE(2, 3);
750 
exitf(ExitErr err,const char * format,...)751 static void exitf(ExitErr err, const char* format, ...) {
752     fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
753     va_list args;
754     va_start(args, format);
755     vfprintf(stderr, format, args);
756     va_end(args);
757     fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
758     exit((int)err);
759 }
760 
waitIfNeeded()761 void GpuSync::waitIfNeeded() {
762     if (fFinishTrackers[fCurrentFlushIdx]) {
763         fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
764     }
765 }
766 
newFlushTracker(GrDirectContext * context)767 sk_gpu_test::FlushFinishTracker* GpuSync::newFlushTracker(GrDirectContext* context) {
768     fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
769 
770     sk_gpu_test::FlushFinishTracker* tracker = fFinishTrackers[fCurrentFlushIdx].get();
771     // We add an additional ref to the current flush tracker here. This ref is owned by the finish
772     // callback on the flush call. The finish callback will unref the tracker when called.
773     tracker->ref();
774 
775     fCurrentFlushIdx = (fCurrentFlushIdx + 1) % std::size(fFinishTrackers);
776     return tracker;
777 }
778