• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkThreadedBMPDevice.h"
9 
10 #include "SkPath.h"
11 #include "SkTaskGroup.h"
12 #include "SkVertices.h"
13 
14 #include <mutex>
15 #include <vector>
16 
17 constexpr int MAX_CACHE_LINE = 64;
18 
19 // Some basic logics and data structures that are shared across the current experimental schedulers.
20 class TiledDrawSchedulerBase : public TiledDrawScheduler {
21 public:
TiledDrawSchedulerBase(int tiles,WorkFunc work)22     TiledDrawSchedulerBase(int tiles, WorkFunc work)
23             : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(std::move(work)) {}
24 
signal()25     void signal() override {
26         fDrawCnt++;
27     }
finish()28     void finish() override {
29         fIsFinishing.store(true, std::memory_order_relaxed);
30     }
31 
32 protected:
33     const int                   fTileCnt;
34     std::atomic<bool>           fIsFinishing;
35     std::atomic<int>            fDrawCnt;
36     WorkFunc                    fWork;
37 };
38 
39 class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase {
40 public:
TiledDrawSchedulerBySpinning(int tiles,WorkFunc work)41     TiledDrawSchedulerBySpinning(int tiles, WorkFunc work)
42             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
43 
signal()44     void signal() final { this->TiledDrawSchedulerBase::signal(); }
finish()45     void finish() final { this->TiledDrawSchedulerBase::finish(); }
46 
next(int & tileIndex)47     bool next(int& tileIndex) final {
48         int& drawIndex = fScheduleData[tileIndex].fDrawIndex;
49         SkASSERT(drawIndex <= fDrawCnt);
50         while (true) {
51             bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
52             if (isFinishing && drawIndex >= fDrawCnt) {
53                 return false;
54             } else if (drawIndex < fDrawCnt) {
55                 fWork(tileIndex, drawIndex++);
56                 return true;
57             }
58         }
59     }
60 
61 private:
62     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
63     struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerBySpinning::TileScheduleData64         TileScheduleData() : fDrawIndex(0) {}
65 
66         int fDrawIndex; // next draw index for this tile
67     };
68 
69     std::vector<TileScheduleData>  fScheduleData;
70 };
71 
72 class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase {
73 public:
TiledDrawSchedulerFlexible(int tiles,WorkFunc work)74     TiledDrawSchedulerFlexible(int tiles, WorkFunc work)
75             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
76 
signal()77     void signal() final { this->TiledDrawSchedulerBase::signal(); }
finish()78     void finish() final { this->TiledDrawSchedulerBase::finish(); }
79 
next(int & tileIndex)80     bool next(int& tileIndex) final {
81         int failCnt = 0;
82         while (true) {
83             TileScheduleData& scheduleData = fScheduleData[tileIndex];
84             bool locked = scheduleData.fMutex.try_lock();
85             bool processed = false;
86 
87             if (locked) {
88                 if (scheduleData.fDrawIndex < fDrawCnt) {
89                     fWork(tileIndex, scheduleData.fDrawIndex++);
90                     processed = true;
91                 } else {
92                     failCnt += fIsFinishing.load(std::memory_order_relaxed);
93                 }
94                 scheduleData.fMutex.unlock();
95             }
96 
97             if (processed) {
98                 return true;
99             } else {
100                 if (failCnt >= fTileCnt) {
101                     return false;
102                 }
103                 tileIndex = (tileIndex + 1) % fTileCnt;
104             }
105         }
106     }
107 
108 private:
109     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
110     struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerFlexible::TileScheduleData111         TileScheduleData() : fDrawIndex(0) {}
112 
113         int         fDrawIndex; // next draw index for this tile
114         std::mutex  fMutex;     // the mutex for the thread to acquire
115     };
116 
117     std::vector<TileScheduleData>  fScheduleData;
118 };
119 
120 class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase {
121 public:
TiledDrawSchedulerBySemaphores(int tiles,WorkFunc work)122     TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work)
123             : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
124 
125 
signal()126     void signal() final {
127         this->TiledDrawSchedulerBase::signal();
128         signalRoot();
129     }
130 
finish()131     void finish() final {
132         this->TiledDrawSchedulerBase::finish();
133         signalRoot();
134     }
135 
next(int & tileIndex)136     bool next(int& tileIndex) final {
137         SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt);
138         TileScheduleData& scheduleData = fScheduleData[tileIndex];
139         while (true) {
140             scheduleData.fSemaphore.wait();
141             int leftChild = (tileIndex + 1) * 2 - 1;
142             int rightChild = leftChild + 1;
143             if (leftChild < fTileCnt) {
144                 fScheduleData[leftChild].fSemaphore.signal();
145             }
146             if (rightChild < fTileCnt) {
147                 fScheduleData[rightChild].fSemaphore.signal();
148             }
149 
150             bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
151             if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) {
152                 return false;
153             } else {
154                 SkASSERT(scheduleData.fDrawIndex < fDrawCnt);
155                 fWork(tileIndex, scheduleData.fDrawIndex++);
156                 return true;
157             }
158         }
159     }
160 
161 private:
162     // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
163     struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerBySemaphores::TileScheduleData164         TileScheduleData() : fDrawIndex(0) {}
165 
166         int         fDrawIndex;
167         SkSemaphore fSemaphore;
168     };
169 
signalRoot()170     void signalRoot() {
171         SkASSERT(fTileCnt > 0);
172         fScheduleData[0].fSemaphore.signal();
173     }
174 
175     std::vector<TileScheduleData> fScheduleData;
176 };
177 
startThreads()178 void SkThreadedBMPDevice::startThreads() {
179     SkASSERT(fThreadFutures.count() == 0);
180     SkASSERT(fQueueSize == 0);
181 
182     TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){
183         auto& element = fQueue[drawIndex];
184         if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) {
185             element.fDrawFn(fTileBounds[tileIndex]);
186         }
187     };
188 
189     // using Scheduler = TiledDrawSchedulerBySemaphores;
190     // using Scheduler = TiledDrawSchedulerBySpinning;
191     using Scheduler = TiledDrawSchedulerFlexible;
192     fScheduler.reset(new Scheduler(fTileCnt, work));
193     for(int i = 0; i < fThreadCnt; ++i) {
194         fThreadFutures.push_back(std::async(std::launch::async, [this, i]() {
195             int tileIndex = i;
196             while (fScheduler->next(tileIndex)) {}
197         }));
198     }
199 }
200 
finishThreads()201 void SkThreadedBMPDevice::finishThreads() {
202     fScheduler->finish();
203     for(auto& future : fThreadFutures) {
204         future.wait();
205     }
206     fThreadFutures.reset();
207     fQueueSize = 0;
208     fScheduler.reset(nullptr);
209 }
210 
SkThreadedBMPDevice(const SkBitmap & bitmap,int tiles,int threads)211 SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads)
212         : INHERITED(bitmap)
213         , fTileCnt(tiles)
214         , fThreadCnt(threads <= 0 ? tiles : threads)
215 {
216     // Tiling using stripes for now; we'll explore better tiling in the future.
217     int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1);
218     int w = bitmap.width();
219     int top = 0;
220     for(int tid = 0; tid < fTileCnt; ++tid, top += h) {
221         fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h));
222     }
223     fQueueSize = 0;
224     startThreads();
225 }
226 
flush()227 void SkThreadedBMPDevice::flush() {
228     finishThreads();
229     startThreads();
230 }
231 
232 // Having this captured in lambda seems to be faster than saving this in DrawElement
233 struct SkThreadedBMPDevice::DrawState {
234     SkPixmap fDst;
235     SkMatrix fMatrix;
236     SkRasterClip fRC;
237 
DrawStateSkThreadedBMPDevice::DrawState238     explicit DrawState(SkThreadedBMPDevice* dev) {
239         // we need fDst to be set, and if we're actually drawing, to dirty the genID
240         if (!dev->accessPixels(&fDst)) {
241             // NoDrawDevice uses us (why?) so we have to catch this case w/ no pixels
242             fDst.reset(dev->imageInfo(), nullptr, 0);
243         }
244         fMatrix = dev->ctm();
245         fRC = dev->fRCStack.rc();
246     }
247 
getThreadDrawSkThreadedBMPDevice::DrawState248     SkDraw getThreadDraw(SkRasterClip& threadRC, const SkIRect& threadBounds) const {
249         SkDraw draw;
250         draw.fDst = fDst;
251         draw.fMatrix = &fMatrix;
252         threadRC = fRC;
253         threadRC.op(threadBounds, SkRegion::kIntersect_Op);
254         draw.fRC = &threadRC;
255         return draw;
256     }
257 };
258 
transformDrawBounds(const SkRect & drawBounds) const259 SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const {
260     if (drawBounds.isLargest()) {
261         return SkIRect::MakeLargest();
262     }
263     SkRect transformedBounds;
264     this->ctm().mapRect(&transformedBounds, drawBounds);
265     return transformedBounds.roundOut();
266 }
267 
268 // The do {...} while (false) is to enforce trailing semicolon as suggested by mtklein@
269 #define THREADED_DRAW(drawBounds, actualDrawCall)                                                  \
270     do {                                                                                           \
271         DrawState ds(this);                                                                        \
272         SkASSERT(fQueueSize < MAX_QUEUE_SIZE);                                                     \
273         fQueue[fQueueSize++] = {                                                                   \
274             this->transformDrawBounds(drawBounds),                                                 \
275             [=](const SkIRect& tileBounds) {                                                       \
276                 SkRasterClip tileRC;                                                               \
277                 SkDraw draw = ds.getThreadDraw(tileRC, tileBounds);                                \
278                 draw.actualDrawCall;                                                               \
279             },                                                                                     \
280         };                                                                                         \
281         fScheduler->signal();                                                                      \
282     } while (false)
283 
get_fast_bounds(const SkRect & r,const SkPaint & p)284 static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) {
285     SkRect result;
286     if (p.canComputeFastBounds()) {
287         result = p.computeFastBounds(r, &result);
288     } else {
289         result = SkRect::MakeLargest();
290     }
291     return result;
292 }
293 
drawPaint(const SkPaint & paint)294 void SkThreadedBMPDevice::drawPaint(const SkPaint& paint) {
295     THREADED_DRAW(SkRect::MakeLargest(), drawPaint(paint));
296 }
297 
drawPoints(SkCanvas::PointMode mode,size_t count,const SkPoint pts[],const SkPaint & paint)298 void SkThreadedBMPDevice::drawPoints(SkCanvas::PointMode mode, size_t count,
299         const SkPoint pts[], const SkPaint& paint) {
300     // TODO tighter drawBounds
301     SkRect drawBounds = SkRect::MakeLargest();
302     THREADED_DRAW(drawBounds, drawPoints(mode, count, pts, paint, nullptr));
303 }
304 
drawRect(const SkRect & r,const SkPaint & paint)305 void SkThreadedBMPDevice::drawRect(const SkRect& r, const SkPaint& paint) {
306     SkRect drawBounds = get_fast_bounds(r, paint);
307     THREADED_DRAW(drawBounds, drawRect(r, paint));
308 }
309 
drawRRect(const SkRRect & rrect,const SkPaint & paint)310 void SkThreadedBMPDevice::drawRRect(const SkRRect& rrect, const SkPaint& paint) {
311 #ifdef SK_IGNORE_BLURRED_RRECT_OPT
312     SkPath  path;
313 
314     path.addRRect(rrect);
315     // call the VIRTUAL version, so any subclasses who do handle drawPath aren't
316     // required to override drawRRect.
317     this->drawPath(path, paint, nullptr, false);
318 #else
319     SkRect drawBounds = get_fast_bounds(rrect.getBounds(), paint);
320     THREADED_DRAW(drawBounds, drawRRect(rrect, paint));
321 #endif
322 }
323 
drawPath(const SkPath & path,const SkPaint & paint,const SkMatrix * prePathMatrix,bool pathIsMutable)324 void SkThreadedBMPDevice::drawPath(const SkPath& path, const SkPaint& paint,
325         const SkMatrix* prePathMatrix, bool pathIsMutable) {
326     SkRect drawBounds = path.isInverseFillType() ? SkRect::MakeLargest()
327                                                  : get_fast_bounds(path.getBounds(), paint);
328     // For thread safety, make path imutable
329     THREADED_DRAW(drawBounds, drawPath(path, paint, prePathMatrix, false));
330 }
331 
drawBitmap(const SkBitmap & bitmap,SkScalar x,SkScalar y,const SkPaint & paint)332 void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y,
333         const SkPaint& paint) {
334     SkMatrix matrix = SkMatrix::MakeTrans(x, y);
335     LogDrawScaleFactor(SkMatrix::Concat(this->ctm(), matrix), paint.getFilterQuality());
336     SkRect drawBounds = SkRect::MakeWH(bitmap.width(), bitmap.height());
337     matrix.mapRect(&drawBounds);
338     THREADED_DRAW(drawBounds, drawBitmap(bitmap, matrix, nullptr, paint));
339 }
340 
drawSprite(const SkBitmap & bitmap,int x,int y,const SkPaint & paint)341 void SkThreadedBMPDevice::drawSprite(const SkBitmap& bitmap, int x, int y, const SkPaint& paint) {
342     SkRect drawBounds = SkRect::MakeXYWH(x, y, bitmap.width(), bitmap.height());
343     THREADED_DRAW(drawBounds, drawSprite(bitmap, x, y, paint));
344 }
345 
drawText(const void * text,size_t len,SkScalar x,SkScalar y,const SkPaint & paint)346 void SkThreadedBMPDevice::drawText(const void* text, size_t len, SkScalar x, SkScalar y,
347         const SkPaint& paint) {
348     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
349     THREADED_DRAW(drawBounds, drawText((const char*)text, len, x, y, paint, &this->surfaceProps()));
350 }
351 
drawPosText(const void * text,size_t len,const SkScalar xpos[],int scalarsPerPos,const SkPoint & offset,const SkPaint & paint)352 void SkThreadedBMPDevice::drawPosText(const void* text, size_t len, const SkScalar xpos[],
353         int scalarsPerPos, const SkPoint& offset, const SkPaint& paint) {
354     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
355     THREADED_DRAW(drawBounds, drawPosText((const char*)text, len, xpos, scalarsPerPos, offset,
356                                           paint, &surfaceProps()));
357 }
358 
drawVertices(const SkVertices * vertices,SkBlendMode bmode,const SkPaint & paint)359 void SkThreadedBMPDevice::drawVertices(const SkVertices* vertices, SkBlendMode bmode,
360         const SkPaint& paint) {
361     SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
362     THREADED_DRAW(drawBounds, drawVertices(vertices->mode(), vertices->vertexCount(),
363                                            vertices->positions(), vertices->texCoords(),
364                                            vertices->colors(), bmode, vertices->indices(),
365                                            vertices->indexCount(), paint));
366 }
367 
drawDevice(SkBaseDevice * device,int x,int y,const SkPaint & paint)368 void SkThreadedBMPDevice::drawDevice(SkBaseDevice* device, int x, int y, const SkPaint& paint) {
369     SkASSERT(!paint.getImageFilter());
370     SkRect drawBounds = SkRect::MakeXYWH(x, y, device->width(), device->height());
371     THREADED_DRAW(drawBounds,
372                   drawSprite(static_cast<SkBitmapDevice*>(device)->fBitmap, x, y, paint));
373 }
374