1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkThreadedBMPDevice.h"
9
10 #include "SkPath.h"
11 #include "SkTaskGroup.h"
12 #include "SkVertices.h"
13
14 #include <mutex>
15 #include <vector>
16
17 constexpr int MAX_CACHE_LINE = 64;
18
19 // Some basic logics and data structures that are shared across the current experimental schedulers.
20 class TiledDrawSchedulerBase : public TiledDrawScheduler {
21 public:
TiledDrawSchedulerBase(int tiles,WorkFunc work)22 TiledDrawSchedulerBase(int tiles, WorkFunc work)
23 : fTileCnt(tiles), fIsFinishing(false), fDrawCnt(0), fWork(std::move(work)) {}
24
signal()25 void signal() override {
26 fDrawCnt++;
27 }
finish()28 void finish() override {
29 fIsFinishing.store(true, std::memory_order_relaxed);
30 }
31
32 protected:
33 const int fTileCnt;
34 std::atomic<bool> fIsFinishing;
35 std::atomic<int> fDrawCnt;
36 WorkFunc fWork;
37 };
38
39 class TiledDrawSchedulerBySpinning : public TiledDrawSchedulerBase {
40 public:
TiledDrawSchedulerBySpinning(int tiles,WorkFunc work)41 TiledDrawSchedulerBySpinning(int tiles, WorkFunc work)
42 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
43
signal()44 void signal() final { this->TiledDrawSchedulerBase::signal(); }
finish()45 void finish() final { this->TiledDrawSchedulerBase::finish(); }
46
next(int & tileIndex)47 bool next(int& tileIndex) final {
48 int& drawIndex = fScheduleData[tileIndex].fDrawIndex;
49 SkASSERT(drawIndex <= fDrawCnt);
50 while (true) {
51 bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
52 if (isFinishing && drawIndex >= fDrawCnt) {
53 return false;
54 } else if (drawIndex < fDrawCnt) {
55 fWork(tileIndex, drawIndex++);
56 return true;
57 }
58 }
59 }
60
61 private:
62 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
63 struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerBySpinning::TileScheduleData64 TileScheduleData() : fDrawIndex(0) {}
65
66 int fDrawIndex; // next draw index for this tile
67 };
68
69 std::vector<TileScheduleData> fScheduleData;
70 };
71
72 class TiledDrawSchedulerFlexible : public TiledDrawSchedulerBase {
73 public:
TiledDrawSchedulerFlexible(int tiles,WorkFunc work)74 TiledDrawSchedulerFlexible(int tiles, WorkFunc work)
75 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
76
signal()77 void signal() final { this->TiledDrawSchedulerBase::signal(); }
finish()78 void finish() final { this->TiledDrawSchedulerBase::finish(); }
79
next(int & tileIndex)80 bool next(int& tileIndex) final {
81 int failCnt = 0;
82 while (true) {
83 TileScheduleData& scheduleData = fScheduleData[tileIndex];
84 bool locked = scheduleData.fMutex.try_lock();
85 bool processed = false;
86
87 if (locked) {
88 if (scheduleData.fDrawIndex < fDrawCnt) {
89 fWork(tileIndex, scheduleData.fDrawIndex++);
90 processed = true;
91 } else {
92 failCnt += fIsFinishing.load(std::memory_order_relaxed);
93 }
94 scheduleData.fMutex.unlock();
95 }
96
97 if (processed) {
98 return true;
99 } else {
100 if (failCnt >= fTileCnt) {
101 return false;
102 }
103 tileIndex = (tileIndex + 1) % fTileCnt;
104 }
105 }
106 }
107
108 private:
109 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
110 struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerFlexible::TileScheduleData111 TileScheduleData() : fDrawIndex(0) {}
112
113 int fDrawIndex; // next draw index for this tile
114 std::mutex fMutex; // the mutex for the thread to acquire
115 };
116
117 std::vector<TileScheduleData> fScheduleData;
118 };
119
120 class TiledDrawSchedulerBySemaphores : public TiledDrawSchedulerBase {
121 public:
TiledDrawSchedulerBySemaphores(int tiles,WorkFunc work)122 TiledDrawSchedulerBySemaphores(int tiles, WorkFunc work)
123 : TiledDrawSchedulerBase(tiles, std::move(work)), fScheduleData(tiles) {}
124
125
signal()126 void signal() final {
127 this->TiledDrawSchedulerBase::signal();
128 signalRoot();
129 }
130
finish()131 void finish() final {
132 this->TiledDrawSchedulerBase::finish();
133 signalRoot();
134 }
135
next(int & tileIndex)136 bool next(int& tileIndex) final {
137 SkASSERT(tileIndex >= 0 && tileIndex < fTileCnt);
138 TileScheduleData& scheduleData = fScheduleData[tileIndex];
139 while (true) {
140 scheduleData.fSemaphore.wait();
141 int leftChild = (tileIndex + 1) * 2 - 1;
142 int rightChild = leftChild + 1;
143 if (leftChild < fTileCnt) {
144 fScheduleData[leftChild].fSemaphore.signal();
145 }
146 if (rightChild < fTileCnt) {
147 fScheduleData[rightChild].fSemaphore.signal();
148 }
149
150 bool isFinishing = fIsFinishing.load(std::memory_order_relaxed);
151 if (isFinishing && scheduleData.fDrawIndex >= fDrawCnt) {
152 return false;
153 } else {
154 SkASSERT(scheduleData.fDrawIndex < fDrawCnt);
155 fWork(tileIndex, scheduleData.fDrawIndex++);
156 return true;
157 }
158 }
159 }
160
161 private:
162 // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
163 struct alignas(MAX_CACHE_LINE) TileScheduleData {
TileScheduleDataTiledDrawSchedulerBySemaphores::TileScheduleData164 TileScheduleData() : fDrawIndex(0) {}
165
166 int fDrawIndex;
167 SkSemaphore fSemaphore;
168 };
169
signalRoot()170 void signalRoot() {
171 SkASSERT(fTileCnt > 0);
172 fScheduleData[0].fSemaphore.signal();
173 }
174
175 std::vector<TileScheduleData> fScheduleData;
176 };
177
startThreads()178 void SkThreadedBMPDevice::startThreads() {
179 SkASSERT(fThreadFutures.count() == 0);
180 SkASSERT(fQueueSize == 0);
181
182 TiledDrawScheduler::WorkFunc work = [this](int tileIndex, int drawIndex){
183 auto& element = fQueue[drawIndex];
184 if (SkIRect::Intersects(fTileBounds[tileIndex], element.fDrawBounds)) {
185 element.fDrawFn(fTileBounds[tileIndex]);
186 }
187 };
188
189 // using Scheduler = TiledDrawSchedulerBySemaphores;
190 // using Scheduler = TiledDrawSchedulerBySpinning;
191 using Scheduler = TiledDrawSchedulerFlexible;
192 fScheduler.reset(new Scheduler(fTileCnt, work));
193 for(int i = 0; i < fThreadCnt; ++i) {
194 fThreadFutures.push_back(std::async(std::launch::async, [this, i]() {
195 int tileIndex = i;
196 while (fScheduler->next(tileIndex)) {}
197 }));
198 }
199 }
200
finishThreads()201 void SkThreadedBMPDevice::finishThreads() {
202 fScheduler->finish();
203 for(auto& future : fThreadFutures) {
204 future.wait();
205 }
206 fThreadFutures.reset();
207 fQueueSize = 0;
208 fScheduler.reset(nullptr);
209 }
210
SkThreadedBMPDevice(const SkBitmap & bitmap,int tiles,int threads)211 SkThreadedBMPDevice::SkThreadedBMPDevice(const SkBitmap& bitmap, int tiles, int threads)
212 : INHERITED(bitmap)
213 , fTileCnt(tiles)
214 , fThreadCnt(threads <= 0 ? tiles : threads)
215 {
216 // Tiling using stripes for now; we'll explore better tiling in the future.
217 int h = (bitmap.height() + fTileCnt - 1) / SkTMax(fTileCnt, 1);
218 int w = bitmap.width();
219 int top = 0;
220 for(int tid = 0; tid < fTileCnt; ++tid, top += h) {
221 fTileBounds.push_back(SkIRect::MakeLTRB(0, top, w, top + h));
222 }
223 fQueueSize = 0;
224 startThreads();
225 }
226
flush()227 void SkThreadedBMPDevice::flush() {
228 finishThreads();
229 startThreads();
230 }
231
232 // Having this captured in lambda seems to be faster than saving this in DrawElement
233 struct SkThreadedBMPDevice::DrawState {
234 SkPixmap fDst;
235 SkMatrix fMatrix;
236 SkRasterClip fRC;
237
DrawStateSkThreadedBMPDevice::DrawState238 explicit DrawState(SkThreadedBMPDevice* dev) {
239 // we need fDst to be set, and if we're actually drawing, to dirty the genID
240 if (!dev->accessPixels(&fDst)) {
241 // NoDrawDevice uses us (why?) so we have to catch this case w/ no pixels
242 fDst.reset(dev->imageInfo(), nullptr, 0);
243 }
244 fMatrix = dev->ctm();
245 fRC = dev->fRCStack.rc();
246 }
247
getThreadDrawSkThreadedBMPDevice::DrawState248 SkDraw getThreadDraw(SkRasterClip& threadRC, const SkIRect& threadBounds) const {
249 SkDraw draw;
250 draw.fDst = fDst;
251 draw.fMatrix = &fMatrix;
252 threadRC = fRC;
253 threadRC.op(threadBounds, SkRegion::kIntersect_Op);
254 draw.fRC = &threadRC;
255 return draw;
256 }
257 };
258
transformDrawBounds(const SkRect & drawBounds) const259 SkIRect SkThreadedBMPDevice::transformDrawBounds(const SkRect& drawBounds) const {
260 if (drawBounds.isLargest()) {
261 return SkIRect::MakeLargest();
262 }
263 SkRect transformedBounds;
264 this->ctm().mapRect(&transformedBounds, drawBounds);
265 return transformedBounds.roundOut();
266 }
267
268 // The do {...} while (false) is to enforce trailing semicolon as suggested by mtklein@
269 #define THREADED_DRAW(drawBounds, actualDrawCall) \
270 do { \
271 DrawState ds(this); \
272 SkASSERT(fQueueSize < MAX_QUEUE_SIZE); \
273 fQueue[fQueueSize++] = { \
274 this->transformDrawBounds(drawBounds), \
275 [=](const SkIRect& tileBounds) { \
276 SkRasterClip tileRC; \
277 SkDraw draw = ds.getThreadDraw(tileRC, tileBounds); \
278 draw.actualDrawCall; \
279 }, \
280 }; \
281 fScheduler->signal(); \
282 } while (false)
283
get_fast_bounds(const SkRect & r,const SkPaint & p)284 static inline SkRect get_fast_bounds(const SkRect& r, const SkPaint& p) {
285 SkRect result;
286 if (p.canComputeFastBounds()) {
287 result = p.computeFastBounds(r, &result);
288 } else {
289 result = SkRect::MakeLargest();
290 }
291 return result;
292 }
293
drawPaint(const SkPaint & paint)294 void SkThreadedBMPDevice::drawPaint(const SkPaint& paint) {
295 THREADED_DRAW(SkRect::MakeLargest(), drawPaint(paint));
296 }
297
drawPoints(SkCanvas::PointMode mode,size_t count,const SkPoint pts[],const SkPaint & paint)298 void SkThreadedBMPDevice::drawPoints(SkCanvas::PointMode mode, size_t count,
299 const SkPoint pts[], const SkPaint& paint) {
300 // TODO tighter drawBounds
301 SkRect drawBounds = SkRect::MakeLargest();
302 THREADED_DRAW(drawBounds, drawPoints(mode, count, pts, paint, nullptr));
303 }
304
drawRect(const SkRect & r,const SkPaint & paint)305 void SkThreadedBMPDevice::drawRect(const SkRect& r, const SkPaint& paint) {
306 SkRect drawBounds = get_fast_bounds(r, paint);
307 THREADED_DRAW(drawBounds, drawRect(r, paint));
308 }
309
drawRRect(const SkRRect & rrect,const SkPaint & paint)310 void SkThreadedBMPDevice::drawRRect(const SkRRect& rrect, const SkPaint& paint) {
311 #ifdef SK_IGNORE_BLURRED_RRECT_OPT
312 SkPath path;
313
314 path.addRRect(rrect);
315 // call the VIRTUAL version, so any subclasses who do handle drawPath aren't
316 // required to override drawRRect.
317 this->drawPath(path, paint, nullptr, false);
318 #else
319 SkRect drawBounds = get_fast_bounds(rrect.getBounds(), paint);
320 THREADED_DRAW(drawBounds, drawRRect(rrect, paint));
321 #endif
322 }
323
drawPath(const SkPath & path,const SkPaint & paint,const SkMatrix * prePathMatrix,bool pathIsMutable)324 void SkThreadedBMPDevice::drawPath(const SkPath& path, const SkPaint& paint,
325 const SkMatrix* prePathMatrix, bool pathIsMutable) {
326 SkRect drawBounds = path.isInverseFillType() ? SkRect::MakeLargest()
327 : get_fast_bounds(path.getBounds(), paint);
328 // For thread safety, make path imutable
329 THREADED_DRAW(drawBounds, drawPath(path, paint, prePathMatrix, false));
330 }
331
drawBitmap(const SkBitmap & bitmap,SkScalar x,SkScalar y,const SkPaint & paint)332 void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y,
333 const SkPaint& paint) {
334 SkMatrix matrix = SkMatrix::MakeTrans(x, y);
335 LogDrawScaleFactor(SkMatrix::Concat(this->ctm(), matrix), paint.getFilterQuality());
336 SkRect drawBounds = SkRect::MakeWH(bitmap.width(), bitmap.height());
337 matrix.mapRect(&drawBounds);
338 THREADED_DRAW(drawBounds, drawBitmap(bitmap, matrix, nullptr, paint));
339 }
340
drawSprite(const SkBitmap & bitmap,int x,int y,const SkPaint & paint)341 void SkThreadedBMPDevice::drawSprite(const SkBitmap& bitmap, int x, int y, const SkPaint& paint) {
342 SkRect drawBounds = SkRect::MakeXYWH(x, y, bitmap.width(), bitmap.height());
343 THREADED_DRAW(drawBounds, drawSprite(bitmap, x, y, paint));
344 }
345
drawText(const void * text,size_t len,SkScalar x,SkScalar y,const SkPaint & paint)346 void SkThreadedBMPDevice::drawText(const void* text, size_t len, SkScalar x, SkScalar y,
347 const SkPaint& paint) {
348 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
349 THREADED_DRAW(drawBounds, drawText((const char*)text, len, x, y, paint, &this->surfaceProps()));
350 }
351
drawPosText(const void * text,size_t len,const SkScalar xpos[],int scalarsPerPos,const SkPoint & offset,const SkPaint & paint)352 void SkThreadedBMPDevice::drawPosText(const void* text, size_t len, const SkScalar xpos[],
353 int scalarsPerPos, const SkPoint& offset, const SkPaint& paint) {
354 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
355 THREADED_DRAW(drawBounds, drawPosText((const char*)text, len, xpos, scalarsPerPos, offset,
356 paint, &surfaceProps()));
357 }
358
drawVertices(const SkVertices * vertices,SkBlendMode bmode,const SkPaint & paint)359 void SkThreadedBMPDevice::drawVertices(const SkVertices* vertices, SkBlendMode bmode,
360 const SkPaint& paint) {
361 SkRect drawBounds = SkRect::MakeLargest(); // TODO tighter drawBounds
362 THREADED_DRAW(drawBounds, drawVertices(vertices->mode(), vertices->vertexCount(),
363 vertices->positions(), vertices->texCoords(),
364 vertices->colors(), bmode, vertices->indices(),
365 vertices->indexCount(), paint));
366 }
367
drawDevice(SkBaseDevice * device,int x,int y,const SkPaint & paint)368 void SkThreadedBMPDevice::drawDevice(SkBaseDevice* device, int x, int y, const SkPaint& paint) {
369 SkASSERT(!paint.getImageFilter());
370 SkRect drawBounds = SkRect::MakeXYWH(x, y, device->width(), device->height());
371 THREADED_DRAW(drawBounds,
372 drawSprite(static_cast<SkBitmapDevice*>(device)->fBitmap, x, y, paint));
373 }
374