• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
18 #define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
19 
20 #include <android-base/thread_annotations.h>
21 
22 #include <atomic>
23 #include <condition_variable>
24 #include <cstddef>
25 #include <mutex>
26 #include <thread>
27 #include <vector>
28 
29 namespace android {
30 namespace renderscript {
31 
32 /**
33  * Description of the data to be processed for one Toolkit method call, e.g. one blur or one
34  * blend operation.
35  *
36  * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes.
37  * The most typical configuration is a 2D array of uchar4 used to represent RGBA images.
38  *
39  * This is a base class. There will be a subclass for each Toolkit op.
40  *
41  * Typical usage of a derived class would look like:
42  *    BlurTask task(in, out, sizeX, sizeY, vectorSize, etc);
43  *    processor->doTask(&task);
44  *
45  * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile().
46  * Other classes should not call setTiling(), setUsesSimd(), and processTile().
47  */
48 class Task {
49    protected:
50     /**
51      * Number of cells in the X direction.
52      */
53     const size_t mSizeX;
54     /**
55      * Number of cells in the Y direction.
56      */
57     const size_t mSizeY;
58     /**
59      * Number of elements in a vector (cell). From 1-4.
60      */
61     const size_t mVectorSize;
62     /**
63      * Whether the task prefers the processData call to represent the work to be done as
64      * one line rather than a rectangle. This would be the case for work that don't involve
65      * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the
66      * number of SIMD calls to make, i.e. have one call that covers all the rows.
67      *
68      * This setting will be used only when a tile covers the entire width of the data to be
69      * processed.
70      */
71     const bool mPrefersDataAsOneRow;
72     /**
73      * Whether the processor we're working on supports SIMD operations.
74      */
75     bool mUsesSimd = false;
76 
77    private:
78     /**
79      * If not null, we'll process a subset of the whole 2D array. This specifies the restriction.
80      */
81     const struct Restriction* mRestriction;
82 
83     /**
84      * We'll divide the work into rectangular tiles. See setTiling().
85      */
86 
87     /**
88      * Size of a tile in the X direction, as a number of cells.
89      */
90     size_t mCellsPerTileX = 0;
91     /**
92      * Size of a tile in the Y direction, as a number of cells.
93      */
94     size_t mCellsPerTileY = 0;
95     /**
96      * Number of tiles per row of the restricted area we're working on.
97      */
98     size_t mTilesPerRow = 0;
99     /**
100      * Number of tiles per column of the restricted area we're working on.
101      */
102     size_t mTilesPerColumn = 0;
103 
104    public:
105     /**
106      * Construct a task.
107      *
108      * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4.
109      * The restriction should outlive this instance. The Toolkit validates the
110      * arguments so we won't do that again here.
111      */
Task(size_t sizeX,size_t sizeY,size_t vectorSize,bool prefersDataAsOneRow,const Restriction * restriction)112     Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow,
113          const Restriction* restriction)
114         : mSizeX{sizeX},
115           mSizeY{sizeY},
116           mVectorSize{vectorSize},
117           mPrefersDataAsOneRow{prefersDataAsOneRow},
118           mRestriction{restriction} {}
~Task()119     virtual ~Task() {}
120 
setUsesSimd(bool uses)121     void setUsesSimd(bool uses) { mUsesSimd = uses; }
122 
123     /**
124      * Divide the work into a number of tiles that can be distributed to the various threads.
125      * A tile will be a rectangular region. To be robust, we'll want to handle regular cases
126      * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1.
127      *
128      * We have a target size for the tiles, which corresponds roughly to how much data a thread
129      * will want to process before checking for more work. If the target is set too low, we'll spend
130      * more time in synchronization. If it's too large, some cores may not be used as efficiently.
131      *
132      * This method returns the number of tiles.
133      *
134      * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000.
135      */
136     int setTiling(unsigned int targetTileSizeInBytes);
137 
138     /**
139      * This is called by the TaskProcessor to instruct the task to process a tile.
140      *
141      * @param threadIndex The index of the thread that's processing the tile.
142      * @param tileIndex The index of the tile to process.
143      */
144     void processTile(unsigned int threadIndex, size_t tileIndex);
145 
146    private:
147     /**
148      * Call to the derived class to process the data bounded by the rectangle specified
149      * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle
150      * will be contained with the restriction, if one is provided.
151      */
152     virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
153                              size_t endY) = 0;
154 };
155 
156 /**
157  * There's one instance of the task processor for the Toolkit. This class owns the thread pool,
158  * and dispatches the tiles of work to the threads.
159  */
160 class TaskProcessor {
161     /**
162      * Does this processor support SIMD-like instructions?
163      */
164     const bool mUsesSimd;
165     /**
166      * The number of separate threads we'll spawn. It's one less than the number of threads that
167      * do the work as the client thread that starts the work will also be used.
168      */
169     const unsigned int mNumberOfPoolThreads;
170     /**
171      * Ensures that only one task is done at a time.
172      */
173     std::mutex mTaskMutex;
174     /**
175      * Ensures consistent access to the shared queue state.
176      */
177     std::mutex mQueueMutex;
178     /**
179      * The thread pool workers.
180      */
181     std::vector<std::thread> mPoolThreads;
182     /**
183      * The task being processed, if any. We only do one task at a time. We could create a queue
184      * of tasks but using a mTaskMutex is sufficient for now.
185      */
186     Task* mCurrentTask GUARDED_BY(mTaskMutex) = nullptr;
187     /**
188      * Signals that the mPoolThreads should terminate.
189      */
190     bool mStopThreads GUARDED_BY(mQueueMutex) = false;
191     /**
192      * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used
193      * to distinguish between the two.
194      */
195     std::condition_variable mWorkAvailableOrStop;
196     /**
197      * Signaled when the work for the task is finished.
198      */
199     std::condition_variable mWorkIsFinished;
200     /**
201      * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts
202      * working on a new tile, it uses this count to identify which tile to work on. The tile
203      * number is sufficient to determine the boundaries of the data to process.
204      *
205      * The number of tiles left to process.
206      */
207     int mTilesNotYetStarted GUARDED_BY(mQueueMutex) = 0;
208     /**
209      * The number of tiles currently being processed. Must not be greater than
210      * mNumberOfPoolThreads + 1.
211      */
212     int mTilesInProcess GUARDED_BY(mQueueMutex) = 0;
213 
214     /**
215      * Determines how we'll tile the work and signals the thread pool of available work.
216      *
217      * @param task The task to be performed.
218      */
219     void startWork(Task* task) REQUIRES(mTaskMutex);
220 
221     /**
222      * Tells the thread to start processing work off the queue.
223      *
224      * The flag is used for prevent the main thread from blocking forever if the work is
225      * so trivial that the worker threads complete the work before the main thread calls this
226      * method.
227      *
228      * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by.
229      * @param returnWhenNoWork If there's no work, return immediately.
230      */
231     void processTilesOfWork(int threadIndex, bool returnWhenNoWork);
232 
233     /**
234      * Wait for the pool workers to complete the work on the current task.
235      */
236     void waitForPoolWorkersToComplete();
237 
238    public:
239     /**
240      * Create the processor.
241      *
242      * @param numThreads The total number of threads to use. If 0, we'll decided based on system
243      * properties.
244      */
245     explicit TaskProcessor(unsigned int numThreads = 0);
246 
247     ~TaskProcessor();
248 
249     /**
250      * Do the specified task. Returns only after the task has been completed.
251      */
252     void doTask(Task* task);
253 
254     /**
255      * Some Tasks need to allocate temporary storage for each worker thread.
256      * This provides the number of threads.
257      */
getNumberOfThreads()258     unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; }
259 };
260 
261 }  // namespace renderscript
262 }  // namespace android
263 
264 #endif  // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
265