1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 18 #define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 19 20 #include <android-base/thread_annotations.h> 21 22 #include <atomic> 23 #include <condition_variable> 24 #include <cstddef> 25 #include <mutex> 26 #include <thread> 27 #include <vector> 28 29 namespace android { 30 namespace renderscript { 31 32 /** 33 * Description of the data to be processed for one Toolkit method call, e.g. one blur or one 34 * blend operation. 35 * 36 * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes. 37 * The most typical configuration is a 2D array of uchar4 used to represent RGBA images. 38 * 39 * This is a base class. There will be a subclass for each Toolkit op. 40 * 41 * Typical usage of a derived class would look like: 42 * BlurTask task(in, out, sizeX, sizeY, vectorSize, etc); 43 * processor->doTask(&task); 44 * 45 * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile(). 46 * Other classes should not call setTiling(), setUsesSimd(), and processTile(). 47 */ 48 class Task { 49 protected: 50 /** 51 * Number of cells in the X direction. 52 */ 53 const size_t mSizeX; 54 /** 55 * Number of cells in the Y direction. 56 */ 57 const size_t mSizeY; 58 /** 59 * Number of elements in a vector (cell). From 1-4. 60 */ 61 const size_t mVectorSize; 62 /** 63 * Whether the task prefers the processData call to represent the work to be done as 64 * one line rather than a rectangle. This would be the case for work that don't involve 65 * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the 66 * number of SIMD calls to make, i.e. have one call that covers all the rows. 67 * 68 * This setting will be used only when a tile covers the entire width of the data to be 69 * processed. 70 */ 71 const bool mPrefersDataAsOneRow; 72 /** 73 * Whether the processor we're working on supports SIMD operations. 74 */ 75 bool mUsesSimd = false; 76 77 private: 78 /** 79 * If not null, we'll process a subset of the whole 2D array. This specifies the restriction. 80 */ 81 const struct Restriction* mRestriction; 82 83 /** 84 * We'll divide the work into rectangular tiles. See setTiling(). 85 */ 86 87 /** 88 * Size of a tile in the X direction, as a number of cells. 89 */ 90 size_t mCellsPerTileX = 0; 91 /** 92 * Size of a tile in the Y direction, as a number of cells. 93 */ 94 size_t mCellsPerTileY = 0; 95 /** 96 * Number of tiles per row of the restricted area we're working on. 97 */ 98 size_t mTilesPerRow = 0; 99 /** 100 * Number of tiles per column of the restricted area we're working on. 101 */ 102 size_t mTilesPerColumn = 0; 103 104 public: 105 /** 106 * Construct a task. 107 * 108 * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4. 109 * The restriction should outlive this instance. The Toolkit validates the 110 * arguments so we won't do that again here. 111 */ Task(size_t sizeX,size_t sizeY,size_t vectorSize,bool prefersDataAsOneRow,const Restriction * restriction)112 Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow, 113 const Restriction* restriction) 114 : mSizeX{sizeX}, 115 mSizeY{sizeY}, 116 mVectorSize{vectorSize}, 117 mPrefersDataAsOneRow{prefersDataAsOneRow}, 118 mRestriction{restriction} {} ~Task()119 virtual ~Task() {} 120 setUsesSimd(bool uses)121 void setUsesSimd(bool uses) { mUsesSimd = uses; } 122 123 /** 124 * Divide the work into a number of tiles that can be distributed to the various threads. 125 * A tile will be a rectangular region. To be robust, we'll want to handle regular cases 126 * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1. 127 * 128 * We have a target size for the tiles, which corresponds roughly to how much data a thread 129 * will want to process before checking for more work. If the target is set too low, we'll spend 130 * more time in synchronization. If it's too large, some cores may not be used as efficiently. 131 * 132 * This method returns the number of tiles. 133 * 134 * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000. 135 */ 136 int setTiling(unsigned int targetTileSizeInBytes); 137 138 /** 139 * This is called by the TaskProcessor to instruct the task to process a tile. 140 * 141 * @param threadIndex The index of the thread that's processing the tile. 142 * @param tileIndex The index of the tile to process. 143 */ 144 void processTile(unsigned int threadIndex, size_t tileIndex); 145 146 private: 147 /** 148 * Call to the derived class to process the data bounded by the rectangle specified 149 * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle 150 * will be contained with the restriction, if one is provided. 151 */ 152 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, 153 size_t endY) = 0; 154 }; 155 156 /** 157 * There's one instance of the task processor for the Toolkit. This class owns the thread pool, 158 * and dispatches the tiles of work to the threads. 159 */ 160 class TaskProcessor { 161 /** 162 * Does this processor support SIMD-like instructions? 163 */ 164 const bool mUsesSimd; 165 /** 166 * The number of separate threads we'll spawn. It's one less than the number of threads that 167 * do the work as the client thread that starts the work will also be used. 168 */ 169 const unsigned int mNumberOfPoolThreads; 170 /** 171 * Ensures that only one task is done at a time. 172 */ 173 std::mutex mTaskMutex; 174 /** 175 * Ensures consistent access to the shared queue state. 176 */ 177 std::mutex mQueueMutex; 178 /** 179 * The thread pool workers. 180 */ 181 std::vector<std::thread> mPoolThreads; 182 /** 183 * The task being processed, if any. We only do one task at a time. We could create a queue 184 * of tasks but using a mTaskMutex is sufficient for now. 185 */ 186 Task* mCurrentTask GUARDED_BY(mTaskMutex) = nullptr; 187 /** 188 * Signals that the mPoolThreads should terminate. 189 */ 190 bool mStopThreads GUARDED_BY(mQueueMutex) = false; 191 /** 192 * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used 193 * to distinguish between the two. 194 */ 195 std::condition_variable mWorkAvailableOrStop; 196 /** 197 * Signaled when the work for the task is finished. 198 */ 199 std::condition_variable mWorkIsFinished; 200 /** 201 * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts 202 * working on a new tile, it uses this count to identify which tile to work on. The tile 203 * number is sufficient to determine the boundaries of the data to process. 204 * 205 * The number of tiles left to process. 206 */ 207 int mTilesNotYetStarted GUARDED_BY(mQueueMutex) = 0; 208 /** 209 * The number of tiles currently being processed. Must not be greater than 210 * mNumberOfPoolThreads + 1. 211 */ 212 int mTilesInProcess GUARDED_BY(mQueueMutex) = 0; 213 214 /** 215 * Determines how we'll tile the work and signals the thread pool of available work. 216 * 217 * @param task The task to be performed. 218 */ 219 void startWork(Task* task) REQUIRES(mTaskMutex); 220 221 /** 222 * Tells the thread to start processing work off the queue. 223 * 224 * The flag is used for prevent the main thread from blocking forever if the work is 225 * so trivial that the worker threads complete the work before the main thread calls this 226 * method. 227 * 228 * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by. 229 * @param returnWhenNoWork If there's no work, return immediately. 230 */ 231 void processTilesOfWork(int threadIndex, bool returnWhenNoWork); 232 233 /** 234 * Wait for the pool workers to complete the work on the current task. 235 */ 236 void waitForPoolWorkersToComplete(); 237 238 public: 239 /** 240 * Create the processor. 241 * 242 * @param numThreads The total number of threads to use. If 0, we'll decided based on system 243 * properties. 244 */ 245 explicit TaskProcessor(unsigned int numThreads = 0); 246 247 ~TaskProcessor(); 248 249 /** 250 * Do the specified task. Returns only after the task has been completed. 251 */ 252 void doTask(Task* task); 253 254 /** 255 * Some Tasks need to allocate temporary storage for each worker thread. 256 * This provides the number of threads. 257 */ getNumberOfThreads()258 unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; } 259 }; 260 261 } // namespace renderscript 262 } // namespace android 263 264 #endif // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 265