1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 18 #define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 19 20 // #include <android-base/thread_annotations.h> 21 22 #include <atomic> 23 #include <condition_variable> 24 #include <cstddef> 25 #include <mutex> 26 #include <thread> 27 #include <vector> 28 29 namespace renderscript { 30 31 /** 32 * Description of the data to be processed for one Toolkit method call, e.g. one blur or one 33 * blend operation. 34 * 35 * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes. 36 * The most typical configuration is a 2D array of uchar4 used to represent RGBA images. 37 * 38 * This is a base class. There will be a subclass for each Toolkit op. 39 * 40 * Typical usage of a derived class would look like: 41 * BlurTask task(in, out, sizeX, sizeY, vectorSize, etc); 42 * processor->doTask(&task); 43 * 44 * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile(). 45 * Other classes should not call setTiling(), setUsesSimd(), and processTile(). 46 */ 47 class Task { 48 protected: 49 /** 50 * Number of cells in the X direction. 51 */ 52 const size_t mSizeX; 53 /** 54 * Number of cells in the Y direction. 55 */ 56 const size_t mSizeY; 57 /** 58 * Number of elements in a vector (cell). From 1-4. 59 */ 60 const size_t mVectorSize; 61 /** 62 * Whether the task prefers the processData call to represent the work to be done as 63 * one line rather than a rectangle. This would be the case for work that don't involve 64 * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the 65 * number of SIMD calls to make, i.e. have one call that covers all the rows. 66 * 67 * This setting will be used only when a tile covers the entire width of the data to be 68 * processed. 69 */ 70 const bool mPrefersDataAsOneRow; 71 /** 72 * Whether the processor we're working on supports SIMD operations. 73 */ 74 bool mUsesSimd = false; 75 76 private: 77 /** 78 * If not null, we'll process a subset of the whole 2D array. This specifies the restriction. 79 */ 80 const struct Restriction* mRestriction; 81 82 /** 83 * We'll divide the work into rectangular tiles. See setTiling(). 84 */ 85 86 /** 87 * Size of a tile in the X direction, as a number of cells. 88 */ 89 size_t mCellsPerTileX = 0; 90 /** 91 * Size of a tile in the Y direction, as a number of cells. 92 */ 93 size_t mCellsPerTileY = 0; 94 /** 95 * Number of tiles per row of the restricted area we're working on. 96 */ 97 size_t mTilesPerRow = 0; 98 /** 99 * Number of tiles per column of the restricted area we're working on. 100 */ 101 size_t mTilesPerColumn = 0; 102 103 public: 104 /** 105 * Construct a task. 106 * 107 * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4. 108 * The restriction should outlive this instance. The Toolkit validates the 109 * arguments so we won't do that again here. 110 */ Task(size_t sizeX,size_t sizeY,size_t vectorSize,bool prefersDataAsOneRow,const Restriction * restriction)111 Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow, 112 const Restriction* restriction) 113 : mSizeX{sizeX}, 114 mSizeY{sizeY}, 115 mVectorSize{vectorSize}, 116 mPrefersDataAsOneRow{prefersDataAsOneRow}, 117 mRestriction{restriction} {} ~Task()118 virtual ~Task() {} 119 setUsesSimd(bool uses)120 void setUsesSimd(bool uses) { mUsesSimd = uses; } 121 122 /** 123 * Divide the work into a number of tiles that can be distributed to the various threads. 124 * A tile will be a rectangular region. To be robust, we'll want to handle regular cases 125 * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1. 126 * 127 * We have a target size for the tiles, which corresponds roughly to how much data a thread 128 * will want to process before checking for more work. If the target is set too low, we'll spend 129 * more time in synchronization. If it's too large, some cores may not be used as efficiently. 130 * 131 * This method returns the number of tiles. 132 * 133 * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000. 134 */ 135 int setTiling(unsigned int targetTileSizeInBytes); 136 137 /** 138 * This is called by the TaskProcessor to instruct the task to process a tile. 139 * 140 * @param threadIndex The index of the thread that's processing the tile. 141 * @param tileIndex The index of the tile to process. 142 */ 143 void processTile(unsigned int threadIndex, size_t tileIndex); 144 145 private: 146 /** 147 * Call to the derived class to process the data bounded by the rectangle specified 148 * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle 149 * will be contained with the restriction, if one is provided. 150 */ 151 virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX, 152 size_t endY) = 0; 153 }; 154 155 /** 156 * There's one instance of the task processor for the Toolkit. This class owns the thread pool, 157 * and dispatches the tiles of work to the threads. 158 */ 159 class TaskProcessor { 160 /** 161 * Does this processor support SIMD-like instructions? 162 */ 163 const bool mUsesSimd; 164 /** 165 * The number of separate threads we'll spawn. It's one less than the number of threads that 166 * do the work as the client thread that starts the work will also be used. 167 */ 168 const unsigned int mNumberOfPoolThreads; 169 /** 170 * Ensures that only one task is done at a time. 171 */ 172 std::mutex mTaskMutex; 173 /** 174 * Ensures consistent access to the shared queue state. 175 */ 176 std::mutex mQueueMutex; 177 /** 178 * The thread pool workers. 179 */ 180 std::vector<std::thread> mPoolThreads; 181 /** 182 * The task being processed, if any. We only do one task at a time. We could create a queue 183 * of tasks but using a mTaskMutex is sufficient for now. 184 */ 185 Task* mCurrentTask /*GUARDED_BY(mTaskMutex)*/ = nullptr; 186 /** 187 * Signals that the mPoolThreads should terminate. 188 */ 189 bool mStopThreads /*GUARDED_BY(mQueueMutex)*/ = false; 190 /** 191 * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used 192 * to distinguish between the two. 193 */ 194 std::condition_variable mWorkAvailableOrStop; 195 /** 196 * Signaled when the work for the task is finished. 197 */ 198 std::condition_variable mWorkIsFinished; 199 /** 200 * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts 201 * working on a new tile, it uses this count to identify which tile to work on. The tile 202 * number is sufficient to determine the boundaries of the data to process. 203 * 204 * The number of tiles left to process. 205 */ 206 int mTilesNotYetStarted /*GUARDED_BY(mQueueMutex)*/ = 0; 207 /** 208 * The number of tiles currently being processed. Must not be greater than 209 * mNumberOfPoolThreads + 1. 210 */ 211 int mTilesInProcess /*GUARDED_BY(mQueueMutex)*/ = 0; 212 213 /** 214 * Determines how we'll tile the work and signals the thread pool of available work. 215 * 216 * @param task The task to be performed. 217 */ 218 void startWork(Task* task) /*REQUIRES(mTaskMutex)*/; 219 220 /** 221 * Tells the thread to start processing work off the queue. 222 * 223 * The flag is used for prevent the main thread from blocking forever if the work is 224 * so trivial that the worker threads complete the work before the main thread calls this 225 * method. 226 * 227 * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by. 228 * @param returnWhenNoWork If there's no work, return immediately. 229 */ 230 void processTilesOfWork(int threadIndex, bool returnWhenNoWork); 231 232 /** 233 * Wait for the pool workers to complete the work on the current task. 234 */ 235 void waitForPoolWorkersToComplete(); 236 237 public: 238 /** 239 * Create the processor. 240 * 241 * @param numThreads The total number of threads to use. If 0, we'll decided based on system 242 * properties. 243 */ 244 explicit TaskProcessor(unsigned int numThreads = 0); 245 246 ~TaskProcessor(); 247 248 /** 249 * Do the specified task. Returns only after the task has been completed. 250 */ 251 void doTask(Task* task); 252 253 /** 254 * Some Tasks need to allocate temporary storage for each worker thread. 255 * This provides the number of threads. 256 */ getNumberOfThreads()257 unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; } 258 }; 259 260 } // namespace renderscript 261 262 #endif // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H 263