• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/threading_strategy.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <memory>
20 #include <utility>
21 
22 #include "src/frame_scratch_buffer.h"
23 #include "src/utils/constants.h"
24 #include "src/utils/logging.h"
25 #include "src/utils/vector.h"
26 
27 namespace libgav1 {
28 namespace {
29 
30 #if !defined(LIBGAV1_FRAME_PARALLEL_THRESHOLD_MULTIPLIER)
31 constexpr int kFrameParallelThresholdMultiplier = 3;
32 #else
33 constexpr int kFrameParallelThresholdMultiplier =
34     LIBGAV1_FRAME_PARALLEL_THRESHOLD_MULTIPLIER;
35 #endif
36 
37 // Computes the number of frame threads to be used based on the following
38 // heuristic:
39 //   * If |thread_count| == 1, return 0.
40 //   * If |thread_count| <= |tile_count| * kFrameParallelThresholdMultiplier,
41 //     return 0.
42 //   * Otherwise, return the largest value of i which satisfies the following
43 //     condition: i + i * tile_columns <= thread_count. This ensures that there
44 //     are at least |tile_columns| worker threads for each frame thread.
45 //   * This function will never return 1 or a value > |thread_count|.
46 //
47 //  This heuristic is based on empirical performance data. The in-frame
48 //  threading model (combination of tile multithreading, superblock row
49 //  multithreading and post filter multithreading) performs better than the
50 //  frame parallel model until we reach the threshold of |thread_count| >
51 //  |tile_count| * kFrameParallelThresholdMultiplier.
52 //
53 //  It is a function of |tile_count| since tile threading and superblock row
54 //  multithreading will scale only as a factor of |tile_count|. The threshold
55 //  kFrameParallelThresholdMultiplier is arrived at based on empirical data.
56 //  The general idea is that superblock row multithreading plateaus at 4 *
57 //  |tile_count| because in most practical cases there aren't more than that
58 //  many superblock rows and columns available to work on in parallel.
ComputeFrameThreadCount(int thread_count,int tile_count,int tile_columns)59 int ComputeFrameThreadCount(int thread_count, int tile_count,
60                             int tile_columns) {
61   assert(thread_count > 0);
62   if (thread_count == 1) return 0;
63   return (thread_count <= tile_count * kFrameParallelThresholdMultiplier)
64              ? 0
65              : std::max(2, thread_count / (1 + tile_columns));
66 }
67 
68 }  // namespace
69 
Reset(const ObuFrameHeader & frame_header,int thread_count)70 bool ThreadingStrategy::Reset(const ObuFrameHeader& frame_header,
71                               int thread_count) {
72   assert(thread_count > 0);
73   frame_parallel_ = false;
74 
75   if (thread_count == 1) {
76     thread_pool_.reset(nullptr);
77     tile_thread_count_ = 0;
78     max_tile_index_for_row_threads_ = 0;
79     return true;
80   }
81 
82   // We do work in the current thread, so it is sufficient to create
83   // |thread_count|-1 threads in the threadpool.
84   thread_count = std::min(thread_count, static_cast<int>(kMaxThreads)) - 1;
85 
86   if (thread_pool_ == nullptr || thread_pool_->num_threads() != thread_count) {
87     thread_pool_ = ThreadPool::Create("libgav1", thread_count);
88     if (thread_pool_ == nullptr) {
89       LIBGAV1_DLOG(ERROR, "Failed to create a thread pool with %d threads.",
90                    thread_count);
91       tile_thread_count_ = 0;
92       max_tile_index_for_row_threads_ = 0;
93       return false;
94     }
95   }
96 
97   // Prefer tile threads first (but only if there is more than one tile).
98   const int tile_count = frame_header.tile_info.tile_count;
99   if (tile_count > 1) {
100     // We want 1 + tile_thread_count_ <= tile_count because the current thread
101     // is also used to decode tiles. This is equivalent to
102     // tile_thread_count_ <= tile_count - 1.
103     tile_thread_count_ = std::min(thread_count, tile_count - 1);
104     thread_count -= tile_thread_count_;
105     if (thread_count == 0) {
106       max_tile_index_for_row_threads_ = 0;
107       return true;
108     }
109   } else {
110     tile_thread_count_ = 0;
111   }
112 
113 #if defined(__ANDROID__)
114   // Assign the remaining threads for each Tile. The heuristic used here is that
115   // we will assign two threads for each Tile. So for example, if |thread_count|
116   // is 2, for a stream with 2 tiles the first tile would get both the threads
117   // and the second tile would have row multi-threading turned off. This
118   // heuristic is based on the fact that row multi-threading is fast enough only
119   // when there are at least two threads to do the decoding (since one thread
120   // always does the parsing).
121   //
122   // This heuristic might stop working when SIMD optimizations make the decoding
123   // much faster and the parsing thread is only as fast as the decoding threads.
124   // So we will have to revisit this later to make sure that this is still
125   // optimal.
126   //
127   // Note that while this heuristic significantly improves performance on high
128   // end devices (like the Pixel 3), there are some performance regressions in
129   // some lower end devices (in some cases) and that needs to be revisited as we
130   // bring in more optimizations. Overall, the gains because of this heuristic
131   // seems to be much larger than the regressions.
132   for (int i = 0; i < tile_count; ++i) {
133     max_tile_index_for_row_threads_ = i + 1;
134     thread_count -= 2;
135     if (thread_count <= 0) break;
136   }
137 #else   // !defined(__ANDROID__)
138   // Assign the remaining threads to each Tile.
139   for (int i = 0; i < tile_count; ++i) {
140     const int count = thread_count / tile_count +
141                       static_cast<int>(i < thread_count % tile_count);
142     if (count == 0) {
143       // Once we see a 0 value, all subsequent values will be 0 since it is
144       // supposed to be assigned in a round-robin fashion.
145       break;
146     }
147     max_tile_index_for_row_threads_ = i + 1;
148   }
149 #endif  // defined(__ANDROID__)
150   return true;
151 }
152 
Reset(int thread_count)153 bool ThreadingStrategy::Reset(int thread_count) {
154   assert(thread_count > 0);
155   frame_parallel_ = true;
156 
157   // In frame parallel mode, we simply access the underlying |thread_pool_|
158   // directly. So ensure all the other threadpool getter functions return
159   // nullptr. Also, superblock row multithreading is always disabled in frame
160   // parallel mode.
161   tile_thread_count_ = 0;
162   max_tile_index_for_row_threads_ = 0;
163 
164   if (thread_pool_ == nullptr || thread_pool_->num_threads() != thread_count) {
165     thread_pool_ = ThreadPool::Create("libgav1-fp", thread_count);
166     if (thread_pool_ == nullptr) {
167       LIBGAV1_DLOG(ERROR, "Failed to create a thread pool with %d threads.",
168                    thread_count);
169       return false;
170     }
171   }
172   return true;
173 }
174 
InitializeThreadPoolsForFrameParallel(int thread_count,int tile_count,int tile_columns,std::unique_ptr<ThreadPool> * const frame_thread_pool,FrameScratchBufferPool * const frame_scratch_buffer_pool)175 bool InitializeThreadPoolsForFrameParallel(
176     int thread_count, int tile_count, int tile_columns,
177     std::unique_ptr<ThreadPool>* const frame_thread_pool,
178     FrameScratchBufferPool* const frame_scratch_buffer_pool) {
179   assert(*frame_thread_pool == nullptr);
180   thread_count = std::min(thread_count, static_cast<int>(kMaxThreads));
181   const int frame_threads =
182       ComputeFrameThreadCount(thread_count, tile_count, tile_columns);
183   if (frame_threads == 0) return true;
184   *frame_thread_pool = ThreadPool::Create(frame_threads);
185   if (*frame_thread_pool == nullptr) {
186     LIBGAV1_DLOG(ERROR, "Failed to create frame thread pool with %d threads.",
187                  frame_threads);
188     return false;
189   }
190   int remaining_threads = thread_count - frame_threads;
191   if (remaining_threads == 0) return true;
192   int threads_per_frame = remaining_threads / frame_threads;
193   const int extra_threads = remaining_threads % frame_threads;
194   Vector<std::unique_ptr<FrameScratchBuffer>> frame_scratch_buffers;
195   if (!frame_scratch_buffers.reserve(frame_threads)) return false;
196   // Create the tile thread pools.
197   for (int i = 0; i < frame_threads && remaining_threads > 0; ++i) {
198     std::unique_ptr<FrameScratchBuffer> frame_scratch_buffer =
199         frame_scratch_buffer_pool->Get();
200     if (frame_scratch_buffer == nullptr) {
201       return false;
202     }
203     // If the number of tile threads cannot be divided equally amongst all the
204     // frame threads, assign one extra thread to the first |extra_threads| frame
205     // threads.
206     const int current_frame_thread_count =
207         threads_per_frame + static_cast<int>(i < extra_threads);
208     if (!frame_scratch_buffer->threading_strategy.Reset(
209             current_frame_thread_count)) {
210       return false;
211     }
212     remaining_threads -= current_frame_thread_count;
213     frame_scratch_buffers.push_back_unchecked(std::move(frame_scratch_buffer));
214   }
215   // We release the frame scratch buffers in reverse order so that the extra
216   // threads are allocated to buffers in the top of the stack.
217   for (int i = static_cast<int>(frame_scratch_buffers.size()) - 1; i >= 0;
218        --i) {
219     frame_scratch_buffer_pool->Release(std::move(frame_scratch_buffers[i]));
220   }
221   return true;
222 }
223 
224 }  // namespace libgav1
225