1 //
2 // Copyright 2022 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // AstcDecompressorImpl.cpp: Decodes ASTC-encoded textures.
8
9 #include <array>
10 #include <future>
11 #include <unordered_map>
12
13 #include "astcenc.h"
14 #include "common/WorkerThread.h"
15 #include "image_util/AstcDecompressor.h"
16
17 namespace angle
18 {
19 namespace
20 {
21
22 const astcenc_swizzle kSwizzle = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A};
23
24 // Used by std::unique_ptr to release the context when the pointer is destroyed
25 struct AstcencContextDeleter
26 {
operator ()angle::__anon0cf1bde40111::AstcencContextDeleter27 void operator()(astcenc_context *c) { astcenc_context_free(c); }
28 };
29
30 using AstcencContextUniquePtr = std::unique_ptr<astcenc_context, AstcencContextDeleter>;
31
32 // Returns the max number of threads to use when using multithreaded decompression
MaxThreads()33 uint32_t MaxThreads()
34 {
35 static const uint32_t numThreads = std::min(16u, std::thread::hardware_concurrency());
36 return numThreads;
37 }
38
39 // Creates a new astcenc_context and wraps it in a smart pointer.
40 // It is not needed to call astcenc_context_free() on the returned pointer.
41 // blockWith, blockSize: ASTC block size for the context
42 // Error: (output param) Where to put the error status. Must not be null.
43 // Returns nullptr in case of error.
MakeDecoderContext(uint32_t blockWidth,uint32_t blockHeight,astcenc_error * error)44 AstcencContextUniquePtr MakeDecoderContext(uint32_t blockWidth,
45 uint32_t blockHeight,
46 astcenc_error *error)
47 {
48 astcenc_config config = {};
49 *error =
50 // TODO(gregschlom): Do we need a special case for sRGB images? (And pass
51 // ASTCENC_PRF_LDR_SRGB here?)
52 astcenc_config_init(ASTCENC_PRF_LDR, blockWidth, blockHeight, 1, ASTCENC_PRE_FASTEST,
53 ASTCENC_FLG_DECOMPRESS_ONLY, &config);
54 if (*error != ASTCENC_SUCCESS)
55 {
56 return nullptr;
57 }
58
59 astcenc_context *context;
60 *error = astcenc_context_alloc(&config, MaxThreads(), &context);
61 if (*error != ASTCENC_SUCCESS)
62 {
63 return nullptr;
64 }
65 return AstcencContextUniquePtr(context);
66 }
67
68 // Returns whether the ASTC decompressor can be used on this machine. It might not be available if
69 // the CPU doesn't support AVX2 instructions for example. Since this call is a bit expensive and
70 // never changes, the result should be cached.
IsAstcDecompressorAvailable()71 bool IsAstcDecompressorAvailable()
72 {
73 astcenc_error error;
74 // Try getting an arbitrary context. If it works, the decompressor is available.
75 AstcencContextUniquePtr context = MakeDecoderContext(5, 5, &error);
76 return context != nullptr;
77 }
78
79 // Caches and manages astcenc_context objects.
80 //
81 // Each context is fairly large (around 30 MB) and takes a while to construct, so it's important to
82 // reuse them as much as possible.
83 //
84 // While context objects can be reused across multiple threads, they must be used sequentially. To
85 // avoid having to lock and manage access between threads, we keep one cache per thread. This avoids
86 // any concurrency issues, at the cost of extra memory.
87 //
88 // Currently, there is no eviction strategy. Each cache could grow to a maximum of ~400 MB in size
89 // since they are 13 possible ASTC block sizes.
90 //
91 // Thread-safety: not thread safe.
92 class AstcDecompressorContextCache
93 {
94 public:
95 // Returns a context object for a given ASTC block size, along with the error code if the
96 // context initialization failed.
97 // In this case, the context will be null, and the status code will be non-zero.
get(uint32_t blockWidth,uint32_t blockHeight)98 std::pair<astcenc_context *, astcenc_error> get(uint32_t blockWidth, uint32_t blockHeight)
99 {
100 Value &value = mContexts[{blockWidth, blockHeight}];
101 if (value.context == nullptr)
102 {
103 value.context = MakeDecoderContext(blockWidth, blockHeight, &value.error);
104 }
105 return {value.context.get(), value.error};
106 }
107
108 private:
109 // Holds the data we use as the cache key
110 struct Key
111 {
112 uint32_t blockWidth;
113 uint32_t blockHeight;
114
operator ==angle::__anon0cf1bde40111::AstcDecompressorContextCache::Key115 bool operator==(const Key &other) const
116 {
117 return blockWidth == other.blockWidth && blockHeight == other.blockHeight;
118 }
119 };
120
121 struct Value
122 {
123 AstcencContextUniquePtr context = nullptr;
124 astcenc_error error = ASTCENC_SUCCESS;
125 };
126
127 // Computes the hash of a Key
128 struct KeyHash
129 {
operator ()angle::__anon0cf1bde40111::AstcDecompressorContextCache::KeyHash130 std::size_t operator()(const Key &k) const
131 {
132 // blockWidth and blockHeight are < 256 (actually, < 16), so this is safe
133 return k.blockWidth << 8 | k.blockHeight;
134 }
135 };
136
137 std::unordered_map<Key, Value, KeyHash> mContexts;
138 };
139
140 struct DecompressTask : public Closure
141 {
DecompressTaskangle::__anon0cf1bde40111::DecompressTask142 DecompressTask(astcenc_context *context,
143 uint32_t threadIndex,
144 const uint8_t *data,
145 size_t dataLength,
146 astcenc_image *image)
147 : context(context),
148 threadIndex(threadIndex),
149 data(data),
150 dataLength(dataLength),
151 image(image)
152 {}
153
operator ()angle::__anon0cf1bde40111::DecompressTask154 void operator()() override
155 {
156 result = astcenc_decompress_image(context, data, dataLength, image, &kSwizzle, threadIndex);
157 }
158
159 astcenc_context *context;
160 uint32_t threadIndex;
161 const uint8_t *data;
162 size_t dataLength;
163 astcenc_image *image;
164 astcenc_error result;
165 };
166
167 // Performs ASTC decompression of an image on the CPU
168 class AstcDecompressorImpl : public AstcDecompressor
169 {
170 public:
AstcDecompressorImpl()171 AstcDecompressorImpl()
172 : AstcDecompressor(), mContextCache(std::make_unique<AstcDecompressorContextCache>())
173 {
174 mTasks.reserve(MaxThreads());
175 mWaitEvents.reserve(MaxThreads());
176 }
177
178 ~AstcDecompressorImpl() override = default;
179
available() const180 bool available() const override
181 {
182 static bool available = IsAstcDecompressorAvailable();
183 return available;
184 }
185
decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,std::shared_ptr<WorkerThreadPool> multiThreadPool,const uint32_t imgWidth,const uint32_t imgHeight,const uint32_t blockWidth,const uint32_t blockHeight,const uint8_t * input,size_t inputLength,uint8_t * output)186 int32_t decompress(std::shared_ptr<WorkerThreadPool> singleThreadPool,
187 std::shared_ptr<WorkerThreadPool> multiThreadPool,
188 const uint32_t imgWidth,
189 const uint32_t imgHeight,
190 const uint32_t blockWidth,
191 const uint32_t blockHeight,
192 const uint8_t *input,
193 size_t inputLength,
194 uint8_t *output) override
195 {
196 // A given astcenc context can only decompress one image at a time, which we why we keep
197 // this mutex locked the whole time.
198 std::lock_guard global_lock(mMutex);
199
200 auto [context, context_status] = mContextCache->get(blockWidth, blockHeight);
201 if (context_status != ASTCENC_SUCCESS)
202 return context_status;
203
204 astcenc_image image;
205 image.dim_x = imgWidth;
206 image.dim_y = imgHeight;
207 image.dim_z = 1;
208 image.data_type = ASTCENC_TYPE_U8;
209 image.data = reinterpret_cast<void **>(&output);
210
211 // For smaller images the overhead of multithreading exceeds the benefits.
212 const bool singleThreaded = (imgHeight <= 32 && imgWidth <= 32) || !multiThreadPool;
213
214 std::shared_ptr<WorkerThreadPool> &threadPool =
215 singleThreaded ? singleThreadPool : multiThreadPool;
216 const uint32_t threadCount = singleThreaded ? 1 : MaxThreads();
217
218 mTasks.clear();
219 mWaitEvents.clear();
220
221 for (uint32_t i = 0; i < threadCount; ++i)
222 {
223 mTasks.push_back(
224 std::make_shared<DecompressTask>(context, i, input, inputLength, &image));
225 mWaitEvents.push_back(threadPool->postWorkerTask(mTasks[i]));
226 }
227 WaitableEvent::WaitMany(&mWaitEvents);
228 astcenc_decompress_reset(context);
229
230 for (auto &task : mTasks)
231 {
232 if (task->result != ASTCENC_SUCCESS)
233 return task->result;
234 }
235 return ASTCENC_SUCCESS;
236 }
237
getStatusString(int32_t statusCode) const238 const char *getStatusString(int32_t statusCode) const override
239 {
240 const char *msg = astcenc_get_error_string((astcenc_error)statusCode);
241 return msg ? msg : "ASTCENC_UNKNOWN_STATUS";
242 }
243
244 private:
245 std::unique_ptr<AstcDecompressorContextCache> mContextCache;
246 std::mutex mMutex; // Locked while calling `decode()`
247 std::vector<std::shared_ptr<DecompressTask>> mTasks;
248 std::vector<std::shared_ptr<WaitableEvent>> mWaitEvents;
249 };
250
251 } // namespace
252
get()253 AstcDecompressor &AstcDecompressor::get()
254 {
255 static auto *instance = new AstcDecompressorImpl();
256 return *instance;
257 }
258
259 } // namespace angle
260