• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "AstcTexture.h"
16 
17 #include <atomic>
18 #include <chrono>
19 #include <cstring>
20 #include <optional>
21 #include <vector>
22 
23 #include "aemu/base/HealthMonitor.h"
24 #include "host-common/logging.h"
25 #include "stream-servers/vulkan/vk_util.h"
26 
27 namespace gfxstream {
28 namespace vk {
29 namespace {
30 
31 using std::chrono::milliseconds;
32 
33 // Print stats each time we decompress this many pixels:
34 constexpr uint64_t kProcessedPixelsLogInterval = 10'000'000;
35 
36 std::atomic<uint64_t> pixels_processed = 0;
37 std::atomic<uint64_t> ms_elapsed = 0;
38 std::atomic<int64_t> bytes_used = 0;
39 
mipmapSize(uint32_t size,uint32_t mipLevel)40 uint32_t mipmapSize(uint32_t size, uint32_t mipLevel) {
41     return std::max<uint32_t>(size >> mipLevel, 1);
42 }
43 
isRegionValid(const VkBufferImageCopy & region,uint32_t width,uint32_t height)44 bool isRegionValid(const VkBufferImageCopy& region, uint32_t width, uint32_t height) {
45     // TODO(gregschlom) deal with those cases. See details at:
46     // https://registry.khronos.org/vulkan/specs/1.0-extensions/html/chap20.html#copies-buffers-images-addressing
47     // https://stackoverflow.com/questions/46501832/vulkan-vkbufferimagecopy-for-partial-transfer
48 
49     if (region.bufferRowLength != 0 || region.bufferImageHeight != 0) {
50         WARN("ASTC CPU decompression skipped: non-packed buffer");
51         return false;
52     }
53     if (region.imageOffset.x != 0 || region.imageOffset.y != 0) {
54         WARN("ASTC CPU decompression skipped: imageOffset is non-zero");
55         return false;
56     }
57     if (region.imageExtent.width != width || region.imageExtent.height != height) {
58         WARN("ASTC CPU decompression skipped: imageExtent is less than the entire image");
59         return false;
60     }
61     return true;
62 }
63 
64 }  // namespace
65 
AstcTexture(VulkanDispatch * vk,VkDevice device,VkPhysicalDevice physicalDevice,VkExtent3D imgSize,uint32_t blockWidth,uint32_t blockHeight,AstcCpuDecompressor * decompressor)66 AstcTexture::AstcTexture(VulkanDispatch* vk, VkDevice device, VkPhysicalDevice physicalDevice,
67                          VkExtent3D imgSize, uint32_t blockWidth, uint32_t blockHeight,
68                          AstcCpuDecompressor* decompressor)
69     : mVk(vk),
70       mDevice(device),
71       mPhysicalDevice(physicalDevice),
72       mImgSize(imgSize),
73       mBlockWidth(blockWidth),
74       mBlockHeight(blockHeight),
75       mDecompressor(decompressor) {}
76 
~AstcTexture()77 AstcTexture::~AstcTexture() { destroyVkBuffer(); }
78 
canDecompressOnCpu() const79 bool AstcTexture::canDecompressOnCpu() const { return mDecompressor->available(); }
80 
createVkBufferAndMapMemory(size_t bufferSize)81 uint8_t* AstcTexture::createVkBufferAndMapMemory(size_t bufferSize) {
82     VkResult res;
83     mBufferSize = bufferSize;  // Save the buffer size, for statistics purpose only
84     bytes_used += bufferSize;
85 
86     if (mDecompBuffer || mDecompBufferMemory) {
87         WARN(
88             "ASTC CPU decompression failed: tried to decompress same image more than once. Falling"
89             " back to GPU decompression");
90         return nullptr;
91     }
92 
93     VkBufferCreateInfo bufferInfo = {
94         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
95         .size = bufferSize,
96         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
97         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
98     };
99     res = mVk->vkCreateBuffer(mDevice, &bufferInfo, nullptr, &mDecompBuffer);
100     if (res != VK_SUCCESS) {
101         WARN("ASTC CPU decompression: vkCreateBuffer failed: %d", res);
102         mDecompBuffer = VK_NULL_HANDLE;
103         return nullptr;
104     }
105 
106     VkMemoryRequirements memRequirements;
107     mVk->vkGetBufferMemoryRequirements(mDevice, mDecompBuffer, &memRequirements);
108 
109     std::optional<uint32_t> memIndex = vk_util::findMemoryType(
110         mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
111         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
112             VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
113     if (!memIndex) {
114         // Do it again, but without VK_MEMORY_PROPERTY_HOST_CACHED_BIT this time
115         memIndex = vk_util::findMemoryType(
116             mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
117             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
118     }
119     if (!memIndex) {
120         WARN("ASTC CPU decompression: no suitable memory type to decompress the image");
121         return nullptr;
122     }
123 
124     VkMemoryAllocateInfo allocInfo = {
125         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
126         .allocationSize = memRequirements.size,
127         .memoryTypeIndex = *memIndex,
128     };
129     res = mVk->vkAllocateMemory(mDevice, &allocInfo, nullptr, &mDecompBufferMemory);
130     if (res != VK_SUCCESS) {
131         WARN("ASTC CPU decompression: vkAllocateMemory failed: %d", res);
132         mDecompBufferMemory = VK_NULL_HANDLE;
133         return nullptr;
134     }
135 
136     res = mVk->vkBindBufferMemory(mDevice, mDecompBuffer, mDecompBufferMemory, 0);
137     if (res != VK_SUCCESS) {
138         WARN("ASTC CPU decompression: vkBindBufferMemory failed: %d", res);
139         return nullptr;
140     }
141 
142     uint8_t* decompData;
143     res = mVk->vkMapMemory(mDevice, mDecompBufferMemory, 0, bufferSize, 0, (void**)&decompData);
144     if (res != VK_SUCCESS) {
145         WARN("ASTC CPU decompression: vkMapMemory failed: %d", res);
146         return nullptr;
147     }
148 
149     return decompData;
150 }
151 
destroyVkBuffer()152 void AstcTexture::destroyVkBuffer() {
153     bytes_used -= mBufferSize;
154     if (mVk && mDevice) {
155         mVk->vkDestroyBuffer(mDevice, mDecompBuffer, nullptr);
156         mVk->vkFreeMemory(mDevice, mDecompBufferMemory, nullptr);
157         mDecompBuffer = VK_NULL_HANDLE;
158         mDecompBufferMemory = VK_NULL_HANDLE;
159     }
160 }
161 
on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer,uint8_t * srcAstcData,size_t astcDataSize,VkImage dstImage,VkImageLayout dstImageLayout,uint32_t regionCount,const VkBufferImageCopy * pRegions,const VkDecoderContext & context)162 void AstcTexture::on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
163                                             size_t astcDataSize, VkImage dstImage,
164                                             VkImageLayout dstImageLayout, uint32_t regionCount,
165                                             const VkBufferImageCopy* pRegions,
166                                             const VkDecoderContext& context) {
167     auto watchdog =
168         WATCHDOG_BUILDER(context.healthMonitor, "AstcTexture::on_vkCmdCopyBufferToImage").build();
169     auto start_time = std::chrono::steady_clock::now();
170     mSuccess = false;
171     size_t decompSize = 0;  // How many bytes we need to hold the decompressed data
172 
173     // Holds extra data about the region
174     struct RegionInfo {
175         uint32_t width;           // actual width (ie: mipmap width)
176         uint32_t height;          // actual height (ie: mipmap height)
177         uint32_t compressedSize;  // size of ASTC data for that region
178     };
179 
180     std::vector<RegionInfo> regionInfos;
181     regionInfos.reserve(regionCount);
182 
183     // Make a copy of the regions and update the buffer offset of each to reflect the
184     // correct location of the decompressed data
185     std::vector<VkBufferImageCopy> decompRegions(pRegions, pRegions + regionCount);
186     for (auto& decompRegion : decompRegions) {
187         const uint32_t mipLevel = decompRegion.imageSubresource.mipLevel;
188         const uint32_t width = mipmapSize(mImgSize.width, mipLevel);
189         const uint32_t height = mipmapSize(mImgSize.height, mipLevel);
190         const uint32_t numAstcBlocks = ((width + mBlockWidth - 1) / mBlockWidth) *
191                                        ((height + mBlockHeight - 1) / mBlockHeight);
192         const uint32_t compressedSize = numAstcBlocks * 16;
193         // We haven't updated decompRegion.bufferOffset yet, so it's still the _compressed_ offset.
194         const uint32_t compressedDataOffset = decompRegion.bufferOffset;
195 
196         // Do all the precondition checks
197         if (!isRegionValid(decompRegion, width, height)) return;
198         if (compressedDataOffset + compressedSize > astcDataSize) {
199             WARN("ASTC CPU decompression: data out of bounds. Offset: %llu, Size: %llu, Total %llu",
200                  compressedDataOffset, compressedSize, astcDataSize);
201             return;
202         }
203 
204         decompRegion.bufferOffset = decompSize;
205         decompSize += width * height * 4;
206         regionInfos.push_back({width, height, compressedSize});
207     }
208 
209     // Create a new VkBuffer to hold the decompressed data
210     uint8_t* decompData = createVkBufferAndMapMemory(decompSize);
211     if (!decompData) {
212         destroyVkBuffer();  // The destructor would have done it anyway, but may as well do it early
213         return;
214     }
215 
216     // Decompress each region
217     for (int i = 0; i < regionCount; i++) {
218         const auto& compRegion = pRegions[i];
219         const auto& decompRegion = decompRegions[i];
220         const auto& regionInfo = regionInfos[i];
221 
222         int32_t status = mDecompressor->decompress(
223             regionInfo.width, regionInfo.height, mBlockWidth, mBlockHeight,
224             srcAstcData + compRegion.bufferOffset, regionInfo.compressedSize,
225             decompData + decompRegion.bufferOffset);
226 
227         if (status != 0) {
228             WARN("ASTC CPU decompression failed: %s - will try compute shader instead.",
229                  mDecompressor->getStatusString(status));
230             mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
231             destroyVkBuffer();
232             return;
233         }
234     }
235 
236     mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
237 
238     // Finally, actually copy the buffer to the image
239     mVk->vkCmdCopyBufferToImage(commandBuffer, mDecompBuffer, dstImage, dstImageLayout,
240                                 decompRegions.size(), decompRegions.data());
241 
242     mSuccess = true;
243     auto end_time = std::chrono::steady_clock::now();
244 
245     // Compute stats
246     pixels_processed += decompSize / 4;
247     ms_elapsed += std::chrono::duration_cast<milliseconds>(end_time - start_time).count();
248 
249     uint64_t total_pixels = pixels_processed.load();
250     uint64_t total_time = ms_elapsed.load();
251 
252     if (total_pixels >= kProcessedPixelsLogInterval && total_time > 0) {
253         pixels_processed.store(0);
254         ms_elapsed.store(0);
255         INFO("ASTC CPU decompression: %.2f Mpix in %.2f seconds (%.2f Mpix/s). Total mem: %.2f MB",
256              total_pixels / 1'000'000.0, total_time / 1000.0,
257              (float)total_pixels / total_time / 1000.0, bytes_used / 1000000.0);
258     }
259 }
260 
261 }  // namespace vk
262 }  // namespace gfxstream
263