1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <cmath>
17 #include <fstream>
18 #ifdef ENABLE_OPENCL
19 #include <securec.h>
20 #endif // ENABLE_OPENCL
21
22 #include "base/log/ace_trace.h"
23 #include "base/log/log.h"
24 #include "base/thread/background_task_executor.h"
25 #include "base/utils/system_properties.h"
26 #include "core/image/image_cache.h"
27 #include "core/image/image_compressor.h"
28
29 namespace OHOS::Ace {
load(void)30 __attribute__((constructor)) void load(void)
31 {
32 #ifdef ENABLE_OPENCL
33 #ifdef __MUSL__
34 OHOS::InitOpenCL();
35 #endif
36 #endif
37 }
38
39 std::shared_ptr<ImageCompressor> ImageCompressor::instance_ = nullptr;
40 std::mutex ImageCompressor::instanceMutex_;
GetInstance()41 std::shared_ptr<ImageCompressor> ImageCompressor::GetInstance()
42 {
43 if (instance_ == nullptr) {
44 std::lock_guard<std::mutex> lock(instanceMutex_);
45 if (instance_ == nullptr) {
46 instance_.reset(new ImageCompressor());
47 instance_->Init();
48 }
49 }
50 return instance_;
51 }
52
Init()53 void ImageCompressor::Init()
54 {
55 #ifdef ENABLE_OPENCL
56 switch_ = SystemProperties::IsAstcEnabled();
57 if (switch_) {
58 clOk_ = OHOS::InitOpenCL();
59 maxErr_ = SystemProperties::GetAstcMaxError();
60 psnr_ = SystemProperties::GetAstcPsnr();
61 InitPartition();
62 InitRecords();
63 }
64 #endif // ENABLE_OPENCL
65 }
66
CanCompress()67 bool ImageCompressor::CanCompress()
68 {
69 #ifdef UPLOAD_GPU_DISABLED
70 return false;
71 #else
72 if (switch_ && clOk_) {
73 return true;
74 }
75 return false;
76 #endif
77 }
78
79 #ifdef ENABLE_OPENCL
LoadShaderBin(cl_context context,cl_device_id device_id)80 cl_program ImageCompressor::LoadShaderBin(cl_context context, cl_device_id device_id)
81 {
82 ACE_FUNCTION_TRACE();
83 std::unique_ptr<FILE, decltype(&fclose)> file(fopen(shader_path_.c_str(), "rb"), fclose);
84 if (!file) {
85 LOGE("load cl shader failed");
86 return nullptr;
87 }
88 auto data = SkData::MakeFromFILE(file.get());
89 if (!data) {
90 return nullptr;
91 }
92 cl_int err;
93 size_t len = data->size();
94 auto ptr = (const unsigned char*) data->data();
95 cl_program p = clCreateProgramWithBinary(context, 1, &device_id, &len, &ptr, NULL, &err);
96 if (err) {
97 return nullptr;
98 }
99 LOGD("load cl shader");
100 return p;
101 }
102
CreateKernel()103 bool ImageCompressor::CreateKernel()
104 {
105 if (!context_ || !kernel_) {
106 cl_int err;
107 cl_platform_id platform_id;
108 cl_device_id device_id;
109 clGetPlatformIDs(1, &platform_id, NULL);
110 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
111 context_ = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
112 queue_ = clCreateCommandQueueWithProperties(context_, device_id, 0, &err);
113
114 cl_program program = LoadShaderBin(context_, device_id);
115 clBuildProgram(program, 1, &device_id, compileOption_.c_str(), NULL, NULL);
116 ACE_SCOPED_TRACE("clCreateKernel");
117 kernel_ = clCreateKernel(program, "astc", &err);
118 clReleaseProgram(program);
119 }
120 if (!context_ || !kernel_ || !queue_) {
121 ReleaseResource();
122 LOGE("build opencl program failed");
123 clOk_ = false;
124 return false;
125 }
126 refCount_++;
127 return true;
128 }
129
CheckImageQuality(std::string key,uint32_t sumErr,uint32_t maxErr,int32_t width,int32_t height)130 bool ImageCompressor::CheckImageQuality(std::string key, uint32_t sumErr, uint32_t maxErr, int32_t width, int32_t height)
131 {
132 bool isOk = true;
133 float mse = (float)sumErr / (width * height);
134 float psnr = 10 * log10(255 * 255 / mse);
135 if (maxErr == 0 || psnr == 0 || maxErr > maxErr_ || (int32_t)psnr < psnr_) {
136 isOk = false;
137 std::lock_guard<std::mutex> mLock(recordsMutex_);
138 failedRecords_.insert(key);
139 }
140 LOGI("compress quality %{public}s [%{public}u, %{public}.2f] size(%{public}d×%{public}d) %{public}s",
141 key.c_str(), maxErr, psnr, width, height, isOk ? "ok" : "no");
142 return isOk;
143 }
144
ReleaseResource()145 void ImageCompressor::ReleaseResource()
146 {
147 ACE_FUNCTION_TRACE();
148 clReleaseKernel(kernel_);
149 kernel_ = NULL;
150 clReleaseCommandQueue(queue_);
151 queue_ = NULL;
152 clReleaseContext(context_);
153 context_ = NULL;
154 }
155 #endif // ENABLE_OPENCL
156
GpuCompress(std::string key,SkPixmap & pixmap,int32_t width,int32_t height)157 sk_sp<SkData> ImageCompressor::GpuCompress(std::string key, SkPixmap& pixmap, int32_t width, int32_t height)
158 {
159 #ifdef ENABLE_OPENCL
160 std::lock_guard<std::mutex> lock(instanceMutex_);
161 if (width <= 0 || height <= 0 || !clOk_ || IsFailedImage(key) || width > maxSize_ || height > maxSize_) {
162 return nullptr;
163 }
164 if (!CreateKernel()) {
165 return nullptr;
166 }
167 ACE_SCOPED_TRACE("GpuCompress %d×%d", width, height);
168
169 cl_int err;
170
171 // Number of work items in each local work group
172 int32_t blockX = ceil((width + DIM - 1) / DIM);
173 int32_t blockY = ceil((height + DIM - 1) / DIM);
174 int32_t numBlocks = blockX * blockY;
175 size_t local[] = { DIM, DIM };
176 size_t global[2];
177 global[0] = (width % local[0] == 0 ? width : (width + local[0] - width % local[0]));
178 global[1] = (height % local[1] == 0 ? height : (height + local[1] - height % local[1]));
179
180 size_t astc_size = numBlocks * DIM * DIM;
181
182 cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
183 cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height };
184 cl_mem inputImage = clCreateImage(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
185 &image_format, &desc, const_cast<void*>(pixmap.addr()), &err);
186 cl_mem astcResult = clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, astc_size, NULL, &err);
187 cl_mem partInfos = clCreateBuffer(context_, CL_MEM_COPY_HOST_PTR,
188 sizeof(PartInfo) * parts_.size(), &parts_[0], &err);
189
190 uint32_t* blockErrs = new uint32_t[numBlocks]{0};
191 cl_mem clErrs = clCreateBuffer(context_, CL_MEM_USE_HOST_PTR, sizeof(uint32_t) * numBlocks, blockErrs, &err);
192 err |= clSetKernelArg(kernel_, 0, sizeof(cl_mem), &inputImage);
193 err |= clSetKernelArg(kernel_, 1, sizeof(cl_mem), &astcResult);
194 err |= clSetKernelArg(kernel_, 2, sizeof(cl_mem), &partInfos);
195 err |= clSetKernelArg(kernel_, 3, sizeof(cl_mem), &clErrs);
196
197 err = clEnqueueNDRangeKernel(queue_, kernel_, 2, NULL, global, local, 0, NULL, NULL);
198
199 clFinish(queue_);
200
201 uint32_t max_val = 0, sum_val = 0;
202 err = clEnqueueReadBuffer(queue_, clErrs, CL_TRUE, 0, sizeof(uint32_t) * numBlocks, blockErrs, 0, NULL, NULL);
203 for (int32_t i = 0; i < numBlocks; i++) {
204 sum_val += blockErrs[i];
205 max_val = fmax(max_val, blockErrs[i]);
206 }
207
208 clReleaseMemObject(inputImage);
209 clReleaseMemObject(partInfos);
210 clReleaseMemObject(clErrs);
211 delete[] blockErrs;
212
213 if (!CheckImageQuality(key, sum_val, max_val, width, height)) {
214 clReleaseMemObject(astcResult);
215 return nullptr;
216 }
217
218 auto astc_data = SkData::MakeUninitialized(astc_size);
219 clEnqueueReadBuffer(queue_, astcResult, CL_TRUE, 0, astc_size, astc_data->writable_data(), 0, NULL, NULL);
220 clReleaseMemObject(astcResult);
221 return astc_data;
222 #else
223 return nullptr;
224 #endif // ENABLE_OPENCL
225 }
226
227
ScheduleReleaseTask()228 std::function<void()> ImageCompressor::ScheduleReleaseTask()
229 {
230 std::function<void()> task = [this]() {
231 #ifdef ENABLE_OPENCL
232 if (refCount_ > 0 && clOk_) {
233 refCount_--;
234 if (refCount_ <= 0) {
235 this->ReleaseResource();
236
237 // save failed records
238 std::ofstream saveFile(recordsPath_);
239 if (!saveFile.is_open()) {
240 return;
241 }
242 std::lock_guard<std::mutex> mLock(recordsMutex_);
243 for (auto s : failedRecords_) {
244 saveFile << s << "\n";
245 }
246 saveFile.close();
247 }
248 }
249 #endif // ENABLE_OPENCL
250 };
251 return task;
252 }
253
WriteToFile(std::string srcKey,sk_sp<SkData> compressedData,Size imgSize)254 void ImageCompressor::WriteToFile(std::string srcKey, sk_sp<SkData> compressedData, Size imgSize)
255 {
256 if (!compressedData || srcKey.empty()) {
257 return;
258 }
259 #ifdef ENABLE_OPENCL
260 BackgroundTaskExecutor::GetInstance().PostTask(
261 [srcKey, compressedData, imgSize]() {
262 AstcHeader header;
263 int32_t xsize = imgSize.Width();
264 int32_t ysize = imgSize.Height();
265 header.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
266 header.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
267 header.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
268 header.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
269 header.blockdimX = DIM;
270 header.blockdimY = DIM;
271 header.blockdimZ = 1;
272 header.xsize[0] = xsize & 0xFF;
273 header.xsize[1] = (xsize >> 8) & 0xFF;
274 header.xsize[2] = (xsize >> 16) & 0xFF;
275 header.ysize[0] = ysize & 0xFF;
276 header.ysize[1] = (ysize >> 8) & 0xFF;
277 header.ysize[2] = (ysize >> 16) & 0xFF;
278 header.zsize[0] = 1;
279 header.zsize[1] = 0;
280 header.zsize[2] = 0;
281 LOGD("astc write file %{public}s size(%{public}d×%{public}d) (%{public}.2f×%{public}.2f)",
282 srcKey.c_str(), xsize, ysize, imgSize.Width(), imgSize.Height());
283
284 int32_t fileSize = compressedData->size() + sizeof(header);
285 sk_sp<SkData> toWrite = SkData::MakeUninitialized(fileSize);
286 uint8_t* toWritePtr = (uint8_t*) toWrite->writable_data();
287 if (memcpy_s(toWritePtr, fileSize, &header, sizeof(header)) != EOK) {
288 LOGE("astc write file failed");
289 return;
290 }
291 if (memcpy_s(toWritePtr + sizeof(header), compressedData->size(),
292 compressedData->data(), compressedData->size()) != EOK) {
293 LOGE("astc write file failed");
294 return;
295 }
296
297 ImageCache::WriteCacheFile(srcKey, toWritePtr, fileSize, ".astc");
298 }, BgTaskPriority::LOW);
299 #endif
300 }
301
StripFileHeader(sk_sp<SkData> fileData)302 sk_sp<SkData> ImageCompressor::StripFileHeader(sk_sp<SkData> fileData)
303 {
304 if (fileData) {
305 auto imageData = SkData::MakeSubset(fileData.get(), sizeof(AstcHeader), fileData->size() - sizeof(AstcHeader));
306 if (!imageData->isEmpty()) {
307 return imageData;
308 }
309 }
310 return nullptr;
311 }
312
313 /**
314 * @brief Hash function used for procedural partition assignment.
315 *
316 * @param seed The hash seed.
317 *
318 * @return The hashed value.
319 */
Hash52(uint32_t seed)320 static uint32_t Hash52(uint32_t seed)
321 {
322 seed ^= seed >> 15;
323
324 // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
325 seed *= 0xEEDE0891;
326 seed ^= seed >> 5;
327 seed += seed << 16;
328 seed ^= seed >> 7;
329 seed ^= seed >> 3;
330 seed ^= seed << 6;
331 seed ^= seed >> 17;
332 return seed;
333 }
334
335 /**
336 * @brief Select texel assignment for a single coordinate.
337 *
338 * @param seed The seed - the partition index from the block.
339 * @param x The texel X coordinate in the block.
340 * @param y The texel Y coordinate in the block.
341 * @param z The texel Z coordinate in the block.
342 * @param partitionCount The total partition count of this encoding.
343 * @param smallBlock @c true if the blockhas fewer than 32 texels.
344 *
345 * @return The assigned partition index for this texel.
346 */
SelectPartition(int32_t seed,int32_t x,int32_t y,int32_t z,int32_t partitionCount,bool smallBlock)347 static uint8_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, int32_t partitionCount, bool smallBlock)
348 {
349 // For small blocks bias the coordinates to get better distribution
350 if (smallBlock) {
351 x <<= 1;
352 y <<= 1;
353 z <<= 1;
354 }
355
356 seed += (partitionCount - 1) * 1024;
357
358 uint32_t num = Hash52(seed);
359
360 uint8_t seed1 = num & 0xF;
361 uint8_t seed2 = (num >> 4) & 0xF;
362 uint8_t seed3 = (num >> 8) & 0xF;
363 uint8_t seed4 = (num >> 12) & 0xF;
364 uint8_t seed5 = (num >> 16) & 0xF;
365 uint8_t seed6 = (num >> 20) & 0xF;
366 uint8_t seed7 = (num >> 24) & 0xF;
367 uint8_t seed8 = (num >> 28) & 0xF;
368 uint8_t seed9 = (num >> 18) & 0xF;
369 uint8_t seed10 = (num >> 22) & 0xF;
370 uint8_t seed11 = (num >> 26) & 0xF;
371 uint8_t seed12 = ((num >> 30) | (num << 2)) & 0xF;
372
373 // Squaring all the seeds in order to bias their distribution towards lower values.
374 seed1 *= seed1;
375 seed2 *= seed2;
376 seed3 *= seed3;
377 seed4 *= seed4;
378 seed5 *= seed5;
379 seed6 *= seed6;
380 seed7 *= seed7;
381 seed8 *= seed8;
382 seed9 *= seed9;
383 seed10 *= seed10;
384 seed11 *= seed11;
385 seed12 *= seed12;
386
387 int32_t sh1, sh2;
388 if (seed & 1) {
389 sh1 = (seed & 2 ? 4 : 5);
390 sh2 = (partitionCount == 3 ? 6 : 5);
391 } else {
392 sh1 = (partitionCount == 3 ? 6 : 5);
393 sh2 = (seed & 2 ? 4 : 5);
394 }
395
396 int32_t sh3 = (seed & 0x10) ? sh1 : sh2;
397
398 seed1 >>= sh1;
399 seed2 >>= sh2;
400 seed3 >>= sh1;
401 seed4 >>= sh2;
402 seed5 >>= sh1;
403 seed6 >>= sh2;
404 seed7 >>= sh1;
405 seed8 >>= sh2;
406
407 seed9 >>= sh3;
408 seed10 >>= sh3;
409 seed11 >>= sh3;
410 seed12 >>= sh3;
411
412 int32_t a = seed1 * x + seed2 * y + seed11 * z + (num >> 14);
413 int32_t b = seed3 * x + seed4 * y + seed12 * z + (num >> 10);
414 int32_t c = seed5 * x + seed6 * y + seed9 * z + (num >> 6);
415 int32_t d = seed7 * x + seed8 * y + seed10 * z + (num >> 2);
416
417 // Apply the saw
418 a &= 0x3F;
419 b &= 0x3F;
420 c &= 0x3F;
421 d &= 0x3F;
422
423 // Remove some of the components if we are to output < 4 partitions_.
424 if (partitionCount <= 3) {
425 d = 0;
426 }
427
428 if (partitionCount <= 2) {
429 c = 0;
430 }
431
432 if (partitionCount <= 1) {
433 b = 0;
434 }
435
436 uint8_t partition;
437 if (a >= b && a >= c && a >= d) {
438 partition = 0;
439 } else if (b >= c && b >= d) {
440 partition = 1;
441 } else if (c >= d) {
442 partition = 2;
443 } else {
444 partition = 3;
445 }
446
447 return partition;
448 }
449
InitPartitionInfo(PartInfo * partInfos,int32_t part_index,int32_t part_count)450 bool ImageCompressor::InitPartitionInfo(PartInfo *partInfos, int32_t part_index, int32_t part_count)
451 {
452 int32_t texIdx = 0;
453 int32_t counts[4] = {0};
454 for (int32_t y = 0; y < DIM; y++) {
455 for (int32_t x = 0; x < DIM; x++) {
456 int32_t part = SelectPartition(part_index, x, y, 0, part_count, true);
457 partInfos->bitmaps[part] |= 1u << texIdx;
458 counts[part]++;
459 texIdx++;
460 }
461 }
462 int32_t realPartCount = 0;
463 if (counts[0] == 0) {
464 realPartCount = 0;
465 } else if (counts[1] == 0) {
466 realPartCount = 1;
467 } else if (counts[2] == 0) {
468 realPartCount = 2;
469 } else if (counts[3] == 0) {
470 realPartCount = 3;
471 } else {
472 realPartCount = 4;
473 }
474 if (realPartCount == part_count) {
475 return true;
476 }
477 return false;
478 }
479
InitPartition()480 void ImageCompressor::InitPartition()
481 {
482 parts_.clear();
483 int32_t arrSize = sizeof(partitions_) / sizeof(partitions_[0]);
484 for (int32_t i = 0; i < arrSize; i++) {
485 PartInfo p = {};
486 if (InitPartitionInfo(&p, partitions_[i], 2)) {
487 p.partid = partitions_[i];
488 parts_.push_back(p);
489 LOGD("part id:%d %d %d", p.partid, p.bitmaps[0], p.bitmaps[1]);
490 }
491 }
492 compileOption_ = "-D PARTITION_SERACH_MAX=" + std::to_string(parts_.size());
493 }
494
495 #ifdef ENABLE_OPENCL
IsFailedImage(std::string key)496 bool ImageCompressor::IsFailedImage(std::string key)
497 {
498 std::lock_guard<std::mutex> mLock(recordsMutex_);
499 return failedRecords_.find(key) != failedRecords_.end();
500 }
501 #endif
502
InitRecords()503 void ImageCompressor::InitRecords()
504 {
505 recordsPath_ = ImageCache::GetImageCacheFilePath("record") + ".txt";
506 std::ifstream openFile(recordsPath_);
507 if (!openFile.is_open()) {
508 return;
509 }
510 std::string line;
511 std::lock_guard<std::mutex> mLock(recordsMutex_);
512 while (!openFile.eof()) {
513 std::getline(openFile, line);
514 failedRecords_.insert(line);
515 }
516 openFile.close();
517 }
518 #ifdef FUZZTEST
PartDoing()519 void ImageCompressor::PartDoing()
520 {
521 InitPartition();
522 InitRecords();
523 }
524 #endif
525 } // namespace OHOS::Ace
526