1 /*
2 * Copyright (c) 2022-2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifdef ENABLE_OPENCL
17 #include <securec.h>
18 #endif // ENABLE_OPENCL
19
20 #include "include/core/SkData.h"
21 #include "image/bitmap.h"
22 #include "utils/data.h"
23
24 #include "base/log/log.h"
25 #include "base/log/ace_trace.h"
26 #include "base/thread/background_task_executor.h"
27 #include "core/image/image_file_cache.h"
28 #include "core/image/image_compressor.h"
29
30 namespace OHOS::Ace {
load(void)31 __attribute__((constructor)) void load(void)
32 {
33 #ifdef ENABLE_OPENCL
34 #ifdef __MUSL__
35 OHOS::InitOpenCL();
36 #endif
37 #endif
38 }
39
40 std::shared_ptr<ImageCompressor> ImageCompressor::instance_ = nullptr;
41 std::mutex ImageCompressor::instanceMutex_;
GetInstance()42 std::shared_ptr<ImageCompressor> ImageCompressor::GetInstance()
43 {
44 if (instance_ == nullptr) {
45 std::lock_guard<std::mutex> lock(instanceMutex_);
46 if (instance_ == nullptr) {
47 instance_.reset(new ImageCompressor());
48 instance_->Init();
49 }
50 }
51 return instance_;
52 }
53
Init()54 void ImageCompressor::Init()
55 {
56 #ifdef ENABLE_OPENCL
57 switch_ = SystemProperties::IsAstcEnabled();
58 if (switch_) {
59 clOk_ = OHOS::InitOpenCL();
60 maxErr_ = SystemProperties::GetAstcMaxError();
61 psnr_ = SystemProperties::GetAstcPsnr();
62 InitPartition();
63 InitRecords();
64 }
65 #endif // ENABLE_OPENCL
66 }
67
CanCompress()68 bool ImageCompressor::CanCompress()
69 {
70 #ifdef UPLOAD_GPU_DISABLED
71 return false;
72 #else
73 if (switch_ && clOk_) {
74 return true;
75 }
76 return false;
77 #endif
78 }
79
80 #ifdef ENABLE_OPENCL
LoadShaderBin(cl_context context,cl_device_id device_id)81 cl_program ImageCompressor::LoadShaderBin(cl_context context, cl_device_id device_id)
82 {
83 ACE_FUNCTION_TRACE();
84 std::unique_ptr<FILE, decltype(&fclose)> file(fopen(shader_path_.c_str(), "rb"), fclose);
85 if (!file) {
86 LOGE("load cl shader failed");
87 return nullptr;
88 }
89 auto data = SkData::MakeFromFILE(file.get());
90 if (!data) {
91 return nullptr;
92 }
93 cl_int err;
94 size_t len = data->size();
95 auto ptr = (const unsigned char*) data->data();
96 cl_program p = clCreateProgramWithBinary(context, 1, &device_id, &len, &ptr, NULL, &err);
97 if (err) {
98 return nullptr;
99 }
100 return p;
101 }
102
CreateKernel()103 bool ImageCompressor::CreateKernel()
104 {
105 if (!context_ || !kernel_) {
106 cl_int err;
107 cl_platform_id platform_id;
108 cl_device_id device_id;
109 clGetPlatformIDs(1, &platform_id, NULL);
110 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
111 context_ = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
112 queue_ = clCreateCommandQueueWithProperties(context_, device_id, 0, &err);
113
114 cl_program program = LoadShaderBin(context_, device_id);
115 clBuildProgram(program, 1, &device_id, compileOption_.c_str(), NULL, NULL);
116 ACE_SCOPED_TRACE("clCreateKernel");
117 kernel_ = clCreateKernel(program, "astc", &err);
118 clReleaseProgram(program);
119 }
120 if (!context_ || !kernel_ || !queue_) {
121 ReleaseResource();
122 LOGE("build opencl program failed");
123 clOk_ = false;
124 return false;
125 }
126 refCount_++;
127 return true;
128 }
129
CheckImageQuality(std::string key,uint32_t sumErr,uint32_t maxErr,int32_t width,int32_t height)130 bool ImageCompressor::CheckImageQuality(std::string key, uint32_t sumErr, uint32_t maxErr, int32_t width, int32_t height)
131 {
132 bool isOk = true;
133 float mse = (float)sumErr / (width * height);
134 float psnr = 10 * log10(255 * 255 / mse);
135 if (maxErr == 0 || psnr == 0 || maxErr > static_cast<uint32_t>(maxErr_)|| static_cast<int32_t>(psnr) < psnr_) {
136 isOk = false;
137 std::lock_guard<std::mutex> mLock(recordsMutex_);
138 failedRecords_.insert(key);
139 }
140 LOGI("compress quality %{private}s [%{public}u, %{public}.2f] size(%{public}d×%{public}d) %{public}s",
141 key.c_str(), maxErr, psnr, width, height, isOk ? "ok" : "no");
142 return isOk;
143 }
144
ReleaseResource()145 void ImageCompressor::ReleaseResource()
146 {
147 ACE_FUNCTION_TRACE();
148 clReleaseKernel(kernel_);
149 kernel_ = NULL;
150 clReleaseCommandQueue(queue_);
151 queue_ = NULL;
152 clReleaseContext(context_);
153 context_ = NULL;
154 }
155 #endif // ENABLE_OPENCL
156
GpuCompress(std::string key,RSBitmap & bitmap,int32_t width,int32_t height)157 std::shared_ptr<RSData> ImageCompressor::GpuCompress(std::string key, RSBitmap& bitmap, int32_t width, int32_t height)
158 {
159 #ifdef ENABLE_OPENCL
160 std::lock_guard<std::mutex> lock(instanceMutex_);
161 if (width <= 0 || height <= 0 || !clOk_ || IsFailedImage(key) || width > maxSize_ || height > maxSize_) {
162 return nullptr;
163 }
164 if (!CreateKernel()) {
165 return nullptr;
166 }
167 ACE_SCOPED_TRACE("GpuCompress %d×%d", width, height);
168
169 cl_int err;
170
171 // Number of work items in each local work group
172 int32_t blockX = ceil((width + DIM - 1) / DIM);
173 int32_t blockY = ceil((height + DIM - 1) / DIM);
174 int32_t numBlocks = blockX * blockY;
175 uint32_t uWidth = static_cast<uint32_t>(width);
176 uint32_t uHeight = static_cast<uint32_t>(height);
177 size_t local[] = { DIM, DIM };
178 size_t global[2];
179 global[0] = (uWidth % local[0] == 0 ? uWidth : (uWidth + local[0] - uWidth % local[0]));
180 global[1] = (uHeight % local[1] == 0 ? uHeight : (uHeight + local[1] - uHeight % local[1]));
181
182 size_t astc_size = static_cast<size_t>(numBlocks) * DIM * DIM;
183
184 cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
185 cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height };
186 cl_mem inputImage = clCreateImage(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
187 &image_format, &desc, bitmap.GetPixels(), &err);
188 cl_mem astcResult = clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, astc_size, NULL, &err);
189 cl_mem partInfos = clCreateBuffer(context_, CL_MEM_COPY_HOST_PTR,
190 sizeof(PartInfo) * parts_.size(), &parts_[0], &err);
191
192 uint32_t* blockErrs = new uint32_t[numBlocks]{0};
193 cl_mem clErrs = clCreateBuffer(context_, CL_MEM_USE_HOST_PTR, sizeof(uint32_t) * numBlocks, blockErrs, &err);
194 err |= clSetKernelArg(kernel_, 0, sizeof(cl_mem), &inputImage);
195 err |= clSetKernelArg(kernel_, 1, sizeof(cl_mem), &astcResult);
196 err |= clSetKernelArg(kernel_, 2, sizeof(cl_mem), &partInfos);
197 err |= clSetKernelArg(kernel_, 3, sizeof(cl_mem), &clErrs);
198
199 err = clEnqueueNDRangeKernel(queue_, kernel_, 2, NULL, global, local, 0, NULL, NULL);
200
201 clFinish(queue_);
202
203 uint32_t max_val = 0;
204 uint32_t sum_val = 0;
205 err = clEnqueueReadBuffer(queue_, clErrs, CL_TRUE, 0, sizeof(uint32_t) * numBlocks, blockErrs, 0, NULL, NULL);
206 for (int32_t i = 0; i < numBlocks; i++) {
207 sum_val += blockErrs[i];
208 max_val = fmax(max_val, blockErrs[i]);
209 }
210
211 clReleaseMemObject(inputImage);
212 clReleaseMemObject(partInfos);
213 clReleaseMemObject(clErrs);
214 delete[] blockErrs;
215
216 if (!CheckImageQuality(key, sum_val, max_val, width, height)) {
217 clReleaseMemObject(astcResult);
218 return nullptr;
219 }
220
221 auto astc_data = std::make_shared<RSData>();
222 astc_data->BuildUninitialized(astc_size);
223 clEnqueueReadBuffer(queue_, astcResult, CL_TRUE, 0, astc_size, astc_data->WritableData(), 0, NULL, NULL);
224 clReleaseMemObject(astcResult);
225 return astc_data;
226 #else
227 return nullptr;
228 #endif // ENABLE_OPENCL
229 }
230
231
ScheduleReleaseTask()232 std::function<void()> ImageCompressor::ScheduleReleaseTask()
233 {
234 #ifdef ENABLE_OPENCL
235 std::function<void()> task = [this]() {
236 if (refCount_ > 0 && clOk_) {
237 refCount_--;
238 if (refCount_ <= 0) {
239 this->ReleaseResource();
240
241 // save failed records
242 std::ofstream saveFile(recordsPath_);
243 if (!saveFile.is_open()) {
244 return;
245 }
246 std::lock_guard<std::mutex> mLock(recordsMutex_);
247 for (auto s : failedRecords_) {
248 saveFile << s << "\n";
249 }
250 saveFile.close();
251 }
252 }
253 #else
254 std::function<void()> task = []() {
255 #endif // ENABLE_OPENCL
256 };
257
258 return task;
259 }
260
261 void ImageCompressor::WriteToFile(std::string srcKey, std::shared_ptr<RSData> compressedData, Size imgSize)
262 {
263 if (!compressedData || srcKey.empty()) {
264 return;
265 }
266 #ifdef ENABLE_OPENCL
267 BackgroundTaskExecutor::GetInstance().PostTask(
268 [srcKey, compressedData, imgSize]() {
269 AstcHeader header;
270 uint32_t xsize = static_cast<uint32_t>(imgSize.Width());
271 uint32_t ysize = static_cast<uint32_t>(imgSize.Height());
272 header.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
273 header.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
274 header.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
275 header.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
276 header.blockdimX = DIM;
277 header.blockdimY = DIM;
278 header.blockdimZ = 1;
279 header.xsize[0] = xsize & 0xFF;
280 header.xsize[1] = (xsize >> 8) & 0xFF;
281 header.xsize[2] = (xsize >> 16) & 0xFF;
282 header.ysize[0] = ysize & 0xFF;
283 header.ysize[1] = (ysize >> 8) & 0xFF;
284 header.ysize[2] = (ysize >> 16) & 0xFF;
285 header.zsize[0] = 1;
286 header.zsize[1] = 0;
287 header.zsize[2] = 0;
288
289 int32_t fileSize = compressedData->GetSize() + sizeof(header);
290 auto toWrite = std::shared_ptr<RSData>();
291 toWrite->BuildUninitialized(fileSize);
292 uint8_t* toWritePtr = (uint8_t*) toWrite->WritableData();
293 if (memcpy_s(toWritePtr, fileSize, &header, sizeof(header)) != EOK) {
294 LOGE("astc write file failed");
295 return;
296 }
297 if (memcpy_s(toWritePtr + sizeof(header), compressedData->GetSize(),
298 compressedData->GetData(), compressedData->GetSize()) != EOK) {
299 LOGE("astc write file failed");
300 return;
301 }
302
303 ImageFileCache::GetInstance().WriteCacheFile(srcKey, toWritePtr, fileSize, ".astc");
304 }, BgTaskPriority::LOW);
305 #endif
306 }
307
308 std::shared_ptr<RSData> ImageCompressor::StripFileHeader(std::shared_ptr<RSData> fileData)
309 {
310 if (fileData) {
311 if (fileData->GetSize() <= sizeof(AstcHeader)) {
312 return nullptr;
313 }
314 auto imageData = std::make_shared<RSData>();
315 if (imageData->BuildWithCopy(static_cast<const uint8_t*>(fileData->GetData()) + sizeof(AstcHeader),
316 fileData->GetSize() - sizeof(AstcHeader))) {
317 return imageData;
318 }
319 }
320 return nullptr;
321 }
322
323 /**
324 * @brief Hash function used for procedural partition assignment.
325 *
326 * @param seed The hash seed.
327 *
328 * @return The hashed value.
329 */
330 static uint32_t Hash52(uint32_t seed)
331 {
332 seed ^= seed >> 15;
333
334 // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
335 seed *= 0xEEDE0891;
336 seed ^= seed >> 5;
337 seed += seed << 16;
338 seed ^= seed >> 7;
339 seed ^= seed >> 3;
340 seed ^= seed << 6;
341 seed ^= seed >> 17;
342 return seed;
343 }
344
345 /**
346 * @brief Select texel assignment for a single coordinate.
347 *
348 * @param seed The seed - the partition index from the block.
349 * @param x The texel X coordinate in the block.
350 * @param y The texel Y coordinate in the block.
351 * @param z The texel Z coordinate in the block.
352 * @param partitionCount The total partition count of this encoding.
353 * @param smallBlock @c true if the blockhas fewer than 32 texels.
354 *
355 * @return The assigned partition index for this texel.
356 */
357 static uint8_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, int32_t partitionCount, bool smallBlock)
358 {
359 // For small blocks bias the coordinates to get better distribution
360 if (smallBlock) {
361 x *= 2;
362 y *= 2;
363 z *= 2;
364 }
365
366 seed += (partitionCount - 1) * 1024;
367
368 uint32_t num = Hash52(seed);
369
370 uint8_t seed1 = num & 0xF;
371 uint8_t seed2 = (num >> 4) & 0xF;
372 uint8_t seed3 = (num >> 8) & 0xF;
373 uint8_t seed4 = (num >> 12) & 0xF;
374 uint8_t seed5 = (num >> 16) & 0xF;
375 uint8_t seed6 = (num >> 20) & 0xF;
376 uint8_t seed7 = (num >> 24) & 0xF;
377 uint8_t seed8 = (num >> 28) & 0xF;
378 uint8_t seed9 = (num >> 18) & 0xF;
379 uint8_t seed10 = (num >> 22) & 0xF;
380 uint8_t seed11 = (num >> 26) & 0xF;
381 uint8_t seed12 = ((num >> 30) | (num << 2)) & 0xF;
382
383 // Squaring all the seeds in order to bias their distribution towards lower values.
384 seed1 *= seed1;
385 seed2 *= seed2;
386 seed3 *= seed3;
387 seed4 *= seed4;
388 seed5 *= seed5;
389 seed6 *= seed6;
390 seed7 *= seed7;
391 seed8 *= seed8;
392 seed9 *= seed9;
393 seed10 *= seed10;
394 seed11 *= seed11;
395 seed12 *= seed12;
396
397 int32_t sh1, sh2;
398 if (seed & 1) {
399 sh1 = (seed & 2 ? 4 : 5);
400 sh2 = (partitionCount == 3 ? 6 : 5);
401 } else {
402 sh1 = (partitionCount == 3 ? 6 : 5);
403 sh2 = (seed & 2 ? 4 : 5);
404 }
405
406 int32_t sh3 = (seed & 0x10) ? sh1 : sh2;
407
408 seed1 >>= sh1;
409 seed2 >>= sh2;
410 seed3 >>= sh1;
411 seed4 >>= sh2;
412 seed5 >>= sh1;
413 seed6 >>= sh2;
414 seed7 >>= sh1;
415 seed8 >>= sh2;
416
417 seed9 >>= sh3;
418 seed10 >>= sh3;
419 seed11 >>= sh3;
420 seed12 >>= sh3;
421
422 uint32_t a = static_cast<uint32_t>(seed1 * x + seed2 * y + seed11 * z + (num >> 14));
423 uint32_t b = static_cast<uint32_t>(seed3 * x + seed4 * y + seed12 * z + (num >> 10));
424 uint32_t c = static_cast<uint32_t>(seed5 * x + seed6 * y + seed9 * z + (num >> 6));
425 uint32_t d = static_cast<uint32_t>(seed7 * x + seed8 * y + seed10 * z + (num >> 2));
426
427 // Apply the saw
428 a &= 0x3F;
429 b &= 0x3F;
430 c &= 0x3F;
431 d &= 0x3F;
432
433 // Remove some of the components if we are to output < 4 partitions_.
434 if (partitionCount <= 3) {
435 d = 0;
436 }
437
438 if (partitionCount <= 2) {
439 c = 0;
440 }
441
442 if (partitionCount <= 1) {
443 b = 0;
444 }
445
446 uint8_t partition;
447 if (a >= b && a >= c && a >= d) {
448 partition = 0;
449 } else if (b >= c && b >= d) {
450 partition = 1;
451 } else if (c >= d) {
452 partition = 2;
453 } else {
454 partition = 3;
455 }
456
457 return partition;
458 }
459
460 bool ImageCompressor::InitPartitionInfo(PartInfo *partInfos, int32_t part_index, int32_t part_count)
461 {
462 int32_t texIdx = 0;
463 int32_t counts[4] = {0};
464 for (int32_t y = 0; y < DIM; y++) {
465 for (int32_t x = 0; x < DIM; x++) {
466 int32_t part = SelectPartition(part_index, x, y, 0, part_count, true);
467 partInfos->bitmaps[part] |= 1u << texIdx;
468 counts[part]++;
469 texIdx++;
470 }
471 }
472 int32_t realPartCount = 0;
473 if (counts[0] == 0) {
474 realPartCount = 0;
475 } else if (counts[1] == 0) {
476 realPartCount = 1;
477 } else if (counts[2] == 0) {
478 realPartCount = 2;
479 } else if (counts[3] == 0) {
480 realPartCount = 3;
481 } else {
482 realPartCount = 4;
483 }
484 if (realPartCount == part_count) {
485 return true;
486 }
487 return false;
488 }
489
490 void ImageCompressor::InitPartition()
491 {
492 parts_.clear();
493 int32_t arrSize = sizeof(partitions_) / sizeof(partitions_[0]);
494 for (int32_t i = 0; i < arrSize; i++) {
495 PartInfo p = {};
496 if (InitPartitionInfo(&p, partitions_[i], 2)) {
497 p.partid = partitions_[i];
498 parts_.push_back(p);
499 }
500 }
501 compileOption_ = "-D PARTITION_SERACH_MAX=" + std::to_string(parts_.size());
502 }
503
504 #ifdef ENABLE_OPENCL
505 bool ImageCompressor::IsFailedImage(std::string key)
506 {
507 std::lock_guard<std::mutex> mLock(recordsMutex_);
508 return failedRecords_.find(key) != failedRecords_.end();
509 }
510 #endif
511
512 void ImageCompressor::InitRecords()
513 {
514 recordsPath_ = ImageFileCache::GetInstance().GetImageCacheFilePath("record") + ".txt";
515 std::ifstream openFile(recordsPath_);
516 if (!openFile.is_open()) {
517 return;
518 }
519 std::string line;
520 std::lock_guard<std::mutex> mLock(recordsMutex_);
521 while (!openFile.eof()) {
522 std::getline(openFile, line);
523 failedRecords_.insert(line);
524 }
525 openFile.close();
526 }
527 #ifdef FUZZTEST
528 void ImageCompressor::PartDoing()
529 {
530 InitPartition();
531 InitRecords();
532 }
533 #endif
534 } // namespace OHOS::Ace
535