• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <cmath>
17 #include <fstream>
18 #ifdef ENABLE_OPENCL
19 #include <securec.h>
20 #endif // ENABLE_OPENCL
21 
22 #include "base/log/ace_trace.h"
23 #include "base/log/log.h"
24 #include "base/thread/background_task_executor.h"
25 #include "base/utils/system_properties.h"
26 #include "core/image/image_cache.h"
27 #include "core/image/image_compressor.h"
28 
29 namespace OHOS::Ace {
load(void)30 __attribute__((constructor)) void load(void)
31 {
32 #ifdef ENABLE_OPENCL
33 #ifdef __MUSL__
34     OHOS::InitOpenCL();
35 #endif
36 #endif
37 }
38 
39 std::shared_ptr<ImageCompressor> ImageCompressor::instance_ = nullptr;
40 std::mutex ImageCompressor::instanceMutex_;
GetInstance()41 std::shared_ptr<ImageCompressor> ImageCompressor::GetInstance()
42 {
43     if (instance_ == nullptr) {
44         std::lock_guard<std::mutex> lock(instanceMutex_);
45         if (instance_ == nullptr) {
46             instance_.reset(new ImageCompressor());
47             instance_->Init();
48         }
49     }
50     return instance_;
51 }
52 
Init()53 void ImageCompressor::Init()
54 {
55 #ifdef ENABLE_OPENCL
56     switch_ = SystemProperties::IsAstcEnabled();
57     if (switch_) {
58         clOk_ = OHOS::InitOpenCL();
59         maxErr_ = SystemProperties::GetAstcMaxError();
60         psnr_ = SystemProperties::GetAstcPsnr();
61         InitPartition();
62         InitRecords();
63     }
64 #endif // ENABLE_OPENCL
65 }
66 
CanCompress()67 bool ImageCompressor::CanCompress()
68 {
69 #ifdef UPLOAD_GPU_DISABLED
70     return false;
71 #else
72     if (switch_ && clOk_) {
73         return true;
74     }
75     return false;
76 #endif
77 }
78 
79 #ifdef ENABLE_OPENCL
LoadShaderBin(cl_context context,cl_device_id device_id)80 cl_program ImageCompressor::LoadShaderBin(cl_context context, cl_device_id device_id)
81 {
82     ACE_FUNCTION_TRACE();
83     std::unique_ptr<FILE, decltype(&fclose)> file(fopen(shader_path_.c_str(), "rb"), fclose);
84     if (!file) {
85         LOGE("load cl shader failed");
86         return nullptr;
87     }
88     auto data = SkData::MakeFromFILE(file.get());
89     if (!data) {
90         return nullptr;
91     }
92     cl_int err;
93     size_t len = data->size();
94     auto ptr = (const unsigned char*) data->data();
95     cl_program p = clCreateProgramWithBinary(context, 1, &device_id, &len, &ptr, NULL, &err);
96     if (err) {
97         return nullptr;
98     }
99     LOGD("load cl shader");
100     return p;
101 }
102 
CreateKernel()103 bool ImageCompressor::CreateKernel()
104 {
105     if (!context_ || !kernel_) {
106         cl_int err;
107         cl_platform_id platform_id;
108         cl_device_id device_id;
109         clGetPlatformIDs(1, &platform_id, NULL);
110         clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
111         context_ = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
112         queue_ = clCreateCommandQueueWithProperties(context_, device_id, 0, &err);
113 
114         cl_program program = LoadShaderBin(context_, device_id);
115         clBuildProgram(program, 1, &device_id, compileOption_.c_str(), NULL, NULL);
116         ACE_SCOPED_TRACE("clCreateKernel");
117         kernel_ = clCreateKernel(program, "astc", &err);
118         clReleaseProgram(program);
119     }
120     if (!context_ || !kernel_ || !queue_) {
121         ReleaseResource();
122         LOGE("build opencl program failed");
123         clOk_ = false;
124         return false;
125     }
126     refCount_++;
127     return true;
128 }
129 
CheckImageQuality(std::string key,uint32_t sumErr,uint32_t maxErr,int32_t width,int32_t height)130 bool ImageCompressor::CheckImageQuality(std::string key, uint32_t sumErr, uint32_t maxErr, int32_t width, int32_t height)
131 {
132     bool isOk = true;
133     float mse = (float)sumErr / (width * height);
134     float psnr = 10 * log10(255 * 255 / mse);
135     if (maxErr == 0 || psnr == 0 || maxErr > maxErr_ || (int32_t)psnr < psnr_) {
136         isOk = false;
137         std::lock_guard<std::mutex> mLock(recordsMutex_);
138         failedRecords_.insert(key);
139     }
140     LOGI("compress quality %{public}s [%{public}u, %{public}.2f] size(%{public}d×%{public}d) %{public}s",
141         key.c_str(), maxErr, psnr, width, height, isOk ? "ok" : "no");
142     return isOk;
143 }
144 
ReleaseResource()145 void ImageCompressor::ReleaseResource()
146 {
147     ACE_FUNCTION_TRACE();
148     clReleaseKernel(kernel_);
149     kernel_ = NULL;
150     clReleaseCommandQueue(queue_);
151     queue_ = NULL;
152     clReleaseContext(context_);
153     context_ = NULL;
154 }
155 #endif // ENABLE_OPENCL
156 
GpuCompress(std::string key,SkPixmap & pixmap,int32_t width,int32_t height)157 sk_sp<SkData> ImageCompressor::GpuCompress(std::string key, SkPixmap& pixmap, int32_t width, int32_t height)
158 {
159 #ifdef ENABLE_OPENCL
160     std::lock_guard<std::mutex> lock(instanceMutex_);
161     if (width <= 0 || height <= 0 || !clOk_ || IsFailedImage(key) || width > maxSize_ || height > maxSize_) {
162         return nullptr;
163     }
164     if (!CreateKernel()) {
165         return nullptr;
166     }
167     ACE_SCOPED_TRACE("GpuCompress %d×%d", width, height);
168 
169     cl_int err;
170 
171     // Number of work items in each local work group
172     int32_t blockX = ceil((width + DIM - 1) / DIM);
173     int32_t blockY = ceil((height + DIM - 1) / DIM);
174     int32_t numBlocks = blockX * blockY;
175     size_t local[] = { DIM, DIM };
176     size_t global[2];
177     global[0] = (width % local[0] == 0 ? width : (width + local[0] - width % local[0]));
178     global[1] = (height % local[1] == 0 ? height : (height + local[1] - height % local[1]));
179 
180     size_t astc_size = numBlocks * DIM * DIM;
181 
182     cl_image_format image_format = { CL_RGBA, CL_UNORM_INT8 };
183     cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height };
184     cl_mem inputImage = clCreateImage(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
185         &image_format, &desc, const_cast<void*>(pixmap.addr()), &err);
186     cl_mem astcResult = clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, astc_size, NULL, &err);
187     cl_mem partInfos = clCreateBuffer(context_, CL_MEM_COPY_HOST_PTR,
188         sizeof(PartInfo) * parts_.size(), &parts_[0], &err);
189 
190     uint32_t* blockErrs = new uint32_t[numBlocks]{0};
191     cl_mem clErrs = clCreateBuffer(context_, CL_MEM_USE_HOST_PTR, sizeof(uint32_t) * numBlocks, blockErrs, &err);
192     err |= clSetKernelArg(kernel_, 0, sizeof(cl_mem), &inputImage);
193     err |= clSetKernelArg(kernel_, 1, sizeof(cl_mem), &astcResult);
194     err |= clSetKernelArg(kernel_, 2, sizeof(cl_mem), &partInfos);
195     err |= clSetKernelArg(kernel_, 3, sizeof(cl_mem), &clErrs);
196 
197     err = clEnqueueNDRangeKernel(queue_, kernel_, 2, NULL, global, local, 0, NULL, NULL);
198 
199     clFinish(queue_);
200 
201     uint32_t max_val = 0, sum_val = 0;
202     err = clEnqueueReadBuffer(queue_, clErrs, CL_TRUE, 0, sizeof(uint32_t) * numBlocks, blockErrs, 0, NULL, NULL);
203     for (int32_t i = 0; i < numBlocks; i++) {
204         sum_val += blockErrs[i];
205         max_val = fmax(max_val, blockErrs[i]);
206     }
207 
208     clReleaseMemObject(inputImage);
209     clReleaseMemObject(partInfos);
210     clReleaseMemObject(clErrs);
211     delete[] blockErrs;
212 
213     if (!CheckImageQuality(key, sum_val, max_val, width, height)) {
214         clReleaseMemObject(astcResult);
215         return nullptr;
216     }
217 
218     auto astc_data = SkData::MakeUninitialized(astc_size);
219     clEnqueueReadBuffer(queue_, astcResult, CL_TRUE, 0, astc_size, astc_data->writable_data(), 0, NULL, NULL);
220     clReleaseMemObject(astcResult);
221     return astc_data;
222 #else
223     return nullptr;
224 #endif // ENABLE_OPENCL
225 }
226 
227 
ScheduleReleaseTask()228 std::function<void()> ImageCompressor::ScheduleReleaseTask()
229 {
230     std::function<void()> task = [this]() {
231 #ifdef ENABLE_OPENCL
232         if (refCount_ > 0 && clOk_) {
233             refCount_--;
234             if (refCount_ <= 0) {
235                 this->ReleaseResource();
236 
237                 // save failed records
238                 std::ofstream saveFile(recordsPath_);
239                 if (!saveFile.is_open()) {
240                     return;
241                 }
242                 std::lock_guard<std::mutex> mLock(recordsMutex_);
243                 for (auto s : failedRecords_) {
244                     saveFile << s << "\n";
245                 }
246                 saveFile.close();
247             }
248         }
249 #endif // ENABLE_OPENCL
250     };
251     return task;
252 }
253 
WriteToFile(std::string srcKey,sk_sp<SkData> compressedData,Size imgSize)254 void ImageCompressor::WriteToFile(std::string srcKey, sk_sp<SkData> compressedData, Size imgSize)
255 {
256     if (!compressedData || srcKey.empty()) {
257         return;
258     }
259 #ifdef ENABLE_OPENCL
260     BackgroundTaskExecutor::GetInstance().PostTask(
261         [srcKey, compressedData, imgSize]() {
262             AstcHeader header;
263             int32_t xsize = imgSize.Width();
264             int32_t ysize = imgSize.Height();
265             header.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
266             header.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
267             header.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
268             header.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
269             header.blockdimX = DIM;
270             header.blockdimY = DIM;
271             header.blockdimZ = 1;
272             header.xsize[0] = xsize & 0xFF;
273             header.xsize[1] = (xsize >> 8) & 0xFF;
274             header.xsize[2] = (xsize >> 16) & 0xFF;
275             header.ysize[0] = ysize & 0xFF;
276             header.ysize[1] = (ysize >> 8) & 0xFF;
277             header.ysize[2] = (ysize >> 16) & 0xFF;
278             header.zsize[0] = 1;
279             header.zsize[1] = 0;
280             header.zsize[2] = 0;
281             LOGD("astc write file %{public}s size(%{public}d×%{public}d) (%{public}.2f×%{public}.2f)",
282                 srcKey.c_str(), xsize, ysize, imgSize.Width(), imgSize.Height());
283 
284             int32_t fileSize = compressedData->size() + sizeof(header);
285             sk_sp<SkData> toWrite = SkData::MakeUninitialized(fileSize);
286             uint8_t* toWritePtr = (uint8_t*) toWrite->writable_data();
287             if (memcpy_s(toWritePtr, fileSize, &header, sizeof(header)) != EOK) {
288                 LOGE("astc write file failed");
289                 return;
290             }
291             if (memcpy_s(toWritePtr + sizeof(header), compressedData->size(),
292                     compressedData->data(), compressedData->size()) != EOK) {
293                 LOGE("astc write file failed");
294                 return;
295             }
296 
297             ImageCache::WriteCacheFile(srcKey, toWritePtr, fileSize, ".astc");
298         }, BgTaskPriority::LOW);
299 #endif
300 }
301 
StripFileHeader(sk_sp<SkData> fileData)302 sk_sp<SkData> ImageCompressor::StripFileHeader(sk_sp<SkData> fileData)
303 {
304     if (fileData) {
305         auto imageData = SkData::MakeSubset(fileData.get(), sizeof(AstcHeader), fileData->size() - sizeof(AstcHeader));
306         if (!imageData->isEmpty()) {
307             return imageData;
308         }
309     }
310     return nullptr;
311 }
312 
313 /**
314  * @brief Hash function used for procedural partition assignment.
315  *
316  * @param seed The hash seed.
317  *
318  * @return The hashed value.
319  */
Hash52(uint32_t seed)320 static uint32_t Hash52(uint32_t seed)
321 {
322     seed ^= seed >> 15;
323 
324     // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
325     seed *= 0xEEDE0891;
326     seed ^= seed >> 5;
327     seed += seed << 16;
328     seed ^= seed >> 7;
329     seed ^= seed >> 3;
330     seed ^= seed << 6;
331     seed ^= seed >> 17;
332     return seed;
333 }
334 
335 /**
336  * @brief Select texel assignment for a single coordinate.
337  *
338  * @param seed              The seed - the partition index from the block.
339  * @param x                 The texel X coordinate in the block.
340  * @param y                 The texel Y coordinate in the block.
341  * @param z                 The texel Z coordinate in the block.
342  * @param partitionCount   The total partition count of this encoding.
343  * @param smallBlock       @c true if the blockhas fewer than 32 texels.
344  *
345  * @return The assigned partition index for this texel.
346  */
SelectPartition(int32_t seed,int32_t x,int32_t y,int32_t z,int32_t partitionCount,bool smallBlock)347 static uint8_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, int32_t partitionCount, bool smallBlock)
348 {
349     // For small blocks bias the coordinates to get better distribution
350     if (smallBlock) {
351         x <<= 1;
352         y <<= 1;
353         z <<= 1;
354     }
355 
356     seed += (partitionCount - 1) * 1024;
357 
358     uint32_t num = Hash52(seed);
359 
360     uint8_t seed1 = num & 0xF;
361     uint8_t seed2 = (num >> 4) & 0xF;
362     uint8_t seed3 = (num >> 8) & 0xF;
363     uint8_t seed4 = (num >> 12) & 0xF;
364     uint8_t seed5 = (num >> 16) & 0xF;
365     uint8_t seed6 = (num >> 20) & 0xF;
366     uint8_t seed7 = (num >> 24) & 0xF;
367     uint8_t seed8 = (num >> 28) & 0xF;
368     uint8_t seed9 = (num >> 18) & 0xF;
369     uint8_t seed10 = (num >> 22) & 0xF;
370     uint8_t seed11 = (num >> 26) & 0xF;
371     uint8_t seed12 = ((num >> 30) | (num << 2)) & 0xF;
372 
373     // Squaring all the seeds in order to bias their distribution towards lower values.
374     seed1 *= seed1;
375     seed2 *= seed2;
376     seed3 *= seed3;
377     seed4 *= seed4;
378     seed5 *= seed5;
379     seed6 *= seed6;
380     seed7 *= seed7;
381     seed8 *= seed8;
382     seed9 *= seed9;
383     seed10 *= seed10;
384     seed11 *= seed11;
385     seed12 *= seed12;
386 
387     int32_t sh1, sh2;
388     if (seed & 1) {
389         sh1 = (seed & 2 ? 4 : 5);
390         sh2 = (partitionCount == 3 ? 6 : 5);
391     } else {
392         sh1 = (partitionCount == 3 ? 6 : 5);
393         sh2 = (seed & 2 ? 4 : 5);
394     }
395 
396     int32_t sh3 = (seed & 0x10) ? sh1 : sh2;
397 
398     seed1 >>= sh1;
399     seed2 >>= sh2;
400     seed3 >>= sh1;
401     seed4 >>= sh2;
402     seed5 >>= sh1;
403     seed6 >>= sh2;
404     seed7 >>= sh1;
405     seed8 >>= sh2;
406 
407     seed9 >>= sh3;
408     seed10 >>= sh3;
409     seed11 >>= sh3;
410     seed12 >>= sh3;
411 
412     int32_t a = seed1 * x + seed2 * y + seed11 * z + (num >> 14);
413     int32_t b = seed3 * x + seed4 * y + seed12 * z + (num >> 10);
414     int32_t c = seed5 * x + seed6 * y + seed9 * z + (num >> 6);
415     int32_t d = seed7 * x + seed8 * y + seed10 * z + (num >> 2);
416 
417     // Apply the saw
418     a &= 0x3F;
419     b &= 0x3F;
420     c &= 0x3F;
421     d &= 0x3F;
422 
423     // Remove some of the components if we are to output < 4 partitions_.
424     if (partitionCount <= 3) {
425         d = 0;
426     }
427 
428     if (partitionCount <= 2) {
429         c = 0;
430     }
431 
432     if (partitionCount <= 1) {
433         b = 0;
434     }
435 
436     uint8_t partition;
437     if (a >= b && a >= c && a >= d) {
438         partition = 0;
439     } else if (b >= c && b >= d) {
440         partition = 1;
441     } else if (c >= d) {
442         partition = 2;
443     } else {
444         partition = 3;
445     }
446 
447     return partition;
448 }
449 
InitPartitionInfo(PartInfo * partInfos,int32_t part_index,int32_t part_count)450 bool ImageCompressor::InitPartitionInfo(PartInfo *partInfos, int32_t part_index, int32_t part_count)
451 {
452     int32_t texIdx = 0;
453     int32_t counts[4] = {0};
454     for (int32_t y = 0; y < DIM; y++) {
455         for (int32_t x = 0; x < DIM; x++) {
456             int32_t part = SelectPartition(part_index, x, y, 0, part_count, true);
457             partInfos->bitmaps[part] |= 1u << texIdx;
458             counts[part]++;
459             texIdx++;
460         }
461     }
462     int32_t realPartCount = 0;
463     if (counts[0] == 0) {
464         realPartCount = 0;
465     } else if (counts[1] == 0) {
466         realPartCount = 1;
467     } else if (counts[2] == 0) {
468         realPartCount = 2;
469     } else if (counts[3] == 0) {
470         realPartCount = 3;
471     } else {
472         realPartCount = 4;
473     }
474     if (realPartCount == part_count) {
475         return true;
476     }
477     return false;
478 }
479 
InitPartition()480 void ImageCompressor::InitPartition()
481 {
482     parts_.clear();
483     int32_t arrSize = sizeof(partitions_) / sizeof(partitions_[0]);
484     for (int32_t i = 0; i < arrSize; i++) {
485         PartInfo p = {};
486         if (InitPartitionInfo(&p, partitions_[i], 2)) {
487             p.partid = partitions_[i];
488             parts_.push_back(p);
489             LOGD("part id:%d %d %d", p.partid, p.bitmaps[0], p.bitmaps[1]);
490         }
491     }
492     compileOption_ = "-D PARTITION_SERACH_MAX=" + std::to_string(parts_.size());
493 }
494 
495 #ifdef ENABLE_OPENCL
IsFailedImage(std::string key)496 bool ImageCompressor::IsFailedImage(std::string key)
497 {
498     std::lock_guard<std::mutex> mLock(recordsMutex_);
499     return failedRecords_.find(key) != failedRecords_.end();
500 }
501 #endif
502 
InitRecords()503 void ImageCompressor::InitRecords()
504 {
505     recordsPath_ = ImageCache::GetImageCacheFilePath("record") + ".txt";
506     std::ifstream openFile(recordsPath_);
507     if (!openFile.is_open()) {
508         return;
509     }
510     std::string line;
511     std::lock_guard<std::mutex> mLock(recordsMutex_);
512     while (!openFile.eof()) {
513         std::getline(openFile, line);
514         failedRecords_.insert(line);
515     }
516     openFile.close();
517 }
518 #ifdef FUZZTEST
PartDoing()519 void ImageCompressor::PartDoing()
520 {
521     InitPartition();
522     InitRecords();
523 }
524 #endif
525 } // namespace OHOS::Ace
526