1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2024 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions for codec library front-end.
20 */
21
22 #include "astcenc.h"
23 #include "astcenccli_internal.h"
24
25 #if defined(_WIN32)
26 #include <io.h>
27 #define isatty _isatty
28 #else
29 #include <unistd.h>
30 #endif
31 #include <cassert>
32 #include <cstring>
33 #include <functional>
34 #include <string>
35 #include <sstream>
36 #include <vector>
37 #include <memory>
38
39 /* ============================================================================
40 Data structure definitions
41 ============================================================================ */
42
43 typedef unsigned int astcenc_operation;
44
45 struct mode_entry
46 {
47 const char* opt;
48 astcenc_operation operation;
49 astcenc_profile decode_mode;
50 };
51
52 /* ============================================================================
53 Constants and literals
54 ============================================================================ */
55
56 /** @brief Stage bit indicating we need to load a compressed image. */
57 static const unsigned int ASTCENC_STAGE_LD_COMP = 1 << 0;
58
59 /** @brief Stage bit indicating we need to store a compressed image. */
60 static const unsigned int ASTCENC_STAGE_ST_COMP = 1 << 1;
61
62 /** @brief Stage bit indicating we need to load an uncompressed image. */
63 static const unsigned int ASTCENC_STAGE_LD_NCOMP = 1 << 2;
64
65 /** @brief Stage bit indicating we need to store an uncompressed image. */
66 static const unsigned int ASTCENC_STAGE_ST_NCOMP = 1 << 3;
67
68 /** @brief Stage bit indicating we need compress an image. */
69 static const unsigned int ASTCENC_STAGE_COMPRESS = 1 << 4;
70
71 /** @brief Stage bit indicating we need to decompress an image. */
72 static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
73
74 /** @brief Stage bit indicating we need to compare an image with the original input. */
75 static const unsigned int ASTCENC_STAGE_COMPARE = 1 << 6;
76
77 /** @brief Operation indicating an unknown request (should never happen). */
78 static const astcenc_operation ASTCENC_OP_UNKNOWN = 0;
79
80 /** @brief Operation indicating the user wants to print long-form help text and version info. */
81 static const astcenc_operation ASTCENC_OP_HELP = 1 << 7;
82
83 /** @brief Operation indicating the user wants to print short-form help text and version info. */
84 static const astcenc_operation ASTCENC_OP_VERSION = 1 << 8;
85
86 /** @brief Operation indicating the user wants to compress and store an image. */
87 static const astcenc_operation ASTCENC_OP_COMPRESS =
88 ASTCENC_STAGE_LD_NCOMP |
89 ASTCENC_STAGE_COMPRESS |
90 ASTCENC_STAGE_ST_COMP;
91
92 /** @brief Operation indicating the user wants to decompress and store an image. */
93 static const astcenc_operation ASTCENC_OP_DECOMPRESS =
94 ASTCENC_STAGE_LD_COMP |
95 ASTCENC_STAGE_DECOMPRESS |
96 ASTCENC_STAGE_ST_NCOMP;
97
98 /** @brief Operation indicating the user wants to test a compression setting on an image. */
99 static const astcenc_operation ASTCENC_OP_TEST =
100 ASTCENC_STAGE_LD_NCOMP |
101 ASTCENC_STAGE_COMPRESS |
102 ASTCENC_STAGE_DECOMPRESS |
103 ASTCENC_STAGE_COMPARE |
104 ASTCENC_STAGE_ST_NCOMP;
105
106 /**
107 * @brief Image preprocesing tasks prior to encoding.
108 */
109 enum astcenc_preprocess
110 {
111 /** @brief No image preprocessing. */
112 ASTCENC_PP_NONE = 0,
113 /** @brief Normal vector unit-length normalization. */
114 ASTCENC_PP_NORMALIZE,
115 /** @brief Color data alpha premultiplication. */
116 ASTCENC_PP_PREMULTIPLY
117 };
118
119 /** @brief Decode table for command line operation modes. */
120 static const mode_entry modes[] {
121 {"-cl", ASTCENC_OP_COMPRESS, ASTCENC_PRF_LDR},
122 {"-dl", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
123 {"-tl", ASTCENC_OP_TEST, ASTCENC_PRF_LDR},
124 {"-cs", ASTCENC_OP_COMPRESS, ASTCENC_PRF_LDR_SRGB},
125 {"-ds", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
126 {"-ts", ASTCENC_OP_TEST, ASTCENC_PRF_LDR_SRGB},
127 {"-ch", ASTCENC_OP_COMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
128 {"-dh", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
129 {"-th", ASTCENC_OP_TEST, ASTCENC_PRF_HDR_RGB_LDR_A},
130 {"-cH", ASTCENC_OP_COMPRESS, ASTCENC_PRF_HDR},
131 {"-dH", ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
132 {"-tH", ASTCENC_OP_TEST, ASTCENC_PRF_HDR},
133 {"-h", ASTCENC_OP_HELP, ASTCENC_PRF_HDR},
134 {"-help", ASTCENC_OP_HELP, ASTCENC_PRF_HDR},
135 {"-v", ASTCENC_OP_VERSION, ASTCENC_PRF_HDR},
136 {"-version", ASTCENC_OP_VERSION, ASTCENC_PRF_HDR}
137 };
138
139 /**
140 * @brief Compression workload definition for worker threads.
141 */
142 struct compression_workload
143 {
144 astcenc_context* context;
145 astcenc_image* image;
146 astcenc_swizzle swizzle;
147 uint8_t* data_out;
148 size_t data_len;
149 astcenc_error error;
150 #if QUALITY_CONTROL
151 bool calQualityEnable;
152 int32_t *mse[RGBA_COM];
153 #endif
154 };
155
156 /**
157 * @brief Decompression workload definition for worker threads.
158 */
159 struct decompression_workload
160 {
161 astcenc_context* context;
162 uint8_t* data;
163 size_t data_len;
164 astcenc_image* image_out;
165 astcenc_swizzle swizzle;
166 astcenc_error error;
167 };
168
169 /**
170 * @brief Callback emitting a progress bar
171 */
progress_emitter(float value)172 extern "C" void progress_emitter(
173 float value
174 ) {
175 const unsigned int bar_size = 25;
176 unsigned int parts = static_cast<int>(value / 4.0f);
177
178 char buffer[bar_size + 3];
179 buffer[0] = '[';
180
181 for (unsigned int i = 0; i < parts; i++)
182 {
183 buffer[i + 1] = '=';
184 }
185
186 for (unsigned int i = parts; i < bar_size; i++)
187 {
188 buffer[i + 1] = ' ';
189 }
190
191 buffer[bar_size + 1] = ']';
192 buffer[bar_size + 2] = '\0';
193
194 printf(" Progress: %s %03.1f%%\r", buffer, static_cast<double>(value));
195 fflush(stdout);
196 }
197
198 /**
199 * @brief Test if a string argument is a well formed float.
200 */
is_float(std::string target)201 static bool is_float(
202 std::string target
203 ) {
204 float test;
205 std::istringstream stream(target);
206
207 // Leading whitespace is an error
208 stream >> std::noskipws >> test;
209
210 // Ensure entire no remaining string in addition to parse failure
211 return stream.eof() && !stream.fail();
212 }
213
214 /**
215 * @brief Test if a string ends with a given suffix.
216 */
ends_with(const std::string & str,const std::string & suffix)217 static bool ends_with(
218 const std::string& str,
219 const std::string& suffix
220 ) {
221 return (str.size() >= suffix.size()) &&
222 (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
223 }
224
225 /**
226 * @brief Runner callback function for a compression worker thread.
227 *
228 * @param thread_count The number of threads in the worker pool.
229 * @param thread_id The index of this thread in the worker pool.
230 * @param payload The parameters for this thread.
231 */
compression_workload_runner(int thread_count,int thread_id,void * payload)232 static void compression_workload_runner(
233 int thread_count,
234 int thread_id,
235 void* payload
236 ) {
237 (void)thread_count;
238
239 compression_workload* work = static_cast<compression_workload*>(payload);
240 astcenc_error error = astcenc_compress_image(
241 work->context, work->image, &work->swizzle,
242 work->data_out, work->data_len,
243 #if QUALITY_CONTROL
244 work->calQualityEnable, work->mse,
245 #endif
246 thread_id);
247
248 // This is a racy update, so which error gets returned is a random, but it
249 // will reliably report an error if an error occurs
250 if (error != ASTCENC_SUCCESS)
251 {
252 work->error = error;
253 }
254 }
255
256 /**
257 * @brief Runner callback function for a decompression worker thread.
258 *
259 * @param thread_count The number of threads in the worker pool.
260 * @param thread_id The index of this thread in the worker pool.
261 * @param payload The parameters for this thread.
262 */
decompression_workload_runner(int thread_count,int thread_id,void * payload)263 static void decompression_workload_runner(
264 int thread_count,
265 int thread_id,
266 void* payload
267 ) {
268 (void)thread_count;
269
270 decompression_workload* work = static_cast<decompression_workload*>(payload);
271 astcenc_error error = astcenc_decompress_image(
272 work->context, work->data, work->data_len,
273 work->image_out, &work->swizzle, thread_id);
274
275 // This is a racy update, so which error gets returned is a random, but it
276 // will reliably report an error if an error occurs
277 if (error != ASTCENC_SUCCESS)
278 {
279 work->error = error;
280 }
281 }
282
283 /**
284 * @brief Utility to generate a slice file name from a pattern.
285 *
286 * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
287 *
288 * @param basename The base pattern; must contain a file extension.
289 * @param index The slice index.
290 * @param error Set to true on success, false on error (no extension found).
291 *
292 * @return The slice file name.
293 */
get_slice_filename(const std::string & basename,unsigned int index,bool & error)294 static std::string get_slice_filename(
295 const std::string& basename,
296 unsigned int index,
297 bool& error
298 ) {
299 size_t sep = basename.find_last_of('.');
300 if (sep == std::string::npos)
301 {
302 error = true;
303 return "";
304 }
305
306 std::string base = basename.substr(0, sep);
307 std::string ext = basename.substr(sep);
308 std::string name = base + "_" + std::to_string(index) + ext;
309 error = false;
310 return name;
311 }
312
313 /**
314 * @brief Load a non-astc image file from memory.
315 *
316 * @param filename The file to load, or a pattern for array loads.
317 * @param dim_z The number of slices to load.
318 * @param y_flip Should this image be Y flipped?
319 * @param[out] is_hdr Is the loaded image HDR?
320 * @param[out] component_count The number of components in the loaded image.
321 *
322 * @return The astc image file, or nullptr on error.
323 */
load_uncomp_file(const char * filename,unsigned int dim_z,bool y_flip,bool & is_hdr,unsigned int & component_count)324 static astcenc_image* load_uncomp_file(
325 const char* filename,
326 unsigned int dim_z,
327 bool y_flip,
328 bool& is_hdr,
329 unsigned int& component_count
330 ) {
331 astcenc_image *image = nullptr;
332
333 // For a 2D image just load the image directly
334 if (dim_z == 1)
335 {
336 image = load_ncimage(filename, y_flip, is_hdr, component_count);
337 }
338 else
339 {
340 bool slice_is_hdr;
341 unsigned int slice_component_count;
342 astcenc_image* slice = nullptr;
343 std::vector<astcenc_image*> slices;
344
345 // For a 3D image load an array of slices
346 for (unsigned int image_index = 0; image_index < dim_z; image_index++)
347 {
348 bool error;
349 std::string slice_name = get_slice_filename(filename, image_index, error);
350 if (error)
351 {
352 print_error("ERROR: Image pattern does not contain file extension: %s\n", filename);
353 break;
354 }
355
356 slice = load_ncimage(slice_name.c_str(), y_flip,
357 slice_is_hdr, slice_component_count);
358 if (!slice)
359 {
360 break;
361 }
362
363 slices.push_back(slice);
364
365 // Check it is not a 3D image
366 if (slice->dim_z != 1)
367 {
368 print_error("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
369 break;
370 }
371
372 // Check slices are consistent with each other
373 if (image_index != 0)
374 {
375 if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
376 {
377 print_error("ERROR: Image array[0] and [%d] are different formats\n", image_index);
378 break;
379 }
380
381 if ((slices[0]->dim_x != slice->dim_x) ||
382 (slices[0]->dim_y != slice->dim_y) ||
383 (slices[0]->dim_z != slice->dim_z))
384 {
385 print_error("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
386 break;
387 }
388 }
389 else
390 {
391 is_hdr = slice_is_hdr;
392 component_count = slice_component_count;
393 }
394 }
395
396 // If all slices loaded correctly then repack them into a single image
397 if (slices.size() == dim_z)
398 {
399 unsigned int dim_x = slices[0]->dim_x;
400 unsigned int dim_y = slices[0]->dim_y;
401 int bitness = is_hdr ? 16 : 8;
402 int slice_size = dim_x * dim_y;
403
404 image = alloc_image(bitness, dim_x, dim_y, dim_z);
405
406 // Combine 2D source images into one 3D image
407 for (unsigned int z = 0; z < dim_z; z++)
408 {
409 if (image->data_type == ASTCENC_TYPE_U8)
410 {
411 uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
412 uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
413 size_t copy_size = slice_size * 4 * sizeof(uint8_t);
414 memcpy(data8, data8src, copy_size);
415 }
416 else if (image->data_type == ASTCENC_TYPE_F16)
417 {
418 uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
419 uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
420 size_t copy_size = slice_size * 4 * sizeof(uint16_t);
421 memcpy(data16, data16src, copy_size);
422 }
423 else // if (image->data_type == ASTCENC_TYPE_F32)
424 {
425 assert(image->data_type == ASTCENC_TYPE_F32);
426 float* data32 = static_cast<float*>(image->data[z]);
427 float* data32src = static_cast<float*>(slices[z]->data[0]);
428 size_t copy_size = slice_size * 4 * sizeof(float);
429 memcpy(data32, data32src, copy_size);
430 }
431 }
432 }
433
434 for (auto &i : slices)
435 {
436 free_image(i);
437 }
438 }
439
440 return image;
441 }
442
443 /**
444 * @brief Parse the command line.
445 *
446 * @param argc Command line argument count.
447 * @param[in] argv Command line argument vector.
448 * @param[out] operation Codec operation mode.
449 * @param[out] profile Codec color profile.
450 *
451 * @return 0 if everything is okay, 1 if there is some error
452 */
parse_commandline_options(int argc,char ** argv,astcenc_operation & operation,astcenc_profile & profile)453 static int parse_commandline_options(
454 int argc,
455 char **argv,
456 astcenc_operation& operation,
457 astcenc_profile& profile
458 ) {
459 assert(argc >= 2); (void)argc;
460
461 profile = ASTCENC_PRF_LDR;
462 operation = ASTCENC_OP_UNKNOWN;
463
464 int modes_count = sizeof(modes) / sizeof(modes[0]);
465 for (int i = 0; i < modes_count; i++)
466 {
467 if (!strcmp(modes[i].opt, argv[1]))
468 {
469 operation = modes[i].operation;
470 profile = modes[i].decode_mode;
471 break;
472 }
473 }
474
475 if (operation == ASTCENC_OP_UNKNOWN)
476 {
477 print_error("ERROR: Unrecognized operation '%s'\n", argv[1]);
478 return 1;
479 }
480
481 return 0;
482 }
483
484 /**
485 * @brief Initialize the astcenc_config
486 *
487 * @param argc Command line argument count.
488 * @param[in] argv Command line argument vector.
489 * @param operation Codec operation mode.
490 * @param[out] profile Codec color profile.
491 * @param comp_image Compressed image if a decompress operation.
492 * @param[out] preprocess Image preprocess operation.
493 * @param[out] config Codec configuration.
494 *
495 * @return 0 if everything is okay, 1 if there is some error
496 */
init_astcenc_config(int argc,char ** argv,astcenc_profile profile,astcenc_operation operation,astc_compressed_image & comp_image,astcenc_preprocess & preprocess,astcenc_config & config)497 static int init_astcenc_config(
498 int argc,
499 char **argv,
500 astcenc_profile profile,
501 astcenc_operation operation,
502 astc_compressed_image& comp_image,
503 astcenc_preprocess& preprocess,
504 astcenc_config& config
505 ) {
506 unsigned int block_x = 0;
507 unsigned int block_y = 0;
508 unsigned int block_z = 1;
509
510 // For decode the block size is set by the incoming image.
511 if (operation == ASTCENC_OP_DECOMPRESS)
512 {
513 block_x = comp_image.block_x;
514 block_y = comp_image.block_y;
515 block_z = comp_image.block_z;
516 }
517
518 float quality = 0.0f;
519 preprocess = ASTCENC_PP_NONE;
520
521 // parse the command line's encoding options.
522 int argidx = 4;
523 if (operation & ASTCENC_STAGE_COMPRESS)
524 {
525 // Read and decode block size
526 if (argc < 5)
527 {
528 print_error("ERROR: Block size must be specified\n");
529 return 1;
530 }
531
532 int cnt2D, cnt3D;
533 int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
534 &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
535 // Character after the last match should be a NUL
536 if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
537 {
538 print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
539 return 1;
540 }
541
542 // Read and decode search quality
543 if (argc < 6)
544 {
545 print_error("ERROR: Search quality level must be specified\n");
546 return 1;
547 }
548
549 if (!strcmp(argv[5], "-fastest"))
550 {
551 quality = ASTCENC_PRE_FASTEST;
552 }
553 else if (!strcmp(argv[5], "-fast"))
554 {
555 quality = ASTCENC_PRE_FAST;
556 }
557 else if (!strcmp(argv[5], "-medium"))
558 {
559 quality = ASTCENC_PRE_MEDIUM;
560 }
561 else if (!strcmp(argv[5], "-thorough"))
562 {
563 quality = ASTCENC_PRE_THOROUGH;
564 }
565 else if (!strcmp(argv[5], "-verythorough"))
566 {
567 quality = ASTCENC_PRE_VERYTHOROUGH;
568 }
569 else if (!strcmp(argv[5], "-exhaustive"))
570 {
571 quality = ASTCENC_PRE_EXHAUSTIVE;
572 }
573 else if (is_float(argv[5]))
574 {
575 quality = static_cast<float>(atof(argv[5]));
576 }
577 else
578 {
579 print_error("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
580 return 1;
581 }
582
583 argidx = 6;
584 }
585
586 unsigned int flags = 0;
587
588 // Gather the flags that we need
589 while (argidx < argc)
590 {
591 if (!strcmp(argv[argidx], "-a"))
592 {
593 // Skip over the data value for now
594 argidx++;
595 flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
596 }
597 else if (!strcmp(argv[argidx], "-normal"))
598 {
599 flags |= ASTCENC_FLG_MAP_NORMAL;
600 }
601 else if (!strcmp(argv[argidx], "-decode_unorm8"))
602 {
603 flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
604 }
605 else if (!strcmp(argv[argidx], "-rgbm"))
606 {
607 // Skip over the data value for now
608 argidx++;
609 flags |= ASTCENC_FLG_MAP_RGBM;
610 }
611 else if (!strcmp(argv[argidx], "-perceptual"))
612 {
613 flags |= ASTCENC_FLG_USE_PERCEPTUAL;
614 }
615 else if (!strcmp(argv[argidx], "-pp-normalize"))
616 {
617 if (preprocess != ASTCENC_PP_NONE)
618 {
619 print_error("ERROR: Only a single image preprocess can be used\n");
620 return 1;
621 }
622 preprocess = ASTCENC_PP_NORMALIZE;
623 }
624 else if (!strcmp(argv[argidx], "-pp-premultiply"))
625 {
626 if (preprocess != ASTCENC_PP_NONE)
627 {
628 print_error("ERROR: Only a single image preprocess can be used\n");
629 return 1;
630 }
631 preprocess = ASTCENC_PP_PREMULTIPLY;
632 }
633 argidx ++;
634 }
635
636 #if defined(ASTCENC_DECOMPRESS_ONLY)
637 flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
638 #else
639 // Decompression can skip some memory allocation, but need full tables
640 if (operation == ASTCENC_OP_DECOMPRESS)
641 {
642 flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
643 }
644 // Compression and test passes can skip some decimation initialization
645 // as we know we are decompressing images that were compressed using the
646 // same settings and heuristics ...
647 else
648 {
649 flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
650 }
651 #endif
652
653 astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
654 quality, flags, &config);
655 if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
656 {
657 print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
658 return 1;
659 }
660 else if (status == ASTCENC_ERR_BAD_DECODE_MODE)
661 {
662 print_error("ERROR: Decode_unorm8 is not supported by HDR profiles\n", argv[4]);
663 return 1;
664 }
665 else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
666 {
667 print_error("ERROR: astcenc must not be compiled with -ffast-math\n");
668 return 1;
669 }
670 else if (status != ASTCENC_SUCCESS)
671 {
672 print_error("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
673 return 1;
674 }
675
676 return 0;
677 }
678
679 /**
680 * @brief Edit the astcenc_config
681 *
682 * @param argc Command line argument count.
683 * @param[in] argv Command line argument vector.
684 * @param operation Codec operation.
685 * @param[out] cli_config Command line config.
686 * @param[in,out] config Codec configuration.
687 *
688 * @return 0 if everything is OK, 1 if there is some error
689 */
edit_astcenc_config(int argc,char ** argv,const astcenc_operation operation,cli_config_options & cli_config,astcenc_config & config)690 static int edit_astcenc_config(
691 int argc,
692 char **argv,
693 const astcenc_operation operation,
694 cli_config_options& cli_config,
695 astcenc_config& config
696 ) {
697
698 int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
699 config.privateProfile = HIGH_QUALITY_PROFILE;
700 while (argidx < argc)
701 {
702 if (!strcmp(argv[argidx], "-silent"))
703 {
704 argidx++;
705 cli_config.silentmode = 1;
706 }
707 else if (!strcmp(argv[argidx], "-cw"))
708 {
709 argidx += 5;
710 if (argidx > argc)
711 {
712 print_error("ERROR: -cw switch with less than 4 arguments\n");
713 return 1;
714 }
715
716 config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
717 config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
718 config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
719 config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
720 }
721 else if (!strcmp(argv[argidx], "-a"))
722 {
723 argidx += 2;
724 if (argidx > argc)
725 {
726 print_error("ERROR: -a switch with no argument\n");
727 return 1;
728 }
729
730 config.a_scale_radius = atoi(argv[argidx - 1]);
731 }
732 else if (!strcmp(argv[argidx], "-esw"))
733 {
734 argidx += 2;
735 if (argidx > argc)
736 {
737 print_error("ERROR: -esw switch with no argument\n");
738 return 1;
739 }
740
741 if (strlen(argv[argidx - 1]) != 4)
742 {
743 print_error("ERROR: -esw pattern does not contain 4 characters\n");
744 return 1;
745 }
746
747 astcenc_swz swizzle_components[4];
748 for (int i = 0; i < 4; i++)
749 {
750 switch (argv[argidx - 1][i])
751 {
752 case 'r':
753 swizzle_components[i] = ASTCENC_SWZ_R;
754 break;
755 case 'g':
756 swizzle_components[i] = ASTCENC_SWZ_G;
757 break;
758 case 'b':
759 swizzle_components[i] = ASTCENC_SWZ_B;
760 break;
761 case 'a':
762 swizzle_components[i] = ASTCENC_SWZ_A;
763 break;
764 case '0':
765 swizzle_components[i] = ASTCENC_SWZ_0;
766 break;
767 case '1':
768 swizzle_components[i] = ASTCENC_SWZ_1;
769 break;
770 default:
771 print_error("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
772 return 1;
773 }
774 }
775
776 cli_config.swz_encode.r = swizzle_components[0];
777 cli_config.swz_encode.g = swizzle_components[1];
778 cli_config.swz_encode.b = swizzle_components[2];
779 cli_config.swz_encode.a = swizzle_components[3];
780 }
781 else if (!strcmp(argv[argidx], "-ssw"))
782 {
783 argidx += 2;
784 if (argidx > argc)
785 {
786 print_error("ERROR: -ssw switch with no argument\n");
787 return 1;
788 }
789
790 size_t char_count = strlen(argv[argidx - 1]);
791 if (char_count == 0)
792 {
793 print_error("ERROR: -ssw pattern contains no characters\n");
794 return 1;
795 }
796
797 if (char_count > 4)
798 {
799 print_error("ERROR: -ssw pattern contains more than 4 characters\n");
800 return 1;
801 }
802
803 bool found_r = false;
804 bool found_g = false;
805 bool found_b = false;
806 bool found_a = false;
807
808 for (size_t i = 0; i < char_count; i++)
809 {
810 switch (argv[argidx - 1][i])
811 {
812 case 'r':
813 found_r = true;
814 break;
815 case 'g':
816 found_g = true;
817 break;
818 case 'b':
819 found_b = true;
820 break;
821 case 'a':
822 found_a = true;
823 break;
824 default:
825 print_error("ERROR: -ssw component '%c' is not valid\n", argv[argidx - 1][i]);
826 return 1;
827 }
828 }
829
830 config.cw_r_weight = found_r ? 1.0f : 0.0f;
831 config.cw_g_weight = found_g ? 1.0f : 0.0f;
832 config.cw_b_weight = found_b ? 1.0f : 0.0f;
833 config.cw_a_weight = found_a ? 1.0f : 0.0f;
834 }
835 else if (!strcmp(argv[argidx], "-dsw"))
836 {
837 argidx += 2;
838 if (argidx > argc)
839 {
840 print_error("ERROR: -dsw switch with no argument\n");
841 return 1;
842 }
843
844 if (strlen(argv[argidx - 1]) != 4)
845 {
846 print_error("ERROR: -dsw switch does not contain 4 characters\n");
847 return 1;
848 }
849
850 astcenc_swz swizzle_components[4];
851 for (int i = 0; i < 4; i++)
852 {
853 switch (argv[argidx - 1][i])
854 {
855 case 'r':
856 swizzle_components[i] = ASTCENC_SWZ_R;
857 break;
858 case 'g':
859 swizzle_components[i] = ASTCENC_SWZ_G;
860 break;
861 case 'b':
862 swizzle_components[i] = ASTCENC_SWZ_B;
863 break;
864 case 'a':
865 swizzle_components[i] = ASTCENC_SWZ_A;
866 break;
867 case '0':
868 swizzle_components[i] = ASTCENC_SWZ_0;
869 break;
870 case '1':
871 swizzle_components[i] = ASTCENC_SWZ_1;
872 break;
873 case 'z':
874 swizzle_components[i] = ASTCENC_SWZ_Z;
875 break;
876 default:
877 print_error("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
878 return 1;
879 }
880 }
881
882 cli_config.swz_decode.r = swizzle_components[0];
883 cli_config.swz_decode.g = swizzle_components[1];
884 cli_config.swz_decode.b = swizzle_components[2];
885 cli_config.swz_decode.a = swizzle_components[3];
886 }
887 // presets begin here
888 else if (!strcmp(argv[argidx], "-normal"))
889 {
890 argidx++;
891
892 cli_config.swz_encode.r = ASTCENC_SWZ_R;
893 cli_config.swz_encode.g = ASTCENC_SWZ_R;
894 cli_config.swz_encode.b = ASTCENC_SWZ_R;
895 cli_config.swz_encode.a = ASTCENC_SWZ_G;
896
897 cli_config.swz_decode.r = ASTCENC_SWZ_R;
898 cli_config.swz_decode.g = ASTCENC_SWZ_A;
899 cli_config.swz_decode.b = ASTCENC_SWZ_Z;
900 cli_config.swz_decode.a = ASTCENC_SWZ_1;
901 }
902 else if (!strcmp(argv[argidx], "-rgbm"))
903 {
904 argidx += 2;
905 if (argidx > argc)
906 {
907 print_error("ERROR: -rgbm switch with no argument\n");
908 return 1;
909 }
910
911 config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
912 config.cw_a_weight = 2.0f * config.rgbm_m_scale;
913 }
914 else if (!strcmp(argv[argidx], "-decode_unorm8"))
915 {
916 argidx++;
917 }
918 else if (!strcmp(argv[argidx], "-perceptual"))
919 {
920 argidx++;
921 }
922 else if (!strcmp(argv[argidx], "-pp-normalize"))
923 {
924 argidx++;
925 }
926 else if (!strcmp(argv[argidx], "-pp-premultiply"))
927 {
928 argidx++;
929 }
930 else if (!strcmp(argv[argidx], "-blockmodelimit"))
931 {
932 argidx += 2;
933 if (argidx > argc)
934 {
935 print_error("ERROR: -blockmodelimit switch with no argument\n");
936 return 1;
937 }
938
939 config.tune_block_mode_limit = atoi(argv[argidx - 1]);
940 }
941 else if (!strcmp(argv[argidx], "-partitioncountlimit"))
942 {
943 argidx += 2;
944 if (argidx > argc)
945 {
946 print_error("ERROR: -partitioncountlimit switch with no argument\n");
947 return 1;
948 }
949
950 config.tune_partition_count_limit = atoi(argv[argidx - 1]);
951 }
952 else if (!strcmp(argv[argidx], "-2partitionindexlimit"))
953 {
954 argidx += 2;
955 if (argidx > argc)
956 {
957 print_error("ERROR: -2partitionindexlimit switch with no argument\n");
958 return 1;
959 }
960
961 config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
962 }
963 else if (!strcmp(argv[argidx], "-3partitionindexlimit"))
964 {
965 argidx += 2;
966 if (argidx > argc)
967 {
968 print_error("ERROR: -3partitionindexlimit switch with no argument\n");
969 return 1;
970 }
971
972 config.tune_3partition_index_limit = atoi(argv[argidx - 1]);
973 }
974 else if (!strcmp(argv[argidx], "-4partitionindexlimit"))
975 {
976 argidx += 2;
977 if (argidx > argc)
978 {
979 print_error("ERROR: -4partitionindexlimit switch with no argument\n");
980 return 1;
981 }
982
983 config.tune_4partition_index_limit = atoi(argv[argidx - 1]);
984 }
985 else if (!strcmp(argv[argidx], "-2partitioncandidatelimit"))
986 {
987 argidx += 2;
988 if (argidx > argc)
989 {
990 print_error("ERROR: -2partitioncandidatelimit switch with no argument\n");
991 return 1;
992 }
993
994 config.tune_2partitioning_candidate_limit = atoi(argv[argidx - 1]);
995 }
996 else if (!strcmp(argv[argidx], "-3partitioncandidatelimit"))
997 {
998 argidx += 2;
999 if (argidx > argc)
1000 {
1001 print_error("ERROR: -3partitioncandidatelimit switch with no argument\n");
1002 return 1;
1003 }
1004
1005 config.tune_3partitioning_candidate_limit = atoi(argv[argidx - 1]);
1006 }
1007 else if (!strcmp(argv[argidx], "-4partitioncandidatelimit"))
1008 {
1009 argidx += 2;
1010 if (argidx > argc)
1011 {
1012 print_error("ERROR: -4partitioncandidatelimit switch with no argument\n");
1013 return 1;
1014 }
1015
1016 config.tune_4partitioning_candidate_limit = atoi(argv[argidx - 1]);
1017 }
1018 else if (!strcmp(argv[argidx], "-dblimit"))
1019 {
1020 argidx += 2;
1021 if (argidx > argc)
1022 {
1023 print_error("ERROR: -dblimit switch with no argument\n");
1024 return 1;
1025 }
1026
1027 if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
1028 {
1029 config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
1030 }
1031 }
1032 else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
1033 {
1034 argidx += 2;
1035 if (argidx > argc)
1036 {
1037 print_error("ERROR: -2partitionlimitfactor switch with no argument\n");
1038 return 1;
1039 }
1040
1041 config.tune_2partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1042 }
1043 else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
1044 {
1045 argidx += 2;
1046 if (argidx > argc)
1047 {
1048 print_error("ERROR: -3partitionlimitfactor switch with no argument\n");
1049 return 1;
1050 }
1051
1052 config.tune_3partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1053 }
1054 else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
1055 {
1056 argidx += 2;
1057 if (argidx > argc)
1058 {
1059 print_error("ERROR: -2planelimitcorrelation switch with no argument\n");
1060 return 1;
1061 }
1062
1063 config.tune_2plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
1064 }
1065 else if (!strcmp(argv[argidx], "-refinementlimit"))
1066 {
1067 argidx += 2;
1068 if (argidx > argc)
1069 {
1070 print_error("ERROR: -refinementlimit switch with no argument\n");
1071 return 1;
1072 }
1073
1074 config.tune_refinement_limit = atoi(argv[argidx - 1]);
1075 }
1076 else if (!strcmp(argv[argidx], "-candidatelimit"))
1077 {
1078 argidx += 2;
1079 if (argidx > argc)
1080 {
1081 print_error("ERROR: -candidatelimit switch with no argument\n");
1082 return 1;
1083 }
1084
1085 config.tune_candidate_limit = atoi(argv[argidx - 1]);
1086 }
1087 else if (!strcmp(argv[argidx], "-j"))
1088 {
1089 argidx += 2;
1090 if (argidx > argc)
1091 {
1092 print_error("ERROR: -j switch with no argument\n");
1093 return 1;
1094 }
1095
1096 cli_config.thread_count = atoi(argv[argidx - 1]);
1097 }
1098 else if (!strcmp(argv[argidx], "-repeats"))
1099 {
1100 argidx += 2;
1101 if (argidx > argc)
1102 {
1103 print_error("ERROR: -repeats switch with no argument\n");
1104 return 1;
1105 }
1106
1107 cli_config.repeat_count = atoi(argv[argidx - 1]);
1108 if (cli_config.repeat_count <= 0)
1109 {
1110 print_error("ERROR: -repeats value must be at least one\n");
1111 return 1;
1112 }
1113 }
1114 else if (!strcmp(argv[argidx], "-yflip"))
1115 {
1116 argidx++;
1117 cli_config.y_flip = 1;
1118 }
1119 else if (!strcmp(argv[argidx], "-mpsnr"))
1120 {
1121 argidx += 3;
1122 if (argidx > argc)
1123 {
1124 print_error("ERROR: -mpsnr switch with less than 2 arguments\n");
1125 return 1;
1126 }
1127
1128 cli_config.low_fstop = atoi(argv[argidx - 2]);
1129 cli_config.high_fstop = atoi(argv[argidx - 1]);
1130 if (cli_config.high_fstop < cli_config.low_fstop)
1131 {
1132 print_error("ERROR: -mpsnr switch <low> is greater than the <high>\n");
1133 return 1;
1134 }
1135 }
1136 // Option: Encode a 3D image from a sequence of 2D images.
1137 else if (!strcmp(argv[argidx], "-zdim"))
1138 {
1139 // Only supports compressing
1140 if (!(operation & ASTCENC_STAGE_COMPRESS))
1141 {
1142 print_error("ERROR: -zdim switch is only valid for compression\n");
1143 return 1;
1144 }
1145
1146 // Image depth must be specified.
1147 if (argidx + 2 > argc)
1148 {
1149 print_error("ERROR: -zdim switch with no argument\n");
1150 return 1;
1151 }
1152 argidx++;
1153
1154 // Read array size (image depth).
1155 if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1156 {
1157 print_error("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
1158 return 1;
1159 }
1160
1161 if ((cli_config.array_size > 1) && (config.block_z == 1))
1162 {
1163 print_error("ERROR: -zdim with 3D input data for a 2D output format\n");
1164 return 1;
1165 }
1166 argidx++;
1167 }
1168 #if defined(ASTCENC_DIAGNOSTICS)
1169 else if (!strcmp(argv[argidx], "-dtrace"))
1170 {
1171 argidx += 2;
1172 if (argidx > argc)
1173 {
1174 print_error("ERROR: -dtrace switch with no argument\n");
1175 return 1;
1176 }
1177
1178 config.trace_file_path = argv[argidx - 1];
1179 }
1180 #endif
1181 else if (!strcmp(argv[argidx], "-privateProfile"))
1182 {
1183 argidx += 2; // skip 2 chatacters to get next parameter
1184 config.privateProfile = static_cast<QualityProfile>(atoi(argv[argidx - 1]));
1185 }
1186 else if (!strcmp(argv[argidx], "-dimage"))
1187 {
1188 argidx += 1;
1189 cli_config.diagnostic_images = true;
1190 }
1191 else // check others as well
1192 {
1193 print_error("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1194 return 1;
1195 }
1196 }
1197
1198 if (cli_config.thread_count <= 0)
1199 {
1200 cli_config.thread_count = get_cpu_count();
1201 }
1202
1203 #if defined(ASTCENC_DIAGNOSTICS)
1204 // Force single threaded for diagnostic builds
1205 cli_config.thread_count = 1;
1206
1207 if (!config.trace_file_path)
1208 {
1209 print_error("ERROR: Diagnostics builds must set -dtrace\n");
1210 return 1;
1211 }
1212 #endif
1213
1214 return 0;
1215 }
1216
1217 /**
1218 * @brief Print the config settings in a human readable form.
1219 *
1220 * @param[in] cli_config Command line config.
1221 * @param[in] config Codec configuration.
1222 */
print_astcenc_config(const cli_config_options & cli_config,const astcenc_config & config)1223 static void print_astcenc_config(
1224 const cli_config_options& cli_config,
1225 const astcenc_config& config
1226 ) {
1227 // Print all encoding settings unless specifically told otherwise
1228 if (!cli_config.silentmode)
1229 {
1230 printf("Compressor settings\n");
1231 printf("===================\n\n");
1232
1233 switch (config.profile)
1234 {
1235 case ASTCENC_PRF_LDR:
1236 printf(" Color profile: LDR linear\n");
1237 break;
1238 case ASTCENC_PRF_LDR_SRGB:
1239 printf(" Color profile: LDR sRGB\n");
1240 break;
1241 case ASTCENC_PRF_HDR_RGB_LDR_A:
1242 printf(" Color profile: HDR RGB + LDR A\n");
1243 break;
1244 case ASTCENC_PRF_HDR:
1245 printf(" Color profile: HDR RGBA\n");
1246 break;
1247 }
1248
1249 if (config.block_z == 1)
1250 {
1251 printf(" Block size: %ux%u\n", config.block_x, config.block_y);
1252 }
1253 else
1254 {
1255 printf(" Block size: %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1256 }
1257
1258 printf(" Bitrate: %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1259 printf(" RGB alpha scale weight: %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1260 if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1261 {
1262 printf(" Radius RGB alpha scale: %u texels\n", config.a_scale_radius);
1263 }
1264
1265 printf(" R component weight: %g\n", static_cast<double>(config.cw_r_weight));
1266 printf(" G component weight: %g\n", static_cast<double>(config.cw_g_weight));
1267 printf(" B component weight: %g\n", static_cast<double>(config.cw_b_weight));
1268 printf(" A component weight: %g\n", static_cast<double>(config.cw_a_weight));
1269 printf(" Partition cutoff: %u partitions\n", config.tune_partition_count_limit);
1270 printf(" 2 partition index cutoff: %u partition ids\n", config.tune_2partition_index_limit);
1271 printf(" 3 partition index cutoff: %u partition ids\n", config.tune_3partition_index_limit);
1272 printf(" 4 partition index cutoff: %u partition ids\n", config.tune_4partition_index_limit);
1273 printf(" PSNR cutoff: %g dB\n", static_cast<double>(config.tune_db_limit));
1274 printf(" 3 partition cutoff: %g\n", static_cast<double>(config.tune_2partition_early_out_limit_factor));
1275 printf(" 4 partition cutoff: %g\n", static_cast<double>(config.tune_3partition_early_out_limit_factor));
1276 printf(" 2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2plane_early_out_limit_correlation));
1277 printf(" Block mode centile cutoff: %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1278 printf(" Candidate cutoff: %u candidates\n", config.tune_candidate_limit);
1279 printf(" Refinement cutoff: %u iterations\n", config.tune_refinement_limit);
1280 printf(" Compressor thread count: %d\n", cli_config.thread_count);
1281 printf("\n");
1282 }
1283 }
1284
1285 /**
1286 * @brief Get the value of a single pixel in an image.
1287 *
1288 * Note, this implementation is not particularly optimal as it puts format
1289 * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1290 * acceptable as these are not performance critical paths.
1291 *
1292 * @param[in] img The output image.
1293 * @param x The pixel x coordinate.
1294 * @param y The pixel y coordinate.
1295 * @param z The pixel z coordinate.
1296 *
1297 * @return pixel The pixel color value to write.
1298 */
image_get_pixel(const astcenc_image & img,unsigned int x,unsigned int y,unsigned int z)1299 static vfloat4 image_get_pixel(
1300 const astcenc_image& img,
1301 unsigned int x,
1302 unsigned int y,
1303 unsigned int z
1304 ) {
1305 // We should never escape bounds
1306 assert(x < img.dim_x);
1307 assert(y < img.dim_y);
1308 assert(z < img.dim_z);
1309
1310 if (img.data_type == ASTCENC_TYPE_U8)
1311 {
1312 uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1313
1314 float r = data[(4 * img.dim_x * y) + (4 * x )] / 255.0f;
1315 float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1316 float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1317 float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1318
1319 return vfloat4(r, g, b, a);
1320 }
1321 else if (img.data_type == ASTCENC_TYPE_F16)
1322 {
1323 uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1324
1325 vint4 colori(
1326 data[(4 * img.dim_x * y) + (4 * x )],
1327 data[(4 * img.dim_x * y) + (4 * x + 1)],
1328 data[(4 * img.dim_x * y) + (4 * x + 2)],
1329 data[(4 * img.dim_x * y) + (4 * x + 3)]
1330 );
1331
1332 return float16_to_float(colori);
1333 }
1334 else // if (img.data_type == ASTCENC_TYPE_F32)
1335 {
1336 assert(img.data_type == ASTCENC_TYPE_F32);
1337 float* data = static_cast<float*>(img.data[z]);
1338
1339 return vfloat4(
1340 data[(4 * img.dim_x * y) + (4 * x )],
1341 data[(4 * img.dim_x * y) + (4 * x + 1)],
1342 data[(4 * img.dim_x * y) + (4 * x + 2)],
1343 data[(4 * img.dim_x * y) + (4 * x + 3)]
1344 );
1345 }
1346 }
1347
1348 /**
1349 * @brief Set the value of a single pixel in an image.
1350 *
1351 * @param[out] img The output image; must use F32 texture components.
1352 * @param x The pixel x coordinate.
1353 * @param y The pixel y coordinate.
1354 * @param z The pixel z coordinate.
1355 * @param pixel The pixel color value to write.
1356 */
image_set_pixel(astcenc_image & img,unsigned int x,unsigned int y,unsigned int z,vfloat4 pixel)1357 static void image_set_pixel(
1358 astcenc_image& img,
1359 unsigned int x,
1360 unsigned int y,
1361 unsigned int z,
1362 vfloat4 pixel
1363 ) {
1364 // We should never escape bounds
1365 assert(x < img.dim_x);
1366 assert(y < img.dim_y);
1367 assert(z < img.dim_z);
1368 assert(img.data_type == ASTCENC_TYPE_F32);
1369
1370 float* data = static_cast<float*>(img.data[z]);
1371
1372 data[(4 * img.dim_x * y) + (4 * x )] = pixel.lane<0>();
1373 data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1374 data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1375 data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1376 }
1377
1378 /**
1379 * @brief Set the value of a single pixel in an image.
1380 *
1381 * @param[out] img The output image; must use F32 texture components.
1382 * @param x The pixel x coordinate.
1383 * @param y The pixel y coordinate.
1384 * @param pixel The pixel color value to write.
1385 */
image_set_pixel_u8(astcenc_image & img,size_t x,size_t y,vint4 pixel)1386 static void image_set_pixel_u8(
1387 astcenc_image& img,
1388 size_t x,
1389 size_t y,
1390 vint4 pixel
1391 ) {
1392 // We should never escape bounds
1393 assert(x < img.dim_x);
1394 assert(y < img.dim_y);
1395 assert(img.data_type == ASTCENC_TYPE_U8);
1396
1397 uint8_t* data = static_cast<uint8_t*>(img.data[0]);
1398 pixel = pack_low_bytes(pixel);
1399 store_nbytes(pixel, data + (4 * img.dim_x * y) + (4 * x ));
1400 }
1401
1402 /**
1403 * @brief Create a copy of @c input with forced unit-length normal vectors.
1404 *
1405 * It is assumed that all normal vectors are stored in the RGB components, and
1406 * stored in a packed unsigned range of [0,1] which must be unpacked prior
1407 * normalization. Data must then be repacked into this form for handing over to
1408 * the core codec.
1409 *
1410 * @param[in] input The input image.
1411 * @param[out] output The output image, must use F32 components.
1412 */
image_preprocess_normalize(const astcenc_image & input,astcenc_image & output)1413 static void image_preprocess_normalize(
1414 const astcenc_image& input,
1415 astcenc_image& output
1416 ) {
1417 for (unsigned int z = 0; z < input.dim_z; z++)
1418 {
1419 for (unsigned int y = 0; y < input.dim_y; y++)
1420 {
1421 for (unsigned int x = 0; x < input.dim_x; x++)
1422 {
1423 vfloat4 pixel = image_get_pixel(input, x, y, z);
1424
1425 // Stash alpha component and zero
1426 float a = pixel.lane<3>();
1427 pixel.set_lane<3>(0.0f);
1428
1429 // Decode [0,1] normals to [-1,1]
1430 pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1431 pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1432 pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1433
1434 // Normalize pixel and restore alpha
1435 pixel = normalize(pixel);
1436 pixel.set_lane<3>(a);
1437
1438 // Encode [-1,1] normals to [0,1]
1439 pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1440 pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1441 pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1442
1443 image_set_pixel(output, x, y, z, pixel);
1444 }
1445 }
1446 }
1447 }
1448
1449 /**
1450 * @brief Linearize an sRGB value.
1451 *
1452 * @return The linearized value.
1453 */
srgb_to_linear(float a)1454 static float srgb_to_linear(
1455 float a
1456 ) {
1457 if (a <= 0.04045f)
1458 {
1459 return a * (1.0f / 12.92f);
1460 }
1461
1462 return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1463 }
1464
1465 /**
1466 * @brief sRGB gamma-encode a linear value.
1467 *
1468 * @return The gamma encoded value.
1469 */
linear_to_srgb(float a)1470 static float linear_to_srgb(
1471 float a
1472 ) {
1473 if (a <= 0.0031308f)
1474 {
1475 return a * 12.92f;
1476 }
1477
1478 return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1479 }
1480
1481 /**
1482 * @brief Create a copy of @c input with premultiplied color data.
1483 *
1484 * If we are compressing sRGB data we linearize the data prior to
1485 * premultiplication and re-gamma-encode afterwards.
1486 *
1487 * @param[in] input The input image.
1488 * @param[out] output The output image, must use F32 components.
1489 * @param profile The encoding profile.
1490 */
image_preprocess_premultiply(const astcenc_image & input,astcenc_image & output,astcenc_profile profile)1491 static void image_preprocess_premultiply(
1492 const astcenc_image& input,
1493 astcenc_image& output,
1494 astcenc_profile profile
1495 ) {
1496 for (unsigned int z = 0; z < input.dim_z; z++)
1497 {
1498 for (unsigned int y = 0; y < input.dim_y; y++)
1499 {
1500 for (unsigned int x = 0; x < input.dim_x; x++)
1501 {
1502 vfloat4 pixel = image_get_pixel(input, x, y, z);
1503
1504 // Linearize sRGB
1505 if (profile == ASTCENC_PRF_LDR_SRGB)
1506 {
1507 pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1508 pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1509 pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1510 }
1511
1512 // Premultiply pixel in linear-space
1513 pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1514 pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1515 pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1516
1517 // Gamma-encode sRGB
1518 if (profile == ASTCENC_PRF_LDR_SRGB)
1519 {
1520 pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1521 pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1522 pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1523 }
1524
1525 image_set_pixel(output, x, y, z, pixel);
1526 }
1527 }
1528 }
1529 }
1530
1531 /**
1532 * @brief Populate a single diagnostic image showing aspects of the encoding.
1533 *
1534 * @param context The context to use.
1535 * @param image The compressed image to analyze.
1536 * @param diag_image The output visualization image to populate.
1537 * @param texel_func The per-texel callback used to determine output color.
1538 */
print_diagnostic_image(astcenc_context * context,const astc_compressed_image & image,astcenc_image & diag_image,std::function<vint4 (astcenc_block_info &,size_t,size_t)> texel_func)1539 static void print_diagnostic_image(
1540 astcenc_context* context,
1541 const astc_compressed_image& image,
1542 astcenc_image& diag_image,
1543 std::function<vint4(astcenc_block_info&, size_t, size_t)> texel_func
1544 ) {
1545 size_t block_cols = (image.dim_x + image.block_x - 1) / image.block_x;
1546 size_t block_rows = (image.dim_y + image.block_y - 1) / image.block_y;
1547
1548 uint8_t* data = image.data;
1549 for (size_t block_y = 0; block_y < block_rows; block_y++)
1550 {
1551 for (size_t block_x = 0; block_x < block_cols; block_x++)
1552 {
1553 astcenc_block_info block_info;
1554 astcenc_get_block_info(context, data, &block_info);
1555 data += 16;
1556
1557 size_t start_row = block_y * image.block_y;
1558 size_t start_col = block_x * image.block_x;
1559
1560 size_t end_row = astc::min(start_row + image.block_y, static_cast<size_t>(image.dim_y));
1561 size_t end_col = astc::min(start_col + image.block_x, static_cast<size_t>(image.dim_x));
1562
1563 for (size_t texel_y = start_row; texel_y < end_row; texel_y++)
1564 {
1565 for (size_t texel_x = start_col; texel_x < end_col; texel_x++)
1566 {
1567 vint4 color = texel_func(block_info, texel_x - start_col, texel_y - start_row);
1568 image_set_pixel_u8(diag_image, texel_x, texel_y, color);
1569 }
1570 }
1571 }
1572 }
1573 }
1574
1575 /**
1576 * @brief Print a set of diagnostic images showing aspects of the encoding.
1577 *
1578 * @param context The context to use.
1579 * @param image The compressed image to analyze.
1580 * @param output_file The output file name to use as a stem for new names.
1581 */
print_diagnostic_images(astcenc_context * context,const astc_compressed_image & image,const std::string & output_file)1582 static void print_diagnostic_images(
1583 astcenc_context* context,
1584 const astc_compressed_image& image,
1585 const std::string& output_file
1586 ) {
1587 if (image.dim_z != 1)
1588 {
1589 return;
1590 }
1591
1592 // Try to find a file extension we know about
1593 size_t index = output_file.find_last_of(".");
1594 std::string stem = output_file;
1595 if (index != std::string::npos)
1596 {
1597 stem = stem.substr(0, index);
1598 }
1599
1600 auto diag_image = alloc_image(8, image.dim_x, image.dim_y, image.dim_z);
1601
1602 // ---- ---- ---- ---- Partitioning ---- ---- ---- ----
1603 auto partition_func = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1604 const vint4 colors[] {
1605 vint4( 0, 0, 0, 255),
1606 vint4(255, 0, 0, 255),
1607 vint4( 0, 255, 0, 255),
1608 vint4( 0, 0, 255, 255),
1609 vint4(255, 255, 255, 255)
1610 };
1611
1612 size_t texel_index = texel_y * info.block_x + texel_x;
1613
1614 int partition { 0 };
1615 if (!info.is_constant_block)
1616 {
1617 partition = info.partition_assignment[texel_index] + 1;
1618 }
1619
1620 return colors[partition];
1621 };
1622
1623 print_diagnostic_image(context, image, *diag_image, partition_func);
1624 std::string fname = stem + "_diag_partitioning.png";
1625 store_ncimage(diag_image, fname.c_str(), false);
1626
1627 // ---- ---- ---- ---- Weight planes ---- ---- ---- ----
1628 auto texel_func1 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1629 (void)texel_x;
1630 (void)texel_y;
1631
1632 const vint4 colors[] {
1633 vint4( 0, 0, 0, 255),
1634 vint4(255, 0, 0, 255),
1635 vint4( 0, 255, 0, 255),
1636 vint4( 0, 0, 255, 255),
1637 vint4(255, 255, 255, 255)
1638 };
1639
1640 int component { 0 };
1641 if (info.is_dual_plane_block)
1642 {
1643 component = info.dual_plane_component + 1;
1644 }
1645
1646 return colors[component];
1647 };
1648
1649 print_diagnostic_image(context, image, *diag_image, texel_func1);
1650 fname = stem + "_diag_weight_plane2.png";
1651 store_ncimage(diag_image, fname.c_str(), false);
1652
1653 // ---- ---- ---- ---- Weight density ---- ---- ---- ----
1654 auto texel_func2 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1655 (void)texel_x;
1656 (void)texel_y;
1657
1658 float density = 0.0f;
1659 if (!info.is_constant_block)
1660 {
1661 float texel_count = static_cast<float>(info.block_x * info.block_y);
1662 float weight_count = static_cast<float>(info.weight_x * info.weight_y);
1663 density = weight_count / texel_count;
1664 }
1665
1666 int densityi = static_cast<int>(255.0f * density);
1667 return vint4(densityi, densityi, densityi, 255);
1668 };
1669
1670 print_diagnostic_image(context, image, *diag_image, texel_func2);
1671 fname = stem + "_diag_weight_density.png";
1672 store_ncimage(diag_image, fname.c_str(), false);
1673
1674 // ---- ---- ---- ---- Weight quant ---- ---- ---- ----
1675 auto texel_func3 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1676 (void)texel_x;
1677 (void)texel_y;
1678
1679 int quant { 0 };
1680 if (!info.is_constant_block)
1681 {
1682 quant = info.weight_level_count - 1;
1683 }
1684
1685 return vint4(quant, quant, quant, 255);
1686 };
1687
1688 print_diagnostic_image(context, image, *diag_image, texel_func3);
1689 fname = stem + "_diag_weight_quant.png";
1690 store_ncimage(diag_image, fname.c_str(), false);
1691
1692 // ---- ---- ---- ---- Color quant ---- ---- ---- ----
1693 auto texel_func4 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1694 (void)texel_x;
1695 (void)texel_y;
1696
1697 int quant { 0 };
1698 if (!info.is_constant_block)
1699 {
1700 quant = info.color_level_count - 1;
1701 }
1702
1703 return vint4(quant, quant, quant, 255);
1704 };
1705
1706 print_diagnostic_image(context, image, *diag_image, texel_func4);
1707 fname = stem + "_diag_color_quant.png";
1708 store_ncimage(diag_image, fname.c_str(), false);
1709
1710 // ---- ---- ---- ---- Color endpoint mode: Index ---- ---- ---- ----
1711 auto texel_func5 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1712 (void)texel_x;
1713 (void)texel_y;
1714
1715 size_t texel_index = texel_y * info.block_x + texel_x;
1716
1717 int cem { 255 };
1718 if (!info.is_constant_block)
1719 {
1720 uint8_t partition = info.partition_assignment[texel_index];
1721 cem = info.color_endpoint_modes[partition] * 16;
1722 }
1723
1724 return vint4(cem, cem, cem, 255);
1725 };
1726
1727 print_diagnostic_image(context, image, *diag_image, texel_func5);
1728 fname = stem + "_diag_cem_index.png";
1729 store_ncimage(diag_image, fname.c_str(), false);
1730
1731 // ---- ---- ---- ---- Color endpoint mode: Components ---- ---- ---- ----
1732 auto texel_func6 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1733 (void)texel_x;
1734 (void)texel_y;
1735
1736 const vint4 colors[] {
1737 vint4( 0, 0, 0, 255),
1738 vint4(255, 0, 0, 255),
1739 vint4( 0, 255, 0, 255),
1740 vint4( 0, 0, 255, 255),
1741 vint4(255, 255, 255, 255)
1742 };
1743
1744 size_t texel_index = texel_y * info.block_x + texel_x;
1745
1746 int components { 0 };
1747 if (!info.is_constant_block)
1748 {
1749 uint8_t partition = info.partition_assignment[texel_index];
1750 uint8_t cem = info.color_endpoint_modes[partition];
1751
1752 switch (cem)
1753 {
1754 case 0:
1755 case 1:
1756 case 2:
1757 case 3:
1758 components = 1;
1759 break;
1760 case 4:
1761 case 5:
1762 components = 2;
1763 break;
1764 case 6:
1765 case 7:
1766 case 8:
1767 case 9:
1768 case 11:
1769 components = 3;
1770 break;
1771 default:
1772 components = 4;
1773 break;
1774 }
1775 }
1776
1777 return colors[components];
1778 };
1779
1780 print_diagnostic_image(context, image, *diag_image, texel_func6);
1781 fname = stem + "_diag_cem_components.png";
1782 store_ncimage(diag_image, fname.c_str(), false);
1783
1784 // ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1785 auto texel_func7 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1786 (void)texel_x;
1787 (void)texel_y;
1788
1789 const vint4 colors[] {
1790 vint4( 0, 0, 0, 255),
1791 vint4(255, 0, 0, 255),
1792 vint4( 0, 255, 0, 255),
1793 vint4( 0, 0, 255, 255),
1794 };
1795
1796 size_t texel_index = texel_y * info.block_x + texel_x;
1797
1798 int style { 0 };
1799 if (!info.is_constant_block)
1800 {
1801 uint8_t partition = info.partition_assignment[texel_index];
1802 uint8_t cem = info.color_endpoint_modes[partition];
1803
1804 switch (cem)
1805 {
1806 // Direct - two absolute endpoints
1807 case 0:
1808 case 1:
1809 case 2:
1810 case 3:
1811 case 4:
1812 case 8:
1813 case 11:
1814 case 12:
1815 case 14:
1816 case 15:
1817 style = 1;
1818 break;
1819 // Offset - one absolute plus delta
1820 case 5:
1821 case 9:
1822 case 13:
1823 style = 2;
1824 break;
1825 // Scale - one absolute plus scale
1826 case 6:
1827 case 7:
1828 case 10:
1829 style = 3;
1830 break;
1831 // Shouldn't happen ...
1832 default:
1833 style = 0;
1834 break;
1835 }
1836 }
1837
1838 return colors[style];
1839 };
1840
1841 print_diagnostic_image(context, image, *diag_image, texel_func7);
1842 fname = stem + "_diag_cem_style.png";
1843 store_ncimage(diag_image, fname.c_str(), false);
1844
1845 // ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1846 auto texel_func8 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1847 (void)texel_x;
1848 (void)texel_y;
1849
1850 size_t texel_index = texel_y * info.block_x + texel_x;
1851
1852 int style { 0 };
1853 if (!info.is_constant_block)
1854 {
1855 uint8_t partition = info.partition_assignment[texel_index];
1856 uint8_t cem = info.color_endpoint_modes[partition];
1857
1858 switch (cem)
1859 {
1860 // LDR blocks
1861 case 0:
1862 case 1:
1863 case 4:
1864 case 5:
1865 case 6:
1866 case 8:
1867 case 9:
1868 case 10:
1869 case 12:
1870 case 13:
1871 style = 128;
1872 break;
1873 // HDR blocks
1874 default:
1875 style = 155;
1876 break;
1877 }
1878 }
1879
1880 return vint4(style, style, style, 255);
1881 };
1882
1883 print_diagnostic_image(context, image, *diag_image, texel_func8);
1884 fname = stem + "_diag_cem_hdr.png";
1885 store_ncimage(diag_image, fname.c_str(), false);
1886
1887 free_image(diag_image);
1888 }
1889
1890 #if QUALITY_CONTROL
1891 constexpr double MAX_PSNR = 99.9;
1892 constexpr double MAX_VALUE = 255;
1893 constexpr double THRESHOLD_R = 30.0;
1894 constexpr double THRESHOLD_G = 30.0;
1895 constexpr double THRESHOLD_B = 30.0;
1896 constexpr double THRESHOLD_A = 30.0;
1897 constexpr double THRESHOLD_RGB = 30.0;
1898 constexpr double LOG_BASE = 10.0;
1899
CheckQuality(int32_t * mseIn[RGBA_COM],int blockNum,int blockXYZ)1900 bool CheckQuality(int32_t* mseIn[RGBA_COM], int blockNum, int blockXYZ)
1901 {
1902 double psnr[RGBA_COM + 1];
1903 double threshold[RGBA_COM + 1] = { THRESHOLD_R, THRESHOLD_G, THRESHOLD_B, THRESHOLD_A, THRESHOLD_RGB};
1904 uint64_t mseTotal[RGBA_COM + 1] = { 0, 0, 0, 0, 0};
1905 for (int i = R_COM; i < RGBA_COM; i++) {
1906 int32_t* mse = mseIn[i];
1907 for (int j = 0; j < blockNum; j++) {
1908 mseTotal[i] += *mse;
1909 if(i != A_COM) mseTotal[RGBA_COM] += *mse;
1910 mse++;
1911 }
1912 }
1913 for (int i = R_COM; i < RGBA_COM; i++) {
1914 if (mseTotal[i] == 0) {
1915 psnr[i] = MAX_PSNR;
1916 continue;
1917 }
1918 double mseRgb = (double)mseTotal[i] / (blockNum * blockXYZ);
1919 psnr[i] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1920 }
1921 if (mseTotal[RGBA_COM] == 0) {
1922 psnr[RGBA_COM] = MAX_PSNR;
1923 }
1924 else {
1925 double mseRgb = (double)mseTotal[RGBA_COM] / (blockNum * blockXYZ * (RGBA_COM - 1));
1926 psnr[RGBA_COM] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1927 }
1928 printf("astc psnr r%f g%f b%f a%f rgb%f\n",
1929 psnr[R_COM], psnr[G_COM], psnr[B_COM], psnr[A_COM],
1930 psnr[RGBA_COM]);
1931 return (psnr[R_COM] > threshold[R_COM]) && (psnr[G_COM] > threshold[G_COM])
1932 && (psnr[B_COM] > threshold[B_COM]) && (psnr[A_COM] > threshold[A_COM])
1933 && (psnr[RGBA_COM] > threshold[RGBA_COM]);
1934 }
1935 #endif
1936
1937 /**
1938 * @brief The main entry point.
1939 *
1940 * @param argc The number of arguments.
1941 * @param argv The vector of arguments.
1942 *
1943 * @return 0 on success, non-zero otherwise.
1944 */
astcenc_main(int argc,char ** argv)1945 int astcenc_main(
1946 int argc,
1947 char **argv
1948 ) {
1949 double start_time = get_time();
1950
1951 if (argc < 2)
1952 {
1953 astcenc_print_shorthelp();
1954 return 0;
1955 }
1956
1957 astcenc_operation operation;
1958 astcenc_profile profile;
1959 int error = parse_commandline_options(argc, argv, operation, profile);
1960 if (error)
1961 {
1962 return 1;
1963 }
1964
1965 switch (operation)
1966 {
1967 case ASTCENC_OP_HELP:
1968 astcenc_print_longhelp();
1969 return 0;
1970 case ASTCENC_OP_VERSION:
1971 astcenc_print_header();
1972 return 0;
1973 default:
1974 break;
1975 }
1976
1977 std::string input_filename = argc >= 3 ? argv[2] : "";
1978 std::string output_filename = argc >= 4 ? argv[3] : "";
1979
1980 if (input_filename.empty())
1981 {
1982 print_error("ERROR: Input file not specified\n");
1983 return 1;
1984 }
1985
1986 if (output_filename.empty())
1987 {
1988 print_error("ERROR: Output file not specified\n");
1989 return 1;
1990 }
1991
1992 // TODO: Handle RAII resources so they get freed when out of scope
1993 // Load the compressed input file if needed
1994
1995 // This has to come first, as the block size is in the file header
1996 astc_compressed_image image_comp {};
1997 if (operation & ASTCENC_STAGE_LD_COMP)
1998 {
1999 if (ends_with(input_filename, ".astc"))
2000 {
2001 error = load_cimage(input_filename.c_str(), image_comp);
2002 if (error)
2003 {
2004 return 1;
2005 }
2006 }
2007 else if (ends_with(input_filename, ".ktx"))
2008 {
2009 bool is_srgb;
2010 error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
2011 if (error)
2012 {
2013 return 1;
2014 }
2015
2016 if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
2017 {
2018 printf("WARNING: Input file is sRGB, but decompressing as linear\n");
2019 }
2020
2021 if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
2022 {
2023 printf("WARNING: Input file is linear, but decompressing as sRGB\n");
2024 }
2025 }
2026 else
2027 {
2028 print_error("ERROR: Unknown compressed input file type\n");
2029 return 1;
2030 }
2031 }
2032
2033 astcenc_config config {};
2034 astcenc_preprocess preprocess;
2035 error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
2036 if (error)
2037 {
2038 return 1;
2039 }
2040
2041 // Initialize cli_config_options with default values
2042 cli_config_options cli_config { 0, 1, 1, false, false, false, -10, 10,
2043 { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
2044 { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
2045
2046 error = edit_astcenc_config(argc, argv, operation, cli_config, config);
2047 if (error)
2048 {
2049 return 1;
2050 }
2051
2052 // Enable progress callback if not in silent mode and using a terminal
2053 #if defined(_WIN32)
2054 int stdoutfno = _fileno(stdout);
2055 #else
2056 int stdoutfno = STDOUT_FILENO;
2057 #endif
2058
2059 if ((!cli_config.silentmode) && isatty(stdoutfno))
2060 {
2061 config.progress_callback = progress_emitter;
2062 }
2063
2064 astcenc_image* image_uncomp_in = nullptr ;
2065 unsigned int image_uncomp_in_component_count = 0;
2066 bool image_uncomp_in_is_hdr = false;
2067 astcenc_image* image_decomp_out = nullptr;
2068
2069 // Determine decompression output bitness, if limited by file type
2070 int out_bitness = 0;
2071 if (operation & ASTCENC_STAGE_DECOMPRESS)
2072 {
2073 out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2074 if (out_bitness == 0)
2075 {
2076 bool is_hdr = (config.profile == ASTCENC_PRF_HDR) ||
2077 (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
2078 out_bitness = is_hdr ? 16 : 8;
2079 }
2080
2081 // If decompressed output is unorm8 then force the decode_unorm8 heuristics for compression
2082 if (out_bitness == 8)
2083 {
2084 config.flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
2085 }
2086 }
2087
2088 // TODO: Handle RAII resources so they get freed when out of scope
2089 astcenc_error codec_status;
2090 astcenc_context* codec_context;
2091
2092 // Preflight - check we have valid extensions for storing a file
2093 if (operation & ASTCENC_STAGE_ST_NCOMP)
2094 {
2095 int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2096 if (bitness < 0)
2097 {
2098 const char *eptr = strrchr(output_filename.c_str(), '.');
2099 eptr = eptr ? eptr : "";
2100 print_error("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
2101 return 1;
2102 }
2103 }
2104
2105 if (operation & ASTCENC_STAGE_ST_COMP)
2106 {
2107 #if defined(_WIN32)
2108 bool is_null = output_filename == "NUL" || output_filename == "nul";
2109 #else
2110 bool is_null = output_filename == "/dev/null";
2111 #endif
2112
2113 if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
2114 {
2115 const char *eptr = strrchr(output_filename.c_str(), '.');
2116 eptr = eptr ? eptr : "";
2117 print_error("ERROR: Unknown compressed output file type '%s'\n", eptr);
2118 return 1;
2119 }
2120 }
2121
2122 codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
2123 if (codec_status != ASTCENC_SUCCESS)
2124 {
2125 print_error("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
2126 return 1;
2127 }
2128
2129 // Load the uncompressed input file if needed
2130 if (operation & ASTCENC_STAGE_LD_NCOMP)
2131 {
2132 image_uncomp_in = load_uncomp_file(
2133 input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
2134 image_uncomp_in_is_hdr, image_uncomp_in_component_count);
2135 if (!image_uncomp_in)
2136 {
2137 print_error("ERROR: Failed to load uncompressed image file\n");
2138 return 1;
2139 }
2140
2141
2142 if (preprocess != ASTCENC_PP_NONE)
2143 {
2144 // Allocate a float image so we can avoid additional quantization,
2145 // as e.g. premultiplication can result in fractional color values
2146 astcenc_image* image_pp = alloc_image(32,
2147 image_uncomp_in->dim_x,
2148 image_uncomp_in->dim_y,
2149 image_uncomp_in->dim_z);
2150 if (!image_pp)
2151 {
2152 print_error("ERROR: Failed to allocate preprocessed image\n");
2153 return 1;
2154 }
2155
2156 if (preprocess == ASTCENC_PP_NORMALIZE)
2157 {
2158 image_preprocess_normalize(*image_uncomp_in, *image_pp);
2159 }
2160
2161 if (preprocess == ASTCENC_PP_PREMULTIPLY)
2162 {
2163 image_preprocess_premultiply(*image_uncomp_in, *image_pp,
2164 config.profile);
2165 }
2166
2167 // Delete the original as we no longer need it
2168 free_image(image_uncomp_in);
2169 image_uncomp_in = image_pp;
2170 }
2171
2172 if (!cli_config.silentmode)
2173 {
2174 printf("Source image\n");
2175 printf("============\n\n");
2176 printf(" Source: %s\n", input_filename.c_str());
2177 printf(" Color profile: %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
2178 if (image_uncomp_in->dim_z > 1)
2179 {
2180 printf(" Dimensions: 3D, %ux%ux%u\n",
2181 image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
2182 }
2183 else
2184 {
2185 printf(" Dimensions: 2D, %ux%u\n",
2186 image_uncomp_in->dim_x, image_uncomp_in->dim_y);
2187 }
2188 printf(" Components: %d\n\n", image_uncomp_in_component_count);
2189 }
2190 }
2191
2192 double image_size = 0.0;
2193 if (image_uncomp_in)
2194 {
2195 image_size = static_cast<double>(image_uncomp_in->dim_x) *
2196 static_cast<double>(image_uncomp_in->dim_y) *
2197 static_cast<double>(image_uncomp_in->dim_z);
2198 }
2199 else
2200 {
2201 image_size = static_cast<double>(image_comp.dim_x) *
2202 static_cast<double>(image_comp.dim_y) *
2203 static_cast<double>(image_comp.dim_z);
2204 }
2205
2206 // Compress an image
2207 double best_compression_time = 100000.0;
2208 double total_compression_time = 0.0;
2209 if (operation & ASTCENC_STAGE_COMPRESS)
2210 {
2211 print_astcenc_config(cli_config, config);
2212
2213 unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
2214 unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
2215 unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
2216 size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
2217 uint8_t* buffer = new uint8_t[buffer_size];
2218
2219 compression_workload work;
2220 work.context = codec_context;
2221 image_uncomp_in->dim_stride = image_uncomp_in->dim_x;
2222 work.image = image_uncomp_in;
2223 work.swizzle = cli_config.swz_encode;
2224 work.data_out = buffer;
2225 work.data_len = buffer_size;
2226 work.error = ASTCENC_SUCCESS;
2227 #if QUALITY_CONTROL
2228 work.calQualityEnable = true;
2229 work.mse[R_COM] = work.mse[G_COM] = work.mse[B_COM] = work.mse[A_COM] = nullptr;
2230 if (work.calQualityEnable) {
2231 for (int i = R_COM; i < RGBA_COM; i++) {
2232 work.mse[i] = (int32_t*)calloc(blocks_x * blocks_y, sizeof(int32_t));
2233 if (!work.mse[i]) {
2234 printf("quality control calloc failed");
2235 return -1;
2236 }
2237 }
2238 }
2239 #endif
2240 // Only launch worker threads for multi-threaded use - it makes basic
2241 // single-threaded profiling and debugging a little less convoluted
2242 double start_compression_time = get_time();
2243 for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2244 {
2245 if (config.progress_callback)
2246 {
2247 printf("Compression\n");
2248 printf("===========\n");
2249 printf("\n");
2250 }
2251
2252 double start_iter_time = get_time();
2253 if (cli_config.thread_count > 1)
2254 {
2255 launch_threads("Compression", cli_config.thread_count, compression_workload_runner, &work);
2256 }
2257 else
2258 {
2259 work.error = astcenc_compress_image(
2260 work.context, work.image, &work.swizzle,
2261 work.data_out, work.data_len,
2262 #if QUALITY_CONTROL
2263 work.calQualityEnable, work.mse,
2264 #endif
2265 0);
2266 }
2267
2268 astcenc_compress_reset(codec_context);
2269
2270 if (config.progress_callback)
2271 {
2272 printf("\n\n");
2273 }
2274
2275 double iter_time = get_time() - start_iter_time;
2276 best_compression_time = astc::min(iter_time, best_compression_time);
2277 }
2278 total_compression_time = get_time() - start_compression_time;
2279
2280 if (work.error != ASTCENC_SUCCESS)
2281 {
2282 print_error("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
2283 return 1;
2284 }
2285 #if QUALITY_CONTROL
2286 if (work.calQualityEnable && !CheckQuality(work.mse, blocks_x * blocks_y, config.block_x * config.block_y)) {
2287 work.error = ASTCENC_ERR_BAD_QUALITY_CHECK;
2288 }
2289 if (work.calQualityEnable) {
2290 for (int i = R_COM; i < RGBA_COM; i++) {
2291 if (work.mse[i]) {
2292 free(work.mse[i]);
2293 }
2294 }
2295 }
2296 #endif
2297 image_comp.block_x = config.block_x;
2298 image_comp.block_y = config.block_y;
2299 image_comp.block_z = config.block_z;
2300 image_comp.dim_x = image_uncomp_in->dim_x;
2301 image_comp.dim_y = image_uncomp_in->dim_y;
2302 image_comp.dim_z = image_uncomp_in->dim_z;
2303 image_comp.data = buffer;
2304 image_comp.data_len = buffer_size;
2305 }
2306
2307 // Decompress an image
2308 double best_decompression_time = 100000.0;
2309 double total_decompression_time = 0.0;
2310 if (operation & ASTCENC_STAGE_DECOMPRESS)
2311 {
2312 image_decomp_out = alloc_image(
2313 out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
2314
2315 decompression_workload work;
2316 work.context = codec_context;
2317 work.data = image_comp.data;
2318 work.data_len = image_comp.data_len;
2319 work.image_out = image_decomp_out;
2320 work.swizzle = cli_config.swz_decode;
2321 work.error = ASTCENC_SUCCESS;
2322
2323 // Only launch worker threads for multi-threaded use - it makes basic
2324 // single-threaded profiling and debugging a little less convoluted
2325 double start_decompression_time = get_time();
2326 for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2327 {
2328 double start_iter_time = get_time();
2329 if (cli_config.thread_count > 1)
2330 {
2331 launch_threads("Decompression", cli_config.thread_count, decompression_workload_runner, &work);
2332 }
2333 else
2334 {
2335 work.error = astcenc_decompress_image(
2336 work.context, work.data, work.data_len,
2337 work.image_out, &work.swizzle, 0);
2338 }
2339
2340 astcenc_decompress_reset(codec_context);
2341
2342 double iter_time = get_time() - start_iter_time;
2343 best_decompression_time = astc::min(iter_time, best_decompression_time);
2344 }
2345 total_decompression_time = get_time() - start_decompression_time;
2346
2347 if (work.error != ASTCENC_SUCCESS)
2348 {
2349 print_error("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
2350 return 1;
2351 }
2352 }
2353
2354 #if defined(_WIN32)
2355 bool is_null = output_filename == "NUL" || output_filename == "nul";
2356 #else
2357 bool is_null = output_filename == "/dev/null";
2358 #endif
2359
2360 // Print metrics in comparison mode
2361 if (operation & ASTCENC_STAGE_COMPARE)
2362 {
2363 bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
2364
2365 compute_error_metrics(
2366 image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
2367 image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
2368 }
2369
2370 // Store compressed image
2371 if (operation & ASTCENC_STAGE_ST_COMP)
2372 {
2373 if (ends_with(output_filename, ".astc"))
2374 {
2375 error = store_cimage(image_comp, output_filename.c_str());
2376 if (error)
2377 {
2378 print_error("ERROR: Failed to store compressed image\n");
2379 return 1;
2380 }
2381 }
2382 else if (ends_with(output_filename, ".ktx"))
2383 {
2384 bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
2385 error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
2386 if (error)
2387 {
2388 print_error("ERROR: Failed to store compressed image\n");
2389 return 1;
2390 }
2391 }
2392 else
2393 {
2394 if (!is_null)
2395 {
2396 print_error("ERROR: Unknown compressed output file type\n");
2397 return 1;
2398 }
2399 }
2400 }
2401
2402 // Store decompressed image
2403 if (operation & ASTCENC_STAGE_ST_NCOMP)
2404 {
2405 if (!is_null)
2406 {
2407 bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
2408 cli_config.y_flip);
2409 if (!store_result)
2410 {
2411 print_error("ERROR: Failed to write output image %s\n", output_filename.c_str());
2412 return 1;
2413 }
2414 }
2415 }
2416
2417 // Store diagnostic images
2418 if (cli_config.diagnostic_images && !is_null)
2419 {
2420 print_diagnostic_images(codec_context, image_comp, output_filename);
2421 }
2422
2423 free_image(image_uncomp_in);
2424 free_image(image_decomp_out);
2425 astcenc_context_free(codec_context);
2426
2427 delete[] image_comp.data;
2428
2429 if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
2430 {
2431 double end_time = get_time();
2432
2433 double repeats = static_cast<double>(cli_config.repeat_count);
2434 double avg_compression_time = total_compression_time / repeats;
2435 double avg_decompression_time = total_decompression_time / repeats;
2436 double total_time = (end_time - start_time) - ((repeats - 1.0) * avg_compression_time) - ((repeats - 1.0) * avg_decompression_time);
2437
2438 printf("Performance metrics\n");
2439 printf("===================\n\n");
2440 printf(" Total time: %8.4f s\n", total_time);
2441
2442 if (operation & ASTCENC_STAGE_COMPRESS)
2443 {
2444 double compression_rate = image_size / (best_compression_time * 1000000.0);
2445
2446 printf(" Coding time: %8.4f s\n", best_compression_time);
2447 printf(" Coding rate: %8.4f MT/s\n", compression_rate);
2448 }
2449
2450 if (operation & ASTCENC_STAGE_DECOMPRESS)
2451 {
2452 double decompression_rate = image_size / (best_decompression_time * 1000000.0);
2453 printf(" Decoding time: %8.4f s\n", best_decompression_time);
2454 printf(" Decoding rate: %8.4f MT/s\n", decompression_rate);
2455 }
2456 }
2457
2458 return 0;
2459 }
2460