• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for codec library front-end.
20  */
21 
22 #include "astcenc.h"
23 #include "astcenccli_internal.h"
24 
25 #include <cassert>
26 #include <cstring>
27 #include <string>
28 #include <sstream>
29 #include <vector>
30 
31 /* ============================================================================
32 	Data structure definitions
33 ============================================================================ */
34 
35 typedef unsigned int astcenc_operation;
36 
37 struct mode_entry
38 {
39 	const char* opt;
40 	astcenc_operation operation;
41 	astcenc_profile decode_mode;
42 };
43 
44 /* ============================================================================
45 	Constants and literals
46 ============================================================================ */
47 
48 /** @brief Stage bit indicating we need to load a compressed image. */
49 static const unsigned int ASTCENC_STAGE_LD_COMP    = 1 << 0;
50 
51 /** @brief Stage bit indicating we need to store a compressed image. */
52 static const unsigned int ASTCENC_STAGE_ST_COMP    = 1 << 1;
53 
54 /** @brief Stage bit indicating we need to load an uncompressed image. */
55 static const unsigned int ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
56 
57 /** @brief Stage bit indicating we need to store an uncompressed image. */
58 static const unsigned int ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
59 
60 /** @brief Stage bit indicating we need compress an image. */
61 static const unsigned int ASTCENC_STAGE_COMPRESS   = 1 << 4;
62 
63 /** @brief Stage bit indicating we need to decompress an image. */
64 static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
65 
66 /** @brief Stage bit indicating we need to compare an image with the original input. */
67 static const unsigned int ASTCENC_STAGE_COMPARE    = 1 << 6;
68 
69 /** @brief Operation indicating an unknown request (should never happen). */
70 static const astcenc_operation ASTCENC_OP_UNKNOWN  = 0;
71 
72 /** @brief Operation indicating the user wants to print long-form help text and version info. */
73 static const astcenc_operation ASTCENC_OP_HELP     = 1 << 7;
74 
75 /** @brief Operation indicating the user wants to print short-form help text and version info. */
76 static const astcenc_operation ASTCENC_OP_VERSION  = 1 << 8;
77 
78 /** @brief Operation indicating the user wants to compress and store an image. */
79 static const astcenc_operation ASTCENC_OP_COMPRESS =
80                                ASTCENC_STAGE_LD_NCOMP |
81                                ASTCENC_STAGE_COMPRESS |
82                                ASTCENC_STAGE_ST_COMP;
83 
84 /** @brief Operation indicating the user wants to decompress and store an image. */
85 static const astcenc_operation ASTCENC_OP_DECOMPRESS =
86                                ASTCENC_STAGE_LD_COMP |
87                                ASTCENC_STAGE_DECOMPRESS |
88                                ASTCENC_STAGE_ST_NCOMP;
89 
90 /** @brief Operation indicating the user wants to test a compression setting on an image. */
91 static const astcenc_operation ASTCENC_OP_TEST =
92                                ASTCENC_STAGE_LD_NCOMP |
93                                ASTCENC_STAGE_COMPRESS |
94                                ASTCENC_STAGE_DECOMPRESS |
95                                ASTCENC_STAGE_COMPARE |
96                                ASTCENC_STAGE_ST_NCOMP;
97 
98 /**
99  * @brief Image preprocesing tasks prior to encoding.
100  */
101 enum astcenc_preprocess
102 {
103 	/** @brief No image preprocessing. */
104 	ASTCENC_PP_NONE = 0,
105 	/** @brief Normal vector unit-length normalization. */
106 	ASTCENC_PP_NORMALIZE,
107 	/** @brief Color data alpha premultiplication. */
108 	ASTCENC_PP_PREMULTIPLY
109 };
110 
111 /** @brief Decode table for command line operation modes. */
112 static const mode_entry modes[] {
113 	{"-cl",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR},
114 	{"-dl",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
115 	{"-tl",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR},
116 	{"-cs",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR_SRGB},
117 	{"-ds",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
118 	{"-ts",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR_SRGB},
119 	{"-ch",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR_RGB_LDR_A},
120 	{"-dh",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
121 	{"-th",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR_RGB_LDR_A},
122 	{"-cH",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR},
123 	{"-dH",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
124 	{"-tH",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR},
125 	{"-h",       ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
126 	{"-help",    ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
127 	{"-v",       ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR},
128 	{"-version", ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR}
129 };
130 
131 /**
132  * @brief Compression workload definition for worker threads.
133  */
134 struct compression_workload
135 {
136 	astcenc_context* context;
137 	astcenc_image* image;
138 	astcenc_swizzle swizzle;
139 	uint8_t* data_out;
140 	size_t data_len;
141 	astcenc_error error;
142 #if QUALITY_CONTROL
143 	bool calQualityEnable;
144 	int32_t *mse[RGBA_COM];
145 #endif
146 };
147 
148 /**
149  * @brief Decompression workload definition for worker threads.
150  */
151 struct decompression_workload
152 {
153 	astcenc_context* context;
154 	uint8_t* data;
155 	size_t data_len;
156 	astcenc_image* image_out;
157 	astcenc_swizzle swizzle;
158 	astcenc_error error;
159 };
160 
161 /**
162  * @brief Test if a string argument is a well formed float.
163  */
is_float(std::string target)164 static bool is_float(
165 	std::string target
166 ) {
167 	float test;
168 	std::istringstream stream(target);
169 
170 	// Leading whitespace is an error
171 	stream >> std::noskipws >> test;
172 
173 	// Ensure entire no remaining string in addition to parse failure
174 	return stream.eof() && !stream.fail();
175 }
176 
177 /**
178  * @brief Test if a string ends with a given suffix.
179  */
ends_with(const std::string & str,const std::string & suffix)180 static bool ends_with(
181 	const std::string& str,
182 	const std::string& suffix
183 ) {
184 	return (str.size() >= suffix.size()) &&
185 	       (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
186 }
187 
188 /**
189  * @brief Runner callback function for a compression worker thread.
190  *
191  * @param thread_count   The number of threads in the worker pool.
192  * @param thread_id      The index of this thread in the worker pool.
193  * @param payload        The parameters for this thread.
194  */
compression_workload_runner(int thread_count,int thread_id,void * payload)195 static void compression_workload_runner(
196 	int thread_count,
197 	int thread_id,
198 	void* payload
199 ) {
200 	(void)thread_count;
201 
202 	compression_workload* work = static_cast<compression_workload*>(payload);
203 	astcenc_error error = astcenc_compress_image(
204 	                       work->context, work->image, &work->swizzle,
205 	                       work->data_out, work->data_len,
206 #if QUALITY_CONTROL
207 	                       work->calQualityEnable, work->mse,
208 #endif
209 	                       thread_id);
210 
211 	// This is a racy update, so which error gets returned is a random, but it
212 	// will reliably report an error if an error occurs
213 	if (error != ASTCENC_SUCCESS)
214 	{
215 		work->error = error;
216 	}
217 }
218 
219 /**
220  * @brief Runner callback function for a decompression worker thread.
221  *
222  * @param thread_count   The number of threads in the worker pool.
223  * @param thread_id      The index of this thread in the worker pool.
224  * @param payload        The parameters for this thread.
225  */
decompression_workload_runner(int thread_count,int thread_id,void * payload)226 static void decompression_workload_runner(
227 	int thread_count,
228 	int thread_id,
229 	void* payload
230 ) {
231 	(void)thread_count;
232 
233 	decompression_workload* work = static_cast<decompression_workload*>(payload);
234 	astcenc_error error = astcenc_decompress_image(
235 	                       work->context, work->data, work->data_len,
236 	                       work->image_out, &work->swizzle, thread_id);
237 
238 	// This is a racy update, so which error gets returned is a random, but it
239 	// will reliably report an error if an error occurs
240 	if (error != ASTCENC_SUCCESS)
241 	{
242 		work->error = error;
243 	}
244 }
245 
246 /**
247  * @brief Utility to generate a slice file name from a pattern.
248  *
249  * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
250  *
251  * @param basename The base pattern; must contain a file extension.
252  * @param index    The slice index.
253  * @param error    Set to true on success, false on error (no extension found).
254  *
255  * @return The slice file name.
256  */
get_slice_filename(const std::string & basename,unsigned int index,bool & error)257 static std::string get_slice_filename(
258 	const std::string& basename,
259 	unsigned int index,
260 	bool& error
261 ) {
262 	size_t sep = basename.find_last_of('.');
263 	if (sep == std::string::npos)
264 	{
265 		error = true;
266 		return "";
267 	}
268 
269 	std::string base = basename.substr(0, sep);
270 	std::string ext = basename.substr(sep);
271 	std::string name = base + "_" + std::to_string(index) + ext;
272 	error = false;
273 	return name;
274 }
275 
276 /**
277  * @brief Load a non-astc image file from memory.
278  *
279  * @param filename            The file to load, or a pattern for array loads.
280  * @param dim_z               The number of slices to load.
281  * @param y_flip              Should this image be Y flipped?
282  * @param[out] is_hdr         Is the loaded image HDR?
283  * @param[out] component_count The number of components in the loaded image.
284  *
285  * @return The astc image file, or nullptr on error.
286  */
load_uncomp_file(const char * filename,unsigned int dim_z,bool y_flip,bool & is_hdr,unsigned int & component_count)287 static astcenc_image* load_uncomp_file(
288 	const char* filename,
289 	unsigned int dim_z,
290 	bool y_flip,
291 	bool& is_hdr,
292 	unsigned int& component_count
293 ) {
294 	astcenc_image *image = nullptr;
295 
296 	// For a 2D image just load the image directly
297 	if (dim_z == 1)
298 	{
299 		image = load_ncimage(filename, y_flip, is_hdr, component_count);
300 	}
301 	else
302 	{
303 		bool slice_is_hdr;
304 		unsigned int slice_component_count;
305 		astcenc_image* slice = nullptr;
306 		std::vector<astcenc_image*> slices;
307 
308 		// For a 3D image load an array of slices
309 		for (unsigned int image_index = 0; image_index < dim_z; image_index++)
310 		{
311 			bool error;
312 			std::string slice_name = get_slice_filename(filename, image_index, error);
313 			if (error)
314 			{
315 				printf("ERROR: Image pattern does not contain file extension: %s\n", filename);
316 				break;
317 			}
318 
319 			slice = load_ncimage(slice_name.c_str(), y_flip,
320 			                     slice_is_hdr, slice_component_count);
321 			if (!slice)
322 			{
323 				break;
324 			}
325 
326 			slices.push_back(slice);
327 
328 			// Check it is not a 3D image
329 			if (slice->dim_z != 1)
330 			{
331 				printf("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
332 				break;
333 			}
334 
335 			// Check slices are consistent with each other
336 			if (image_index != 0)
337 			{
338 				if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
339 				{
340 					printf("ERROR: Image array[0] and [%d] are different formats\n", image_index);
341 					break;
342 				}
343 
344 				if ((slices[0]->dim_x != slice->dim_x) ||
345 				    (slices[0]->dim_y != slice->dim_y) ||
346 				    (slices[0]->dim_z != slice->dim_z))
347 				{
348 					printf("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
349 					break;
350 				}
351 			}
352 			else
353 			{
354 				is_hdr = slice_is_hdr;
355 				component_count = slice_component_count;
356 			}
357 		}
358 
359 		// If all slices loaded correctly then repack them into a single image
360 		if (slices.size() == dim_z)
361 		{
362 			unsigned int dim_x = slices[0]->dim_x;
363 			unsigned int dim_y = slices[0]->dim_y;
364 			int bitness = is_hdr ? 16 : 8;
365 			int slice_size = dim_x * dim_y;
366 
367 			image = alloc_image(bitness, dim_x, dim_y, dim_z);
368 
369 			// Combine 2D source images into one 3D image
370 			for (unsigned int z = 0; z < dim_z; z++)
371 			{
372 				if (image->data_type == ASTCENC_TYPE_U8)
373 				{
374 					uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
375 					uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
376 					size_t copy_size = slice_size * 4 * sizeof(uint8_t);
377 					memcpy(data8, data8src, copy_size);
378 				}
379 				else if (image->data_type == ASTCENC_TYPE_F16)
380 				{
381 					uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
382 					uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
383 					size_t copy_size = slice_size * 4 * sizeof(uint16_t);
384 					memcpy(data16, data16src, copy_size);
385 				}
386 				else // if (image->data_type == ASTCENC_TYPE_F32)
387 				{
388 					assert(image->data_type == ASTCENC_TYPE_F32);
389 					float* data32 = static_cast<float*>(image->data[z]);
390 					float* data32src = static_cast<float*>(slices[z]->data[0]);
391 					size_t copy_size = slice_size * 4 * sizeof(float);
392 					memcpy(data32, data32src, copy_size);
393 				}
394 			}
395 		}
396 
397 		for (auto &i : slices)
398 		{
399 			free_image(i);
400 		}
401 	}
402 
403 	return image;
404 }
405 
406 /**
407  * @brief Parse the command line.
408  *
409  * @param      argc        Command line argument count.
410  * @param[in]  argv        Command line argument vector.
411  * @param[out] operation   Codec operation mode.
412  * @param[out] profile     Codec color profile.
413  *
414  * @return 0 if everything is okay, 1 if there is some error
415  */
parse_commandline_options(int argc,char ** argv,astcenc_operation & operation,astcenc_profile & profile)416 static int parse_commandline_options(
417 	int argc,
418 	char **argv,
419 	astcenc_operation& operation,
420 	astcenc_profile& profile
421 ) {
422 	assert(argc >= 2); (void)argc;
423 
424 	profile = ASTCENC_PRF_LDR;
425 	operation = ASTCENC_OP_UNKNOWN;
426 
427 	int modes_count = sizeof(modes) / sizeof(modes[0]);
428 	for (int i = 0; i < modes_count; i++)
429 	{
430 		if (!strcmp(modes[i].opt, argv[1]))
431 		{
432 			operation = modes[i].operation;
433 			profile = modes[i].decode_mode;
434 			break;
435 		}
436 	}
437 
438 	if (operation == ASTCENC_OP_UNKNOWN)
439 	{
440 		printf("ERROR: Unrecognized operation '%s'\n", argv[1]);
441 		return 1;
442 	}
443 
444 	return 0;
445 }
446 
447 /**
448  * @brief Initialize the astcenc_config
449  *
450  * @param      argc         Command line argument count.
451  * @param[in]  argv         Command line argument vector.
452  * @param      operation    Codec operation mode.
453  * @param[out] profile      Codec color profile.
454  * @param      comp_image   Compressed image if a decompress operation.
455  * @param[out] preprocess   Image preprocess operation.
456  * @param[out] config       Codec configuration.
457  *
458  * @return 0 if everything is okay, 1 if there is some error
459  */
init_astcenc_config(int argc,char ** argv,astcenc_profile profile,astcenc_operation operation,astc_compressed_image & comp_image,astcenc_preprocess & preprocess,astcenc_config & config)460 static int init_astcenc_config(
461 	int argc,
462 	char **argv,
463 	astcenc_profile profile,
464 	astcenc_operation operation,
465 	astc_compressed_image& comp_image,
466 	astcenc_preprocess& preprocess,
467 	astcenc_config& config
468 ) {
469 	unsigned int block_x = 0;
470 	unsigned int block_y = 0;
471 	unsigned int block_z = 1;
472 
473 	// For decode the block size is set by the incoming image.
474 	if (operation == ASTCENC_OP_DECOMPRESS)
475 	{
476 		block_x = comp_image.block_x;
477 		block_y = comp_image.block_y;
478 		block_z = comp_image.block_z;
479 	}
480 
481 	float quality = 0.0f;
482 	preprocess = ASTCENC_PP_NONE;
483 
484 	// parse the command line's encoding options.
485 	int argidx = 4;
486 	if (operation & ASTCENC_STAGE_COMPRESS)
487 	{
488 		// Read and decode block size
489 		if (argc < 5)
490 		{
491 			printf("ERROR: Block size must be specified\n");
492 			return 1;
493 		}
494 
495 		int cnt2D, cnt3D;
496 		int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
497 		                        &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
498 		// Character after the last match should be a NUL
499 		if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
500 		{
501 			printf("ERROR: Block size '%s' is invalid\n", argv[4]);
502 			return 1;
503 		}
504 
505 		// Read and decode search quality
506 		if (argc < 6)
507 		{
508 			printf("ERROR: Search quality level must be specified\n");
509 			return 1;
510 		}
511 
512 		if (!strcmp(argv[5], "-fastest"))
513 		{
514 			quality = ASTCENC_PRE_FASTEST;
515 		}
516 		else if (!strcmp(argv[5], "-fast"))
517 		{
518 			quality = ASTCENC_PRE_FAST;
519 		}
520 		else if (!strcmp(argv[5], "-medium"))
521 		{
522 			quality = ASTCENC_PRE_MEDIUM;
523 		}
524 		else if (!strcmp(argv[5], "-thorough"))
525 		{
526 			quality = ASTCENC_PRE_THOROUGH;
527 		}
528 		else if (!strcmp(argv[5], "-exhaustive"))
529 		{
530 			quality = ASTCENC_PRE_EXHAUSTIVE;
531 		}
532 		else if (is_float(argv[5]))
533 		{
534 			quality = static_cast<float>(atof(argv[5]));
535 		}
536 		else
537 		{
538 			printf("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
539 			return 1;
540 		}
541 
542 		argidx = 6;
543 	}
544 
545 	unsigned int flags = 0;
546 
547 	// Gather the flags that we need
548 	while (argidx < argc)
549 	{
550 		if (!strcmp(argv[argidx], "-a"))
551 		{
552 			// Skip over the data value for now
553 			argidx++;
554 			flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
555 		}
556 		else if (!strcmp(argv[argidx], "-mask"))
557 		{
558 			flags |= ASTCENC_FLG_MAP_MASK;
559 		}
560 		else if (!strcmp(argv[argidx], "-normal"))
561 		{
562 			flags |= ASTCENC_FLG_MAP_NORMAL;
563 		}
564 		else if (!strcmp(argv[argidx], "-rgbm"))
565 		{
566 			// Skip over the data value for now
567 			argidx++;
568 			flags |= ASTCENC_FLG_MAP_RGBM;
569 		}
570 		else if (!strcmp(argv[argidx], "-perceptual"))
571 		{
572 			flags |= ASTCENC_FLG_USE_PERCEPTUAL;
573 		}
574 		else if (!strcmp(argv[argidx], "-pp-normalize"))
575 		{
576 			if (preprocess != ASTCENC_PP_NONE)
577 			{
578 				printf("ERROR: Only a single image preprocess can be used\n");
579 				return 1;
580 			}
581 			preprocess = ASTCENC_PP_NORMALIZE;
582 		}
583 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
584 		{
585 			if (preprocess != ASTCENC_PP_NONE)
586 			{
587 				printf("ERROR: Only a single image preprocess can be used\n");
588 				return 1;
589 			}
590 			preprocess = ASTCENC_PP_PREMULTIPLY;
591 		}
592 		argidx ++;
593 	}
594 
595 #if defined(ASTCENC_DECOMPRESS_ONLY)
596 	flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
597 #else
598 	// Decompression can skip some memory allocation, but need full tables
599 	if (operation == ASTCENC_OP_DECOMPRESS)
600 	{
601 		flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
602 	}
603 	// Compression and test passes can skip some decimation initialization
604 	// as we know we are decompressing images that were compressed using the
605 	// same settings and heuristics ...
606 	else
607 	{
608 		flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
609 	}
610 #endif
611 
612 	astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
613 	                                           quality, flags, &config);
614 	if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
615 	{
616 		printf("ERROR: Block size '%s' is invalid\n", argv[4]);
617 		return 1;
618 	}
619 	else if (status == ASTCENC_ERR_BAD_CPU_ISA)
620 	{
621 		printf("ERROR: Required SIMD ISA support missing on this CPU\n");
622 		return 1;
623 	}
624 	else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
625 	{
626 		printf("ERROR: astcenc must not be compiled with -ffast-math\n");
627 		return 1;
628 	}
629 	else if (status != ASTCENC_SUCCESS)
630 	{
631 		printf("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
632 		return 1;
633 	}
634 
635 	return 0;
636 }
637 
638 /**
639  * @brief Edit the astcenc_config
640  *
641  * @param         argc         Command line argument count.
642  * @param[in]     argv         Command line argument vector.
643  * @param         operation    Codec operation.
644  * @param[out]    cli_config   Command line config.
645  * @param[in,out] config       Codec configuration.
646  *
647  * @return 0 if everything is OK, 1 if there is some error
648  */
edit_astcenc_config(int argc,char ** argv,const astcenc_operation operation,cli_config_options & cli_config,astcenc_config & config)649 static int edit_astcenc_config(
650 	int argc,
651 	char **argv,
652 	const astcenc_operation operation,
653 	cli_config_options& cli_config,
654 	astcenc_config& config
655 ) {
656 
657 	int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
658 	config.privateProfile = HIGH_QUALITY_PROFILE;
659 	while (argidx < argc)
660 	{
661 		if (!strcmp(argv[argidx], "-silent"))
662 		{
663 			argidx++;
664 			cli_config.silentmode = 1;
665 		}
666 		else if (!strcmp(argv[argidx], "-cw"))
667 		{
668 			argidx += 5;
669 			if (argidx > argc)
670 			{
671 				printf("ERROR: -cw switch with less than 4 arguments\n");
672 				return 1;
673 			}
674 
675 			config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
676 			config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
677 			config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
678 			config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
679 		}
680 		else if (!strcmp(argv[argidx], "-a"))
681 		{
682 			argidx += 2;
683 			if (argidx > argc)
684 			{
685 				printf("ERROR: -a switch with no argument\n");
686 				return 1;
687 			}
688 
689 			config.a_scale_radius = atoi(argv[argidx - 1]);
690 		}
691 		else if (!strcmp(argv[argidx], "-esw"))
692 		{
693 			argidx += 2;
694 			if (argidx > argc)
695 			{
696 				printf("ERROR: -esw switch with no argument\n");
697 				return 1;
698 			}
699 
700 			if (strlen(argv[argidx - 1]) != 4)
701 			{
702 				printf("ERROR: -esw pattern does not contain 4 characters\n");
703 				return 1;
704 			}
705 
706 			astcenc_swz swizzle_components[4];
707 			for (int i = 0; i < 4; i++)
708 			{
709 				switch (argv[argidx - 1][i])
710 				{
711 				case 'r':
712 					swizzle_components[i] = ASTCENC_SWZ_R;
713 					break;
714 				case 'g':
715 					swizzle_components[i] = ASTCENC_SWZ_G;
716 					break;
717 				case 'b':
718 					swizzle_components[i] = ASTCENC_SWZ_B;
719 					break;
720 				case 'a':
721 					swizzle_components[i] = ASTCENC_SWZ_A;
722 					break;
723 				case '0':
724 					swizzle_components[i] = ASTCENC_SWZ_0;
725 					break;
726 				case '1':
727 					swizzle_components[i] = ASTCENC_SWZ_1;
728 					break;
729 				default:
730 					printf("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
731 					return 1;
732 				}
733 			}
734 
735 			cli_config.swz_encode.r = swizzle_components[0];
736 			cli_config.swz_encode.g = swizzle_components[1];
737 			cli_config.swz_encode.b = swizzle_components[2];
738 			cli_config.swz_encode.a = swizzle_components[3];
739 		}
740 		else if (!strcmp(argv[argidx], "-dsw"))
741 		{
742 			argidx += 2;
743 			if (argidx > argc)
744 			{
745 				printf("ERROR: -dsw switch with no argument\n");
746 				return 1;
747 			}
748 
749 			if (strlen(argv[argidx - 1]) != 4)
750 			{
751 				printf("ERROR: -dsw switch does not contain 4 characters\n");
752 				return 1;
753 			}
754 
755 			astcenc_swz swizzle_components[4];
756 			for (int i = 0; i < 4; i++)
757 			{
758 				switch (argv[argidx - 1][i])
759 				{
760 				case 'r':
761 					swizzle_components[i] = ASTCENC_SWZ_R;
762 					break;
763 				case 'g':
764 					swizzle_components[i] = ASTCENC_SWZ_G;
765 					break;
766 				case 'b':
767 					swizzle_components[i] = ASTCENC_SWZ_B;
768 					break;
769 				case 'a':
770 					swizzle_components[i] = ASTCENC_SWZ_A;
771 					break;
772 				case '0':
773 					swizzle_components[i] = ASTCENC_SWZ_0;
774 					break;
775 				case '1':
776 					swizzle_components[i] = ASTCENC_SWZ_1;
777 					break;
778 				case 'z':
779 					swizzle_components[i] =  ASTCENC_SWZ_Z;
780 					break;
781 				default:
782 					printf("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
783 					return 1;
784 				}
785 			}
786 
787 			cli_config.swz_decode.r = swizzle_components[0];
788 			cli_config.swz_decode.g = swizzle_components[1];
789 			cli_config.swz_decode.b = swizzle_components[2];
790 			cli_config.swz_decode.a = swizzle_components[3];
791 		}
792 		// presets begin here
793 		else if (!strcmp(argv[argidx], "-mask"))
794 		{
795 			argidx++;
796 		}
797 		else if (!strcmp(argv[argidx], "-normal"))
798 		{
799 			argidx++;
800 
801 			cli_config.swz_encode.r = ASTCENC_SWZ_R;
802 			cli_config.swz_encode.g = ASTCENC_SWZ_R;
803 			cli_config.swz_encode.b = ASTCENC_SWZ_R;
804 			cli_config.swz_encode.a = ASTCENC_SWZ_G;
805 
806 			cli_config.swz_decode.r = ASTCENC_SWZ_R;
807 			cli_config.swz_decode.g = ASTCENC_SWZ_A;
808 			cli_config.swz_decode.b = ASTCENC_SWZ_Z;
809 			cli_config.swz_decode.a = ASTCENC_SWZ_1;
810 		}
811 		else if (!strcmp(argv[argidx], "-rgbm"))
812 		{
813 			argidx += 2;
814 			if (argidx > argc)
815 			{
816 				printf("ERROR: -rgbm switch with no argument\n");
817 				return 1;
818 			}
819 
820 			config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
821 			config.cw_a_weight = 2.0f * config.rgbm_m_scale;
822 		}
823 		else if (!strcmp(argv[argidx], "-perceptual"))
824 		{
825 			argidx++;
826 		}
827 		else if (!strcmp(argv[argidx], "-pp-normalize"))
828 		{
829 			argidx++;
830 		}
831 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
832 		{
833 			argidx++;
834 		}
835 		else if (!strcmp(argv[argidx], "-blockmodelimit"))
836 		{
837 			argidx += 2;
838 			if (argidx > argc)
839 			{
840 				printf("ERROR: -blockmodelimit switch with no argument\n");
841 				return 1;
842 			}
843 
844 			config.tune_block_mode_limit = atoi(argv[argidx - 1]);
845 		}
846 		else if (!strcmp(argv[argidx], "-partitioncountlimit"))
847 		{
848 			argidx += 2;
849 			if (argidx > argc)
850 			{
851 				printf("ERROR: -partitioncountlimit switch with no argument\n");
852 				return 1;
853 			}
854 
855 			config.tune_partition_count_limit = atoi(argv[argidx - 1]);
856 		}
857 		else if (!strcmp(argv[argidx], "-partitionindexlimit"))
858 		{
859 			argidx += 2;
860 			if (argidx > argc)
861 			{
862 				printf("ERROR: -partitionindexlimit switch with no argument\n");
863 				return 1;
864 			}
865 
866 			config.tune_partition_index_limit = atoi(argv[argidx - 1]);
867 		}
868 		else if (!strcmp(argv[argidx], "-dblimit"))
869 		{
870 			argidx += 2;
871 			if (argidx > argc)
872 			{
873 				printf("ERROR: -dblimit switch with no argument\n");
874 				return 1;
875 			}
876 
877 			if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
878 			{
879 				config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
880 			}
881 		}
882 		else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
883 		{
884 			argidx += 2;
885 			if (argidx > argc)
886 			{
887 				printf("ERROR: -2partitionlimitfactor switch with no argument\n");
888 				return 1;
889 			}
890 
891 			config.tune_2_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
892 		}
893 		else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
894 		{
895 			argidx += 2;
896 			if (argidx > argc)
897 			{
898 				printf("ERROR: -3partitionlimitfactor switch with no argument\n");
899 				return 1;
900 			}
901 
902 			config.tune_3_partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
903 		}
904 		else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
905 		{
906 			argidx += 2;
907 			if (argidx > argc)
908 			{
909 				printf("ERROR: -2planelimitcorrelation switch with no argument\n");
910 				return 1;
911 			}
912 
913 			config.tune_2_plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
914 		}
915 		else if (!strcmp(argv[argidx], "-lowweightmodelimit"))
916 		{
917 			argidx += 2;
918 			if (argidx > argc)
919 			{
920 				printf("ERROR: -lowweightmodelimit switch with no argument\n");
921 				return 1;
922 			}
923 
924 			config.tune_low_weight_count_limit = atoi(argv[argidx - 1]);
925 		}
926 		else if (!strcmp(argv[argidx], "-refinementlimit"))
927 		{
928 			argidx += 2;
929 			if (argidx > argc)
930 			{
931 				printf("ERROR: -refinementlimit switch with no argument\n");
932 				return 1;
933 			}
934 
935 			config.tune_refinement_limit = atoi(argv[argidx - 1]);
936 		}
937 		else if (!strcmp(argv[argidx], "-candidatelimit"))
938 		{
939 			argidx += 2;
940 			if (argidx > argc)
941 			{
942 				printf("ERROR: -candidatelimit switch with no argument\n");
943 				return 1;
944 			}
945 
946 			config.tune_candidate_limit = atoi(argv[argidx - 1]);
947 		}
948 		else if (!strcmp(argv[argidx], "-j"))
949 		{
950 			argidx += 2;
951 			if (argidx > argc)
952 			{
953 				printf("ERROR: -j switch with no argument\n");
954 				return 1;
955 			}
956 
957 			cli_config.thread_count = atoi(argv[argidx - 1]);
958 		}
959 		else if (!strcmp(argv[argidx], "-yflip"))
960 		{
961 			argidx++;
962 			cli_config.y_flip = 1;
963 		}
964 		else if (!strcmp(argv[argidx], "-mpsnr"))
965 		{
966 			argidx += 3;
967 			if (argidx > argc)
968 			{
969 				printf("ERROR: -mpsnr switch with less than 2 arguments\n");
970 				return 1;
971 			}
972 
973 			cli_config.low_fstop = atoi(argv[argidx - 2]);
974 			cli_config.high_fstop = atoi(argv[argidx - 1]);
975 			if (cli_config.high_fstop < cli_config.low_fstop)
976 			{
977 				printf("ERROR: -mpsnr switch <low> is greater than the <high>\n");
978 				return 1;
979 			}
980 		}
981 		// Option: Encode a 3D image from an array of 2D images.
982 		else if (!strcmp(argv[argidx], "-array"))
983 		{
984 			// Only supports compressing
985 			if (!(operation & ASTCENC_STAGE_COMPRESS))
986 			{
987 				printf("ERROR: -array switch is only valid for compression\n");
988 				return 1;
989 			}
990 
991 			// Image depth must be specified.
992 			if (argidx + 2 > argc)
993 			{
994 				printf("ERROR: -array switch with no argument\n");
995 				return 1;
996 			}
997 			argidx++;
998 
999 			// Read array size (image depth).
1000 			if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1001 			{
1002 				printf("ERROR: -array size '%s' is invalid\n", argv[argidx]);
1003 				return 1;
1004 			}
1005 
1006 			if ((cli_config.array_size > 1) && (config.block_z == 1))
1007 			{
1008 				printf("ERROR: -array with 3D input data for a 2D output format\n");
1009 				return 1;
1010 			}
1011 			argidx++;
1012 		}
1013 #if defined(ASTCENC_DIAGNOSTICS)
1014 		else if (!strcmp(argv[argidx], "-dtrace-out"))
1015 		{
1016 			argidx += 2;
1017 			if (argidx > argc)
1018 			{
1019 				printf("ERROR: -dtrace-out switch with no argument\n");
1020 				return 1;
1021 			}
1022 
1023 			config.trace_file_path = argv[argidx - 1];
1024 		}
1025 #endif
1026 		else if (!strcmp(argv[argidx], "-privateProfile"))
1027 		{
1028 			argidx += 2; // skip 2 chatacters to get next parameter
1029 			config.privateProfile = static_cast<QualityProfile>(atoi(argv[argidx - 1]));
1030 		}
1031 		else // check others as well
1032 		{
1033 			printf("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1034 			return 1;
1035 		}
1036 	}
1037 
1038 	if (cli_config.thread_count <= 0)
1039 	{
1040 		cli_config.thread_count = get_cpu_count();
1041 	}
1042 
1043 #if defined(ASTCENC_DIAGNOSTICS)
1044 	// Force single threaded for diagnostic builds
1045 	cli_config.thread_count = 1;
1046 
1047 	if (!config.trace_file_path)
1048 	{
1049 		printf("ERROR: Diagnostics builds must set -dtrace-out\n");
1050 		return 1;
1051 	}
1052 #endif
1053 
1054 	return 0;
1055 }
1056 
1057 /**
1058  * @brief Print the config settings in a human readable form.
1059  *
1060  * @param[in] cli_config   Command line config.
1061  * @param[in] config       Codec configuration.
1062  */
print_astcenc_config(const cli_config_options & cli_config,const astcenc_config & config)1063 static void print_astcenc_config(
1064 	const cli_config_options& cli_config,
1065 	const astcenc_config& config
1066 ) {
1067 	// Print all encoding settings unless specifically told otherwise
1068 	if (!cli_config.silentmode)
1069 	{
1070 		printf("Compressor settings\n");
1071 		printf("===================\n\n");
1072 
1073 		switch (config.profile)
1074 		{
1075 		case ASTCENC_PRF_LDR:
1076 			printf("    Color profile:              LDR linear\n");
1077 			break;
1078 		case ASTCENC_PRF_LDR_SRGB:
1079 			printf("    Color profile:              LDR sRGB\n");
1080 			break;
1081 		case ASTCENC_PRF_HDR_RGB_LDR_A:
1082 			printf("    Color profile:              HDR RGB + LDR A\n");
1083 			break;
1084 		case ASTCENC_PRF_HDR:
1085 			printf("    Color profile:              HDR RGBA\n");
1086 			break;
1087 		}
1088 
1089 		if (config.block_z == 1)
1090 		{
1091 			printf("    Block size:                 %ux%u\n", config.block_x, config.block_y);
1092 		}
1093 		else
1094 		{
1095 			printf("    Block size:                 %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1096 		}
1097 
1098 		printf("    Bitrate:                    %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1099 		printf("    RGB alpha scale weight:     %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1100 		if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1101 		{
1102 			printf("    Radius RGB alpha scale:     %u texels\n", config.a_scale_radius);
1103 		}
1104 
1105 		printf("    R component weight:         %g\n", static_cast<double>(config.cw_r_weight));
1106 		printf("    G component weight:         %g\n", static_cast<double>(config.cw_g_weight));
1107 		printf("    B component weight:         %g\n", static_cast<double>(config.cw_b_weight));
1108 		printf("    A component weight:         %g\n", static_cast<double>(config.cw_a_weight));
1109 		printf("    Partition cutoff:           %u partitions\n", config.tune_partition_count_limit);
1110 		printf("    Partition index cutoff:     %u partition ids\n", config.tune_partition_index_limit);
1111 		printf("    PSNR cutoff:                %g dB\n", static_cast<double>(config.tune_db_limit));
1112 		printf("    3 partition cutoff:         %g\n", static_cast<double>(config.tune_2_partition_early_out_limit_factor));
1113 		printf("    4 partition cutoff:         %g\n", static_cast<double>(config.tune_3_partition_early_out_limit_factor));
1114 		printf("    2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2_plane_early_out_limit_correlation));
1115 		printf("    Block mode centile cutoff:  %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1116 		printf("    Candidate cutoff:           %u candidates\n", config.tune_candidate_limit);
1117 		printf("    Refinement cutoff:          %u iterations\n", config.tune_refinement_limit);
1118 		printf("    Compressor thread count:    %d\n", cli_config.thread_count);
1119 		printf("\n");
1120 	}
1121 }
1122 
1123 /**
1124  * @brief Get the value of a single pixel in an image.
1125  *
1126  * Note, this implementation is not particularly optimal as it puts format
1127  * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1128  * acceptable as these are not performance critical paths.
1129  *
1130  * @param[in] img   The output image.
1131  * @param     x     The pixel x coordinate.
1132  * @param     y     The pixel y coordinate.
1133  * @param     z     The pixel z coordinate.
1134  *
1135  * @return      pixel   The pixel color value to write.
1136  */
image_get_pixel(const astcenc_image & img,unsigned int x,unsigned int y,unsigned int z)1137 static vfloat4 image_get_pixel(
1138 	const astcenc_image& img,
1139 	unsigned int x,
1140 	unsigned int y,
1141 	unsigned int z
1142 ) {
1143 	// We should never escape bounds
1144 	assert(x < img.dim_x);
1145 	assert(y < img.dim_y);
1146 	assert(z < img.dim_z);
1147 
1148 	if (img.data_type == ASTCENC_TYPE_U8)
1149 	{
1150 		uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1151 
1152 		float r = data[(4 * img.dim_x * y) + (4 * x    )] / 255.0f;
1153 		float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1154 		float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1155 		float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1156 
1157 		return vfloat4(r, g, b, a);
1158 	}
1159 	else if (img.data_type == ASTCENC_TYPE_F16)
1160 	{
1161 		uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1162 
1163 		vint4 colori(
1164 			data[(4 * img.dim_x * y) + (4 * x    )],
1165 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1166 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1167 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1168 		);
1169 
1170 		return float16_to_float(colori);
1171 	}
1172 	else // if (img.data_type == ASTCENC_TYPE_F32)
1173 	{
1174 		assert(img.data_type == ASTCENC_TYPE_F32);
1175 		float* data = static_cast<float*>(img.data[z]);
1176 
1177 		return vfloat4(
1178 			data[(4 * img.dim_x * y) + (4 * x    )],
1179 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1180 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1181 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1182 		);
1183 	}
1184 }
1185 
1186 /**
1187  * @brief Set the value of a single pixel in an image.
1188  *
1189  * @param[out] img     The output image; must use F32 texture components.
1190  * @param      x       The pixel x coordinate.
1191  * @param      y       The pixel y coordinate.
1192  * @param      z       The pixel z coordinate.
1193  * @param      pixel   The pixel color value to write.
1194  */
image_set_pixel(astcenc_image & img,unsigned int x,unsigned int y,unsigned int z,vfloat4 pixel)1195 static void image_set_pixel(
1196 	astcenc_image& img,
1197 	unsigned int x,
1198 	unsigned int y,
1199 	unsigned int z,
1200 	vfloat4 pixel
1201 ) {
1202 	// We should never escape bounds
1203 	assert(x < img.dim_x);
1204 	assert(y < img.dim_y);
1205 	assert(z < img.dim_z);
1206 	assert(img.data_type == ASTCENC_TYPE_F32);
1207 
1208 	float* data = static_cast<float*>(img.data[z]);
1209 
1210 	data[(4 * img.dim_x * y) + (4 * x    )] = pixel.lane<0>();
1211 	data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1212 	data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1213 	data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1214 }
1215 
1216 /**
1217  * @brief Create a copy of @c input with forced unit-length normal vectors.
1218  *
1219  * It is assumed that all normal vectors are stored in the RGB components, and
1220  * stored in a packed unsigned range of [0,1] which must be unpacked prior
1221  * normalization. Data must then be repacked into this form for handing over to
1222  * the core codec.
1223  *
1224  * @param[in]  input    The input image.
1225  * @param[out] output   The output image, must use F32 components.
1226  */
image_preprocess_normalize(const astcenc_image & input,astcenc_image & output)1227 static void image_preprocess_normalize(
1228 	const astcenc_image& input,
1229 	astcenc_image& output
1230 ) {
1231 	for (unsigned int z = 0; z < input.dim_z; z++)
1232 	{
1233 		for (unsigned int y = 0; y < input.dim_y; y++)
1234 		{
1235 			for (unsigned int x = 0; x < input.dim_x; x++)
1236 			{
1237 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1238 
1239 				// Stash alpha component and zero
1240 				float a = pixel.lane<3>();
1241 				pixel.set_lane<3>(0.0f);
1242 
1243 				// Decode [0,1] normals to [-1,1]
1244 				pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1245 				pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1246 				pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1247 
1248 				// Normalize pixel and restore alpha
1249 				pixel = normalize(pixel);
1250 				pixel.set_lane<3>(a);
1251 
1252 				// Encode [-1,1] normals to [0,1]
1253 				pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1254 				pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1255 				pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1256 
1257 				image_set_pixel(output, x, y, z, pixel);
1258 			}
1259 		}
1260 	}
1261 }
1262 
1263 /**
1264  * @brief Linearize an sRGB value.
1265  *
1266  * @return The linearized value.
1267  */
srgb_to_linear(float a)1268 static float srgb_to_linear(
1269 	float a
1270 ) {
1271 	if (a <= 0.04045f)
1272 	{
1273 		return a * (1.0f / 12.92f);
1274 	}
1275 
1276 	return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1277 }
1278 
1279 /**
1280  * @brief sRGB gamma-encode a linear value.
1281  *
1282  * @return The gamma encoded value.
1283  */
linear_to_srgb(float a)1284 static float linear_to_srgb(
1285 	float a
1286 ) {
1287 	if (a <= 0.0031308f)
1288 	{
1289 		return a * 12.92f;
1290 	}
1291 
1292 	return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1293 }
1294 
1295 /**
1296  * @brief Create a copy of @c input with premultiplied color data.
1297  *
1298  * If we are compressing sRGB data we linearize the data prior to
1299  * premultiplication and re-gamma-encode afterwards.
1300  *
1301  * @param[in]  input     The input image.
1302  * @param[out] output    The output image, must use F32 components.
1303  * @param      profile   The encoding profile.
1304  */
image_preprocess_premultiply(const astcenc_image & input,astcenc_image & output,astcenc_profile profile)1305 static void image_preprocess_premultiply(
1306 	const astcenc_image& input,
1307 	astcenc_image& output,
1308 	astcenc_profile profile
1309 ) {
1310 	for (unsigned int z = 0; z < input.dim_z; z++)
1311 	{
1312 		for (unsigned int y = 0; y < input.dim_y; y++)
1313 		{
1314 			for (unsigned int x = 0; x < input.dim_x; x++)
1315 			{
1316 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1317 
1318 				// Linearize sRGB
1319 				if (profile == ASTCENC_PRF_LDR_SRGB)
1320 				{
1321 					pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1322 					pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1323 					pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1324 				}
1325 
1326 				// Premultiply pixel in linear-space
1327 				pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1328 				pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1329 				pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1330 
1331 				// Gamma-encode sRGB
1332 				if (profile == ASTCENC_PRF_LDR_SRGB)
1333 				{
1334 					pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1335 					pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1336 					pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1337 				}
1338 
1339 				image_set_pixel(output, x, y, z, pixel);
1340 			}
1341 		}
1342 	}
1343 }
1344 
1345 #if QUALITY_CONTROL
1346 constexpr double MAX_PSNR = 99.9;
1347 constexpr double MAX_VALUE = 255;
1348 constexpr double THRESHOLD_R = 30.0;
1349 constexpr double THRESHOLD_G = 30.0;
1350 constexpr double THRESHOLD_B = 30.0;
1351 constexpr double THRESHOLD_A = 30.0;
1352 constexpr double THRESHOLD_RGB = 30.0;
1353 constexpr double LOG_BASE = 10.0;
1354 
CheckQuality(int32_t * mseIn[RGBA_COM],int blockNum,int blockXYZ)1355 bool CheckQuality(int32_t* mseIn[RGBA_COM], int blockNum, int blockXYZ)
1356 {
1357     double psnr[RGBA_COM + 1];
1358     double threshold[RGBA_COM + 1] = { THRESHOLD_R, THRESHOLD_G, THRESHOLD_B, THRESHOLD_A, THRESHOLD_RGB};
1359     uint64_t mseTotal[RGBA_COM + 1] = { 0, 0, 0, 0, 0};
1360     for (int i = R_COM; i < RGBA_COM; i++) {
1361         int32_t* mse = mseIn[i];
1362         for (int j = 0; j < blockNum; j++) {
1363             mseTotal[i] += *mse;
1364             if(i != A_COM) mseTotal[RGBA_COM] += *mse;
1365             mse++;
1366         }
1367     }
1368     for (int i = R_COM; i < RGBA_COM; i++) {
1369         if (mseTotal[i] == 0) {
1370         	psnr[i] = MAX_PSNR;
1371         	continue;
1372         }
1373         double mseRgb = (double)mseTotal[i] / (blockNum * blockXYZ);
1374         psnr[i] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1375     }
1376     if (mseTotal[RGBA_COM] == 0) {
1377         psnr[RGBA_COM] = MAX_PSNR;
1378     }
1379     else {
1380         double mseRgb = (double)mseTotal[RGBA_COM] / (blockNum * blockXYZ * (RGBA_COM - 1));
1381         psnr[RGBA_COM] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1382     }
1383     printf("astc psnr r%f g%f b%f a%f rgb%f\n",
1384         psnr[R_COM], psnr[G_COM], psnr[B_COM], psnr[A_COM],
1385         psnr[RGBA_COM]);
1386     return (psnr[R_COM] > threshold[R_COM]) && (psnr[G_COM] > threshold[G_COM])
1387         && (psnr[B_COM] > threshold[B_COM]) && (psnr[A_COM] > threshold[A_COM])
1388         && (psnr[RGBA_COM] > threshold[RGBA_COM]);
1389 }
1390 #endif
1391 
1392 /**
1393  * @brief The main entry point.
1394  *
1395  * @param argc   The number of arguments.
1396  * @param argv   The vector of arguments.
1397  *
1398  * @return 0 on success, non-zero otherwise.
1399  */
main(int argc,char ** argv)1400 int main(
1401 	int argc,
1402 	char **argv
1403 ) {
1404 	double start_time = get_time();
1405 
1406 	if (argc < 2)
1407 	{
1408 		astcenc_print_shorthelp();
1409 		return 0;
1410 	}
1411 
1412 	astcenc_operation operation;
1413 	astcenc_profile profile;
1414 	int error = parse_commandline_options(argc, argv, operation, profile);
1415 	if (error)
1416 	{
1417 		return 1;
1418 	}
1419 
1420 	switch (operation)
1421 	{
1422 	case ASTCENC_OP_HELP:
1423 		astcenc_print_longhelp();
1424 		return 0;
1425 	case ASTCENC_OP_VERSION:
1426 		astcenc_print_header();
1427 		return 0;
1428 	default:
1429 		break;
1430 	}
1431 
1432 
1433 	std::string input_filename = argc >= 3 ? argv[2] : "";
1434 	std::string output_filename = argc >= 4 ? argv[3] : "";
1435 
1436 	if (input_filename.empty())
1437 	{
1438 		printf("ERROR: Input file not specified\n");
1439 		return 1;
1440 	}
1441 
1442 	if (output_filename.empty())
1443 	{
1444 		printf("ERROR: Output file not specified\n");
1445 		return 1;
1446 	}
1447 
1448 	// TODO: Handle RAII resources so they get freed when out of scope
1449 	// Load the compressed input file if needed
1450 
1451 	// This has to come first, as the block size is in the file header
1452 	astc_compressed_image image_comp {};
1453 	if (operation & ASTCENC_STAGE_LD_COMP)
1454 	{
1455 		if (ends_with(input_filename, ".astc"))
1456 		{
1457 			error = load_cimage(input_filename.c_str(), image_comp);
1458 			if (error)
1459 			{
1460 				return 1;
1461 			}
1462 		}
1463 		else if (ends_with(input_filename, ".ktx"))
1464 		{
1465 			bool is_srgb;
1466 			error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
1467 			if (error)
1468 			{
1469 				return 1;
1470 			}
1471 
1472 			if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
1473 			{
1474 				printf("WARNING: Input file is sRGB, but decompressing as linear\n");
1475 			}
1476 
1477 			if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
1478 			{
1479 				printf("WARNING: Input file is linear, but decompressing as sRGB\n");
1480 			}
1481 		}
1482 		else
1483 		{
1484 			printf("ERROR: Unknown compressed input file type\n");
1485 			return 1;
1486 		}
1487 	}
1488 
1489 	astcenc_config config {};
1490 	astcenc_preprocess preprocess;
1491 	error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
1492 	if (error)
1493 	{
1494 		return 1;
1495 	}
1496 
1497 	// Initialize cli_config_options with default values
1498 	cli_config_options cli_config { 0, 1, false, false, -10, 10,
1499 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
1500 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
1501 
1502 	error = edit_astcenc_config(argc, argv, operation, cli_config, config);
1503 	if (error)
1504 	{
1505 		return 1;
1506 	}
1507 
1508 	astcenc_image* image_uncomp_in = nullptr ;
1509 	unsigned int image_uncomp_in_component_count = 0;
1510 	bool image_uncomp_in_is_hdr = false;
1511 	astcenc_image* image_decomp_out = nullptr;
1512 
1513 	// TODO: Handle RAII resources so they get freed when out of scope
1514 	astcenc_error    codec_status;
1515 	astcenc_context* codec_context;
1516 
1517 
1518 	// Preflight - check we have valid extensions for storing a file
1519 	if (operation & ASTCENC_STAGE_ST_NCOMP)
1520 	{
1521 		int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
1522 		if (bitness < 0)
1523 		{
1524 			const char *eptr = strrchr(output_filename.c_str(), '.');
1525 			eptr = eptr ? eptr : "";
1526 			printf("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
1527 			return 1;
1528 		}
1529 	}
1530 
1531 	if (operation & ASTCENC_STAGE_ST_COMP)
1532 	{
1533 #if defined(_WIN32)
1534 		bool is_null = output_filename == "NUL" || output_filename == "nul";
1535 #else
1536 		bool is_null = output_filename == "/dev/null";
1537 #endif
1538 
1539 		if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
1540 		{
1541 			const char *eptr = strrchr(output_filename.c_str(), '.');
1542 			eptr = eptr ? eptr : "";
1543 			printf("ERROR: Unknown compressed output file type '%s'\n", eptr);
1544 			return 1;
1545 		}
1546 	}
1547 
1548 	codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
1549 	if (codec_status != ASTCENC_SUCCESS)
1550 	{
1551 		printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
1552 		return 1;
1553 	}
1554 
1555 	// Load the uncompressed input file if needed
1556 	if (operation & ASTCENC_STAGE_LD_NCOMP)
1557 	{
1558 		image_uncomp_in = load_uncomp_file(
1559 		    input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
1560 		    image_uncomp_in_is_hdr, image_uncomp_in_component_count);
1561 		if (!image_uncomp_in)
1562 		{
1563 			printf ("ERROR: Failed to load uncompressed image file\n");
1564 			return 1;
1565 		}
1566 
1567 
1568 		if (preprocess != ASTCENC_PP_NONE)
1569 		{
1570 			// Allocate a float image so we can avoid additional quantization,
1571 			// as e.g. premultiplication can result in fractional color values
1572 			astcenc_image* image_pp = alloc_image(32,
1573 			                                      image_uncomp_in->dim_x,
1574 			                                      image_uncomp_in->dim_y,
1575 			                                      image_uncomp_in->dim_z);
1576 			if (!image_pp)
1577 			{
1578 				printf ("ERROR: Failed to allocate preprocessed image\n");
1579 				return 1;
1580 			}
1581 
1582 			if (preprocess == ASTCENC_PP_NORMALIZE)
1583 			{
1584 				image_preprocess_normalize(*image_uncomp_in, *image_pp);
1585 			}
1586 
1587 			if (preprocess == ASTCENC_PP_PREMULTIPLY)
1588 			{
1589 				image_preprocess_premultiply(*image_uncomp_in, *image_pp,
1590 				                             config.profile);
1591 			}
1592 
1593 			// Delete the original as we no longer need it
1594 			free_image(image_uncomp_in);
1595 			image_uncomp_in = image_pp;
1596 		}
1597 
1598 		if (!cli_config.silentmode)
1599 		{
1600 			printf("Source image\n");
1601 			printf("============\n\n");
1602 			printf("    Source:                     %s\n", input_filename.c_str());
1603 			printf("    Color profile:              %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
1604 			if (image_uncomp_in->dim_z > 1)
1605 			{
1606 				printf("    Dimensions:                 3D, %ux%ux%u\n",
1607 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
1608 			}
1609 			else
1610 			{
1611 				printf("    Dimensions:                 2D, %ux%u\n",
1612 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y);
1613 			}
1614 			printf("    Components:                 %d\n\n", image_uncomp_in_component_count);
1615 		}
1616 	}
1617 
1618 	double start_coding_time = get_time();
1619 
1620 	double image_size = 0.0;
1621 	if (image_uncomp_in)
1622 	{
1623 		image_size = static_cast<double>(image_uncomp_in->dim_x) *
1624 		             static_cast<double>(image_uncomp_in->dim_y) *
1625 		             static_cast<double>(image_uncomp_in->dim_z);
1626 	}
1627 	else
1628 	{
1629 		image_size = static_cast<double>(image_comp.dim_x) *
1630 		             static_cast<double>(image_comp.dim_y) *
1631 		             static_cast<double>(image_comp.dim_z);
1632 	}
1633 
1634 	// Compress an image
1635 	if (operation & ASTCENC_STAGE_COMPRESS)
1636 	{
1637 		print_astcenc_config(cli_config, config);
1638 
1639 		unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
1640 		unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
1641 		unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
1642 		size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
1643 		uint8_t* buffer = new uint8_t[buffer_size];
1644 
1645 		compression_workload work;
1646 		work.context = codec_context;
1647 		image_uncomp_in->dim_stride = image_uncomp_in->dim_x;
1648 		work.image = image_uncomp_in;
1649 		work.swizzle = cli_config.swz_encode;
1650 		work.data_out = buffer;
1651 		work.data_len = buffer_size;
1652 		work.error = ASTCENC_SUCCESS;
1653 #if QUALITY_CONTROL
1654 		work.calQualityEnable = true;
1655 		work.mse[R_COM] = work.mse[G_COM] = work.mse[B_COM] = work.mse[A_COM] = nullptr;
1656 		if (work.calQualityEnable) {
1657 		for (int i = R_COM; i < RGBA_COM; i++) {
1658 				work.mse[i] = (int32_t*)calloc(blocks_x * blocks_y, sizeof(int32_t));
1659 				if (!work.mse[i]) {
1660 					printf("quality control calloc failed");
1661 					return -1;
1662 				}
1663 			}
1664 		}
1665 #endif
1666 		// Only launch worker threads for multi-threaded use - it makes basic
1667 		// single-threaded profiling and debugging a little less convoluted
1668 		if (cli_config.thread_count > 1)
1669 		{
1670 			launch_threads(cli_config.thread_count, compression_workload_runner, &work);
1671 		}
1672 		else
1673 		{
1674 			work.error = astcenc_compress_image(
1675 			    work.context, work.image, &work.swizzle,
1676 			    work.data_out, work.data_len,
1677 #if QUALITY_CONTROL
1678 			    work.calQualityEnable, work.mse,
1679 #endif
1680 			    0);
1681 		}
1682 
1683 		if (work.error != ASTCENC_SUCCESS)
1684 		{
1685 			printf("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
1686 			return 1;
1687 		}
1688 #if QUALITY_CONTROL
1689 		if (work.calQualityEnable && !CheckQuality(work.mse, blocks_x * blocks_y, config.block_x * config.block_y)) {
1690 		    work.error = ASTCENC_ERR_BAD_QUALITY_CHECK;
1691 		}
1692 		if (work.calQualityEnable) {
1693 			for (int i = R_COM; i < RGBA_COM; i++) {
1694 				if (work.mse[i]) {
1695 					free(work.mse[i]);
1696 				}
1697 			}
1698 		}
1699 #endif
1700 		image_comp.block_x = config.block_x;
1701 		image_comp.block_y = config.block_y;
1702 		image_comp.block_z = config.block_z;
1703 		image_comp.dim_x = image_uncomp_in->dim_x;
1704 		image_comp.dim_y = image_uncomp_in->dim_y;
1705 		image_comp.dim_z = image_uncomp_in->dim_z;
1706 		image_comp.data = buffer;
1707 		image_comp.data_len = buffer_size;
1708 	}
1709 
1710 	// Decompress an image
1711 	if (operation & ASTCENC_STAGE_DECOMPRESS)
1712 	{
1713 		int out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
1714 		if (out_bitness == 0)
1715 		{
1716 			bool is_hdr = (config.profile == ASTCENC_PRF_HDR) || (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
1717 			out_bitness = is_hdr ? 16 : 8;
1718 		}
1719 
1720 		image_decomp_out = alloc_image(
1721 		    out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
1722 
1723 		decompression_workload work;
1724 		work.context = codec_context;
1725 		work.data = image_comp.data;
1726 		work.data_len = image_comp.data_len;
1727 		work.image_out = image_decomp_out;
1728 		work.swizzle = cli_config.swz_decode;
1729 		work.error = ASTCENC_SUCCESS;
1730 
1731 		// Only launch worker threads for multi-threaded use - it makes basic
1732 		// single-threaded profiling and debugging a little less convoluted
1733 		if (cli_config.thread_count > 1)
1734 		{
1735 			launch_threads(cli_config.thread_count, decompression_workload_runner, &work);
1736 		}
1737 		else
1738 		{
1739 			work.error = astcenc_decompress_image(
1740 			    work.context, work.data, work.data_len,
1741 			    work.image_out, &work.swizzle, 0);
1742 		}
1743 
1744 		if (work.error != ASTCENC_SUCCESS)
1745 		{
1746 			printf("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
1747 			return 1;
1748 		}
1749 	}
1750 
1751 	double end_coding_time = get_time();
1752 
1753 	// Print metrics in comparison mode
1754 	if (operation & ASTCENC_STAGE_COMPARE)
1755 	{
1756 		bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
1757 
1758 		compute_error_metrics(
1759 		    image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
1760 		    image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
1761 	}
1762 
1763 	// Store compressed image
1764 	if (operation & ASTCENC_STAGE_ST_COMP)
1765 	{
1766 		if (ends_with(output_filename, ".astc"))
1767 		{
1768 			error = store_cimage(image_comp, output_filename.c_str());
1769 			if (error)
1770 			{
1771 				printf ("ERROR: Failed to store compressed image\n");
1772 				return 1;
1773 			}
1774 		}
1775 		else if (ends_with(output_filename, ".ktx"))
1776 		{
1777 			bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
1778 			error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
1779 			if (error)
1780 			{
1781 				printf ("ERROR: Failed to store compressed image\n");
1782 				return 1;
1783 			}
1784 		}
1785 		else
1786 		{
1787 #if defined(_WIN32)
1788 			bool is_null = output_filename == "NUL" || output_filename == "nul";
1789 #else
1790 			bool is_null = output_filename == "/dev/null";
1791 #endif
1792 			if (!is_null)
1793 			{
1794 				printf("ERROR: Unknown compressed output file type\n");
1795 				return 1;
1796 			}
1797 		}
1798 	}
1799 
1800 	// Store decompressed image
1801 	if (operation & ASTCENC_STAGE_ST_NCOMP)
1802 	{
1803 #if defined(_WIN32)
1804 		bool is_null = output_filename == "NUL" || output_filename == "nul";
1805 #else
1806 		bool is_null = output_filename == "/dev/null";
1807 #endif
1808 
1809 		if (!is_null)
1810 		{
1811 			bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
1812 			                                  cli_config.y_flip);
1813 			if (!store_result)
1814 			{
1815 				printf("ERROR: Failed to write output image %s\n", output_filename.c_str());
1816 				return 1;
1817 			}
1818 		}
1819 	}
1820 
1821 	free_image(image_uncomp_in);
1822 	free_image(image_decomp_out);
1823 	astcenc_context_free(codec_context);
1824 
1825 	delete[] image_comp.data;
1826 
1827 	if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
1828 	{
1829 		double end_time = get_time();
1830 		double tex_rate = image_size / (end_coding_time - start_coding_time);
1831 		tex_rate = tex_rate / 1000000.0;
1832 
1833 		printf("Performance metrics\n");
1834 		printf("===================\n\n");
1835 		printf("    Total time:                %8.4f s\n", end_time - start_time);
1836 		printf("    Coding time:               %8.4f s\n", end_coding_time - start_coding_time);
1837 		printf("    Coding rate:               %8.4f MT/s\n", tex_rate);
1838 	}
1839 
1840 	return 0;
1841 }
1842