• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2024 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for codec library front-end.
20  */
21 
22 #include "astcenc.h"
23 #include "astcenccli_internal.h"
24 
25 #if defined(_WIN32)
26 	#include <io.h>
27 	#define isatty _isatty
28 #else
29 	#include <unistd.h>
30 #endif
31 #include <cassert>
32 #include <cstring>
33 #include <functional>
34 #include <string>
35 #include <sstream>
36 #include <vector>
37 #include <memory>
38 
39 /* ============================================================================
40 	Data structure definitions
41 ============================================================================ */
42 
43 typedef unsigned int astcenc_operation;
44 
45 struct mode_entry
46 {
47 	const char* opt;
48 	astcenc_operation operation;
49 	astcenc_profile decode_mode;
50 };
51 
52 /* ============================================================================
53 	Constants and literals
54 ============================================================================ */
55 
56 /** @brief Stage bit indicating we need to load a compressed image. */
57 static const unsigned int ASTCENC_STAGE_LD_COMP    = 1 << 0;
58 
59 /** @brief Stage bit indicating we need to store a compressed image. */
60 static const unsigned int ASTCENC_STAGE_ST_COMP    = 1 << 1;
61 
62 /** @brief Stage bit indicating we need to load an uncompressed image. */
63 static const unsigned int ASTCENC_STAGE_LD_NCOMP   = 1 << 2;
64 
65 /** @brief Stage bit indicating we need to store an uncompressed image. */
66 static const unsigned int ASTCENC_STAGE_ST_NCOMP   = 1 << 3;
67 
68 /** @brief Stage bit indicating we need compress an image. */
69 static const unsigned int ASTCENC_STAGE_COMPRESS   = 1 << 4;
70 
71 /** @brief Stage bit indicating we need to decompress an image. */
72 static const unsigned int ASTCENC_STAGE_DECOMPRESS = 1 << 5;
73 
74 /** @brief Stage bit indicating we need to compare an image with the original input. */
75 static const unsigned int ASTCENC_STAGE_COMPARE    = 1 << 6;
76 
77 /** @brief Operation indicating an unknown request (should never happen). */
78 static const astcenc_operation ASTCENC_OP_UNKNOWN  = 0;
79 
80 /** @brief Operation indicating the user wants to print long-form help text and version info. */
81 static const astcenc_operation ASTCENC_OP_HELP     = 1 << 7;
82 
83 /** @brief Operation indicating the user wants to print short-form help text and version info. */
84 static const astcenc_operation ASTCENC_OP_VERSION  = 1 << 8;
85 
86 /** @brief Operation indicating the user wants to compress and store an image. */
87 static const astcenc_operation ASTCENC_OP_COMPRESS =
88                                ASTCENC_STAGE_LD_NCOMP |
89                                ASTCENC_STAGE_COMPRESS |
90                                ASTCENC_STAGE_ST_COMP;
91 
92 /** @brief Operation indicating the user wants to decompress and store an image. */
93 static const astcenc_operation ASTCENC_OP_DECOMPRESS =
94                                ASTCENC_STAGE_LD_COMP |
95                                ASTCENC_STAGE_DECOMPRESS |
96                                ASTCENC_STAGE_ST_NCOMP;
97 
98 /** @brief Operation indicating the user wants to test a compression setting on an image. */
99 static const astcenc_operation ASTCENC_OP_TEST =
100                                ASTCENC_STAGE_LD_NCOMP |
101                                ASTCENC_STAGE_COMPRESS |
102                                ASTCENC_STAGE_DECOMPRESS |
103                                ASTCENC_STAGE_COMPARE |
104                                ASTCENC_STAGE_ST_NCOMP;
105 
106 /**
107  * @brief Image preprocesing tasks prior to encoding.
108  */
109 enum astcenc_preprocess
110 {
111 	/** @brief No image preprocessing. */
112 	ASTCENC_PP_NONE = 0,
113 	/** @brief Normal vector unit-length normalization. */
114 	ASTCENC_PP_NORMALIZE,
115 	/** @brief Color data alpha premultiplication. */
116 	ASTCENC_PP_PREMULTIPLY
117 };
118 
119 /** @brief Decode table for command line operation modes. */
120 static const mode_entry modes[] {
121 	{"-cl",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR},
122 	{"-dl",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR},
123 	{"-tl",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR},
124 	{"-cs",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_LDR_SRGB},
125 	{"-ds",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_LDR_SRGB},
126 	{"-ts",      ASTCENC_OP_TEST,       ASTCENC_PRF_LDR_SRGB},
127 	{"-ch",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR_RGB_LDR_A},
128 	{"-dh",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR_RGB_LDR_A},
129 	{"-th",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR_RGB_LDR_A},
130 	{"-cH",      ASTCENC_OP_COMPRESS,   ASTCENC_PRF_HDR},
131 	{"-dH",      ASTCENC_OP_DECOMPRESS, ASTCENC_PRF_HDR},
132 	{"-tH",      ASTCENC_OP_TEST,       ASTCENC_PRF_HDR},
133 	{"-h",       ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
134 	{"-help",    ASTCENC_OP_HELP,       ASTCENC_PRF_HDR},
135 	{"-v",       ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR},
136 	{"-version", ASTCENC_OP_VERSION,    ASTCENC_PRF_HDR}
137 };
138 
139 /**
140  * @brief Compression workload definition for worker threads.
141  */
142 struct compression_workload
143 {
144 	astcenc_context* context;
145 	astcenc_image* image;
146 	astcenc_swizzle swizzle;
147 	uint8_t* data_out;
148 	size_t data_len;
149 	astcenc_error error;
150 #if QUALITY_CONTROL
151 	bool calQualityEnable;
152 	int32_t *mse[RGBA_COM];
153 #endif
154 };
155 
156 /**
157  * @brief Decompression workload definition for worker threads.
158  */
159 struct decompression_workload
160 {
161 	astcenc_context* context;
162 	uint8_t* data;
163 	size_t data_len;
164 	astcenc_image* image_out;
165 	astcenc_swizzle swizzle;
166 	astcenc_error error;
167 };
168 
169 /**
170  * @brief Callback emitting a progress bar
171  */
progress_emitter(float value)172 extern "C" void progress_emitter(
173 	float value
174 ) {
175 	const unsigned int bar_size = 25;
176 	unsigned int parts = static_cast<int>(value / 4.0f);
177 
178 	char buffer[bar_size + 3];
179 	buffer[0] = '[';
180 
181 	for (unsigned int i = 0; i < parts; i++)
182 	{
183 		buffer[i + 1] = '=';
184 	}
185 
186 	for (unsigned int i = parts; i < bar_size; i++)
187 	{
188 		buffer[i + 1] = ' ';
189 	}
190 
191 	buffer[bar_size + 1] = ']';
192 	buffer[bar_size + 2] = '\0';
193 
194 	printf("    Progress: %s %03.1f%%\r", buffer, static_cast<double>(value));
195 	fflush(stdout);
196 }
197 
198 /**
199  * @brief Test if a string argument is a well formed float.
200  */
is_float(std::string target)201 static bool is_float(
202 	std::string target
203 ) {
204 	float test;
205 	std::istringstream stream(target);
206 
207 	// Leading whitespace is an error
208 	stream >> std::noskipws >> test;
209 
210 	// Ensure entire no remaining string in addition to parse failure
211 	return stream.eof() && !stream.fail();
212 }
213 
214 /**
215  * @brief Test if a string ends with a given suffix.
216  */
ends_with(const std::string & str,const std::string & suffix)217 static bool ends_with(
218 	const std::string& str,
219 	const std::string& suffix
220 ) {
221 	return (str.size() >= suffix.size()) &&
222 	       (0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix));
223 }
224 
225 /**
226  * @brief Runner callback function for a compression worker thread.
227  *
228  * @param thread_count   The number of threads in the worker pool.
229  * @param thread_id      The index of this thread in the worker pool.
230  * @param payload        The parameters for this thread.
231  */
compression_workload_runner(int thread_count,int thread_id,void * payload)232 static void compression_workload_runner(
233 	int thread_count,
234 	int thread_id,
235 	void* payload
236 ) {
237 	(void)thread_count;
238 
239 	compression_workload* work = static_cast<compression_workload*>(payload);
240 	astcenc_error error = astcenc_compress_image(
241 	                       work->context, work->image, &work->swizzle,
242 	                       work->data_out, work->data_len,
243 #if QUALITY_CONTROL
244 	                       work->calQualityEnable, work->mse,
245 #endif
246 	                       thread_id);
247 
248 	// This is a racy update, so which error gets returned is a random, but it
249 	// will reliably report an error if an error occurs
250 	if (error != ASTCENC_SUCCESS)
251 	{
252 		work->error = error;
253 	}
254 }
255 
256 /**
257  * @brief Runner callback function for a decompression worker thread.
258  *
259  * @param thread_count   The number of threads in the worker pool.
260  * @param thread_id      The index of this thread in the worker pool.
261  * @param payload        The parameters for this thread.
262  */
decompression_workload_runner(int thread_count,int thread_id,void * payload)263 static void decompression_workload_runner(
264 	int thread_count,
265 	int thread_id,
266 	void* payload
267 ) {
268 	(void)thread_count;
269 
270 	decompression_workload* work = static_cast<decompression_workload*>(payload);
271 	astcenc_error error = astcenc_decompress_image(
272 	                       work->context, work->data, work->data_len,
273 	                       work->image_out, &work->swizzle, thread_id);
274 
275 	// This is a racy update, so which error gets returned is a random, but it
276 	// will reliably report an error if an error occurs
277 	if (error != ASTCENC_SUCCESS)
278 	{
279 		work->error = error;
280 	}
281 }
282 
283 /**
284  * @brief Utility to generate a slice file name from a pattern.
285  *
286  * Convert "foo/bar.png" in to "foo/bar_<slice>.png"
287  *
288  * @param basename The base pattern; must contain a file extension.
289  * @param index    The slice index.
290  * @param error    Set to true on success, false on error (no extension found).
291  *
292  * @return The slice file name.
293  */
get_slice_filename(const std::string & basename,unsigned int index,bool & error)294 static std::string get_slice_filename(
295 	const std::string& basename,
296 	unsigned int index,
297 	bool& error
298 ) {
299 	size_t sep = basename.find_last_of('.');
300 	if (sep == std::string::npos)
301 	{
302 		error = true;
303 		return "";
304 	}
305 
306 	std::string base = basename.substr(0, sep);
307 	std::string ext = basename.substr(sep);
308 	std::string name = base + "_" + std::to_string(index) + ext;
309 	error = false;
310 	return name;
311 }
312 
313 /**
314  * @brief Load a non-astc image file from memory.
315  *
316  * @param filename            The file to load, or a pattern for array loads.
317  * @param dim_z               The number of slices to load.
318  * @param y_flip              Should this image be Y flipped?
319  * @param[out] is_hdr         Is the loaded image HDR?
320  * @param[out] component_count The number of components in the loaded image.
321  *
322  * @return The astc image file, or nullptr on error.
323  */
load_uncomp_file(const char * filename,unsigned int dim_z,bool y_flip,bool & is_hdr,unsigned int & component_count)324 static astcenc_image* load_uncomp_file(
325 	const char* filename,
326 	unsigned int dim_z,
327 	bool y_flip,
328 	bool& is_hdr,
329 	unsigned int& component_count
330 ) {
331 	astcenc_image *image = nullptr;
332 
333 	// For a 2D image just load the image directly
334 	if (dim_z == 1)
335 	{
336 		image = load_ncimage(filename, y_flip, is_hdr, component_count);
337 	}
338 	else
339 	{
340 		bool slice_is_hdr;
341 		unsigned int slice_component_count;
342 		astcenc_image* slice = nullptr;
343 		std::vector<astcenc_image*> slices;
344 
345 		// For a 3D image load an array of slices
346 		for (unsigned int image_index = 0; image_index < dim_z; image_index++)
347 		{
348 			bool error;
349 			std::string slice_name = get_slice_filename(filename, image_index, error);
350 			if (error)
351 			{
352 				print_error("ERROR: Image pattern does not contain file extension: %s\n", filename);
353 				break;
354 			}
355 
356 			slice = load_ncimage(slice_name.c_str(), y_flip,
357 			                     slice_is_hdr, slice_component_count);
358 			if (!slice)
359 			{
360 				break;
361 			}
362 
363 			slices.push_back(slice);
364 
365 			// Check it is not a 3D image
366 			if (slice->dim_z != 1)
367 			{
368 				print_error("ERROR: Image arrays do not support 3D sources: %s\n", slice_name.c_str());
369 				break;
370 			}
371 
372 			// Check slices are consistent with each other
373 			if (image_index != 0)
374 			{
375 				if ((is_hdr != slice_is_hdr) || (component_count != slice_component_count))
376 				{
377 					print_error("ERROR: Image array[0] and [%d] are different formats\n", image_index);
378 					break;
379 				}
380 
381 				if ((slices[0]->dim_x != slice->dim_x) ||
382 				    (slices[0]->dim_y != slice->dim_y) ||
383 				    (slices[0]->dim_z != slice->dim_z))
384 				{
385 					print_error("ERROR: Image array[0] and [%d] are different dimensions\n", image_index);
386 					break;
387 				}
388 			}
389 			else
390 			{
391 				is_hdr = slice_is_hdr;
392 				component_count = slice_component_count;
393 			}
394 		}
395 
396 		// If all slices loaded correctly then repack them into a single image
397 		if (slices.size() == dim_z)
398 		{
399 			unsigned int dim_x = slices[0]->dim_x;
400 			unsigned int dim_y = slices[0]->dim_y;
401 			int bitness = is_hdr ? 16 : 8;
402 			int slice_size = dim_x * dim_y;
403 
404 			image = alloc_image(bitness, dim_x, dim_y, dim_z);
405 
406 			// Combine 2D source images into one 3D image
407 			for (unsigned int z = 0; z < dim_z; z++)
408 			{
409 				if (image->data_type == ASTCENC_TYPE_U8)
410 				{
411 					uint8_t* data8 = static_cast<uint8_t*>(image->data[z]);
412 					uint8_t* data8src = static_cast<uint8_t*>(slices[z]->data[0]);
413 					size_t copy_size = slice_size * 4 * sizeof(uint8_t);
414 					memcpy(data8, data8src, copy_size);
415 				}
416 				else if (image->data_type == ASTCENC_TYPE_F16)
417 				{
418 					uint16_t* data16 = static_cast<uint16_t*>(image->data[z]);
419 					uint16_t* data16src = static_cast<uint16_t*>(slices[z]->data[0]);
420 					size_t copy_size = slice_size * 4 * sizeof(uint16_t);
421 					memcpy(data16, data16src, copy_size);
422 				}
423 				else // if (image->data_type == ASTCENC_TYPE_F32)
424 				{
425 					assert(image->data_type == ASTCENC_TYPE_F32);
426 					float* data32 = static_cast<float*>(image->data[z]);
427 					float* data32src = static_cast<float*>(slices[z]->data[0]);
428 					size_t copy_size = slice_size * 4 * sizeof(float);
429 					memcpy(data32, data32src, copy_size);
430 				}
431 			}
432 		}
433 
434 		for (auto &i : slices)
435 		{
436 			free_image(i);
437 		}
438 	}
439 
440 	return image;
441 }
442 
443 /**
444  * @brief Parse the command line.
445  *
446  * @param      argc        Command line argument count.
447  * @param[in]  argv        Command line argument vector.
448  * @param[out] operation   Codec operation mode.
449  * @param[out] profile     Codec color profile.
450  *
451  * @return 0 if everything is okay, 1 if there is some error
452  */
parse_commandline_options(int argc,char ** argv,astcenc_operation & operation,astcenc_profile & profile)453 static int parse_commandline_options(
454 	int argc,
455 	char **argv,
456 	astcenc_operation& operation,
457 	astcenc_profile& profile
458 ) {
459 	assert(argc >= 2); (void)argc;
460 
461 	profile = ASTCENC_PRF_LDR;
462 	operation = ASTCENC_OP_UNKNOWN;
463 
464 	int modes_count = sizeof(modes) / sizeof(modes[0]);
465 	for (int i = 0; i < modes_count; i++)
466 	{
467 		if (!strcmp(modes[i].opt, argv[1]))
468 		{
469 			operation = modes[i].operation;
470 			profile = modes[i].decode_mode;
471 			break;
472 		}
473 	}
474 
475 	if (operation == ASTCENC_OP_UNKNOWN)
476 	{
477 		print_error("ERROR: Unrecognized operation '%s'\n", argv[1]);
478 		return 1;
479 	}
480 
481 	return 0;
482 }
483 
484 /**
485  * @brief Initialize the astcenc_config
486  *
487  * @param      argc         Command line argument count.
488  * @param[in]  argv         Command line argument vector.
489  * @param      operation    Codec operation mode.
490  * @param[out] profile      Codec color profile.
491  * @param      comp_image   Compressed image if a decompress operation.
492  * @param[out] preprocess   Image preprocess operation.
493  * @param[out] config       Codec configuration.
494  *
495  * @return 0 if everything is okay, 1 if there is some error
496  */
init_astcenc_config(int argc,char ** argv,astcenc_profile profile,astcenc_operation operation,astc_compressed_image & comp_image,astcenc_preprocess & preprocess,astcenc_config & config)497 static int init_astcenc_config(
498 	int argc,
499 	char **argv,
500 	astcenc_profile profile,
501 	astcenc_operation operation,
502 	astc_compressed_image& comp_image,
503 	astcenc_preprocess& preprocess,
504 	astcenc_config& config
505 ) {
506 	unsigned int block_x = 0;
507 	unsigned int block_y = 0;
508 	unsigned int block_z = 1;
509 
510 	// For decode the block size is set by the incoming image.
511 	if (operation == ASTCENC_OP_DECOMPRESS)
512 	{
513 		block_x = comp_image.block_x;
514 		block_y = comp_image.block_y;
515 		block_z = comp_image.block_z;
516 	}
517 
518 	float quality = 0.0f;
519 	preprocess = ASTCENC_PP_NONE;
520 
521 	// parse the command line's encoding options.
522 	int argidx = 4;
523 	if (operation & ASTCENC_STAGE_COMPRESS)
524 	{
525 		// Read and decode block size
526 		if (argc < 5)
527 		{
528 			print_error("ERROR: Block size must be specified\n");
529 			return 1;
530 		}
531 
532 		int cnt2D, cnt3D;
533 		int dimensions = sscanf(argv[4], "%ux%u%nx%u%n",
534 		                        &block_x, &block_y, &cnt2D, &block_z, &cnt3D);
535 		// Character after the last match should be a NUL
536 		if (!(((dimensions == 2) && !argv[4][cnt2D]) || ((dimensions == 3) && !argv[4][cnt3D])))
537 		{
538 			print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
539 			return 1;
540 		}
541 
542 		// Read and decode search quality
543 		if (argc < 6)
544 		{
545 			print_error("ERROR: Search quality level must be specified\n");
546 			return 1;
547 		}
548 
549 		if (!strcmp(argv[5], "-fastest"))
550 		{
551 			quality = ASTCENC_PRE_FASTEST;
552 		}
553 		else if (!strcmp(argv[5], "-fast"))
554 		{
555 			quality = ASTCENC_PRE_FAST;
556 		}
557 		else if (!strcmp(argv[5], "-medium"))
558 		{
559 			quality = ASTCENC_PRE_MEDIUM;
560 		}
561 		else if (!strcmp(argv[5], "-thorough"))
562 		{
563 			quality = ASTCENC_PRE_THOROUGH;
564 		}
565 		else if (!strcmp(argv[5], "-verythorough"))
566 		{
567 			quality = ASTCENC_PRE_VERYTHOROUGH;
568 		}
569 		else if (!strcmp(argv[5], "-exhaustive"))
570 		{
571 			quality = ASTCENC_PRE_EXHAUSTIVE;
572 		}
573 		else if (is_float(argv[5]))
574 		{
575 			quality = static_cast<float>(atof(argv[5]));
576 		}
577 		else
578 		{
579 			print_error("ERROR: Search quality/preset '%s' is invalid\n", argv[5]);
580 			return 1;
581 		}
582 
583 		argidx = 6;
584 	}
585 
586 	unsigned int flags = 0;
587 
588 	// Gather the flags that we need
589 	while (argidx < argc)
590 	{
591 		if (!strcmp(argv[argidx], "-a"))
592 		{
593 			// Skip over the data value for now
594 			argidx++;
595 			flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT;
596 		}
597 		else if (!strcmp(argv[argidx], "-normal"))
598 		{
599 			flags |= ASTCENC_FLG_MAP_NORMAL;
600 		}
601 		else if (!strcmp(argv[argidx], "-decode_unorm8"))
602 		{
603 			flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
604 		}
605 		else if (!strcmp(argv[argidx], "-rgbm"))
606 		{
607 			// Skip over the data value for now
608 			argidx++;
609 			flags |= ASTCENC_FLG_MAP_RGBM;
610 		}
611 		else if (!strcmp(argv[argidx], "-perceptual"))
612 		{
613 			flags |= ASTCENC_FLG_USE_PERCEPTUAL;
614 		}
615 		else if (!strcmp(argv[argidx], "-pp-normalize"))
616 		{
617 			if (preprocess != ASTCENC_PP_NONE)
618 			{
619 				print_error("ERROR: Only a single image preprocess can be used\n");
620 				return 1;
621 			}
622 			preprocess = ASTCENC_PP_NORMALIZE;
623 		}
624 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
625 		{
626 			if (preprocess != ASTCENC_PP_NONE)
627 			{
628 				print_error("ERROR: Only a single image preprocess can be used\n");
629 				return 1;
630 			}
631 			preprocess = ASTCENC_PP_PREMULTIPLY;
632 		}
633 		argidx ++;
634 	}
635 
636 #if defined(ASTCENC_DECOMPRESS_ONLY)
637 	flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
638 #else
639 	// Decompression can skip some memory allocation, but need full tables
640 	if (operation == ASTCENC_OP_DECOMPRESS)
641 	{
642 		flags |= ASTCENC_FLG_DECOMPRESS_ONLY;
643 	}
644 	// Compression and test passes can skip some decimation initialization
645 	// as we know we are decompressing images that were compressed using the
646 	// same settings and heuristics ...
647 	else
648 	{
649 		flags |= ASTCENC_FLG_SELF_DECOMPRESS_ONLY;
650 	}
651 #endif
652 
653 	astcenc_error status = astcenc_config_init(profile, block_x, block_y, block_z,
654 	                                           quality, flags, &config);
655 	if (status == ASTCENC_ERR_BAD_BLOCK_SIZE)
656 	{
657 		print_error("ERROR: Block size '%s' is invalid\n", argv[4]);
658 		return 1;
659 	}
660 	else if (status == ASTCENC_ERR_BAD_DECODE_MODE)
661 	{
662 		print_error("ERROR: Decode_unorm8 is not supported by HDR profiles\n", argv[4]);
663 		return 1;
664 	}
665 	else if (status == ASTCENC_ERR_BAD_CPU_FLOAT)
666 	{
667 		print_error("ERROR: astcenc must not be compiled with -ffast-math\n");
668 		return 1;
669 	}
670 	else if (status != ASTCENC_SUCCESS)
671 	{
672 		print_error("ERROR: Init config failed with %s\n", astcenc_get_error_string(status));
673 		return 1;
674 	}
675 
676 	return 0;
677 }
678 
679 /**
680  * @brief Edit the astcenc_config
681  *
682  * @param         argc         Command line argument count.
683  * @param[in]     argv         Command line argument vector.
684  * @param         operation    Codec operation.
685  * @param[out]    cli_config   Command line config.
686  * @param[in,out] config       Codec configuration.
687  *
688  * @return 0 if everything is OK, 1 if there is some error
689  */
edit_astcenc_config(int argc,char ** argv,const astcenc_operation operation,cli_config_options & cli_config,astcenc_config & config)690 static int edit_astcenc_config(
691 	int argc,
692 	char **argv,
693 	const astcenc_operation operation,
694 	cli_config_options& cli_config,
695 	astcenc_config& config
696 ) {
697 
698 	int argidx = (operation & ASTCENC_STAGE_COMPRESS) ? 6 : 4;
699 	config.privateProfile = HIGH_QUALITY_PROFILE;
700 	while (argidx < argc)
701 	{
702 		if (!strcmp(argv[argidx], "-silent"))
703 		{
704 			argidx++;
705 			cli_config.silentmode = 1;
706 		}
707 		else if (!strcmp(argv[argidx], "-cw"))
708 		{
709 			argidx += 5;
710 			if (argidx > argc)
711 			{
712 				print_error("ERROR: -cw switch with less than 4 arguments\n");
713 				return 1;
714 			}
715 
716 			config.cw_r_weight = static_cast<float>(atof(argv[argidx - 4]));
717 			config.cw_g_weight = static_cast<float>(atof(argv[argidx - 3]));
718 			config.cw_b_weight = static_cast<float>(atof(argv[argidx - 2]));
719 			config.cw_a_weight = static_cast<float>(atof(argv[argidx - 1]));
720 		}
721 		else if (!strcmp(argv[argidx], "-a"))
722 		{
723 			argidx += 2;
724 			if (argidx > argc)
725 			{
726 				print_error("ERROR: -a switch with no argument\n");
727 				return 1;
728 			}
729 
730 			config.a_scale_radius = atoi(argv[argidx - 1]);
731 		}
732 		else if (!strcmp(argv[argidx], "-esw"))
733 		{
734 			argidx += 2;
735 			if (argidx > argc)
736 			{
737 				print_error("ERROR: -esw switch with no argument\n");
738 				return 1;
739 			}
740 
741 			if (strlen(argv[argidx - 1]) != 4)
742 			{
743 				print_error("ERROR: -esw pattern does not contain 4 characters\n");
744 				return 1;
745 			}
746 
747 			astcenc_swz swizzle_components[4];
748 			for (int i = 0; i < 4; i++)
749 			{
750 				switch (argv[argidx - 1][i])
751 				{
752 				case 'r':
753 					swizzle_components[i] = ASTCENC_SWZ_R;
754 					break;
755 				case 'g':
756 					swizzle_components[i] = ASTCENC_SWZ_G;
757 					break;
758 				case 'b':
759 					swizzle_components[i] = ASTCENC_SWZ_B;
760 					break;
761 				case 'a':
762 					swizzle_components[i] = ASTCENC_SWZ_A;
763 					break;
764 				case '0':
765 					swizzle_components[i] = ASTCENC_SWZ_0;
766 					break;
767 				case '1':
768 					swizzle_components[i] = ASTCENC_SWZ_1;
769 					break;
770 				default:
771 					print_error("ERROR: -esw component '%c' is not valid\n", argv[argidx - 1][i]);
772 					return 1;
773 				}
774 			}
775 
776 			cli_config.swz_encode.r = swizzle_components[0];
777 			cli_config.swz_encode.g = swizzle_components[1];
778 			cli_config.swz_encode.b = swizzle_components[2];
779 			cli_config.swz_encode.a = swizzle_components[3];
780 		}
781 		else if (!strcmp(argv[argidx], "-ssw"))
782 		{
783 			argidx += 2;
784 			if (argidx > argc)
785 			{
786 				print_error("ERROR: -ssw switch with no argument\n");
787 				return 1;
788 			}
789 
790 			size_t char_count = strlen(argv[argidx - 1]);
791 			if (char_count == 0)
792 			{
793 				print_error("ERROR: -ssw pattern contains no characters\n");
794 				return 1;
795 			}
796 
797 			if (char_count > 4)
798 			{
799 				print_error("ERROR: -ssw pattern contains more than 4 characters\n");
800 				return 1;
801 			}
802 
803 			bool found_r = false;
804 			bool found_g = false;
805 			bool found_b = false;
806 			bool found_a = false;
807 
808 			for (size_t i = 0; i < char_count; i++)
809 			{
810 				switch (argv[argidx - 1][i])
811 				{
812 				case 'r':
813 					found_r = true;
814 					break;
815 				case 'g':
816 					found_g = true;
817 					break;
818 				case 'b':
819 					found_b = true;
820 					break;
821 				case 'a':
822 					found_a = true;
823 					break;
824 				default:
825 					print_error("ERROR: -ssw component '%c' is not valid\n", argv[argidx - 1][i]);
826 					return 1;
827 				}
828 			}
829 
830 			config.cw_r_weight = found_r ? 1.0f : 0.0f;
831 			config.cw_g_weight = found_g ? 1.0f : 0.0f;
832 			config.cw_b_weight = found_b ? 1.0f : 0.0f;
833 			config.cw_a_weight = found_a ? 1.0f : 0.0f;
834 		}
835 		else if (!strcmp(argv[argidx], "-dsw"))
836 		{
837 			argidx += 2;
838 			if (argidx > argc)
839 			{
840 				print_error("ERROR: -dsw switch with no argument\n");
841 				return 1;
842 			}
843 
844 			if (strlen(argv[argidx - 1]) != 4)
845 			{
846 				print_error("ERROR: -dsw switch does not contain 4 characters\n");
847 				return 1;
848 			}
849 
850 			astcenc_swz swizzle_components[4];
851 			for (int i = 0; i < 4; i++)
852 			{
853 				switch (argv[argidx - 1][i])
854 				{
855 				case 'r':
856 					swizzle_components[i] = ASTCENC_SWZ_R;
857 					break;
858 				case 'g':
859 					swizzle_components[i] = ASTCENC_SWZ_G;
860 					break;
861 				case 'b':
862 					swizzle_components[i] = ASTCENC_SWZ_B;
863 					break;
864 				case 'a':
865 					swizzle_components[i] = ASTCENC_SWZ_A;
866 					break;
867 				case '0':
868 					swizzle_components[i] = ASTCENC_SWZ_0;
869 					break;
870 				case '1':
871 					swizzle_components[i] = ASTCENC_SWZ_1;
872 					break;
873 				case 'z':
874 					swizzle_components[i] =  ASTCENC_SWZ_Z;
875 					break;
876 				default:
877 					print_error("ERROR: ERROR: -dsw component '%c' is not valid\n", argv[argidx - 1][i]);
878 					return 1;
879 				}
880 			}
881 
882 			cli_config.swz_decode.r = swizzle_components[0];
883 			cli_config.swz_decode.g = swizzle_components[1];
884 			cli_config.swz_decode.b = swizzle_components[2];
885 			cli_config.swz_decode.a = swizzle_components[3];
886 		}
887 		// presets begin here
888 		else if (!strcmp(argv[argidx], "-normal"))
889 		{
890 			argidx++;
891 
892 			cli_config.swz_encode.r = ASTCENC_SWZ_R;
893 			cli_config.swz_encode.g = ASTCENC_SWZ_R;
894 			cli_config.swz_encode.b = ASTCENC_SWZ_R;
895 			cli_config.swz_encode.a = ASTCENC_SWZ_G;
896 
897 			cli_config.swz_decode.r = ASTCENC_SWZ_R;
898 			cli_config.swz_decode.g = ASTCENC_SWZ_A;
899 			cli_config.swz_decode.b = ASTCENC_SWZ_Z;
900 			cli_config.swz_decode.a = ASTCENC_SWZ_1;
901 		}
902 		else if (!strcmp(argv[argidx], "-rgbm"))
903 		{
904 			argidx += 2;
905 			if (argidx > argc)
906 			{
907 				print_error("ERROR: -rgbm switch with no argument\n");
908 				return 1;
909 			}
910 
911 			config.rgbm_m_scale = static_cast<float>(atof(argv[argidx - 1]));
912 			config.cw_a_weight = 2.0f * config.rgbm_m_scale;
913 		}
914 		else if (!strcmp(argv[argidx], "-decode_unorm8"))
915 		{
916 			argidx++;
917 		}
918 		else if (!strcmp(argv[argidx], "-perceptual"))
919 		{
920 			argidx++;
921 		}
922 		else if (!strcmp(argv[argidx], "-pp-normalize"))
923 		{
924 			argidx++;
925 		}
926 		else if (!strcmp(argv[argidx], "-pp-premultiply"))
927 		{
928 			argidx++;
929 		}
930 		else if (!strcmp(argv[argidx], "-blockmodelimit"))
931 		{
932 			argidx += 2;
933 			if (argidx > argc)
934 			{
935 				print_error("ERROR: -blockmodelimit switch with no argument\n");
936 				return 1;
937 			}
938 
939 			config.tune_block_mode_limit = atoi(argv[argidx - 1]);
940 		}
941 		else if (!strcmp(argv[argidx], "-partitioncountlimit"))
942 		{
943 			argidx += 2;
944 			if (argidx > argc)
945 			{
946 				print_error("ERROR: -partitioncountlimit switch with no argument\n");
947 				return 1;
948 			}
949 
950 			config.tune_partition_count_limit = atoi(argv[argidx - 1]);
951 		}
952 		else if (!strcmp(argv[argidx], "-2partitionindexlimit"))
953 		{
954 			argidx += 2;
955 			if (argidx > argc)
956 			{
957 				print_error("ERROR: -2partitionindexlimit switch with no argument\n");
958 				return 1;
959 			}
960 
961 			config.tune_2partition_index_limit = atoi(argv[argidx - 1]);
962 		}
963 		else if (!strcmp(argv[argidx], "-3partitionindexlimit"))
964 		{
965 			argidx += 2;
966 			if (argidx > argc)
967 			{
968 				print_error("ERROR: -3partitionindexlimit switch with no argument\n");
969 				return 1;
970 			}
971 
972 			config.tune_3partition_index_limit = atoi(argv[argidx - 1]);
973 		}
974 		else if (!strcmp(argv[argidx], "-4partitionindexlimit"))
975 		{
976 			argidx += 2;
977 			if (argidx > argc)
978 			{
979 				print_error("ERROR: -4partitionindexlimit switch with no argument\n");
980 				return 1;
981 			}
982 
983 			config.tune_4partition_index_limit = atoi(argv[argidx - 1]);
984 		}
985 		else if (!strcmp(argv[argidx], "-2partitioncandidatelimit"))
986 		{
987 			argidx += 2;
988 			if (argidx > argc)
989 			{
990 				print_error("ERROR: -2partitioncandidatelimit switch with no argument\n");
991 				return 1;
992 			}
993 
994 			config.tune_2partitioning_candidate_limit = atoi(argv[argidx - 1]);
995 		}
996 		else if (!strcmp(argv[argidx], "-3partitioncandidatelimit"))
997 		{
998 			argidx += 2;
999 			if (argidx > argc)
1000 			{
1001 				print_error("ERROR: -3partitioncandidatelimit switch with no argument\n");
1002 				return 1;
1003 			}
1004 
1005 			config.tune_3partitioning_candidate_limit = atoi(argv[argidx - 1]);
1006 		}
1007 		else if (!strcmp(argv[argidx], "-4partitioncandidatelimit"))
1008 		{
1009 			argidx += 2;
1010 			if (argidx > argc)
1011 			{
1012 				print_error("ERROR: -4partitioncandidatelimit switch with no argument\n");
1013 				return 1;
1014 			}
1015 
1016 			config.tune_4partitioning_candidate_limit = atoi(argv[argidx - 1]);
1017 		}
1018 		else if (!strcmp(argv[argidx], "-dblimit"))
1019 		{
1020 			argidx += 2;
1021 			if (argidx > argc)
1022 			{
1023 				print_error("ERROR: -dblimit switch with no argument\n");
1024 				return 1;
1025 			}
1026 
1027 			if ((config.profile == ASTCENC_PRF_LDR) || (config.profile == ASTCENC_PRF_LDR_SRGB))
1028 			{
1029 				config.tune_db_limit = static_cast<float>(atof(argv[argidx - 1]));
1030 			}
1031 		}
1032 		else if (!strcmp(argv[argidx], "-2partitionlimitfactor"))
1033 		{
1034 			argidx += 2;
1035 			if (argidx > argc)
1036 			{
1037 				print_error("ERROR: -2partitionlimitfactor switch with no argument\n");
1038 				return 1;
1039 			}
1040 
1041 			config.tune_2partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1042 		}
1043 		else if (!strcmp(argv[argidx], "-3partitionlimitfactor"))
1044 		{
1045 			argidx += 2;
1046 			if (argidx > argc)
1047 			{
1048 				print_error("ERROR: -3partitionlimitfactor switch with no argument\n");
1049 				return 1;
1050 			}
1051 
1052 			config.tune_3partition_early_out_limit_factor = static_cast<float>(atof(argv[argidx - 1]));
1053 		}
1054 		else if (!strcmp(argv[argidx], "-2planelimitcorrelation"))
1055 		{
1056 			argidx += 2;
1057 			if (argidx > argc)
1058 			{
1059 				print_error("ERROR: -2planelimitcorrelation switch with no argument\n");
1060 				return 1;
1061 			}
1062 
1063 			config.tune_2plane_early_out_limit_correlation = static_cast<float>(atof(argv[argidx - 1]));
1064 		}
1065 		else if (!strcmp(argv[argidx], "-refinementlimit"))
1066 		{
1067 			argidx += 2;
1068 			if (argidx > argc)
1069 			{
1070 				print_error("ERROR: -refinementlimit switch with no argument\n");
1071 				return 1;
1072 			}
1073 
1074 			config.tune_refinement_limit = atoi(argv[argidx - 1]);
1075 		}
1076 		else if (!strcmp(argv[argidx], "-candidatelimit"))
1077 		{
1078 			argidx += 2;
1079 			if (argidx > argc)
1080 			{
1081 				print_error("ERROR: -candidatelimit switch with no argument\n");
1082 				return 1;
1083 			}
1084 
1085 			config.tune_candidate_limit = atoi(argv[argidx - 1]);
1086 		}
1087 		else if (!strcmp(argv[argidx], "-j"))
1088 		{
1089 			argidx += 2;
1090 			if (argidx > argc)
1091 			{
1092 				print_error("ERROR: -j switch with no argument\n");
1093 				return 1;
1094 			}
1095 
1096 			cli_config.thread_count = atoi(argv[argidx - 1]);
1097 		}
1098 		else if (!strcmp(argv[argidx], "-repeats"))
1099 		{
1100 			argidx += 2;
1101 			if (argidx > argc)
1102 			{
1103 				print_error("ERROR: -repeats switch with no argument\n");
1104 				return 1;
1105 			}
1106 
1107 			cli_config.repeat_count = atoi(argv[argidx - 1]);
1108 			if (cli_config.repeat_count <= 0)
1109 			{
1110 				print_error("ERROR: -repeats value must be at least one\n");
1111 				return 1;
1112 			}
1113 		}
1114 		else if (!strcmp(argv[argidx], "-yflip"))
1115 		{
1116 			argidx++;
1117 			cli_config.y_flip = 1;
1118 		}
1119 		else if (!strcmp(argv[argidx], "-mpsnr"))
1120 		{
1121 			argidx += 3;
1122 			if (argidx > argc)
1123 			{
1124 				print_error("ERROR: -mpsnr switch with less than 2 arguments\n");
1125 				return 1;
1126 			}
1127 
1128 			cli_config.low_fstop = atoi(argv[argidx - 2]);
1129 			cli_config.high_fstop = atoi(argv[argidx - 1]);
1130 			if (cli_config.high_fstop < cli_config.low_fstop)
1131 			{
1132 				print_error("ERROR: -mpsnr switch <low> is greater than the <high>\n");
1133 				return 1;
1134 			}
1135 		}
1136 		// Option: Encode a 3D image from a sequence of 2D images.
1137 		else if (!strcmp(argv[argidx], "-zdim"))
1138 		{
1139 			// Only supports compressing
1140 			if (!(operation & ASTCENC_STAGE_COMPRESS))
1141 			{
1142 				print_error("ERROR: -zdim switch is only valid for compression\n");
1143 				return 1;
1144 			}
1145 
1146 			// Image depth must be specified.
1147 			if (argidx + 2 > argc)
1148 			{
1149 				print_error("ERROR: -zdim switch with no argument\n");
1150 				return 1;
1151 			}
1152 			argidx++;
1153 
1154 			// Read array size (image depth).
1155 			if (!sscanf(argv[argidx], "%u", &cli_config.array_size) || cli_config.array_size == 0)
1156 			{
1157 				print_error("ERROR: -zdim size '%s' is invalid\n", argv[argidx]);
1158 				return 1;
1159 			}
1160 
1161 			if ((cli_config.array_size > 1) && (config.block_z == 1))
1162 			{
1163 				print_error("ERROR: -zdim with 3D input data for a 2D output format\n");
1164 				return 1;
1165 			}
1166 			argidx++;
1167 		}
1168 #if defined(ASTCENC_DIAGNOSTICS)
1169 		else if (!strcmp(argv[argidx], "-dtrace"))
1170 		{
1171 			argidx += 2;
1172 			if (argidx > argc)
1173 			{
1174 				print_error("ERROR: -dtrace switch with no argument\n");
1175 				return 1;
1176 			}
1177 
1178 			config.trace_file_path = argv[argidx - 1];
1179 		}
1180 #endif
1181 		else if (!strcmp(argv[argidx], "-privateProfile"))
1182 		{
1183 			argidx += 2; // skip 2 chatacters to get next parameter
1184 			config.privateProfile = static_cast<QualityProfile>(atoi(argv[argidx - 1]));
1185 		}
1186 		else if (!strcmp(argv[argidx], "-dimage"))
1187 		{
1188 			argidx += 1;
1189 			cli_config.diagnostic_images = true;
1190 		}
1191 		else // check others as well
1192 		{
1193 			print_error("ERROR: Argument '%s' not recognized\n", argv[argidx]);
1194 			return 1;
1195 		}
1196 	}
1197 
1198 	if (cli_config.thread_count <= 0)
1199 	{
1200 		cli_config.thread_count = get_cpu_count();
1201 	}
1202 
1203 #if defined(ASTCENC_DIAGNOSTICS)
1204 	// Force single threaded for diagnostic builds
1205 	cli_config.thread_count = 1;
1206 
1207 	if (!config.trace_file_path)
1208 	{
1209 		print_error("ERROR: Diagnostics builds must set -dtrace\n");
1210 		return 1;
1211 	}
1212 #endif
1213 
1214 	return 0;
1215 }
1216 
1217 /**
1218  * @brief Print the config settings in a human readable form.
1219  *
1220  * @param[in] cli_config   Command line config.
1221  * @param[in] config       Codec configuration.
1222  */
print_astcenc_config(const cli_config_options & cli_config,const astcenc_config & config)1223 static void print_astcenc_config(
1224 	const cli_config_options& cli_config,
1225 	const astcenc_config& config
1226 ) {
1227 	// Print all encoding settings unless specifically told otherwise
1228 	if (!cli_config.silentmode)
1229 	{
1230 		printf("Compressor settings\n");
1231 		printf("===================\n\n");
1232 
1233 		switch (config.profile)
1234 		{
1235 		case ASTCENC_PRF_LDR:
1236 			printf("    Color profile:              LDR linear\n");
1237 			break;
1238 		case ASTCENC_PRF_LDR_SRGB:
1239 			printf("    Color profile:              LDR sRGB\n");
1240 			break;
1241 		case ASTCENC_PRF_HDR_RGB_LDR_A:
1242 			printf("    Color profile:              HDR RGB + LDR A\n");
1243 			break;
1244 		case ASTCENC_PRF_HDR:
1245 			printf("    Color profile:              HDR RGBA\n");
1246 			break;
1247 		}
1248 
1249 		if (config.block_z == 1)
1250 		{
1251 			printf("    Block size:                 %ux%u\n", config.block_x, config.block_y);
1252 		}
1253 		else
1254 		{
1255 			printf("    Block size:                 %ux%ux%u\n", config.block_x, config.block_y, config.block_z);
1256 		}
1257 
1258 		printf("    Bitrate:                    %3.2f bpp\n", 128.0 / (config.block_x * config.block_y * config.block_z));
1259 		printf("    RGB alpha scale weight:     %d\n", (config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT));
1260 		if ((config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT))
1261 		{
1262 			printf("    Radius RGB alpha scale:     %u texels\n", config.a_scale_radius);
1263 		}
1264 
1265 		printf("    R component weight:         %g\n", static_cast<double>(config.cw_r_weight));
1266 		printf("    G component weight:         %g\n", static_cast<double>(config.cw_g_weight));
1267 		printf("    B component weight:         %g\n", static_cast<double>(config.cw_b_weight));
1268 		printf("    A component weight:         %g\n", static_cast<double>(config.cw_a_weight));
1269 		printf("    Partition cutoff:           %u partitions\n", config.tune_partition_count_limit);
1270 		printf("    2 partition index cutoff:   %u partition ids\n", config.tune_2partition_index_limit);
1271 		printf("    3 partition index cutoff:   %u partition ids\n", config.tune_3partition_index_limit);
1272 		printf("    4 partition index cutoff:   %u partition ids\n", config.tune_4partition_index_limit);
1273 		printf("    PSNR cutoff:                %g dB\n", static_cast<double>(config.tune_db_limit));
1274 		printf("    3 partition cutoff:         %g\n", static_cast<double>(config.tune_2partition_early_out_limit_factor));
1275 		printf("    4 partition cutoff:         %g\n", static_cast<double>(config.tune_3partition_early_out_limit_factor));
1276 		printf("    2 plane correlation cutoff: %g\n", static_cast<double>(config.tune_2plane_early_out_limit_correlation));
1277 		printf("    Block mode centile cutoff:  %g%%\n", static_cast<double>(config.tune_block_mode_limit));
1278 		printf("    Candidate cutoff:           %u candidates\n", config.tune_candidate_limit);
1279 		printf("    Refinement cutoff:          %u iterations\n", config.tune_refinement_limit);
1280 		printf("    Compressor thread count:    %d\n", cli_config.thread_count);
1281 		printf("\n");
1282 	}
1283 }
1284 
1285 /**
1286  * @brief Get the value of a single pixel in an image.
1287  *
1288  * Note, this implementation is not particularly optimal as it puts format
1289  * checks in the inner-most loop. For the CLI preprocess passes this is deemed
1290  * acceptable as these are not performance critical paths.
1291  *
1292  * @param[in] img   The output image.
1293  * @param     x     The pixel x coordinate.
1294  * @param     y     The pixel y coordinate.
1295  * @param     z     The pixel z coordinate.
1296  *
1297  * @return      pixel   The pixel color value to write.
1298  */
image_get_pixel(const astcenc_image & img,unsigned int x,unsigned int y,unsigned int z)1299 static vfloat4 image_get_pixel(
1300 	const astcenc_image& img,
1301 	unsigned int x,
1302 	unsigned int y,
1303 	unsigned int z
1304 ) {
1305 	// We should never escape bounds
1306 	assert(x < img.dim_x);
1307 	assert(y < img.dim_y);
1308 	assert(z < img.dim_z);
1309 
1310 	if (img.data_type == ASTCENC_TYPE_U8)
1311 	{
1312 		uint8_t* data = static_cast<uint8_t*>(img.data[z]);
1313 
1314 		float r = data[(4 * img.dim_x * y) + (4 * x    )] / 255.0f;
1315 		float g = data[(4 * img.dim_x * y) + (4 * x + 1)] / 255.0f;
1316 		float b = data[(4 * img.dim_x * y) + (4 * x + 2)] / 255.0f;
1317 		float a = data[(4 * img.dim_x * y) + (4 * x + 3)] / 255.0f;
1318 
1319 		return vfloat4(r, g, b, a);
1320 	}
1321 	else if (img.data_type == ASTCENC_TYPE_F16)
1322 	{
1323 		uint16_t* data = static_cast<uint16_t*>(img.data[z]);
1324 
1325 		vint4 colori(
1326 			data[(4 * img.dim_x * y) + (4 * x    )],
1327 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1328 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1329 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1330 		);
1331 
1332 		return float16_to_float(colori);
1333 	}
1334 	else // if (img.data_type == ASTCENC_TYPE_F32)
1335 	{
1336 		assert(img.data_type == ASTCENC_TYPE_F32);
1337 		float* data = static_cast<float*>(img.data[z]);
1338 
1339 		return vfloat4(
1340 			data[(4 * img.dim_x * y) + (4 * x    )],
1341 			data[(4 * img.dim_x * y) + (4 * x + 1)],
1342 			data[(4 * img.dim_x * y) + (4 * x + 2)],
1343 			data[(4 * img.dim_x * y) + (4 * x + 3)]
1344 		);
1345 	}
1346 }
1347 
1348 /**
1349  * @brief Set the value of a single pixel in an image.
1350  *
1351  * @param[out] img     The output image; must use F32 texture components.
1352  * @param      x       The pixel x coordinate.
1353  * @param      y       The pixel y coordinate.
1354  * @param      z       The pixel z coordinate.
1355  * @param      pixel   The pixel color value to write.
1356  */
image_set_pixel(astcenc_image & img,unsigned int x,unsigned int y,unsigned int z,vfloat4 pixel)1357 static void image_set_pixel(
1358 	astcenc_image& img,
1359 	unsigned int x,
1360 	unsigned int y,
1361 	unsigned int z,
1362 	vfloat4 pixel
1363 ) {
1364 	// We should never escape bounds
1365 	assert(x < img.dim_x);
1366 	assert(y < img.dim_y);
1367 	assert(z < img.dim_z);
1368 	assert(img.data_type == ASTCENC_TYPE_F32);
1369 
1370 	float* data = static_cast<float*>(img.data[z]);
1371 
1372 	data[(4 * img.dim_x * y) + (4 * x    )] = pixel.lane<0>();
1373 	data[(4 * img.dim_x * y) + (4 * x + 1)] = pixel.lane<1>();
1374 	data[(4 * img.dim_x * y) + (4 * x + 2)] = pixel.lane<2>();
1375 	data[(4 * img.dim_x * y) + (4 * x + 3)] = pixel.lane<3>();
1376 }
1377 
1378 /**
1379  * @brief Set the value of a single pixel in an image.
1380  *
1381  * @param[out] img     The output image; must use F32 texture components.
1382  * @param      x       The pixel x coordinate.
1383  * @param      y       The pixel y coordinate.
1384  * @param      pixel   The pixel color value to write.
1385  */
image_set_pixel_u8(astcenc_image & img,size_t x,size_t y,vint4 pixel)1386 static void image_set_pixel_u8(
1387 	astcenc_image& img,
1388 	size_t x,
1389 	size_t y,
1390 	vint4 pixel
1391 ) {
1392 	// We should never escape bounds
1393 	assert(x < img.dim_x);
1394 	assert(y < img.dim_y);
1395 	assert(img.data_type == ASTCENC_TYPE_U8);
1396 
1397 	uint8_t* data = static_cast<uint8_t*>(img.data[0]);
1398 	pixel = pack_low_bytes(pixel);
1399 	store_nbytes(pixel, data + (4 * img.dim_x * y) + (4 * x    ));
1400 }
1401 
1402 /**
1403  * @brief Create a copy of @c input with forced unit-length normal vectors.
1404  *
1405  * It is assumed that all normal vectors are stored in the RGB components, and
1406  * stored in a packed unsigned range of [0,1] which must be unpacked prior
1407  * normalization. Data must then be repacked into this form for handing over to
1408  * the core codec.
1409  *
1410  * @param[in]  input    The input image.
1411  * @param[out] output   The output image, must use F32 components.
1412  */
image_preprocess_normalize(const astcenc_image & input,astcenc_image & output)1413 static void image_preprocess_normalize(
1414 	const astcenc_image& input,
1415 	astcenc_image& output
1416 ) {
1417 	for (unsigned int z = 0; z < input.dim_z; z++)
1418 	{
1419 		for (unsigned int y = 0; y < input.dim_y; y++)
1420 		{
1421 			for (unsigned int x = 0; x < input.dim_x; x++)
1422 			{
1423 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1424 
1425 				// Stash alpha component and zero
1426 				float a = pixel.lane<3>();
1427 				pixel.set_lane<3>(0.0f);
1428 
1429 				// Decode [0,1] normals to [-1,1]
1430 				pixel.set_lane<0>((pixel.lane<0>() * 2.0f) - 1.0f);
1431 				pixel.set_lane<1>((pixel.lane<1>() * 2.0f) - 1.0f);
1432 				pixel.set_lane<2>((pixel.lane<2>() * 2.0f) - 1.0f);
1433 
1434 				// Normalize pixel and restore alpha
1435 				pixel = normalize(pixel);
1436 				pixel.set_lane<3>(a);
1437 
1438 				// Encode [-1,1] normals to [0,1]
1439 				pixel.set_lane<0>((pixel.lane<0>() + 1.0f) / 2.0f);
1440 				pixel.set_lane<1>((pixel.lane<1>() + 1.0f) / 2.0f);
1441 				pixel.set_lane<2>((pixel.lane<2>() + 1.0f) / 2.0f);
1442 
1443 				image_set_pixel(output, x, y, z, pixel);
1444 			}
1445 		}
1446 	}
1447 }
1448 
1449 /**
1450  * @brief Linearize an sRGB value.
1451  *
1452  * @return The linearized value.
1453  */
srgb_to_linear(float a)1454 static float srgb_to_linear(
1455 	float a
1456 ) {
1457 	if (a <= 0.04045f)
1458 	{
1459 		return a * (1.0f / 12.92f);
1460 	}
1461 
1462 	return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
1463 }
1464 
1465 /**
1466  * @brief sRGB gamma-encode a linear value.
1467  *
1468  * @return The gamma encoded value.
1469  */
linear_to_srgb(float a)1470 static float linear_to_srgb(
1471 	float a
1472 ) {
1473 	if (a <= 0.0031308f)
1474 	{
1475 		return a * 12.92f;
1476 	}
1477 
1478 	return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
1479 }
1480 
1481 /**
1482  * @brief Create a copy of @c input with premultiplied color data.
1483  *
1484  * If we are compressing sRGB data we linearize the data prior to
1485  * premultiplication and re-gamma-encode afterwards.
1486  *
1487  * @param[in]  input     The input image.
1488  * @param[out] output    The output image, must use F32 components.
1489  * @param      profile   The encoding profile.
1490  */
image_preprocess_premultiply(const astcenc_image & input,astcenc_image & output,astcenc_profile profile)1491 static void image_preprocess_premultiply(
1492 	const astcenc_image& input,
1493 	astcenc_image& output,
1494 	astcenc_profile profile
1495 ) {
1496 	for (unsigned int z = 0; z < input.dim_z; z++)
1497 	{
1498 		for (unsigned int y = 0; y < input.dim_y; y++)
1499 		{
1500 			for (unsigned int x = 0; x < input.dim_x; x++)
1501 			{
1502 				vfloat4 pixel = image_get_pixel(input, x, y, z);
1503 
1504 				// Linearize sRGB
1505 				if (profile == ASTCENC_PRF_LDR_SRGB)
1506 				{
1507 					pixel.set_lane<0>(srgb_to_linear(pixel.lane<0>()));
1508 					pixel.set_lane<1>(srgb_to_linear(pixel.lane<1>()));
1509 					pixel.set_lane<2>(srgb_to_linear(pixel.lane<2>()));
1510 				}
1511 
1512 				// Premultiply pixel in linear-space
1513 				pixel.set_lane<0>(pixel.lane<0>() * pixel.lane<3>());
1514 				pixel.set_lane<1>(pixel.lane<1>() * pixel.lane<3>());
1515 				pixel.set_lane<2>(pixel.lane<2>() * pixel.lane<3>());
1516 
1517 				// Gamma-encode sRGB
1518 				if (profile == ASTCENC_PRF_LDR_SRGB)
1519 				{
1520 					pixel.set_lane<0>(linear_to_srgb(pixel.lane<0>()));
1521 					pixel.set_lane<1>(linear_to_srgb(pixel.lane<1>()));
1522 					pixel.set_lane<2>(linear_to_srgb(pixel.lane<2>()));
1523 				}
1524 
1525 				image_set_pixel(output, x, y, z, pixel);
1526 			}
1527 		}
1528 	}
1529 }
1530 
1531 /**
1532  * @brief Populate a single diagnostic image showing aspects of the encoding.
1533  *
1534  * @param context      The context to use.
1535  * @param image        The compressed image to analyze.
1536  * @param diag_image   The output visualization image to populate.
1537  * @param texel_func   The per-texel callback used to determine output color.
1538  */
print_diagnostic_image(astcenc_context * context,const astc_compressed_image & image,astcenc_image & diag_image,std::function<vint4 (astcenc_block_info &,size_t,size_t)> texel_func)1539 static void print_diagnostic_image(
1540 	astcenc_context* context,
1541 	const astc_compressed_image& image,
1542 	astcenc_image& diag_image,
1543 	std::function<vint4(astcenc_block_info&, size_t, size_t)> texel_func
1544 ) {
1545 	size_t block_cols = (image.dim_x + image.block_x - 1) / image.block_x;
1546 	size_t block_rows = (image.dim_y + image.block_y - 1) / image.block_y;
1547 
1548 	uint8_t* data = image.data;
1549 	for (size_t block_y = 0; block_y < block_rows; block_y++)
1550 	{
1551 		for (size_t block_x = 0; block_x < block_cols; block_x++)
1552 		{
1553 			astcenc_block_info block_info;
1554 			astcenc_get_block_info(context, data, &block_info);
1555 			data += 16;
1556 
1557 			size_t start_row = block_y * image.block_y;
1558 			size_t start_col = block_x * image.block_x;
1559 
1560 			size_t end_row = astc::min(start_row + image.block_y, static_cast<size_t>(image.dim_y));
1561 			size_t end_col = astc::min(start_col + image.block_x, static_cast<size_t>(image.dim_x));
1562 
1563 			for (size_t texel_y = start_row; texel_y < end_row; texel_y++)
1564 			{
1565 				for (size_t texel_x = start_col; texel_x < end_col; texel_x++)
1566 				{
1567 					vint4 color = texel_func(block_info, texel_x - start_col, texel_y - start_row);
1568 					image_set_pixel_u8(diag_image, texel_x, texel_y, color);
1569 				}
1570 			}
1571 		}
1572 	}
1573 }
1574 
1575 /**
1576  * @brief Print a set of diagnostic images showing aspects of the encoding.
1577  *
1578  * @param context       The context to use.
1579  * @param image         The compressed image to analyze.
1580  * @param output_file   The output file name to use as a stem for new names.
1581  */
print_diagnostic_images(astcenc_context * context,const astc_compressed_image & image,const std::string & output_file)1582 static void print_diagnostic_images(
1583 	astcenc_context* context,
1584 	const astc_compressed_image& image,
1585 	const std::string& output_file
1586 ) {
1587 	if (image.dim_z != 1)
1588 	{
1589 		return;
1590 	}
1591 
1592 	// Try to find a file extension we know about
1593 	size_t index = output_file.find_last_of(".");
1594 	std::string stem = output_file;
1595 	if (index != std::string::npos)
1596 	{
1597 		stem = stem.substr(0, index);
1598 	}
1599 
1600 	auto diag_image = alloc_image(8, image.dim_x, image.dim_y, image.dim_z);
1601 
1602 	// ---- ---- ---- ---- Partitioning ---- ---- ---- ----
1603 	auto partition_func = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1604 		const vint4 colors[] {
1605 			vint4(  0,   0,   0, 255),
1606 			vint4(255,   0,   0, 255),
1607 			vint4(  0, 255,   0, 255),
1608 			vint4(  0,   0, 255, 255),
1609 			vint4(255, 255, 255, 255)
1610 		};
1611 
1612 		size_t texel_index = texel_y * info.block_x + texel_x;
1613 
1614 		int partition { 0 };
1615 		if (!info.is_constant_block)
1616 		{
1617 			partition = info.partition_assignment[texel_index] + 1;
1618 		}
1619 
1620 		return colors[partition];
1621 	};
1622 
1623 	print_diagnostic_image(context, image, *diag_image, partition_func);
1624 	std::string fname = stem + "_diag_partitioning.png";
1625 	store_ncimage(diag_image, fname.c_str(), false);
1626 
1627 	// ---- ---- ---- ---- Weight planes  ---- ---- ---- ----
1628 	auto texel_func1 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1629 		(void)texel_x;
1630 		(void)texel_y;
1631 
1632 		const vint4 colors[] {
1633 			vint4(  0,   0,   0, 255),
1634 			vint4(255,   0,   0, 255),
1635 			vint4(  0, 255,   0, 255),
1636 			vint4(  0,   0, 255, 255),
1637 			vint4(255, 255, 255, 255)
1638 		};
1639 
1640 		int component { 0 };
1641 		if (info.is_dual_plane_block)
1642 		{
1643 			component = info.dual_plane_component + 1;
1644 		}
1645 
1646 		return colors[component];
1647 	};
1648 
1649 	print_diagnostic_image(context, image, *diag_image, texel_func1);
1650 	fname = stem + "_diag_weight_plane2.png";
1651 	store_ncimage(diag_image, fname.c_str(), false);
1652 
1653 	// ---- ---- ---- ---- Weight density  ---- ---- ---- ----
1654 	auto texel_func2 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1655 		(void)texel_x;
1656 		(void)texel_y;
1657 
1658 		float density = 0.0f;
1659 		if (!info.is_constant_block)
1660 		{
1661 			float texel_count = static_cast<float>(info.block_x * info.block_y);
1662 			float weight_count = static_cast<float>(info.weight_x * info.weight_y);
1663 			density = weight_count / texel_count;
1664 		}
1665 
1666 		int densityi = static_cast<int>(255.0f * density);
1667 		return vint4(densityi, densityi, densityi, 255);
1668 	};
1669 
1670 	print_diagnostic_image(context, image, *diag_image, texel_func2);
1671 	fname = stem + "_diag_weight_density.png";
1672 	store_ncimage(diag_image, fname.c_str(), false);
1673 
1674 	// ---- ---- ---- ---- Weight quant  ---- ---- ---- ----
1675 	auto texel_func3 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1676 		(void)texel_x;
1677 		(void)texel_y;
1678 
1679 		int quant { 0 };
1680 		if (!info.is_constant_block)
1681 		{
1682 			quant = info.weight_level_count - 1;
1683 		}
1684 
1685 		return vint4(quant, quant, quant, 255);
1686 	};
1687 
1688 	print_diagnostic_image(context, image, *diag_image, texel_func3);
1689 	fname = stem + "_diag_weight_quant.png";
1690 	store_ncimage(diag_image, fname.c_str(), false);
1691 
1692 	// ---- ---- ---- ---- Color quant  ---- ---- ---- ----
1693 	auto texel_func4 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1694 		(void)texel_x;
1695 		(void)texel_y;
1696 
1697 		int quant { 0 };
1698 		if (!info.is_constant_block)
1699 		{
1700 			quant = info.color_level_count - 1;
1701 		}
1702 
1703 		return vint4(quant, quant, quant, 255);
1704 	};
1705 
1706 	print_diagnostic_image(context, image, *diag_image, texel_func4);
1707 	fname = stem + "_diag_color_quant.png";
1708 	store_ncimage(diag_image, fname.c_str(), false);
1709 
1710 	// ---- ---- ---- ---- Color endpoint mode: Index ---- ---- ---- ----
1711 	auto texel_func5 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1712 		(void)texel_x;
1713 		(void)texel_y;
1714 
1715 		size_t texel_index = texel_y * info.block_x + texel_x;
1716 
1717 		int cem { 255 };
1718 		if (!info.is_constant_block)
1719 		{
1720 			uint8_t partition = info.partition_assignment[texel_index];
1721 			cem = info.color_endpoint_modes[partition] * 16;
1722 		}
1723 
1724 		return vint4(cem, cem, cem, 255);
1725 	};
1726 
1727 	print_diagnostic_image(context, image, *diag_image, texel_func5);
1728 	fname = stem + "_diag_cem_index.png";
1729 	store_ncimage(diag_image, fname.c_str(), false);
1730 
1731 	// ---- ---- ---- ---- Color endpoint mode: Components ---- ---- ---- ----
1732 	auto texel_func6 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1733 		(void)texel_x;
1734 		(void)texel_y;
1735 
1736 		const vint4 colors[] {
1737 			vint4(  0,   0,   0, 255),
1738 			vint4(255,   0,   0, 255),
1739 			vint4(  0, 255,   0, 255),
1740 			vint4(  0,   0, 255, 255),
1741 			vint4(255, 255, 255, 255)
1742 		};
1743 
1744 		size_t texel_index = texel_y * info.block_x + texel_x;
1745 
1746 		int components { 0 };
1747 		if (!info.is_constant_block)
1748 		{
1749 			uint8_t partition = info.partition_assignment[texel_index];
1750 			uint8_t cem = info.color_endpoint_modes[partition];
1751 
1752 			switch (cem)
1753 			{
1754 				case 0:
1755 				case 1:
1756 				case 2:
1757 				case 3:
1758 					components = 1;
1759 					break;
1760 				case 4:
1761 				case 5:
1762 					components = 2;
1763 					break;
1764 				case 6:
1765 				case 7:
1766 				case 8:
1767 				case 9:
1768 				case 11:
1769 					components = 3;
1770 					break;
1771 				default:
1772 					components = 4;
1773 					break;
1774 			}
1775 		}
1776 
1777 		return colors[components];
1778 	};
1779 
1780 	print_diagnostic_image(context, image, *diag_image, texel_func6);
1781 	fname = stem + "_diag_cem_components.png";
1782 	store_ncimage(diag_image, fname.c_str(), false);
1783 
1784 	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1785 	auto texel_func7 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1786 		(void)texel_x;
1787 		(void)texel_y;
1788 
1789 		const vint4 colors[] {
1790 			vint4(  0,   0,   0, 255),
1791 			vint4(255,   0,   0, 255),
1792 			vint4(  0, 255,   0, 255),
1793 			vint4(  0,   0, 255, 255),
1794 		};
1795 
1796 		size_t texel_index = texel_y * info.block_x + texel_x;
1797 
1798 		int style { 0 };
1799 		if (!info.is_constant_block)
1800 		{
1801 			uint8_t partition = info.partition_assignment[texel_index];
1802 			uint8_t cem = info.color_endpoint_modes[partition];
1803 
1804 			switch (cem)
1805 			{
1806 				// Direct - two absolute endpoints
1807 				case 0:
1808 				case 1:
1809 				case 2:
1810 				case 3:
1811 				case 4:
1812 				case 8:
1813 				case 11:
1814 				case 12:
1815 				case 14:
1816 				case 15:
1817 					style = 1;
1818 					break;
1819 				// Offset - one absolute plus delta
1820 				case 5:
1821 				case 9:
1822 				case 13:
1823 					style = 2;
1824 					break;
1825 				// Scale - one absolute plus scale
1826 				case 6:
1827 				case 7:
1828 				case 10:
1829 					style = 3;
1830 					break;
1831 				// Shouldn't happen ...
1832 				default:
1833 					style = 0;
1834 					break;
1835 			}
1836 		}
1837 
1838 		return colors[style];
1839 	};
1840 
1841 	print_diagnostic_image(context, image, *diag_image, texel_func7);
1842 	fname = stem + "_diag_cem_style.png";
1843 	store_ncimage(diag_image, fname.c_str(), false);
1844 
1845 	// ---- ---- ---- ---- Color endpoint mode: Style ---- ---- ---- ----
1846 	auto texel_func8 = [](astcenc_block_info& info, size_t texel_x, size_t texel_y) {
1847 		(void)texel_x;
1848 		(void)texel_y;
1849 
1850 		size_t texel_index = texel_y * info.block_x + texel_x;
1851 
1852 		int style { 0 };
1853 		if (!info.is_constant_block)
1854 		{
1855 			uint8_t partition = info.partition_assignment[texel_index];
1856 			uint8_t cem = info.color_endpoint_modes[partition];
1857 
1858 			switch (cem)
1859 			{
1860 				// LDR blocks
1861 				case 0:
1862 				case 1:
1863 				case 4:
1864 				case 5:
1865 				case 6:
1866 				case 8:
1867 				case 9:
1868 				case 10:
1869 				case 12:
1870 				case 13:
1871 					style = 128;
1872 					break;
1873 				// HDR blocks
1874 				default:
1875 					style = 155;
1876 					break;
1877 			}
1878 		}
1879 
1880 		return vint4(style, style, style, 255);
1881 	};
1882 
1883 	print_diagnostic_image(context, image, *diag_image, texel_func8);
1884 	fname = stem + "_diag_cem_hdr.png";
1885 	store_ncimage(diag_image, fname.c_str(), false);
1886 
1887 	free_image(diag_image);
1888 }
1889 
1890 #if QUALITY_CONTROL
1891 constexpr double MAX_PSNR = 99.9;
1892 constexpr double MAX_VALUE = 255;
1893 constexpr double THRESHOLD_R = 30.0;
1894 constexpr double THRESHOLD_G = 30.0;
1895 constexpr double THRESHOLD_B = 30.0;
1896 constexpr double THRESHOLD_A = 30.0;
1897 constexpr double THRESHOLD_RGB = 30.0;
1898 constexpr double LOG_BASE = 10.0;
1899 
CheckQuality(int32_t * mseIn[RGBA_COM],int blockNum,int blockXYZ)1900 bool CheckQuality(int32_t* mseIn[RGBA_COM], int blockNum, int blockXYZ)
1901 {
1902     double psnr[RGBA_COM + 1];
1903     double threshold[RGBA_COM + 1] = { THRESHOLD_R, THRESHOLD_G, THRESHOLD_B, THRESHOLD_A, THRESHOLD_RGB};
1904     uint64_t mseTotal[RGBA_COM + 1] = { 0, 0, 0, 0, 0};
1905     for (int i = R_COM; i < RGBA_COM; i++) {
1906         int32_t* mse = mseIn[i];
1907         for (int j = 0; j < blockNum; j++) {
1908             mseTotal[i] += *mse;
1909             if(i != A_COM) mseTotal[RGBA_COM] += *mse;
1910             mse++;
1911         }
1912     }
1913     for (int i = R_COM; i < RGBA_COM; i++) {
1914         if (mseTotal[i] == 0) {
1915         	psnr[i] = MAX_PSNR;
1916         	continue;
1917         }
1918         double mseRgb = (double)mseTotal[i] / (blockNum * blockXYZ);
1919         psnr[i] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1920     }
1921     if (mseTotal[RGBA_COM] == 0) {
1922         psnr[RGBA_COM] = MAX_PSNR;
1923     }
1924     else {
1925         double mseRgb = (double)mseTotal[RGBA_COM] / (blockNum * blockXYZ * (RGBA_COM - 1));
1926         psnr[RGBA_COM] = LOG_BASE * log((double)(MAX_VALUE * MAX_VALUE) / mseRgb) / log(LOG_BASE);
1927     }
1928     printf("astc psnr r%f g%f b%f a%f rgb%f\n",
1929         psnr[R_COM], psnr[G_COM], psnr[B_COM], psnr[A_COM],
1930         psnr[RGBA_COM]);
1931     return (psnr[R_COM] > threshold[R_COM]) && (psnr[G_COM] > threshold[G_COM])
1932         && (psnr[B_COM] > threshold[B_COM]) && (psnr[A_COM] > threshold[A_COM])
1933         && (psnr[RGBA_COM] > threshold[RGBA_COM]);
1934 }
1935 #endif
1936 
1937 /**
1938  * @brief The main entry point.
1939  *
1940  * @param argc   The number of arguments.
1941  * @param argv   The vector of arguments.
1942  *
1943  * @return 0 on success, non-zero otherwise.
1944  */
astcenc_main(int argc,char ** argv)1945 int astcenc_main(
1946 	int argc,
1947 	char **argv
1948 ) {
1949 	double start_time = get_time();
1950 
1951 	if (argc < 2)
1952 	{
1953 		astcenc_print_shorthelp();
1954 		return 0;
1955 	}
1956 
1957 	astcenc_operation operation;
1958 	astcenc_profile profile;
1959 	int error = parse_commandline_options(argc, argv, operation, profile);
1960 	if (error)
1961 	{
1962 		return 1;
1963 	}
1964 
1965 	switch (operation)
1966 	{
1967 	case ASTCENC_OP_HELP:
1968 		astcenc_print_longhelp();
1969 		return 0;
1970 	case ASTCENC_OP_VERSION:
1971 		astcenc_print_header();
1972 		return 0;
1973 	default:
1974 		break;
1975 	}
1976 
1977 	std::string input_filename = argc >= 3 ? argv[2] : "";
1978 	std::string output_filename = argc >= 4 ? argv[3] : "";
1979 
1980 	if (input_filename.empty())
1981 	{
1982 		print_error("ERROR: Input file not specified\n");
1983 		return 1;
1984 	}
1985 
1986 	if (output_filename.empty())
1987 	{
1988 		print_error("ERROR: Output file not specified\n");
1989 		return 1;
1990 	}
1991 
1992 	// TODO: Handle RAII resources so they get freed when out of scope
1993 	// Load the compressed input file if needed
1994 
1995 	// This has to come first, as the block size is in the file header
1996 	astc_compressed_image image_comp {};
1997 	if (operation & ASTCENC_STAGE_LD_COMP)
1998 	{
1999 		if (ends_with(input_filename, ".astc"))
2000 		{
2001 			error = load_cimage(input_filename.c_str(), image_comp);
2002 			if (error)
2003 			{
2004 				return 1;
2005 			}
2006 		}
2007 		else if (ends_with(input_filename, ".ktx"))
2008 		{
2009 			bool is_srgb;
2010 			error = load_ktx_compressed_image(input_filename.c_str(), is_srgb, image_comp);
2011 			if (error)
2012 			{
2013 				return 1;
2014 			}
2015 
2016 			if (is_srgb && (profile != ASTCENC_PRF_LDR_SRGB))
2017 			{
2018 				printf("WARNING: Input file is sRGB, but decompressing as linear\n");
2019 			}
2020 
2021 			if (!is_srgb && (profile == ASTCENC_PRF_LDR_SRGB))
2022 			{
2023 				printf("WARNING: Input file is linear, but decompressing as sRGB\n");
2024 			}
2025 		}
2026 		else
2027 		{
2028 			print_error("ERROR: Unknown compressed input file type\n");
2029 			return 1;
2030 		}
2031 	}
2032 
2033 	astcenc_config config {};
2034 	astcenc_preprocess preprocess;
2035 	error = init_astcenc_config(argc, argv, profile, operation, image_comp, preprocess, config);
2036 	if (error)
2037 	{
2038 		return 1;
2039 	}
2040 
2041 	// Initialize cli_config_options with default values
2042 	cli_config_options cli_config { 0, 1, 1, false, false, false, -10, 10,
2043 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A },
2044 		{ ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A } };
2045 
2046 	error = edit_astcenc_config(argc, argv, operation, cli_config, config);
2047 	if (error)
2048 	{
2049 		return 1;
2050 	}
2051 
2052 	// Enable progress callback if not in silent mode and using a terminal
2053 	#if defined(_WIN32)
2054 		int stdoutfno = _fileno(stdout);
2055 	#else
2056 		int stdoutfno = STDOUT_FILENO;
2057 	#endif
2058 
2059 	if ((!cli_config.silentmode) && isatty(stdoutfno))
2060 	{
2061 		config.progress_callback = progress_emitter;
2062 	}
2063 
2064 	astcenc_image* image_uncomp_in = nullptr ;
2065 	unsigned int image_uncomp_in_component_count = 0;
2066 	bool image_uncomp_in_is_hdr = false;
2067 	astcenc_image* image_decomp_out = nullptr;
2068 
2069 	// Determine decompression output bitness, if limited by file type
2070 	int out_bitness = 0;
2071 	if (operation & ASTCENC_STAGE_DECOMPRESS)
2072 	{
2073 		out_bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2074 		if (out_bitness == 0)
2075 		{
2076 			bool is_hdr = (config.profile == ASTCENC_PRF_HDR) ||
2077 			              (config.profile == ASTCENC_PRF_HDR_RGB_LDR_A);
2078 			out_bitness = is_hdr ? 16 : 8;
2079 		}
2080 
2081 		// If decompressed output is unorm8 then force the decode_unorm8 heuristics for compression
2082 		if (out_bitness == 8)
2083 		{
2084 			config.flags |= ASTCENC_FLG_USE_DECODE_UNORM8;
2085 		}
2086 	}
2087 
2088 	// TODO: Handle RAII resources so they get freed when out of scope
2089 	astcenc_error    codec_status;
2090 	astcenc_context* codec_context;
2091 
2092 	// Preflight - check we have valid extensions for storing a file
2093 	if (operation & ASTCENC_STAGE_ST_NCOMP)
2094 	{
2095 		int bitness = get_output_filename_enforced_bitness(output_filename.c_str());
2096 		if (bitness < 0)
2097 		{
2098 			const char *eptr = strrchr(output_filename.c_str(), '.');
2099 			eptr = eptr ? eptr : "";
2100 			print_error("ERROR: Unknown uncompressed output file type '%s'\n", eptr);
2101 			return 1;
2102 		}
2103 	}
2104 
2105 	if (operation & ASTCENC_STAGE_ST_COMP)
2106 	{
2107 #if defined(_WIN32)
2108 		bool is_null = output_filename == "NUL" || output_filename == "nul";
2109 #else
2110 		bool is_null = output_filename == "/dev/null";
2111 #endif
2112 
2113 		if (!(is_null || ends_with(output_filename, ".astc") || ends_with(output_filename, ".ktx")))
2114 		{
2115 			const char *eptr = strrchr(output_filename.c_str(), '.');
2116 			eptr = eptr ? eptr : "";
2117 			print_error("ERROR: Unknown compressed output file type '%s'\n", eptr);
2118 			return 1;
2119 		}
2120 	}
2121 
2122 	codec_status = astcenc_context_alloc(&config, cli_config.thread_count, &codec_context);
2123 	if (codec_status != ASTCENC_SUCCESS)
2124 	{
2125 		print_error("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(codec_status));
2126 		return 1;
2127 	}
2128 
2129 	// Load the uncompressed input file if needed
2130 	if (operation & ASTCENC_STAGE_LD_NCOMP)
2131 	{
2132 		image_uncomp_in = load_uncomp_file(
2133 		    input_filename.c_str(), cli_config.array_size, cli_config.y_flip,
2134 		    image_uncomp_in_is_hdr, image_uncomp_in_component_count);
2135 		if (!image_uncomp_in)
2136 		{
2137 			print_error("ERROR: Failed to load uncompressed image file\n");
2138 			return 1;
2139 		}
2140 
2141 
2142 		if (preprocess != ASTCENC_PP_NONE)
2143 		{
2144 			// Allocate a float image so we can avoid additional quantization,
2145 			// as e.g. premultiplication can result in fractional color values
2146 			astcenc_image* image_pp = alloc_image(32,
2147 			                                      image_uncomp_in->dim_x,
2148 			                                      image_uncomp_in->dim_y,
2149 			                                      image_uncomp_in->dim_z);
2150 			if (!image_pp)
2151 			{
2152 				print_error("ERROR: Failed to allocate preprocessed image\n");
2153 				return 1;
2154 			}
2155 
2156 			if (preprocess == ASTCENC_PP_NORMALIZE)
2157 			{
2158 				image_preprocess_normalize(*image_uncomp_in, *image_pp);
2159 			}
2160 
2161 			if (preprocess == ASTCENC_PP_PREMULTIPLY)
2162 			{
2163 				image_preprocess_premultiply(*image_uncomp_in, *image_pp,
2164 				                             config.profile);
2165 			}
2166 
2167 			// Delete the original as we no longer need it
2168 			free_image(image_uncomp_in);
2169 			image_uncomp_in = image_pp;
2170 		}
2171 
2172 		if (!cli_config.silentmode)
2173 		{
2174 			printf("Source image\n");
2175 			printf("============\n\n");
2176 			printf("    Source:                     %s\n", input_filename.c_str());
2177 			printf("    Color profile:              %s\n", image_uncomp_in_is_hdr ? "HDR" : "LDR");
2178 			if (image_uncomp_in->dim_z > 1)
2179 			{
2180 				printf("    Dimensions:                 3D, %ux%ux%u\n",
2181 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y, image_uncomp_in->dim_z);
2182 			}
2183 			else
2184 			{
2185 				printf("    Dimensions:                 2D, %ux%u\n",
2186 				       image_uncomp_in->dim_x, image_uncomp_in->dim_y);
2187 			}
2188 			printf("    Components:                 %d\n\n", image_uncomp_in_component_count);
2189 		}
2190 	}
2191 
2192 	double image_size = 0.0;
2193 	if (image_uncomp_in)
2194 	{
2195 		image_size = static_cast<double>(image_uncomp_in->dim_x) *
2196 		             static_cast<double>(image_uncomp_in->dim_y) *
2197 		             static_cast<double>(image_uncomp_in->dim_z);
2198 	}
2199 	else
2200 	{
2201 		image_size = static_cast<double>(image_comp.dim_x) *
2202 		             static_cast<double>(image_comp.dim_y) *
2203 		             static_cast<double>(image_comp.dim_z);
2204 	}
2205 
2206 	// Compress an image
2207 	double best_compression_time = 100000.0;
2208 	double total_compression_time = 0.0;
2209 	if (operation & ASTCENC_STAGE_COMPRESS)
2210 	{
2211 		print_astcenc_config(cli_config, config);
2212 
2213 		unsigned int blocks_x = (image_uncomp_in->dim_x + config.block_x - 1) / config.block_x;
2214 		unsigned int blocks_y = (image_uncomp_in->dim_y + config.block_y - 1) / config.block_y;
2215 		unsigned int blocks_z = (image_uncomp_in->dim_z + config.block_z - 1) / config.block_z;
2216 		size_t buffer_size = blocks_x * blocks_y * blocks_z * 16;
2217 		uint8_t* buffer = new uint8_t[buffer_size];
2218 
2219 		compression_workload work;
2220 		work.context = codec_context;
2221 		image_uncomp_in->dim_stride = image_uncomp_in->dim_x;
2222 		work.image = image_uncomp_in;
2223 		work.swizzle = cli_config.swz_encode;
2224 		work.data_out = buffer;
2225 		work.data_len = buffer_size;
2226 		work.error = ASTCENC_SUCCESS;
2227 #if QUALITY_CONTROL
2228 		work.calQualityEnable = true;
2229 		work.mse[R_COM] = work.mse[G_COM] = work.mse[B_COM] = work.mse[A_COM] = nullptr;
2230 		if (work.calQualityEnable) {
2231 		for (int i = R_COM; i < RGBA_COM; i++) {
2232 				work.mse[i] = (int32_t*)calloc(blocks_x * blocks_y, sizeof(int32_t));
2233 				if (!work.mse[i]) {
2234 					printf("quality control calloc failed");
2235 					return -1;
2236 				}
2237 			}
2238 		}
2239 #endif
2240 		// Only launch worker threads for multi-threaded use - it makes basic
2241 		// single-threaded profiling and debugging a little less convoluted
2242 		double start_compression_time = get_time();
2243 		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2244 		{
2245 			if (config.progress_callback)
2246 			{
2247 				printf("Compression\n");
2248 				printf("===========\n");
2249 				printf("\n");
2250 			}
2251 
2252 			double start_iter_time = get_time();
2253 			if (cli_config.thread_count > 1)
2254 			{
2255 				launch_threads("Compression", cli_config.thread_count, compression_workload_runner, &work);
2256 			}
2257 			else
2258 			{
2259 				work.error = astcenc_compress_image(
2260 					work.context, work.image, &work.swizzle,
2261 					work.data_out, work.data_len,
2262 #if QUALITY_CONTROL
2263 			    	work.calQualityEnable, work.mse,
2264 #endif
2265 			    	0);
2266 			}
2267 
2268 			astcenc_compress_reset(codec_context);
2269 
2270 			if (config.progress_callback)
2271 			{
2272 				printf("\n\n");
2273 			}
2274 
2275 			double iter_time = get_time() - start_iter_time;
2276 			best_compression_time = astc::min(iter_time, best_compression_time);
2277 		}
2278 		total_compression_time = get_time() - start_compression_time;
2279 
2280 		if (work.error != ASTCENC_SUCCESS)
2281 		{
2282 			print_error("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(work.error));
2283 			return 1;
2284 		}
2285 #if QUALITY_CONTROL
2286 		if (work.calQualityEnable && !CheckQuality(work.mse, blocks_x * blocks_y, config.block_x * config.block_y)) {
2287 		    work.error = ASTCENC_ERR_BAD_QUALITY_CHECK;
2288 		}
2289 		if (work.calQualityEnable) {
2290 			for (int i = R_COM; i < RGBA_COM; i++) {
2291 				if (work.mse[i]) {
2292 					free(work.mse[i]);
2293 				}
2294 			}
2295 		}
2296 #endif
2297 		image_comp.block_x = config.block_x;
2298 		image_comp.block_y = config.block_y;
2299 		image_comp.block_z = config.block_z;
2300 		image_comp.dim_x = image_uncomp_in->dim_x;
2301 		image_comp.dim_y = image_uncomp_in->dim_y;
2302 		image_comp.dim_z = image_uncomp_in->dim_z;
2303 		image_comp.data = buffer;
2304 		image_comp.data_len = buffer_size;
2305 	}
2306 
2307 	// Decompress an image
2308 	double best_decompression_time = 100000.0;
2309 	double total_decompression_time = 0.0;
2310 	if (operation & ASTCENC_STAGE_DECOMPRESS)
2311 	{
2312 		image_decomp_out = alloc_image(
2313 		    out_bitness, image_comp.dim_x, image_comp.dim_y, image_comp.dim_z);
2314 
2315 		decompression_workload work;
2316 		work.context = codec_context;
2317 		work.data = image_comp.data;
2318 		work.data_len = image_comp.data_len;
2319 		work.image_out = image_decomp_out;
2320 		work.swizzle = cli_config.swz_decode;
2321 		work.error = ASTCENC_SUCCESS;
2322 
2323 		// Only launch worker threads for multi-threaded use - it makes basic
2324 		// single-threaded profiling and debugging a little less convoluted
2325 		double start_decompression_time = get_time();
2326 		for (unsigned int i = 0; i < cli_config.repeat_count; i++)
2327 		{
2328 			double start_iter_time = get_time();
2329 			if (cli_config.thread_count > 1)
2330 			{
2331 				launch_threads("Decompression", cli_config.thread_count, decompression_workload_runner, &work);
2332 			}
2333 			else
2334 			{
2335 				work.error = astcenc_decompress_image(
2336 				    work.context, work.data, work.data_len,
2337 				    work.image_out, &work.swizzle, 0);
2338 			}
2339 
2340 			astcenc_decompress_reset(codec_context);
2341 
2342 			double iter_time = get_time() - start_iter_time;
2343 			best_decompression_time = astc::min(iter_time, best_decompression_time);
2344 		}
2345 		total_decompression_time = get_time() - start_decompression_time;
2346 
2347 		if (work.error != ASTCENC_SUCCESS)
2348 		{
2349 			print_error("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(codec_status));
2350 			return 1;
2351 		}
2352 	}
2353 
2354 #if defined(_WIN32)
2355 	bool is_null = output_filename == "NUL" || output_filename == "nul";
2356 #else
2357 	bool is_null = output_filename == "/dev/null";
2358 #endif
2359 
2360    // Print metrics in comparison mode
2361 	if (operation & ASTCENC_STAGE_COMPARE)
2362 	{
2363 		bool is_normal_map = config.flags & ASTCENC_FLG_MAP_NORMAL;
2364 
2365 		compute_error_metrics(
2366 		    image_uncomp_in_is_hdr, is_normal_map, image_uncomp_in_component_count,
2367 		    image_uncomp_in, image_decomp_out, cli_config.low_fstop, cli_config.high_fstop);
2368 	}
2369 
2370 	// Store compressed image
2371 	if (operation & ASTCENC_STAGE_ST_COMP)
2372 	{
2373 		if (ends_with(output_filename, ".astc"))
2374 		{
2375 			error = store_cimage(image_comp, output_filename.c_str());
2376 			if (error)
2377 			{
2378 				print_error("ERROR: Failed to store compressed image\n");
2379 				return 1;
2380 			}
2381 		}
2382 		else if (ends_with(output_filename, ".ktx"))
2383 		{
2384 			bool srgb = profile == ASTCENC_PRF_LDR_SRGB;
2385 			error = store_ktx_compressed_image(image_comp, output_filename.c_str(), srgb);
2386 			if (error)
2387 			{
2388 				print_error("ERROR: Failed to store compressed image\n");
2389 				return 1;
2390 			}
2391 		}
2392 		else
2393 		{
2394 			if (!is_null)
2395 			{
2396 				print_error("ERROR: Unknown compressed output file type\n");
2397 				return 1;
2398 			}
2399 		}
2400 	}
2401 
2402 	// Store decompressed image
2403 	if (operation & ASTCENC_STAGE_ST_NCOMP)
2404 	{
2405 		if (!is_null)
2406 		{
2407 			bool store_result = store_ncimage(image_decomp_out, output_filename.c_str(),
2408 			                                  cli_config.y_flip);
2409 			if (!store_result)
2410 			{
2411 				print_error("ERROR: Failed to write output image %s\n", output_filename.c_str());
2412 				return 1;
2413 			}
2414 		}
2415 	}
2416 
2417 	// Store diagnostic images
2418 	if (cli_config.diagnostic_images && !is_null)
2419 	{
2420 		print_diagnostic_images(codec_context, image_comp, output_filename);
2421 	}
2422 
2423 	free_image(image_uncomp_in);
2424 	free_image(image_decomp_out);
2425 	astcenc_context_free(codec_context);
2426 
2427 	delete[] image_comp.data;
2428 
2429 	if ((operation & ASTCENC_STAGE_COMPARE) || (!cli_config.silentmode))
2430 	{
2431 		double end_time = get_time();
2432 
2433 		double repeats = static_cast<double>(cli_config.repeat_count);
2434 		double avg_compression_time = total_compression_time / repeats;
2435 		double avg_decompression_time = total_decompression_time / repeats;
2436 		double total_time = (end_time - start_time) - ((repeats - 1.0) * avg_compression_time)  - ((repeats - 1.0) * avg_decompression_time);
2437 
2438 		printf("Performance metrics\n");
2439 		printf("===================\n\n");
2440 		printf("    Total time:                %8.4f s\n", total_time);
2441 
2442 		if (operation & ASTCENC_STAGE_COMPRESS)
2443 		{
2444 			double compression_rate = image_size / (best_compression_time * 1000000.0);
2445 
2446 			printf("    Coding time:               %8.4f s\n", best_compression_time);
2447 			printf("    Coding rate:               %8.4f MT/s\n", compression_rate);
2448 		}
2449 
2450 		if (operation & ASTCENC_STAGE_DECOMPRESS)
2451 		{
2452 			double decompression_rate = image_size / (best_decompression_time * 1000000.0);
2453 			printf("    Decoding time:             %8.4f s\n", best_decompression_time);
2454 			printf("    Decoding rate:             %8.4f MT/s\n", decompression_rate);
2455 		}
2456 	}
2457 
2458 	return 0;
2459 }
2460