• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for loading/storing uncompressed and compressed images.
20  */
21 
22 #include <array>
23 #include <cassert>
24 #include <cstdio>
25 #include <cstdlib>
26 #include <cstring>
27 #include <fstream>
28 #include <iomanip>
29 #include <sstream>
30 
31 #include "astcenccli_internal.h"
32 
33 #include "stb_image.h"
34 #include "stb_image_write.h"
35 #include "tinyexr.h"
36 
37 /**
38  * @brief Determine the output file name to use for a sliced image write.
39  *
40  * @param img        The source data for the image.
41  * @param filename   The base name of the file to save.
42  * @param index      The slice index to write.
43  *
44  * @return The file name to use when saving the file.
45  */
get_output_filename(const astcenc_image * img,const char * filename,unsigned int index)46 static std::string get_output_filename(
47 	const astcenc_image* img,
48 	const char* filename,
49 	unsigned int index
50 ) {
51 	if (img->dim_z <= 1)
52 	{
53 		return filename;
54 	}
55 
56 	std::string fnmod(filename);
57 	std::string fnext = fnmod.substr(fnmod.find_last_of("."));
58 
59 	// Remove the extension
60 	fnmod = fnmod.erase(fnmod.length() - fnext.size());
61 
62 	// Insert the file index into the base name, then append the extension
63 	std::stringstream ss;
64 	ss << fnmod << "_" << std::setw(3) << std::setfill('0') << index << fnext;
65 	return ss.str();
66 }
67 
68 /* ============================================================================
69   Image load and store through the stb_image and tinyexr libraries
70 ============================================================================ */
71 
72 /**
73  * @brief Load a .exr image using TinyExr to provide the loader.
74  *
75  * @param      filename          The name of the file to load.
76  * @param      y_flip            Should the image be vertically flipped?
77  * @param[out] is_hdr            Is this an HDR image load? Always @c true for this function.
78  * @param[out] component_count   The number of components in the data.
79  *
80  * @return The loaded image data in a canonical 4 channel format.
81  */
load_image_with_tinyexr(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)82 static astcenc_image* load_image_with_tinyexr(
83 	const char* filename,
84 	bool y_flip,
85 	bool& is_hdr,
86 	unsigned int& component_count
87 ) {
88 	int dim_x, dim_y;
89 	float* image;
90 	const char* err;
91 
92 	int load_res = LoadEXR(&image, &dim_x, &dim_y, filename, &err);
93 	if (load_res != TINYEXR_SUCCESS)
94 	{
95 		print_error("ERROR: Failed to load image %s (%s)\n", filename, err);
96 		free(reinterpret_cast<void*>(const_cast<char*>(err)));
97 		return nullptr;
98 	}
99 
100 	astcenc_image* res_img = astc_img_from_floatx4_array(image, dim_x, dim_y, y_flip);
101 	free(image);
102 
103 	is_hdr = true;
104 	component_count = 4;
105 	return res_img;
106 }
107 
108 /**
109  * @brief Load an image using STBImage to provide the loader.
110  *
111  * @param      filename          The name of the file to load.
112  * @param      y_flip            Should the image be vertically flipped?
113  * @param[out] is_hdr            Is this an HDR image load?
114  * @param[out] component_count   The number of components in the data.
115  *
116  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
117  */
load_image_with_stb(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)118 static astcenc_image* load_image_with_stb(
119 	const char* filename,
120 	bool y_flip,
121 	bool& is_hdr,
122 	unsigned int& component_count
123 ) {
124 	int dim_x, dim_y;
125 
126 	if (stbi_is_hdr(filename))
127 	{
128 		float* data = stbi_loadf(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
129 		if (data)
130 		{
131 			astcenc_image* img = astc_img_from_floatx4_array(data, dim_x, dim_y, y_flip);
132 			stbi_image_free(data);
133 			is_hdr = true;
134 			component_count = 4;
135 			return img;
136 		}
137 	}
138 	else
139 	{
140 		uint8_t* data = stbi_load(filename, &dim_x, &dim_y, nullptr, STBI_rgb_alpha);
141 		if (data)
142 		{
143 			astcenc_image* img = astc_img_from_unorm8x4_array(data, dim_x, dim_y, y_flip);
144 			stbi_image_free(data);
145 			is_hdr = false;
146 			component_count = 4;
147 			return img;
148 		}
149 	}
150 
151 	print_error("ERROR: Failed to load image %s (%s)\n", filename, stbi_failure_reason());
152 	return nullptr;
153 }
154 
155 /**
156  * @brief Save an EXR image using TinyExr to provide the store routine.
157  *
158  * @param img        The source data for the image.
159  * @param filename   The name of the file to save.
160  * @param y_flip     Should the image be vertically flipped?
161  *
162  * @return @c true if the image saved OK, @c false on error.
163  */
store_exr_image_with_tinyexr(const astcenc_image * img,const char * filename,int y_flip)164 static bool store_exr_image_with_tinyexr(
165 	const astcenc_image* img,
166 	const char* filename,
167 	int y_flip
168 ) {
169 	int res { 0 };
170 
171 	for (unsigned int i = 0; i < img->dim_z; i++)
172 	{
173 		std::string fnmod = get_output_filename(img, filename, i);
174 		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
175 
176 		res = SaveEXR(buf, img->dim_x, img->dim_y, 4, 1, fnmod.c_str(), nullptr);
177 		delete[] buf;
178 		if (res < 0)
179 		{
180 			break;
181 		}
182 	}
183 
184 	return res >= 0;
185 }
186 
187 /**
188  * @brief Save a PNG image using STBImageWrite to provide the store routine.
189  *
190  * @param img        The source data for the image.
191  * @param filename   The name of the file to save.
192  * @param y_flip     Should the image be vertically flipped?
193  *
194  * @return @c true if the image saved OK, @c false on error.
195  */
store_png_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)196 static bool store_png_image_with_stb(
197 	const astcenc_image* img,
198 	const char* filename,
199 	int y_flip
200 ) {
201 	int res { 0 };
202 
203 	assert(img->data_type == ASTCENC_TYPE_U8);
204 
205 	for (unsigned int i = 0; i < img->dim_z; i++)
206 	{
207 		std::string fnmod = get_output_filename(img, filename, i);
208 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
209 
210 		stbi_flip_vertically_on_write(y_flip);
211 		res = stbi_write_png(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf, img->dim_x * 4);
212 		if (res == 0)
213 		{
214 			break;
215 		}
216 	}
217 
218 	return res != 0;
219 }
220 
221 /**
222  * @brief Save a TGA image using STBImageWrite to provide the store routine.
223  *
224  * @param img        The source data for the image.
225  * @param filename   The name of the file to save.
226  * @param y_flip     Should the image be vertically flipped?
227  *
228  * @return @c true if the image saved OK, @c false on error.
229  */
store_tga_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)230 static bool store_tga_image_with_stb(
231 	const astcenc_image* img,
232 	const char* filename,
233 	int y_flip
234 ) {
235 	int res { 0 };
236 
237 	assert(img->data_type == ASTCENC_TYPE_U8);
238 
239 	for (unsigned int i = 0; i < img->dim_z; i++)
240 	{
241 		std::string fnmod = get_output_filename(img, filename, i);
242 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
243 
244 		stbi_flip_vertically_on_write(y_flip);
245 		res = stbi_write_tga(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
246 		if (res == 0)
247 		{
248 			break;
249 		}
250 	}
251 
252 	return res != 0;
253 }
254 
255 /**
256  * @brief Save a BMP image using STBImageWrite to provide the store routine.
257  *
258  * @param img        The source data for the image.
259  * @param filename   The name of the file to save.
260  * @param y_flip     Should the image be vertically flipped?
261  *
262  * @return @c true if the image saved OK, @c false on error.
263  */
store_bmp_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)264 static bool store_bmp_image_with_stb(
265 	const astcenc_image* img,
266 	const char* filename,
267 	int y_flip
268 ) {
269 	int res { 0 };
270 
271 	assert(img->data_type == ASTCENC_TYPE_U8);
272 
273 	for (unsigned int i = 0; i < img->dim_z; i++)
274 	{
275 		std::string fnmod = get_output_filename(img, filename, i);
276 		uint8_t* buf = reinterpret_cast<uint8_t*>(img->data[i]);
277 
278 		stbi_flip_vertically_on_write(y_flip);
279 		res = stbi_write_bmp(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
280 		if (res == 0)
281 		{
282 			break;
283 		}
284 	}
285 
286 	return res != 0;
287 }
288 
289 /**
290  * @brief Save a HDR image using STBImageWrite to provide the store routine.
291  *
292  * @param img        The source data for the image.
293  * @param filename   The name of the file to save.
294  * @param y_flip     Should the image be vertically flipped?
295  *
296  * @return @c true if the image saved OK, @c false on error.
297  */
store_hdr_image_with_stb(const astcenc_image * img,const char * filename,int y_flip)298 static bool store_hdr_image_with_stb(
299 	const astcenc_image* img,
300 	const char* filename,
301 	int y_flip
302 ) {
303 	int res { 0 };
304 
305 	for (unsigned int i = 0; i < img->dim_z; i++)
306 	{
307 		std::string fnmod = get_output_filename(img, filename, i);
308 		float* buf = floatx4_array_from_astc_img(img, y_flip, i);
309 
310 		res = stbi_write_hdr(fnmod.c_str(), img->dim_x, img->dim_y, 4, buf);
311 		delete[] buf;
312 		if (res == 0)
313 		{
314 			break;
315 		}
316 	}
317 
318 	return res != 0;
319 }
320 
321 /* ============================================================================
322 Native Load and store of KTX and DDS file formats.
323 
324 Unlike "regular" 2D image formats, which are mostly supported through stb_image
325 and tinyexr, these formats are supported directly; this involves a relatively
326 large number of pixel formats.
327 
328 The following restrictions apply to loading of these file formats:
329 
330     * Only uncompressed data supported
331     * Only first mipmap in mipmap pyramid supported
332     * KTX: Cube-map arrays are not supported
333 ============================================================================ */
334 enum scanline_transfer
335 {
336 	R8_TO_RGBA8,
337 	RG8_TO_RGBA8,
338 	RGB8_TO_RGBA8,
339 	RGBA8_TO_RGBA8,
340 	BGR8_TO_RGBA8,
341 	BGRA8_TO_RGBA8,
342 	L8_TO_RGBA8,
343 	LA8_TO_RGBA8,
344 
345 	RGBX8_TO_RGBA8,
346 	BGRX8_TO_RGBA8,
347 
348 	R16_TO_RGBA16F,
349 	RG16_TO_RGBA16F,
350 	RGB16_TO_RGBA16F,
351 	RGBA16_TO_RGBA16F,
352 	BGR16_TO_RGBA16F,
353 	BGRA16_TO_RGBA16F,
354 	L16_TO_RGBA16F,
355 	LA16_TO_RGBA16F,
356 
357 	R16F_TO_RGBA16F,
358 	RG16F_TO_RGBA16F,
359 	RGB16F_TO_RGBA16F,
360 	RGBA16F_TO_RGBA16F,
361 	BGR16F_TO_RGBA16F,
362 	BGRA16F_TO_RGBA16F,
363 	L16F_TO_RGBA16F,
364 	LA16F_TO_RGBA16F,
365 
366 	R32F_TO_RGBA16F,
367 	RG32F_TO_RGBA16F,
368 	RGB32F_TO_RGBA16F,
369 	RGBA32F_TO_RGBA16F,
370 	BGR32F_TO_RGBA16F,
371 	BGRA32F_TO_RGBA16F,
372 	L32F_TO_RGBA16F,
373 	LA32F_TO_RGBA16F
374 };
375 
376 /**
377  * @brief Copy a scanline from a source file and expand to a canonical format.
378  *
379  * Outputs are always 4 component RGBA, stored as U8 (LDR) or FP16 (HDR).
380  *
381  * @param[out] dst           The start of the line to store to.
382  * @param      src           The start of the line to load.
383  * @param      pixel_count   The number of pixels in the scanline.
384  * @param      method        The conversion function.
385  */
copy_scanline(void * dst,const void * src,int pixel_count,scanline_transfer method)386 static void copy_scanline(
387 	void* dst,
388 	const void* src,
389 	int pixel_count,
390 	scanline_transfer method
391 ) {
392 
393 #define id(x) (x)
394 #define u16_sf16(x) float_to_float16(x * (1.0f/65535.0f))
395 #define f32_sf16(x) float_to_float16(x)
396 
397 #define COPY_R(dsttype, srctype, convfunc, oneval) \
398 	do { \
399 		const srctype* s = reinterpret_cast<const srctype*>(src); \
400 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
401 		for (int i = 0; i < pixel_count; i++) \
402 		{ \
403 			d[4 * i    ] = convfunc(s[i]); \
404 			d[4 * i + 1] = 0;              \
405 			d[4 * i + 2] = 0;              \
406 			d[4 * i + 3] = oneval;         \
407 		} \
408 	} while (0); \
409 	break
410 
411 #define COPY_RG(dsttype, srctype, convfunc, oneval) \
412 	do { \
413 		const srctype* s = reinterpret_cast<const srctype*>(src); \
414 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
415 		for (int i = 0; i < pixel_count; i++) \
416 		{ \
417 			d[4 * i    ] = convfunc(s[2 * i    ]); \
418 			d[4 * i + 1] = convfunc(s[2 * i + 1]); \
419 			d[4 * i + 2] = 0;                      \
420 			d[4 * i + 3] = oneval;                 \
421 		} \
422 	} while (0); \
423 	break
424 
425 #define COPY_RGB(dsttype, srctype, convfunc, oneval) \
426 	do { \
427 		const srctype* s = reinterpret_cast<const srctype*>(src); \
428 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
429 		for (int i = 0; i < pixel_count; i++) \
430 		{ \
431 			d[4 * i    ] = convfunc(s[3 * i    ]); \
432 			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
433 			d[4 * i + 2] = convfunc(s[3 * i + 2]); \
434 			d[4 * i + 3] = oneval;                 \
435 		} \
436 	} while (0); \
437 	break
438 
439 #define COPY_BGR(dsttype, srctype, convfunc, oneval) \
440 	do { \
441 		const srctype* s = reinterpret_cast<const srctype*>(src); \
442 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
443 		for (int i = 0; i < pixel_count; i++)\
444 		{ \
445 			d[4 * i    ] = convfunc(s[3 * i + 2]); \
446 			d[4 * i + 1] = convfunc(s[3 * i + 1]); \
447 			d[4 * i + 2] = convfunc(s[3 * i    ]); \
448 			d[4 * i + 3] = oneval;                 \
449 		} \
450 	} while (0); \
451 	break
452 
453 #define COPY_RGBX(dsttype, srctype, convfunc, oneval) \
454 	do { \
455 		const srctype* s = reinterpret_cast<const srctype*>(src); \
456 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
457 		for (int i = 0; i < pixel_count; i++)\
458 		{ \
459 			d[4 * i    ] = convfunc(s[4 * i    ]); \
460 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
461 			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
462 			d[4 * i + 3] = oneval;                 \
463 		} \
464 	} while (0); \
465 	break
466 
467 #define COPY_BGRX(dsttype, srctype, convfunc, oneval) \
468 	do { \
469 		const srctype* s = reinterpret_cast<const srctype*>(src); \
470 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
471 		for (int i = 0; i < pixel_count; i++)\
472 		{ \
473 			d[4 * i    ] = convfunc(s[4 * i + 2]); \
474 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
475 			d[4 * i + 2] = convfunc(s[4 * i    ]); \
476 			d[4 * i + 3] = oneval;                 \
477 		} \
478 	} while (0); \
479 	break
480 
481 #define COPY_RGBA(dsttype, srctype, convfunc, oneval) \
482 	do { \
483 		const srctype* s = reinterpret_cast<const srctype*>(src); \
484 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
485 		for (int i = 0; i < pixel_count; i++) \
486 		{ \
487 			d[4 * i    ] = convfunc(s[4 * i    ]); \
488 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
489 			d[4 * i + 2] = convfunc(s[4 * i + 2]); \
490 			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
491 		} \
492 	} while (0); \
493 	break
494 
495 #define COPY_BGRA(dsttype, srctype, convfunc, oneval) \
496 	do { \
497 		const srctype* s = reinterpret_cast<const srctype*>(src); \
498 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
499 		for (int i = 0; i < pixel_count; i++) \
500 		{ \
501 			d[4 * i    ] = convfunc(s[4 * i + 2]); \
502 			d[4 * i + 1] = convfunc(s[4 * i + 1]); \
503 			d[4 * i + 2] = convfunc(s[4 * i    ]); \
504 			d[4 * i + 3] = convfunc(s[4 * i + 3]); \
505 		} \
506 	} while (0); \
507 	break
508 
509 #define COPY_L(dsttype, srctype, convfunc, oneval) \
510 	do { \
511 		const srctype* s = reinterpret_cast<const srctype*>(src); \
512 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
513 		for (int i = 0; i < pixel_count; i++) \
514 		{ \
515 			d[4 * i    ] = convfunc(s[i]); \
516 			d[4 * i + 1] = convfunc(s[i]); \
517 			d[4 * i + 2] = convfunc(s[i]); \
518 			d[4 * i + 3] = oneval;         \
519 		} \
520 	} while (0); \
521 	break
522 
523 #define COPY_LA(dsttype, srctype, convfunc, oneval) \
524 	do { \
525 		const srctype* s = reinterpret_cast<const srctype*>(src); \
526 		dsttype* d = reinterpret_cast<dsttype*>(dst); \
527 		for (int i = 0; i < pixel_count; i++) \
528 		{ \
529 			d[4 * i    ] = convfunc(s[2 * i    ]); \
530 			d[4 * i + 1] = convfunc(s[2 * i    ]); \
531 			d[4 * i + 2] = convfunc(s[2 * i    ]); \
532 			d[4 * i + 3] = convfunc(s[2 * i + 1]); \
533 		} \
534 	} while (0); \
535 	break
536 
537 	switch (method)
538 	{
539 	case R8_TO_RGBA8:
540 		COPY_R(uint8_t, uint8_t, id, 0xFF);
541 	case RG8_TO_RGBA8:
542 		COPY_RG(uint8_t, uint8_t, id, 0xFF);
543 	case RGB8_TO_RGBA8:
544 		COPY_RGB(uint8_t, uint8_t, id, 0xFF);
545 	case RGBA8_TO_RGBA8:
546 		COPY_RGBA(uint8_t, uint8_t, id, 0xFF);
547 	case BGR8_TO_RGBA8:
548 		COPY_BGR(uint8_t, uint8_t, id, 0xFF);
549 	case BGRA8_TO_RGBA8:
550 		COPY_BGRA(uint8_t, uint8_t, id, 0xFF);
551 	case RGBX8_TO_RGBA8:
552 		COPY_RGBX(uint8_t, uint8_t, id, 0xFF);
553 	case BGRX8_TO_RGBA8:
554 		COPY_BGRX(uint8_t, uint8_t, id, 0xFF);
555 	case L8_TO_RGBA8:
556 		COPY_L(uint8_t, uint8_t, id, 0xFF);
557 	case LA8_TO_RGBA8:
558 		COPY_LA(uint8_t, uint8_t, id, 0xFF);
559 
560 	case R16F_TO_RGBA16F:
561 		COPY_R(uint16_t, uint16_t, id, 0x3C00);
562 	case RG16F_TO_RGBA16F:
563 		COPY_RG(uint16_t, uint16_t, id, 0x3C00);
564 	case RGB16F_TO_RGBA16F:
565 		COPY_RGB(uint16_t, uint16_t, id, 0x3C00);
566 	case RGBA16F_TO_RGBA16F:
567 		COPY_RGBA(uint16_t, uint16_t, id, 0x3C00);
568 	case BGR16F_TO_RGBA16F:
569 		COPY_BGR(uint16_t, uint16_t, id, 0x3C00);
570 	case BGRA16F_TO_RGBA16F:
571 		COPY_BGRA(uint16_t, uint16_t, id, 0x3C00);
572 	case L16F_TO_RGBA16F:
573 		COPY_L(uint16_t, uint16_t, id, 0x3C00);
574 	case LA16F_TO_RGBA16F:
575 		COPY_LA(uint16_t, uint16_t, id, 0x3C00);
576 
577 	case R16_TO_RGBA16F:
578 		COPY_R(uint16_t, uint16_t, u16_sf16, 0x3C00);
579 	case RG16_TO_RGBA16F:
580 		COPY_RG(uint16_t, uint16_t, u16_sf16, 0x3C00);
581 	case RGB16_TO_RGBA16F:
582 		COPY_RGB(uint16_t, uint16_t, u16_sf16, 0x3C00);
583 	case RGBA16_TO_RGBA16F:
584 		COPY_RGBA(uint16_t, uint16_t, u16_sf16, 0x3C00);
585 	case BGR16_TO_RGBA16F:
586 		COPY_BGR(uint16_t, uint16_t, u16_sf16, 0x3C00);
587 	case BGRA16_TO_RGBA16F:
588 		COPY_BGRA(uint16_t, uint16_t, u16_sf16, 0x3C00);
589 	case L16_TO_RGBA16F:
590 		COPY_L(uint16_t, uint16_t, u16_sf16, 0x3C00);
591 	case LA16_TO_RGBA16F:
592 		COPY_LA(uint16_t, uint16_t, u16_sf16, 0x3C00);
593 
594 	case R32F_TO_RGBA16F:
595 		COPY_R(uint16_t, float, f32_sf16, 0x3C00);
596 	case RG32F_TO_RGBA16F:
597 		COPY_RG(uint16_t, float, f32_sf16, 0x3C00);
598 	case RGB32F_TO_RGBA16F:
599 		COPY_RGB(uint16_t, float, f32_sf16, 0x3C00);
600 	case RGBA32F_TO_RGBA16F:
601 		COPY_RGBA(uint16_t, float, f32_sf16, 0x3C00);
602 	case BGR32F_TO_RGBA16F:
603 		COPY_BGR(uint16_t, float, f32_sf16, 0x3C00);
604 	case BGRA32F_TO_RGBA16F:
605 		COPY_BGRA(uint16_t, float, f32_sf16, 0x3C00);
606 	case L32F_TO_RGBA16F:
607 		COPY_L(uint16_t, float, f32_sf16, 0x3C00);
608 	case LA32F_TO_RGBA16F:
609 		COPY_LA(uint16_t, float, f32_sf16, 0x3C00);
610 	}
611 }
612 
613 /**
614  * @brief Swap endianness of N two byte values.
615  *
616  * @param[in,out] dataptr      The data to convert.
617  * @param         byte_count   The number of bytes to convert.
618  */
switch_endianness2(void * dataptr,int byte_count)619 static void switch_endianness2(
620 	void* dataptr,
621 	int byte_count
622 ) {
623 	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
624 	for (int i = 0; i < byte_count / 2; i++)
625 	{
626 		uint8_t d0 = data[0];
627 		uint8_t d1 = data[1];
628 		data[0] = d1;
629 		data[1] = d0;
630 		data += 2;
631 	}
632 }
633 
634 /**
635  * @brief Swap endianness of N four byte values.
636  *
637  * @param[in,out] dataptr      The data to convert.
638  * @param         byte_count   The number of bytes to convert.
639  */
switch_endianness4(void * dataptr,int byte_count)640 static void switch_endianness4(
641 	void* dataptr,
642 	int byte_count
643 ) {
644 	uint8_t* data = reinterpret_cast<uint8_t*>(dataptr);
645 	for (int i = 0; i < byte_count / 4; i++)
646 	{
647 		uint8_t d0 = data[0];
648 		uint8_t d1 = data[1];
649 		uint8_t d2 = data[2];
650 		uint8_t d3 = data[3];
651 		data[0] = d3;
652 		data[1] = d2;
653 		data[2] = d1;
654 		data[3] = d0;
655 		data += 4;
656 	}
657 }
658 
659 /**
660  * @brief Swap endianness of a u32 value.
661  *
662  * @param v   The data to convert.
663  *
664  * @return The converted value.
665  */
u32_byterev(uint32_t v)666 static uint32_t u32_byterev(uint32_t v)
667 {
668 	return (v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24);
669 }
670 
671 /*
672  Notes about KTX:
673 
674  After the header and the key/value data area, the actual image data follows.
675  Each image starts with a 4-byte "imageSize" value indicating the number of bytes of image data follow.
676  (For cube-maps, this value appears only after first image; the remaining 5 images are all of equal size.)
677  If the size of an image is not a multiple of 4, then it is padded to the next multiple of 4.
678  Note that this padding is NOT included in the "imageSize" field.
679  In a cubemap, the padding appears after each face note that in a 2D/3D texture, padding does
680  NOT appear between the lines/planes of the texture!
681 
682  In a KTX file, there may be multiple images; they are organized as follows:
683 
684  For each mipmap_level in numberOfMipmapLevels
685  	UInt32 imageSize;
686  	For each array_element in numberOfArrayElements
687  	* for each face in numberOfFaces
688  		* for each z_slice in pixelDepth
689  			* for each row or row_of_blocks in pixelHeight
690  				* for each pixel or block_of_pixels in pixelWidth
691  					Byte data[format-specific-number-of-bytes]
692  				* end
693  			* end
694  		*end
695  		Byte cubePadding[0-3]
696  	*end
697  	Byte mipPadding[3 - ((imageSize+ 3) % 4)]
698  *end
699 
700  In the ASTC codec, we will, for the time being only harvest the first image,
701  and we will support only a limited set of formats:
702 
703  gl_type: UNSIGNED_BYTE UNSIGNED_SHORT HALF_FLOAT FLOAT UNSIGNED_INT_8_8_8_8 UNSIGNED_INT_8_8_8_8_REV
704  gl_format: RED, RG. RGB, RGBA BGR, BGRA
705  gl_internal_format: used for upload to OpenGL; we can ignore it on uncompressed-load, but
706  	need to provide a reasonable value on store: RGB8 RGBA8 RGB16F RGBA16F
707  gl_base_internal_format: same as gl_format unless texture is compressed (well, BGR is turned into RGB)
708  	RED, RG, RGB, RGBA
709 */
710 
711 // Khronos enums
712 #define GL_RED                                      0x1903
713 #define GL_RG                                       0x8227
714 #define GL_RGB                                      0x1907
715 #define GL_RGBA                                     0x1908
716 #define GL_BGR                                      0x80E0
717 #define GL_BGRA                                     0x80E1
718 #define GL_LUMINANCE                                0x1909
719 #define GL_LUMINANCE_ALPHA                          0x190A
720 
721 #define GL_R8                                       0x8229
722 #define GL_RG8                                      0x822B
723 #define GL_RGB8                                     0x8051
724 #define GL_RGBA8                                    0x8058
725 
726 #define GL_R16F                                     0x822D
727 #define GL_RG16F                                    0x822F
728 #define GL_RGB16F                                   0x881B
729 #define GL_RGBA16F                                  0x881A
730 
731 #define GL_UNSIGNED_BYTE                            0x1401
732 #define GL_UNSIGNED_SHORT                           0x1403
733 #define GL_HALF_FLOAT                               0x140B
734 #define GL_FLOAT                                    0x1406
735 
736 #define GL_COMPRESSED_RGBA_ASTC_4x4                 0x93B0
737 #define GL_COMPRESSED_RGBA_ASTC_5x4                 0x93B1
738 #define GL_COMPRESSED_RGBA_ASTC_5x5                 0x93B2
739 #define GL_COMPRESSED_RGBA_ASTC_6x5                 0x93B3
740 #define GL_COMPRESSED_RGBA_ASTC_6x6                 0x93B4
741 #define GL_COMPRESSED_RGBA_ASTC_8x5                 0x93B5
742 #define GL_COMPRESSED_RGBA_ASTC_8x6                 0x93B6
743 #define GL_COMPRESSED_RGBA_ASTC_8x8                 0x93B7
744 #define GL_COMPRESSED_RGBA_ASTC_10x5                0x93B8
745 #define GL_COMPRESSED_RGBA_ASTC_10x6                0x93B9
746 #define GL_COMPRESSED_RGBA_ASTC_10x8                0x93BA
747 #define GL_COMPRESSED_RGBA_ASTC_10x10               0x93BB
748 #define GL_COMPRESSED_RGBA_ASTC_12x10               0x93BC
749 #define GL_COMPRESSED_RGBA_ASTC_12x12               0x93BD
750 
751 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4         0x93D0
752 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4         0x93D1
753 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5         0x93D2
754 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5         0x93D3
755 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6         0x93D4
756 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5         0x93D5
757 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6         0x93D6
758 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8         0x93D7
759 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5        0x93D8
760 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6        0x93D9
761 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8        0x93DA
762 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10       0x93DB
763 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10       0x93DC
764 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12       0x93DD
765 
766 #define GL_COMPRESSED_RGBA_ASTC_3x3x3_OES           0x93C0
767 #define GL_COMPRESSED_RGBA_ASTC_4x3x3_OES           0x93C1
768 #define GL_COMPRESSED_RGBA_ASTC_4x4x3_OES           0x93C2
769 #define GL_COMPRESSED_RGBA_ASTC_4x4x4_OES           0x93C3
770 #define GL_COMPRESSED_RGBA_ASTC_5x4x4_OES           0x93C4
771 #define GL_COMPRESSED_RGBA_ASTC_5x5x4_OES           0x93C5
772 #define GL_COMPRESSED_RGBA_ASTC_5x5x5_OES           0x93C6
773 #define GL_COMPRESSED_RGBA_ASTC_6x5x5_OES           0x93C7
774 #define GL_COMPRESSED_RGBA_ASTC_6x6x5_OES           0x93C8
775 #define GL_COMPRESSED_RGBA_ASTC_6x6x6_OES           0x93C9
776 
777 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES   0x93E0
778 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES   0x93E1
779 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES   0x93E2
780 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES   0x93E3
781 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES   0x93E4
782 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES   0x93E5
783 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES   0x93E6
784 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES   0x93E7
785 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES   0x93E8
786 #define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES   0x93E9
787 
788 struct format_entry
789 {
790 	unsigned int x;
791 	unsigned int y;
792 	unsigned int z;
793 	bool is_srgb;
794 	unsigned int format;
795 };
796 
797 static const std::array<format_entry, 48> ASTC_FORMATS =
798 {{
799 	// 2D Linear RGB
800 	{ 4,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_4x4},
801 	{ 5,  4,  1, false, GL_COMPRESSED_RGBA_ASTC_5x4},
802 	{ 5,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_5x5},
803 	{ 6,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_6x5},
804 	{ 6,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_6x6},
805 	{ 8,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_8x5},
806 	{ 8,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_8x6},
807 	{ 8,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_8x8},
808 	{10,  5,  1, false, GL_COMPRESSED_RGBA_ASTC_10x5},
809 	{10,  6,  1, false, GL_COMPRESSED_RGBA_ASTC_10x6},
810 	{10,  8,  1, false, GL_COMPRESSED_RGBA_ASTC_10x8},
811 	{10, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_10x10},
812 	{12, 10,  1, false, GL_COMPRESSED_RGBA_ASTC_12x10},
813 	{12, 12,  1, false, GL_COMPRESSED_RGBA_ASTC_12x12},
814 	// 2D SRGB
815 	{ 4,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4},
816 	{ 5,  4,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4},
817 	{ 5,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5},
818 	{ 6,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5},
819 	{ 6,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6},
820 	{ 8,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5},
821 	{ 8,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6},
822 	{ 8,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8},
823 	{10,  5,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5},
824 	{10,  6,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6},
825 	{10,  8,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8},
826 	{10, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10},
827 	{12, 10,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10},
828 	{12, 12,  1,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12},
829 	// 3D Linear RGB
830 	{ 3,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_3x3x3_OES},
831 	{ 4,  3,  3, false, GL_COMPRESSED_RGBA_ASTC_4x3x3_OES},
832 	{ 4,  4,  3, false, GL_COMPRESSED_RGBA_ASTC_4x4x3_OES},
833 	{ 4,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_4x4x4_OES},
834 	{ 5,  4,  4, false, GL_COMPRESSED_RGBA_ASTC_5x4x4_OES},
835 	{ 5,  5,  4, false, GL_COMPRESSED_RGBA_ASTC_5x5x4_OES},
836 	{ 5,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_5x5x5_OES},
837 	{ 6,  5,  5, false, GL_COMPRESSED_RGBA_ASTC_6x5x5_OES},
838 	{ 6,  6,  5, false, GL_COMPRESSED_RGBA_ASTC_6x6x5_OES},
839 	{ 6,  6,  6, false, GL_COMPRESSED_RGBA_ASTC_6x6x6_OES},
840 	// 3D SRGB
841 	{ 3,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES},
842 	{ 4,  3,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES},
843 	{ 4,  4,  3,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES},
844 	{ 4,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES},
845 	{ 5,  4,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES},
846 	{ 5,  5,  4,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES},
847 	{ 5,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES},
848 	{ 6,  5,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES},
849 	{ 6,  6,  5,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES},
850 	{ 6,  6,  6,  true, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES}
851 }};
852 
get_format(unsigned int format)853 static const format_entry* get_format(
854 	unsigned int format
855 ) {
856 	for (auto& it : ASTC_FORMATS)
857 	{
858 		if (it.format == format)
859 		{
860 			return &it;
861 		}
862 	}
863 	return nullptr;
864 }
865 
get_format(unsigned int x,unsigned int y,unsigned int z,bool is_srgb)866 static unsigned int get_format(
867 	unsigned int x,
868 	unsigned int y,
869 	unsigned int z,
870 	bool is_srgb
871 ) {
872 	for (auto& it : ASTC_FORMATS)
873 	{
874 		if ((it.x == x) && (it.y == y) && (it.z == z) && (it.is_srgb == is_srgb))
875 		{
876 			return it.format;
877 		}
878 	}
879 	return 0;
880 }
881 
882 struct ktx_header
883 {
884 	uint8_t magic[12];
885 	uint32_t endianness;				// should be 0x04030201; if it is instead 0x01020304, then the endianness of everything must be switched.
886 	uint32_t gl_type;					// 0 for compressed textures, otherwise value from table 3.2 (page 162) of OpenGL 4.0 spec
887 	uint32_t gl_type_size;				// size of data elements to do endianness swap on (1=endian-neutral data)
888 	uint32_t gl_format;					// 0 for compressed textures, otherwise value from table 3.3 (page 163) of OpenGL spec
889 	uint32_t gl_internal_format;		// sized-internal-format, corresponding to table 3.12 to 3.14 (pages 182-185) of OpenGL spec
890 	uint32_t gl_base_internal_format;	// unsized-internal-format: corresponding to table 3.11 (page 179) of OpenGL spec
891 	uint32_t pixel_width;				// texture dimensions; not rounded up to block size for compressed.
892 	uint32_t pixel_height;				// must be 0 for 1D textures.
893 	uint32_t pixel_depth;				// must be 0 for 1D, 2D and cubemap textures.
894 	uint32_t number_of_array_elements;	// 0 if not a texture array
895 	uint32_t number_of_faces;			// 6 for cubemaps, 1 for non-cubemaps
896 	uint32_t number_of_mipmap_levels;	// 0 or 1 for non-mipmapped textures; 0 indicates that auto-mipmap-gen should be done at load time.
897 	uint32_t bytes_of_key_value_data;	// size in bytes of the key-and-value area immediately following the header.
898 };
899 
900 // Magic 12-byte sequence that must appear at the beginning of every KTX file.
901 static uint8_t ktx_magic[12] {
902 	0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A
903 };
904 
ktx_header_switch_endianness(ktx_header * kt)905 static void ktx_header_switch_endianness(ktx_header * kt)
906 {
907 	#define REV(x) kt->x = u32_byterev(kt->x)
908 	REV(endianness);
909 	REV(gl_type);
910 	REV(gl_type_size);
911 	REV(gl_format);
912 	REV(gl_internal_format);
913 	REV(gl_base_internal_format);
914 	REV(pixel_width);
915 	REV(pixel_height);
916 	REV(pixel_depth);
917 	REV(number_of_array_elements);
918 	REV(number_of_faces);
919 	REV(number_of_mipmap_levels);
920 	REV(bytes_of_key_value_data);
921 	#undef REV
922 }
923 
924 /**
925  * @brief Load an uncompressed KTX image using the local custom loader.
926  *
927  * @param      filename          The name of the file to load.
928  * @param      y_flip            Should the image be vertically flipped?
929  * @param[out] is_hdr            Is this an HDR image load?
930  * @param[out] component_count   The number of components in the data.
931  *
932  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
933  */
load_ktx_uncompressed_image(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)934 static astcenc_image* load_ktx_uncompressed_image(
935 	const char* filename,
936 	bool y_flip,
937 	bool& is_hdr,
938 	unsigned int& component_count
939 ) {
940 	FILE *f = fopen(filename, "rb");
941 	if (!f)
942 	{
943 		printf("Failed to open file %s\n", filename);
944 		return nullptr;
945 	}
946 
947 	ktx_header hdr;
948 	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
949 
950 	if (header_bytes_read != sizeof(hdr))
951 	{
952 		printf("Failed to read header of KTX file %s\n", filename);
953 		fclose(f);
954 		return nullptr;
955 	}
956 
957 	if (memcmp(hdr.magic, ktx_magic, 12) != 0 || (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
958 	{
959 		printf("File %s does not have a valid KTX header\n", filename);
960 		fclose(f);
961 		return nullptr;
962 	}
963 
964 	int switch_endianness = 0;
965 	if (hdr.endianness == 0x01020304)
966 	{
967 		ktx_header_switch_endianness(&hdr);
968 		switch_endianness = 1;
969 	}
970 
971 	if (hdr.gl_type == 0 || hdr.gl_format == 0)
972 	{
973 		printf("File %s appears to be compressed, not supported as input\n", filename);
974 		fclose(f);
975 		return nullptr;
976 	}
977 
978 	// the formats we support are:
979 
980 	// Cartesian product of gl_type=(UNSIGNED_BYTE, UNSIGNED_SHORT, HALF_FLOAT, FLOAT) x gl_format=(RED, RG, RGB, RGBA, BGR, BGRA)
981 
982 	int components;
983 	switch (hdr.gl_format)
984 	{
985 	case GL_RED:
986 		components = 1;
987 		break;
988 	case GL_RG:
989 		components = 2;
990 		break;
991 	case GL_RGB:
992 		components = 3;
993 		break;
994 	case GL_RGBA:
995 		components = 4;
996 		break;
997 	case GL_BGR:
998 		components = 3;
999 		break;
1000 	case GL_BGRA:
1001 		components = 4;
1002 		break;
1003 	case GL_LUMINANCE:
1004 		components = 1;
1005 		break;
1006 	case GL_LUMINANCE_ALPHA:
1007 		components = 2;
1008 		break;
1009 	default:
1010 		printf("KTX file %s has unsupported GL type\n", filename);
1011 		fclose(f);
1012 		return nullptr;
1013 	}
1014 
1015 	// Although these are set up later, use default initializer to remove warnings
1016 	int bitness = 8;              // Internal precision after conversion
1017 	int bytes_per_component = 1;  // Bytes per component in the KTX file
1018 	scanline_transfer copy_method = R8_TO_RGBA8;
1019 
1020 	switch (hdr.gl_type)
1021 	{
1022 	case GL_UNSIGNED_BYTE:
1023 		{
1024 			bitness = 8;
1025 			bytes_per_component = 1;
1026 			switch (hdr.gl_format)
1027 			{
1028 			case GL_RED:
1029 				copy_method = R8_TO_RGBA8;
1030 				break;
1031 			case GL_RG:
1032 				copy_method = RG8_TO_RGBA8;
1033 				break;
1034 			case GL_RGB:
1035 				copy_method = RGB8_TO_RGBA8;
1036 				break;
1037 			case GL_RGBA:
1038 				copy_method = RGBA8_TO_RGBA8;
1039 				break;
1040 			case GL_BGR:
1041 				copy_method = BGR8_TO_RGBA8;
1042 				break;
1043 			case GL_BGRA:
1044 				copy_method = BGRA8_TO_RGBA8;
1045 				break;
1046 			case GL_LUMINANCE:
1047 				copy_method = L8_TO_RGBA8;
1048 				break;
1049 			case GL_LUMINANCE_ALPHA:
1050 				copy_method = LA8_TO_RGBA8;
1051 				break;
1052 			}
1053 			break;
1054 		}
1055 	case GL_UNSIGNED_SHORT:
1056 		{
1057 			bitness = 16;
1058 			bytes_per_component = 2;
1059 			switch (hdr.gl_format)
1060 			{
1061 			case GL_RED:
1062 				copy_method = R16_TO_RGBA16F;
1063 				break;
1064 			case GL_RG:
1065 				copy_method = RG16_TO_RGBA16F;
1066 				break;
1067 			case GL_RGB:
1068 				copy_method = RGB16_TO_RGBA16F;
1069 				break;
1070 			case GL_RGBA:
1071 				copy_method = RGBA16_TO_RGBA16F;
1072 				break;
1073 			case GL_BGR:
1074 				copy_method = BGR16_TO_RGBA16F;
1075 				break;
1076 			case GL_BGRA:
1077 				copy_method = BGRA16_TO_RGBA16F;
1078 				break;
1079 			case GL_LUMINANCE:
1080 				copy_method = L16_TO_RGBA16F;
1081 				break;
1082 			case GL_LUMINANCE_ALPHA:
1083 				copy_method = LA16_TO_RGBA16F;
1084 				break;
1085 			}
1086 			break;
1087 		}
1088 	case GL_HALF_FLOAT:
1089 		{
1090 			bitness = 16;
1091 			bytes_per_component = 2;
1092 			switch (hdr.gl_format)
1093 			{
1094 			case GL_RED:
1095 				copy_method = R16F_TO_RGBA16F;
1096 				break;
1097 			case GL_RG:
1098 				copy_method = RG16F_TO_RGBA16F;
1099 				break;
1100 			case GL_RGB:
1101 				copy_method = RGB16F_TO_RGBA16F;
1102 				break;
1103 			case GL_RGBA:
1104 				copy_method = RGBA16F_TO_RGBA16F;
1105 				break;
1106 			case GL_BGR:
1107 				copy_method = BGR16F_TO_RGBA16F;
1108 				break;
1109 			case GL_BGRA:
1110 				copy_method = BGRA16F_TO_RGBA16F;
1111 				break;
1112 			case GL_LUMINANCE:
1113 				copy_method = L16F_TO_RGBA16F;
1114 				break;
1115 			case GL_LUMINANCE_ALPHA:
1116 				copy_method = LA16F_TO_RGBA16F;
1117 				break;
1118 			}
1119 			break;
1120 		}
1121 	case GL_FLOAT:
1122 		{
1123 			bitness = 16;
1124 			bytes_per_component = 4;
1125 			switch (hdr.gl_format)
1126 			{
1127 			case GL_RED:
1128 				copy_method = R32F_TO_RGBA16F;
1129 				break;
1130 			case GL_RG:
1131 				copy_method = RG32F_TO_RGBA16F;
1132 				break;
1133 			case GL_RGB:
1134 				copy_method = RGB32F_TO_RGBA16F;
1135 				break;
1136 			case GL_RGBA:
1137 				copy_method = RGBA32F_TO_RGBA16F;
1138 				break;
1139 			case GL_BGR:
1140 				copy_method = BGR32F_TO_RGBA16F;
1141 				break;
1142 			case GL_BGRA:
1143 				copy_method = BGRA32F_TO_RGBA16F;
1144 				break;
1145 			case GL_LUMINANCE:
1146 				copy_method = L32F_TO_RGBA16F;
1147 				break;
1148 			case GL_LUMINANCE_ALPHA:
1149 				copy_method = LA32F_TO_RGBA16F;
1150 				break;
1151 			}
1152 			break;
1153 		}
1154 	default:
1155 		printf("KTX file %s has unsupported GL format\n", filename);
1156 		fclose(f);
1157 		return nullptr;
1158 	}
1159 
1160 	if (hdr.number_of_mipmap_levels > 1)
1161 	{
1162 		printf("WARNING: KTX file %s has %d mipmap levels; only the first one will be encoded.\n", filename, hdr.number_of_mipmap_levels);
1163 	}
1164 
1165 	if (hdr.number_of_array_elements > 1)
1166 	{
1167 		printf("WARNING: KTX file %s contains a texture array with %d layers; only the first one will be encoded.\n", filename, hdr.number_of_array_elements);
1168 	}
1169 
1170 	if (hdr.number_of_faces > 1)
1171 	{
1172 		printf("WARNING: KTX file %s contains a cubemap with 6 faces; only the first one will be encoded.\n", filename);
1173 	}
1174 
1175 
1176 	unsigned int dim_x = hdr.pixel_width;
1177 	unsigned int dim_y = astc::max(hdr.pixel_height, 1u);
1178 	unsigned int dim_z = astc::max(hdr.pixel_depth, 1u);
1179 
1180 	// ignore the key/value data
1181 	fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1182 
1183 	uint32_t specified_bytes_of_surface = 0;
1184 	size_t sb_read = fread(&specified_bytes_of_surface, 1, 4, f);
1185 	if (sb_read != 4)
1186 	{
1187 		printf("Failed to read header of KTX file %s\n", filename);
1188 		fclose(f);
1189 		return nullptr;
1190 	}
1191 
1192 	if (switch_endianness)
1193 	{
1194 		specified_bytes_of_surface = u32_byterev(specified_bytes_of_surface);
1195 	}
1196 
1197 	// read the surface
1198 	uint32_t xstride = bytes_per_component * components * dim_x;
1199 	uint32_t ystride = xstride * dim_y;
1200 	uint32_t computed_bytes_of_surface = dim_z * ystride;
1201 	if (computed_bytes_of_surface != specified_bytes_of_surface)
1202 	{
1203 		fclose(f);
1204 		printf("%s: KTX file inconsistency: computed surface size is %d bytes, but specified size is %d bytes\n", filename, computed_bytes_of_surface, specified_bytes_of_surface);
1205 		return nullptr;
1206 	}
1207 
1208 	uint8_t *buf = new uint8_t[specified_bytes_of_surface];
1209 	size_t bytes_read = fread(buf, 1, specified_bytes_of_surface, f);
1210 	fclose(f);
1211 	if (bytes_read != specified_bytes_of_surface)
1212 	{
1213 		delete[] buf;
1214 		printf("Failed to read file %s\n", filename);
1215 		return nullptr;
1216 	}
1217 
1218 	// perform an endianness swap on the surface if needed.
1219 	if (switch_endianness)
1220 	{
1221 		if (hdr.gl_type_size == 2)
1222 		{
1223 			switch_endianness2(buf, specified_bytes_of_surface);
1224 		}
1225 
1226 		if (hdr.gl_type_size == 4)
1227 		{
1228 			switch_endianness4(buf, specified_bytes_of_surface);
1229 		}
1230 	}
1231 
1232 	// Transfer data from the surface to our own image data structure
1233 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
1234 
1235 	for (unsigned int z = 0; z < dim_z; z++)
1236 	{
1237 		for (unsigned int y = 0; y < dim_y; y++)
1238 		{
1239 			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
1240 			unsigned int ydst = ymod;
1241 			void *dst;
1242 
1243 			if (astc_img->data_type == ASTCENC_TYPE_U8)
1244 			{
1245 				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
1246 				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
1247 			}
1248 			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
1249 			{
1250 				assert(astc_img->data_type == ASTCENC_TYPE_F16);
1251 				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
1252 				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
1253 			}
1254 
1255 			uint8_t *src = buf + (z * ystride) + (y * xstride);
1256 			copy_scanline(dst, src, dim_x, copy_method);
1257 		}
1258 	}
1259 
1260 	delete[] buf;
1261 	is_hdr = bitness >= 16;
1262 	component_count = components;
1263 	return astc_img;
1264 }
1265 
1266 /**
1267  * @brief Load a KTX compressed image using the local custom loader.
1268  *
1269  * @param      filename          The name of the file to load.
1270  * @param[out] is_srgb           @c true if this is an sRGB image, @c false otherwise.
1271  * @param[out] img               The output image to populate.
1272  *
1273  * @return @c true on error, @c false otherwise.
1274  */
load_ktx_compressed_image(const char * filename,bool & is_srgb,astc_compressed_image & img)1275 bool load_ktx_compressed_image(
1276 	const char* filename,
1277 	bool& is_srgb,
1278 	astc_compressed_image& img
1279 ) {
1280 	FILE *f = fopen(filename, "rb");
1281 	if (!f)
1282 	{
1283 		printf("Failed to open file %s\n", filename);
1284 		return true;
1285 	}
1286 
1287 	ktx_header hdr;
1288 	size_t actual = fread(&hdr, 1, sizeof(hdr), f);
1289 	if (actual != sizeof(hdr))
1290 	{
1291 		printf("Failed to read header from %s\n", filename);
1292 		fclose(f);
1293 		return true;
1294 	}
1295 
1296 	if (memcmp(hdr.magic, ktx_magic, 12) != 0 ||
1297 	    (hdr.endianness != 0x04030201 && hdr.endianness != 0x01020304))
1298 	{
1299 		printf("File %s does not have a valid KTX header\n", filename);
1300 		fclose(f);
1301 		return true;
1302 	}
1303 
1304 	bool switch_endianness = false;
1305 	if (hdr.endianness == 0x01020304)
1306 	{
1307 		switch_endianness = true;
1308 		ktx_header_switch_endianness(&hdr);
1309 	}
1310 
1311 	if (hdr.gl_type != 0 || hdr.gl_format != 0 || hdr.gl_type_size != 1 ||
1312 	    hdr.gl_base_internal_format != GL_RGBA)
1313 	{
1314 		printf("File %s is not a compressed ASTC file\n", filename);
1315 		fclose(f);
1316 		return true;
1317 	}
1318 
1319 	const format_entry* fmt = get_format(hdr.gl_internal_format);
1320 	if (!fmt)
1321 	{
1322 		printf("File %s is not a compressed ASTC file\n", filename);
1323 		fclose(f);
1324 		return true;
1325 	}
1326 
1327 	// Skip over any key-value pairs
1328 	int seekerr;
1329 	seekerr = fseek(f, hdr.bytes_of_key_value_data, SEEK_CUR);
1330 	if (seekerr)
1331 	{
1332 		printf("Failed to skip key-value pairs in %s\n", filename);
1333 		fclose(f);
1334 		return true;
1335 	}
1336 
1337 	// Read the length of the data and endianess convert
1338 	unsigned int data_len;
1339 	actual = fread(&data_len, 1, sizeof(data_len), f);
1340 	if (actual != sizeof(data_len))
1341 	{
1342 		printf("Failed to read mip 0 size from %s\n", filename);
1343 		fclose(f);
1344 		return true;
1345 	}
1346 
1347 	if (switch_endianness)
1348 	{
1349 		data_len = u32_byterev(data_len);
1350 	}
1351 
1352 	// Read the data
1353 	unsigned char* data = new unsigned char[data_len];
1354 	actual = fread(data, 1, data_len, f);
1355 	if (actual != data_len)
1356 	{
1357 		printf("Failed to read mip 0 data from %s\n", filename);
1358 		fclose(f);
1359 		delete[] data;
1360 		return true;
1361 	}
1362 
1363 	img.block_x = fmt->x;
1364 	img.block_y = fmt->y;
1365 	img.block_z = fmt->z == 0 ? 1 : fmt->z;
1366 
1367 	img.dim_x = hdr.pixel_width;
1368 	img.dim_y = hdr.pixel_height;
1369 	img.dim_z = hdr.pixel_depth == 0 ? 1 : hdr.pixel_depth;
1370 
1371 	img.data_len = data_len;
1372 	img.data = data;
1373 
1374 	is_srgb = fmt->is_srgb;
1375 
1376 	fclose(f);
1377 	return false;
1378 }
1379 
1380 /**
1381  * @brief Store a KTX compressed image using a local store routine.
1382  *
1383  * @param img        The image data to store.
1384  * @param filename   The name of the file to save.
1385  * @param is_srgb    @c true if this is an sRGB image, @c false if linear.
1386  *
1387  * @return @c true on error, @c false otherwise.
1388  */
store_ktx_compressed_image(const astc_compressed_image & img,const char * filename,bool is_srgb)1389 bool store_ktx_compressed_image(
1390 	const astc_compressed_image& img,
1391 	const char* filename,
1392 	bool is_srgb
1393 ) {
1394 	unsigned int fmt = get_format(img.block_x, img.block_y, img.block_z, is_srgb);
1395 
1396 	ktx_header hdr;
1397 	memcpy(hdr.magic, ktx_magic, 12);
1398 	hdr.endianness = 0x04030201;
1399 	hdr.gl_type = 0;
1400 	hdr.gl_type_size = 1;
1401 	hdr.gl_format = 0;
1402 	hdr.gl_internal_format = fmt;
1403 	hdr.gl_base_internal_format = GL_RGBA;
1404 	hdr.pixel_width = img.dim_x;
1405 	hdr.pixel_height = img.dim_y;
1406 	hdr.pixel_depth = (img.dim_z == 1) ? 0 : img.dim_z;
1407 	hdr.number_of_array_elements = 0;
1408 	hdr.number_of_faces = 1;
1409 	hdr.number_of_mipmap_levels = 1;
1410 	hdr.bytes_of_key_value_data = 0;
1411 
1412 	size_t expected = sizeof(ktx_header) + 4 + img.data_len;
1413 	size_t actual = 0;
1414 
1415 	FILE *wf = fopen(filename, "wb");
1416 	if (!wf)
1417 	{
1418 		return true;
1419 	}
1420 
1421 	actual += fwrite(&hdr, 1, sizeof(ktx_header), wf);
1422 	actual += fwrite(&img.data_len, 1, 4, wf);
1423 	actual += fwrite(img.data, 1, img.data_len, wf);
1424 	fclose(wf);
1425 
1426 	if (actual != expected)
1427 	{
1428 		return true;
1429 	}
1430 
1431 	return false;
1432 }
1433 
1434 /**
1435  * @brief Save a KTX uncompressed image using a local store routine.
1436  *
1437  * @param img        The source data for the image.
1438  * @param filename   The name of the file to save.
1439  * @param y_flip     Should the image be vertically flipped?
1440  *
1441  * @return @c true if the image saved OK, @c false on error.
1442  */
store_ktx_uncompressed_image(const astcenc_image * img,const char * filename,int y_flip)1443 static bool store_ktx_uncompressed_image(
1444 	const astcenc_image* img,
1445 	const char* filename,
1446 	int y_flip
1447 ) {
1448 	unsigned int dim_x = img->dim_x;
1449 	unsigned int dim_y = img->dim_y;
1450 	unsigned int dim_z = img->dim_z;
1451 
1452 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
1453 	int image_components = determine_image_components(img);
1454 
1455 	ktx_header hdr;
1456 
1457 	static const int gl_format_of_components[4] {
1458 		GL_RED, GL_RG, GL_RGB, GL_RGBA
1459 	};
1460 
1461 	static const int gl_sized_format_of_components_ldr[4] {
1462 		GL_R8, GL_RG8, GL_RGB8, GL_RGBA8
1463 	};
1464 
1465 	static const int gl_sized_format_of_components_hdr[4] {
1466 		GL_R16F, GL_RG16F, GL_RGB16F, GL_RGBA16F
1467 	};
1468 
1469 	memcpy(hdr.magic, ktx_magic, 12);
1470 	hdr.endianness = 0x04030201;
1471 	hdr.gl_type = (bitness == 16) ? GL_HALF_FLOAT : GL_UNSIGNED_BYTE;
1472 	hdr.gl_type_size = bitness / 8;
1473 	hdr.gl_format = gl_format_of_components[image_components - 1];
1474 	if (bitness == 16)
1475 	{
1476 		hdr.gl_internal_format = gl_sized_format_of_components_hdr[image_components - 1];
1477 	}
1478 	else
1479 	{
1480 		hdr.gl_internal_format = gl_sized_format_of_components_ldr[image_components - 1];
1481 	}
1482 	hdr.gl_base_internal_format = hdr.gl_format;
1483 	hdr.pixel_width = dim_x;
1484 	hdr.pixel_height = dim_y;
1485 	hdr.pixel_depth = (dim_z == 1) ? 0 : dim_z;
1486 	hdr.number_of_array_elements = 0;
1487 	hdr.number_of_faces = 1;
1488 	hdr.number_of_mipmap_levels = 1;
1489 	hdr.bytes_of_key_value_data = 0;
1490 
1491 	// Collect image data to write
1492 	uint8_t ***row_pointers8 = nullptr;
1493 	uint16_t ***row_pointers16 = nullptr;
1494 	if (bitness == 8)
1495 	{
1496 		row_pointers8 = new uint8_t **[dim_z];
1497 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
1498 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components + 3];
1499 
1500 		for (unsigned int z = 1; z < dim_z; z++)
1501 		{
1502 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
1503 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_x * image_components * z;
1504 		}
1505 
1506 		for (unsigned int z = 0; z < dim_z; z++)
1507 		{
1508 			for (unsigned int y = 1; y < dim_y; y++)
1509 			{
1510 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
1511 			}
1512 		}
1513 
1514 		for (unsigned int z = 0; z < dim_z; z++)
1515 		{
1516 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
1517 			for (unsigned int y = 0; y < dim_y; y++)
1518 			{
1519 				int ym = y_flip ? dim_y - y - 1 : y;
1520 				switch (image_components)
1521 				{
1522 				case 1:		// single-component, treated as Luminance
1523 					for (unsigned int x = 0; x < dim_x; x++)
1524 					{
1525 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
1526 					}
1527 					break;
1528 				case 2:		// two-component, treated as Luminance-Alpha
1529 					for (unsigned int x = 0; x < dim_x; x++)
1530 					{
1531 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1532 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1533 					}
1534 					break;
1535 				case 3:		// three-component, treated a
1536 					for (unsigned int x = 0; x < dim_x; x++)
1537 					{
1538 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1539 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1540 						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1541 					}
1542 					break;
1543 				case 4:		// four-component, treated as RGBA
1544 					for (unsigned int x = 0; x < dim_x; x++)
1545 					{
1546 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
1547 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
1548 						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
1549 						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
1550 					}
1551 					break;
1552 				}
1553 			}
1554 		}
1555 	}
1556 	else						// if bitness == 16
1557 	{
1558 		row_pointers16 = new uint16_t **[dim_z];
1559 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
1560 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components + 1];
1561 
1562 		for (unsigned int z = 1; z < dim_z; z++)
1563 		{
1564 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
1565 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
1566 		}
1567 
1568 		for (unsigned int z = 0; z < dim_z; z++)
1569 		{
1570 			for (unsigned int y = 1; y < dim_y; y++)
1571 			{
1572 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
1573 			}
1574 		}
1575 
1576 		for (unsigned int z = 0; z < dim_z; z++)
1577 		{
1578 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
1579 			for (unsigned int y = 0; y < dim_y; y++)
1580 			{
1581 				int ym = y_flip ? dim_y - y - 1 : y;
1582 				switch (image_components)
1583 				{
1584 				case 1:		// single-component, treated as Luminance
1585 					for (unsigned int x = 0; x < dim_x; x++)
1586 					{
1587 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
1588 					}
1589 					break;
1590 				case 2:		// two-component, treated as Luminance-Alpha
1591 					for (unsigned int x = 0; x < dim_x; x++)
1592 					{
1593 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1594 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1595 					}
1596 					break;
1597 				case 3:		// three-component, treated as RGB
1598 					for (unsigned int x = 0; x < dim_x; x++)
1599 					{
1600 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1601 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1602 						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1603 					}
1604 					break;
1605 				case 4:		// four-component, treated as RGBA
1606 					for (unsigned int x = 0; x < dim_x; x++)
1607 					{
1608 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
1609 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
1610 						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
1611 						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
1612 					}
1613 					break;
1614 				}
1615 			}
1616 		}
1617 	}
1618 
1619 	bool retval { true };
1620 	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
1621 	uint32_t image_write_bytes = (image_bytes + 3) & ~3;
1622 
1623 	FILE *wf = fopen(filename, "wb");
1624 	if (wf)
1625 	{
1626 		void* dataptr = (bitness == 16) ?
1627 			reinterpret_cast<void*>(row_pointers16[0][0]) :
1628 			reinterpret_cast<void*>(row_pointers8[0][0]);
1629 
1630 		size_t expected_bytes_written = sizeof(ktx_header) + image_write_bytes + 4;
1631 		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(ktx_header), wf);
1632 		size_t bytecount_bytes_written = fwrite(&image_bytes, 1, 4, wf);
1633 		size_t data_bytes_written = fwrite(dataptr, 1, image_write_bytes, wf);
1634 		fclose(wf);
1635 		if (hdr_bytes_written + bytecount_bytes_written + data_bytes_written != expected_bytes_written)
1636 		{
1637 			retval = false;
1638 		}
1639 	}
1640 	else
1641 	{
1642 		retval = false;
1643 	}
1644 
1645 	if (row_pointers8)
1646 	{
1647 		delete[] row_pointers8[0][0];
1648 		delete[] row_pointers8[0];
1649 		delete[] row_pointers8;
1650 	}
1651 
1652 	if (row_pointers16)
1653 	{
1654 		delete[] row_pointers16[0][0];
1655 		delete[] row_pointers16[0];
1656 		delete[] row_pointers16;
1657 	}
1658 
1659 	return retval;
1660 }
1661 
1662 /*
1663 	Loader for DDS files.
1664 
1665 	Note that after the header, data are densely packed with no padding;
1666 	in the case of multiple surfaces, they appear one after another in
1667 	the file, again with no padding.
1668 
1669 	This code is NOT endian-neutral.
1670 */
1671 struct dds_pixelformat
1672 {
1673 	uint32_t size;				// structure size, set to 32.
1674 	/*
1675 	   flags bits are a combination of the following: 0x1 : Texture contains alpha data 0x2 : ---- (older files: texture contains alpha data, for Alpha-only texture) 0x4 : The fourcc field is valid,
1676 	   indicating a compressed or DX10 texture format 0x40 : texture contains uncompressed RGB data 0x200 : ---- (YUV in older files) 0x20000 : Texture contains Luminance data (can be combined with
1677 	   0x1 for Lum-Alpha) */
1678 	uint32_t flags;
1679 	uint32_t fourcc;			// "DX10" to indicate a DX10 format, "DXTn" for the DXT formats
1680 	uint32_t rgbbitcount;		// number of bits per texel; up to 32 for non-DX10 formats.
1681 	uint32_t rbitmask;			// bitmap indicating position of red/luminance color component
1682 	uint32_t gbitmask;			// bitmap indicating position of green color component
1683 	uint32_t bbitmask;			// bitmap indicating position of blue color component
1684 	uint32_t abitmask;			// bitmap indicating position of alpha color component
1685 };
1686 
1687 struct dds_header
1688 {
1689 	uint32_t size;				// header size; must be exactly 124.
1690 	/*
1691 	   flag field is an OR or the following bits, that indicate fields containing valid data:
1692 		1: caps/caps2/caps3/caps4 (set in all DDS files, ignore on read)
1693 		2: height (set in all DDS files, ignore on read)
1694 		4: width (set in all DDS files, ignore on read)
1695 		8: pitch (for uncompressed texture)
1696 		0x1000: the pixel format field (set in all DDS files, ignore on read)
1697 		0x20000: mipmap count (for mipmapped textures with >1 level)
1698 		0x80000: pitch (for compressed texture)
1699 		0x800000: depth (for 3d textures)
1700 	*/
1701 	uint32_t flags;
1702 	uint32_t height;
1703 	uint32_t width;
1704 	uint32_t pitch_or_linear_size;	// scanline pitch for uncompressed; total size in bytes for compressed
1705 	uint32_t depth;
1706 	uint32_t mipmapcount;
1707 	// unused, set to 0
1708 	uint32_t reserved1[11];
1709 	dds_pixelformat ddspf;
1710 	/*
1711 	   caps field is an OR of the following values:
1712 		8 : should be set for a file that contains more than 1 surface (ignore on read)
1713 		0x400000 : should be set for a mipmapped texture
1714 		0x1000 : should be set if the surface is a texture at all (all DDS files, ignore on read)
1715 	*/
1716 	uint32_t caps;
1717 	/*
1718 	   caps2 field is an OR of the following values:
1719 		0x200 : texture is cubemap
1720 		0x400 : +X face of cubemap is present
1721 		0x800 : -X face of cubemap is present
1722 		0x1000 : +Y face of cubemap is present
1723 		0x2000 : -Y face of cubemap is present
1724 		0x4000 : +Z face of cubemap is present
1725 		0x8000 : -Z face of cubemap is present
1726 		0x200000 : texture is a 3d texture.
1727 	*/
1728 	uint32_t caps2;
1729 	// unused, set to 0
1730 	uint32_t caps3;
1731 	// unused, set to 0
1732 	uint32_t caps4;
1733 	// unused, set to 0
1734 	uint32_t reserved2;
1735 };
1736 
1737 struct dds_header_dx10
1738 {
1739 	uint32_t dxgi_format;
1740 	uint32_t resource_dimension;	// 2=1d-texture, 3=2d-texture or cubemap, 4=3d-texture
1741 	uint32_t misc_flag;			// 4 if cubemap, else 0
1742 	uint32_t array_size;		// size of array in case of a texture array; set to 1 for a non-array
1743 	uint32_t reserved;			// set to 0.
1744 };
1745 
1746 #define DDS_MAGIC 0x20534444
1747 #define DX10_MAGIC 0x30315844
1748 
1749 /**
1750  * @brief Load an uncompressed DDS image using the local custom loader.
1751  *
1752  * @param      filename          The name of the file to load.
1753  * @param      y_flip            Should the image be vertically flipped?
1754  * @param[out] is_hdr            Is this an HDR image load?
1755  * @param[out] component_count   The number of components in the data.
1756  *
1757  * @return The loaded image data in a canonical 4 channel format, or @c nullptr on error.
1758  */
load_dds_uncompressed_image(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)1759 static astcenc_image* load_dds_uncompressed_image(
1760 	const char* filename,
1761 	bool y_flip,
1762 	bool& is_hdr,
1763 	unsigned int& component_count
1764 ) {
1765 	FILE *f = fopen(filename, "rb");
1766 	if (!f)
1767 	{
1768 		printf("Failed to open file %s\n", filename);
1769 		return nullptr;
1770 	}
1771 
1772 	uint8_t magic[4];
1773 
1774 	dds_header hdr;
1775 	size_t magic_bytes_read = fread(magic, 1, 4, f);
1776 	size_t header_bytes_read = fread(&hdr, 1, sizeof(hdr), f);
1777 	if (magic_bytes_read != 4 || header_bytes_read != sizeof(hdr))
1778 	{
1779 		printf("Failed to read header of DDS file %s\n", filename);
1780 		fclose(f);
1781 		return nullptr;
1782 	}
1783 
1784 	uint32_t magicx = magic[0] | (magic[1] << 8) | (magic[2] << 16) | (magic[3] << 24);
1785 
1786 	if (magicx != DDS_MAGIC || hdr.size != 124)
1787 	{
1788 		printf("File %s does not have a valid DDS header\n", filename);
1789 		fclose(f);
1790 		return nullptr;
1791 	}
1792 
1793 	int use_dx10_header = 0;
1794 	if (hdr.ddspf.flags & 4)
1795 	{
1796 		if (hdr.ddspf.fourcc == DX10_MAGIC)
1797 		{
1798 			use_dx10_header = 1;
1799 		}
1800 		else
1801 		{
1802 			printf("DDS file %s is compressed, not supported\n", filename);
1803 			fclose(f);
1804 			return nullptr;
1805 		}
1806 	}
1807 
1808 	dds_header_dx10 dx10_header;
1809 	if (use_dx10_header)
1810 	{
1811 		size_t dx10_header_bytes_read = fread(&dx10_header, 1, sizeof(dx10_header), f);
1812 		if (dx10_header_bytes_read != sizeof(dx10_header))
1813 		{
1814 			printf("Failed to read header of DDS file %s\n", filename);
1815 			fclose(f);
1816 			return nullptr;
1817 		}
1818 	}
1819 
1820 	unsigned int dim_x = hdr.width;
1821 	unsigned int dim_y = hdr.height;
1822 	unsigned int dim_z = (hdr.flags & 0x800000) ? hdr.depth : 1;
1823 
1824 	// The bitcount that we will use internally in the codec
1825 	int bitness = 0;
1826 
1827 	// The bytes per component in the DDS file itself
1828 	int bytes_per_component = 0;
1829 	int components = 0;
1830 	scanline_transfer copy_method = R8_TO_RGBA8;
1831 
1832 	// figure out the format actually used in the DDS file.
1833 	if (use_dx10_header)
1834 	{
1835 		// DX10 header present; use the DXGI format.
1836 		#define DXGI_FORMAT_R32G32B32A32_FLOAT   2
1837 		#define DXGI_FORMAT_R32G32B32_FLOAT      6
1838 		#define DXGI_FORMAT_R16G16B16A16_FLOAT  10
1839 		#define DXGI_FORMAT_R16G16B16A16_UNORM  11
1840 		#define DXGI_FORMAT_R32G32_FLOAT        16
1841 		#define DXGI_FORMAT_R8G8B8A8_UNORM      28
1842 		#define DXGI_FORMAT_R16G16_FLOAT    34
1843 		#define DXGI_FORMAT_R16G16_UNORM    35
1844 		#define DXGI_FORMAT_R32_FLOAT       41
1845 		#define DXGI_FORMAT_R8G8_UNORM      49
1846 		#define DXGI_FORMAT_R16_FLOAT       54
1847 		#define DXGI_FORMAT_R16_UNORM       56
1848 		#define DXGI_FORMAT_R8_UNORM        61
1849 		#define DXGI_FORMAT_B8G8R8A8_UNORM  86
1850 		#define DXGI_FORMAT_B8G8R8X8_UNORM  87
1851 
1852 		struct dxgi_params
1853 		{
1854 			int bitness;
1855 			int bytes_per_component;
1856 			int components;
1857 			scanline_transfer copy_method;
1858 			uint32_t dxgi_format_number;
1859 		};
1860 
1861 		static const dxgi_params format_params[] {
1862 			{16, 4, 4, RGBA32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32A32_FLOAT},
1863 			{16, 4, 3, RGB32F_TO_RGBA16F, DXGI_FORMAT_R32G32B32_FLOAT},
1864 			{16, 2, 4, RGBA16F_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_FLOAT},
1865 			{16, 2, 4, RGBA16_TO_RGBA16F, DXGI_FORMAT_R16G16B16A16_UNORM},
1866 			{16, 4, 2, RG32F_TO_RGBA16F, DXGI_FORMAT_R32G32_FLOAT},
1867 			{8, 1, 4, RGBA8_TO_RGBA8, DXGI_FORMAT_R8G8B8A8_UNORM},
1868 			{16, 2, 2, RG16F_TO_RGBA16F, DXGI_FORMAT_R16G16_FLOAT},
1869 			{16, 2, 2, RG16_TO_RGBA16F, DXGI_FORMAT_R16G16_UNORM},
1870 			{16, 4, 1, R32F_TO_RGBA16F, DXGI_FORMAT_R32_FLOAT},
1871 			{8, 1, 2, RG8_TO_RGBA8, DXGI_FORMAT_R8G8_UNORM},
1872 			{16, 2, 1, R16F_TO_RGBA16F, DXGI_FORMAT_R16_FLOAT},
1873 			{16, 2, 1, R16_TO_RGBA16F, DXGI_FORMAT_R16_UNORM},
1874 			{8, 1, 1, R8_TO_RGBA8, DXGI_FORMAT_R8_UNORM},
1875 			{8, 1, 4, BGRA8_TO_RGBA8, DXGI_FORMAT_B8G8R8A8_UNORM},
1876 			{8, 1, 4, BGRX8_TO_RGBA8, DXGI_FORMAT_B8G8R8X8_UNORM},
1877 		};
1878 
1879 		int dxgi_modes_supported = sizeof(format_params) / sizeof(format_params[0]);
1880 		int did_select_format = 0;
1881 		for (int i = 0; i < dxgi_modes_supported; i++)
1882 		{
1883 			if (dx10_header.dxgi_format == format_params[i].dxgi_format_number)
1884 			{
1885 				bitness = format_params[i].bitness;
1886 				bytes_per_component = format_params[i].bytes_per_component;
1887 				components = format_params[i].components;
1888 				copy_method = format_params[i].copy_method;
1889 				did_select_format = 1;
1890 				break;
1891 			}
1892 		}
1893 
1894 		if (!did_select_format)
1895 		{
1896 			printf("DDS file %s: DXGI format not supported by codec\n", filename);
1897 			fclose(f);
1898 			return nullptr;
1899 		}
1900 	}
1901 	else
1902 	{
1903 		// No DX10 header present. Then try to match the bitcount and bitmask against
1904 		// a set of prepared patterns.
1905 		uint32_t flags = hdr.ddspf.flags;
1906 		uint32_t bitcount = hdr.ddspf.rgbbitcount;
1907 		uint32_t rmask = hdr.ddspf.rbitmask;
1908 		uint32_t gmask = hdr.ddspf.gbitmask;
1909 		uint32_t bmask = hdr.ddspf.bbitmask;
1910 		uint32_t amask = hdr.ddspf.abitmask;
1911 
1912 		// RGBA-unorm8
1913 		if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000 && amask == 0xFF000000)
1914 		{
1915 			bytes_per_component = 1;
1916 			components = 4;
1917 			copy_method = RGBA8_TO_RGBA8;
1918 		}
1919 		// BGRA-unorm8
1920 		else if ((flags & 0x41) == 0x41 && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF && amask == 0xFF000000)
1921 		{
1922 			bytes_per_component = 1;
1923 			components = 4;
1924 			copy_method = BGRA8_TO_RGBA8;
1925 		}
1926 		// RGBX-unorm8
1927 		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1928 		{
1929 			bytes_per_component = 1;
1930 			components = 4;
1931 			copy_method = RGBX8_TO_RGBA8;
1932 		}
1933 		// BGRX-unorm8
1934 		else if ((flags & 0x40) && bitcount == 32 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1935 		{
1936 			bytes_per_component = 1;
1937 			components = 4;
1938 			copy_method = BGRX8_TO_RGBA8;
1939 		}
1940 		// RGB-unorm8
1941 		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF && gmask == 0xFF00 && bmask == 0xFF0000)
1942 		{
1943 			bytes_per_component = 1;
1944 			components = 3;
1945 			copy_method = RGB8_TO_RGBA8;
1946 		}
1947 		// BGR-unorm8
1948 		else if ((flags & 0x40) && bitcount == 24 && rmask == 0xFF0000 && gmask == 0xFF00 && bmask == 0xFF)
1949 		{
1950 			bytes_per_component = 1;
1951 			components = 3;
1952 			copy_method = BGR8_TO_RGBA8;
1953 		}
1954 		// RG-unorm16
1955 		else if ((flags & 0x40) && bitcount == 16 && rmask == 0xFFFF && gmask == 0xFFFF0000)
1956 		{
1957 			bytes_per_component = 2;
1958 			components = 2;
1959 			copy_method = RG16_TO_RGBA16F;
1960 		}
1961 		// A8L8
1962 		else if ((flags & 0x20001) == 0x20001 && bitcount == 16 && rmask == 0xFF && amask == 0xFF00)
1963 		{
1964 			bytes_per_component = 1;
1965 			components = 2;
1966 			copy_method = LA8_TO_RGBA8;
1967 		}
1968 		// L8
1969 		else if ((flags & 0x20000) && bitcount == 8 && rmask == 0xFF)
1970 		{
1971 			bytes_per_component = 1;
1972 			components = 1;
1973 			copy_method = L8_TO_RGBA8;
1974 		}
1975 		// L16
1976 		else if ((flags & 0x20000) && bitcount == 16 && rmask == 0xFFFF)
1977 		{
1978 			bytes_per_component = 2;
1979 			components = 1;
1980 			copy_method = L16_TO_RGBA16F;
1981 		}
1982 		else
1983 		{
1984 			printf("DDS file %s: Non-DXGI format not supported by codec\n", filename);
1985 			fclose(f);
1986 			return nullptr;
1987 		}
1988 
1989 		bitness = bytes_per_component * 8;
1990 	}
1991 
1992 	// then, load the actual file.
1993 	uint32_t xstride = bytes_per_component * components * dim_x;
1994 	uint32_t ystride = xstride * dim_y;
1995 	uint32_t bytes_of_surface = ystride * dim_z;
1996 
1997 	uint8_t *buf = new uint8_t[bytes_of_surface];
1998 	size_t bytes_read = fread(buf, 1, bytes_of_surface, f);
1999 	fclose(f);
2000 	if (bytes_read != bytes_of_surface)
2001 	{
2002 		delete[] buf;
2003 		printf("Failed to read file %s\n", filename);
2004 		return nullptr;
2005 	}
2006 
2007 	// then transfer data from the surface to our own image-data-structure.
2008 	astcenc_image *astc_img = alloc_image(bitness, dim_x, dim_y, dim_z);
2009 
2010 	for (unsigned int z = 0; z < dim_z; z++)
2011 	{
2012 		for (unsigned int y = 0; y < dim_y; y++)
2013 		{
2014 			unsigned int ymod = y_flip ? dim_y - y - 1 : y;
2015 			unsigned int ydst = ymod;
2016 			void* dst;
2017 
2018 			if (astc_img->data_type == ASTCENC_TYPE_U8)
2019 			{
2020 				uint8_t* data8 = static_cast<uint8_t*>(astc_img->data[z]);
2021 				dst = static_cast<void*>(&data8[4 * dim_x * ydst]);
2022 			}
2023 			else // if (astc_img->data_type == ASTCENC_TYPE_F16)
2024 			{
2025 				assert(astc_img->data_type == ASTCENC_TYPE_F16);
2026 				uint16_t* data16 = static_cast<uint16_t*>(astc_img->data[z]);
2027 				dst = static_cast<void*>(&data16[4 * dim_x * ydst]);
2028 			}
2029 
2030 			uint8_t *src = buf + (z * ystride) + (y * xstride);
2031 			copy_scanline(dst, src, dim_x, copy_method);
2032 		}
2033 	}
2034 
2035 	delete[] buf;
2036 	is_hdr = bitness >= 16;
2037 	component_count = components;
2038 	return astc_img;
2039 }
2040 
2041 /**
2042  * @brief Save a DDS uncompressed image using a local store routine.
2043  *
2044  * @param img        The source data for the image.
2045  * @param filename   The name of the file to save.
2046  * @param y_flip     Should the image be vertically flipped?
2047  *
2048  * @return @c true if the image saved OK, @c false on error.
2049  */
store_dds_uncompressed_image(const astcenc_image * img,const char * filename,int y_flip)2050 static bool store_dds_uncompressed_image(
2051 	const astcenc_image* img,
2052 	const char* filename,
2053 	int y_flip
2054 ) {
2055 	unsigned int dim_x = img->dim_x;
2056 	unsigned int dim_y = img->dim_y;
2057 	unsigned int dim_z = img->dim_z;
2058 
2059 	int bitness = img->data_type == ASTCENC_TYPE_U8 ? 8 : 16;
2060 	int image_components = (bitness == 16) ? 4 : determine_image_components(img);
2061 
2062 	// DDS-pixel-format structures to use when storing LDR image with 1,2,3 or 4 components.
2063 	static const dds_pixelformat format_of_image_components[4] =
2064 	{
2065 		{32, 0x20000, 0, 8, 0xFF, 0, 0, 0},	// luminance
2066 		{32, 0x20001, 0, 16, 0xFF, 0, 0, 0xFF00},	// L8A8
2067 		{32, 0x40, 0, 24, 0xFF, 0xFF00, 0xFF0000, 0},	// RGB8
2068 		{32, 0x41, 0, 32, 0xFF, 0xFF00, 0xFF0000, 0xFF000000}	// RGBA8
2069 	};
2070 
2071 	// DDS-pixel-format structures to use when storing HDR image.
2072 	static const dds_pixelformat dxt10_diverter =
2073 	{
2074 		32, 4, DX10_MAGIC, 0, 0, 0, 0, 0
2075 	};
2076 
2077 	// Header handling; will write:
2078 	// * DDS magic value
2079 	// * DDS header
2080 	// * DDS DX10 header, if the file is floating-point
2081 	// * pixel data
2082 
2083 	// Main header data
2084 	dds_header hdr;
2085 	hdr.size = 124;
2086 	hdr.flags = 0x100F | (dim_z > 1 ? 0x800000 : 0);
2087 	hdr.height = dim_y;
2088 	hdr.width = dim_x;
2089 	hdr.pitch_or_linear_size = image_components * (bitness / 8) * dim_x;
2090 	hdr.depth = dim_z;
2091 	hdr.mipmapcount = 1;
2092 	for (unsigned int i = 0; i < 11; i++)
2093 	{
2094 		hdr.reserved1[i] = 0;
2095 	}
2096 	hdr.caps = 0x1000;
2097 	hdr.caps2 = (dim_z > 1) ? 0x200000 : 0;
2098 	hdr.caps3 = 0;
2099 	hdr.caps4 = 0;
2100 
2101 	// Pixel-format data
2102 	if (bitness == 8)
2103 	{
2104 		hdr.ddspf = format_of_image_components[image_components - 1];
2105 	}
2106 	else
2107 	{
2108 		hdr.ddspf = dxt10_diverter;
2109 	}
2110 
2111 	// DX10 data
2112 	dds_header_dx10 dx10;
2113 	dx10.dxgi_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
2114 	dx10.resource_dimension = (dim_z > 1) ? 4 : 3;
2115 	dx10.misc_flag = 0;
2116 	dx10.array_size = 1;
2117 	dx10.reserved = 0;
2118 
2119 	// Collect image data to write
2120 	uint8_t ***row_pointers8 = nullptr;
2121 	uint16_t ***row_pointers16 = nullptr;
2122 
2123 	if (bitness == 8)
2124 	{
2125 		row_pointers8 = new uint8_t **[dim_z];
2126 		row_pointers8[0] = new uint8_t *[dim_y * dim_z];
2127 		row_pointers8[0][0] = new uint8_t[dim_x * dim_y * dim_z * image_components];
2128 
2129 		for (unsigned int z = 1; z < dim_z; z++)
2130 		{
2131 			row_pointers8[z] = row_pointers8[0] + dim_y * z;
2132 			row_pointers8[z][0] = row_pointers8[0][0] + dim_y * dim_z * image_components * z;
2133 		}
2134 
2135 		for (unsigned int z = 0; z < dim_z; z++)
2136 		{
2137 			for (unsigned int y = 1; y < dim_y; y++)
2138 			{
2139 				row_pointers8[z][y] = row_pointers8[z][0] + dim_x * image_components * y;
2140 			}
2141 		}
2142 
2143 		for (unsigned int z = 0; z < dim_z; z++)
2144 		{
2145 			uint8_t* data8 = static_cast<uint8_t*>(img->data[z]);
2146 
2147 			for (unsigned int y = 0; y < dim_y; y++)
2148 			{
2149 				int ym = y_flip ? dim_y - y - 1 : y;
2150 				switch (image_components)
2151 				{
2152 				case 1:		// single-component, treated as Luminance
2153 					for (unsigned int x = 0; x < dim_x; x++)
2154 					{
2155 						row_pointers8[z][y][x] = data8[(4 * dim_x * ym) + (4 * x    )];
2156 					}
2157 					break;
2158 				case 2:		// two-component, treated as Luminance-Alpha
2159 					for (unsigned int x = 0; x < dim_x; x++)
2160 					{
2161 						row_pointers8[z][y][2 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2162 						row_pointers8[z][y][2 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2163 					}
2164 					break;
2165 				case 3:		// three-component, treated as RGB
2166 					for (unsigned int x = 0; x < dim_x; x++)
2167 					{
2168 						row_pointers8[z][y][3 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2169 						row_pointers8[z][y][3 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2170 						row_pointers8[z][y][3 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2171 					}
2172 					break;
2173 				case 4:		// four-component, treated as RGBA
2174 					for (unsigned int x = 0; x < dim_x; x++)
2175 					{
2176 						row_pointers8[z][y][4 * x    ] = data8[(4 * dim_x * ym) + (4 * x    )];
2177 						row_pointers8[z][y][4 * x + 1] = data8[(4 * dim_x * ym) + (4 * x + 1)];
2178 						row_pointers8[z][y][4 * x + 2] = data8[(4 * dim_x * ym) + (4 * x + 2)];
2179 						row_pointers8[z][y][4 * x + 3] = data8[(4 * dim_x * ym) + (4 * x + 3)];
2180 					}
2181 					break;
2182 				}
2183 			}
2184 		}
2185 	}
2186 	else						// if bitness == 16
2187 	{
2188 		row_pointers16 = new uint16_t **[dim_z];
2189 		row_pointers16[0] = new uint16_t *[dim_y * dim_z];
2190 		row_pointers16[0][0] = new uint16_t[dim_x * dim_y * dim_z * image_components];
2191 
2192 		for (unsigned int z = 1; z < dim_z; z++)
2193 		{
2194 			row_pointers16[z] = row_pointers16[0] + dim_y * z;
2195 			row_pointers16[z][0] = row_pointers16[0][0] + dim_y * dim_x * image_components * z;
2196 		}
2197 
2198 		for (unsigned int z = 0; z < dim_z; z++)
2199 		{
2200 			for (unsigned int y = 1; y < dim_y; y++)
2201 			{
2202 				row_pointers16[z][y] = row_pointers16[z][0] + dim_x * image_components * y;
2203 			}
2204 		}
2205 
2206 		for (unsigned int z = 0; z < dim_z; z++)
2207 		{
2208 			uint16_t* data16 = static_cast<uint16_t*>(img->data[z]);
2209 
2210 			for (unsigned int y = 0; y < dim_y; y++)
2211 			{
2212 				int ym = y_flip ? dim_y - y - 1: y;
2213 				switch (image_components)
2214 				{
2215 				case 1:		// single-component, treated as Luminance
2216 					for (unsigned int x = 0; x < dim_x; x++)
2217 					{
2218 						row_pointers16[z][y][x] = data16[(4 * dim_x * ym) + (4 * x    )];
2219 					}
2220 					break;
2221 				case 2:		// two-component, treated as Luminance-Alpha
2222 					for (unsigned int x = 0; x < dim_x; x++)
2223 					{
2224 						row_pointers16[z][y][2 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2225 						row_pointers16[z][y][2 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2226 					}
2227 					break;
2228 				case 3:		// three-component, treated as RGB
2229 					for (unsigned int x = 0; x < dim_x; x++)
2230 					{
2231 						row_pointers16[z][y][3 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2232 						row_pointers16[z][y][3 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2233 						row_pointers16[z][y][3 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2234 					}
2235 					break;
2236 				case 4:		// four-component, treated as RGBA
2237 					for (unsigned int x = 0; x < dim_x; x++)
2238 					{
2239 						row_pointers16[z][y][4 * x    ] = data16[(4 * dim_x * ym) + (4 * x    )];
2240 						row_pointers16[z][y][4 * x + 1] = data16[(4 * dim_x * ym) + (4 * x + 1)];
2241 						row_pointers16[z][y][4 * x + 2] = data16[(4 * dim_x * ym) + (4 * x + 2)];
2242 						row_pointers16[z][y][4 * x + 3] = data16[(4 * dim_x * ym) + (4 * x + 3)];
2243 					}
2244 					break;
2245 				}
2246 			}
2247 		}
2248 	}
2249 
2250 	bool retval { true };
2251 	uint32_t image_bytes = dim_x * dim_y * dim_z * image_components * (bitness / 8);
2252 
2253 	uint32_t dds_magic = DDS_MAGIC;
2254 
2255 	FILE *wf = fopen(filename, "wb");
2256 	if (wf)
2257 	{
2258 		void *dataptr = (bitness == 16) ?
2259 			reinterpret_cast<void*>(row_pointers16[0][0]) :
2260 			reinterpret_cast<void*>(row_pointers8[0][0]);
2261 
2262 		size_t expected_bytes_written = 4 + sizeof(dds_header) + (bitness > 8 ? sizeof(dds_header_dx10) : 0) + image_bytes;
2263 
2264 		size_t magic_bytes_written = fwrite(&dds_magic, 1, 4, wf);
2265 		size_t hdr_bytes_written = fwrite(&hdr, 1, sizeof(dds_header), wf);
2266 
2267 		size_t dx10_bytes_written;
2268 		if (bitness > 8)
2269 		{
2270 			dx10_bytes_written = fwrite(&dx10, 1, sizeof(dx10), wf);
2271 		}
2272 		else
2273 		{
2274 			dx10_bytes_written = 0;
2275 		}
2276 
2277 		size_t data_bytes_written = fwrite(dataptr, 1, image_bytes, wf);
2278 
2279 		fclose(wf);
2280 		if (magic_bytes_written + hdr_bytes_written + dx10_bytes_written + data_bytes_written != expected_bytes_written)
2281 		{
2282 			retval = false;
2283 		}
2284 	}
2285 	else
2286 	{
2287 		retval = false;
2288 	}
2289 
2290 	if (row_pointers8)
2291 	{
2292 		delete[] row_pointers8[0][0];
2293 		delete[] row_pointers8[0];
2294 		delete[] row_pointers8;
2295 	}
2296 
2297 	if (row_pointers16)
2298 	{
2299 		delete[] row_pointers16[0][0];
2300 		delete[] row_pointers16[0];
2301 		delete[] row_pointers16;
2302 	}
2303 
2304 	return retval;
2305 }
2306 
2307 /**
2308  * @brief Supported uncompressed image load functions, and their associated file extensions.
2309  */
2310 static const struct
2311 {
2312 	const char* ending1;
2313 	const char* ending2;
2314 	astcenc_image* (*loader_func)(const char*, bool, bool&, unsigned int&);
2315 } loader_descs[] {
2316 	// LDR formats
2317 	{".png",   ".PNG",  load_png_with_wuffs},
2318 	// HDR formats
2319 	{".exr",   ".EXR",  load_image_with_tinyexr },
2320 	// Container formats
2321 	{".ktx",   ".KTX",  load_ktx_uncompressed_image },
2322 	{".dds",   ".DDS",  load_dds_uncompressed_image },
2323 	// Generic catch all; this one must be last in the list
2324 	{ nullptr, nullptr, load_image_with_stb }
2325 };
2326 
2327 static const int loader_descr_count = sizeof(loader_descs) / sizeof(loader_descs[0]);
2328 
2329 /**
2330  * @brief Supported uncompressed image store functions, and their associated file extensions.
2331  */
2332 static const struct
2333 {
2334 	const char *ending1;
2335 	const char *ending2;
2336 	int enforced_bitness;
2337 	bool (*storer_func)(const astcenc_image *output_image, const char *filename, int y_flip);
2338 } storer_descs[] {
2339 	// LDR formats
2340 	{".bmp", ".BMP",  8, store_bmp_image_with_stb},
2341 	{".png", ".PNG",  8, store_png_image_with_stb},
2342 	{".tga", ".TGA",  8, store_tga_image_with_stb},
2343 	// HDR formats
2344 	{".exr", ".EXR", 16, store_exr_image_with_tinyexr},
2345 	{".hdr", ".HDR", 16, store_hdr_image_with_stb},
2346 	// Container formats
2347 	{".dds", ".DDS",  0, store_dds_uncompressed_image},
2348 	{".ktx", ".KTX",  0, store_ktx_uncompressed_image}
2349 };
2350 
2351 static const int storer_descr_count = sizeof(storer_descs) / sizeof(storer_descs[0]);
2352 
2353 /* See header for documentation. */
get_output_filename_enforced_bitness(const char * filename)2354 int get_output_filename_enforced_bitness(
2355 	const char* filename
2356 ) {
2357 	const char *eptr = strrchr(filename, '.');
2358 	if (!eptr)
2359 	{
2360 		return 0;
2361 	}
2362 
2363 	for (int i = 0; i < storer_descr_count; i++)
2364 	{
2365 		if (strcmp(eptr, storer_descs[i].ending1) == 0
2366 		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2367 		{
2368 			return storer_descs[i].enforced_bitness;
2369 		}
2370 	}
2371 
2372 	return -1;
2373 }
2374 
2375 /* See header for documentation. */
load_ncimage(const char * filename,bool y_flip,bool & is_hdr,unsigned int & component_count)2376 astcenc_image* load_ncimage(
2377 	const char* filename,
2378 	bool y_flip,
2379 	bool& is_hdr,
2380 	unsigned int& component_count
2381 ) {
2382 	// Get the file extension
2383 	const char* eptr = strrchr(filename, '.');
2384 	if (!eptr)
2385 	{
2386 		eptr = filename;
2387 	}
2388 
2389 	// Scan through descriptors until a matching loader is found
2390 	for (unsigned int i = 0; i < loader_descr_count; i++)
2391 	{
2392 		if (loader_descs[i].ending1 == nullptr
2393 			|| strcmp(eptr, loader_descs[i].ending1) == 0
2394 			|| strcmp(eptr, loader_descs[i].ending2) == 0)
2395 		{
2396 			return loader_descs[i].loader_func(filename, y_flip, is_hdr, component_count);
2397 		}
2398 	}
2399 
2400 	// Should never reach here - stb_image provides a generic handler
2401 	return nullptr;
2402 }
2403 
2404 /* See header for documentation. */
store_ncimage(const astcenc_image * output_image,const char * filename,int y_flip)2405 bool store_ncimage(
2406 	const astcenc_image* output_image,
2407 	const char* filename,
2408 	int y_flip
2409 ) {
2410 	const char* eptr = strrchr(filename, '.');
2411 	if (!eptr)
2412 	{
2413 		eptr = ".ktx"; // use KTX file format if we don't have an ending.
2414 	}
2415 
2416 	for (int i = 0; i < storer_descr_count; i++)
2417 	{
2418 		if (strcmp(eptr, storer_descs[i].ending1) == 0
2419 		 || strcmp(eptr, storer_descs[i].ending2) == 0)
2420 		{
2421 			return storer_descs[i].storer_func(output_image, filename, y_flip);
2422 		}
2423 	}
2424 
2425 	// Should never reach here - get_output_filename_enforced_bitness should
2426 	// have acted as a preflight check
2427 	return false;
2428 }
2429 
2430 /* ============================================================================
2431 	ASTC compressed file loading
2432 ============================================================================ */
2433 struct astc_header
2434 {
2435 	uint8_t magic[4];
2436 	uint8_t block_x;
2437 	uint8_t block_y;
2438 	uint8_t block_z;
2439 	uint8_t dim_x[3];			// dims = dim[0] + (dim[1] << 8) + (dim[2] << 16)
2440 	uint8_t dim_y[3];			// Sizes are given in texels;
2441 	uint8_t dim_z[3];			// block count is inferred
2442 };
2443 
2444 static const uint32_t ASTC_MAGIC_ID = 0x5CA1AB13;
2445 
unpack_bytes(uint8_t a,uint8_t b,uint8_t c,uint8_t d)2446 static unsigned int unpack_bytes(
2447 	uint8_t a,
2448 	uint8_t b,
2449 	uint8_t c,
2450 	uint8_t d
2451 ) {
2452 	return (static_cast<unsigned int>(a)      ) +
2453 	       (static_cast<unsigned int>(b) <<  8) +
2454 	       (static_cast<unsigned int>(c) << 16) +
2455 	       (static_cast<unsigned int>(d) << 24);
2456 }
2457 
2458 /* See header for documentation. */
load_cimage(const char * filename,astc_compressed_image & img)2459 int load_cimage(
2460 	const char* filename,
2461 	astc_compressed_image& img
2462 ) {
2463 	std::ifstream file(filename, std::ios::in | std::ios::binary);
2464 	if (!file)
2465 	{
2466 		print_error("ERROR: File open failed '%s'\n", filename);
2467 		return 1;
2468 	}
2469 
2470 	astc_header hdr;
2471 	file.read(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2472 	if (file.fail())
2473 	{
2474 		print_error("ERROR: File read failed '%s'\n", filename);
2475 		return 1;
2476 	}
2477 
2478 	unsigned int magicval = unpack_bytes(hdr.magic[0], hdr.magic[1], hdr.magic[2], hdr.magic[3]);
2479 	if (magicval != ASTC_MAGIC_ID)
2480 	{
2481 		print_error("ERROR: File not recognized '%s'\n", filename);
2482 		return 1;
2483 	}
2484 
2485 	// Ensure these are not zero to avoid div by zero
2486 	unsigned int block_x = astc::max(static_cast<unsigned int>(hdr.block_x), 1u);
2487 	unsigned int block_y = astc::max(static_cast<unsigned int>(hdr.block_y), 1u);
2488 	unsigned int block_z = astc::max(static_cast<unsigned int>(hdr.block_z), 1u);
2489 
2490 	unsigned int dim_x = unpack_bytes(hdr.dim_x[0], hdr.dim_x[1], hdr.dim_x[2], 0);
2491 	unsigned int dim_y = unpack_bytes(hdr.dim_y[0], hdr.dim_y[1], hdr.dim_y[2], 0);
2492 	unsigned int dim_z = unpack_bytes(hdr.dim_z[0], hdr.dim_z[1], hdr.dim_z[2], 0);
2493 
2494 	if (dim_x == 0 || dim_y == 0 || dim_z == 0)
2495 	{
2496 		print_error("ERROR: Image header corrupt '%s'\n", filename);
2497 		return 1;
2498 	}
2499 
2500 	unsigned int xblocks = (dim_x + block_x - 1) / block_x;
2501 	unsigned int yblocks = (dim_y + block_y - 1) / block_y;
2502 	unsigned int zblocks = (dim_z + block_z - 1) / block_z;
2503 
2504 	size_t data_size = xblocks * yblocks * zblocks * 16;
2505 	uint8_t *buffer = new uint8_t[data_size];
2506 
2507 	file.read(reinterpret_cast<char*>(buffer), data_size);
2508 	if (file.fail())
2509 	{
2510 		print_error("ERROR: Image data size exceeded file size '%s'\n", filename);
2511 		delete[] buffer;
2512 		return 1;
2513 	}
2514 
2515 	img.data = buffer;
2516 	img.data_len = data_size;
2517 	img.block_x = block_x;
2518 	img.block_y = block_y;
2519 	img.block_z = block_z;
2520 	img.dim_x = dim_x;
2521 	img.dim_y = dim_y;
2522 	img.dim_z = dim_z;
2523 	return 0;
2524 }
2525 
2526 /* See header for documentation. */
store_cimage(const astc_compressed_image & img,const char * filename)2527 int store_cimage(
2528 	const astc_compressed_image& img,
2529 	const char* filename
2530 ) {
2531 	astc_header hdr;
2532 	hdr.magic[0] =  ASTC_MAGIC_ID        & 0xFF;
2533 	hdr.magic[1] = (ASTC_MAGIC_ID >>  8) & 0xFF;
2534 	hdr.magic[2] = (ASTC_MAGIC_ID >> 16) & 0xFF;
2535 	hdr.magic[3] = (ASTC_MAGIC_ID >> 24) & 0xFF;
2536 
2537 	hdr.block_x = static_cast<uint8_t>(img.block_x);
2538 	hdr.block_y = static_cast<uint8_t>(img.block_y);
2539 	hdr.block_z = static_cast<uint8_t>(img.block_z);
2540 
2541 	hdr.dim_x[0] =  img.dim_x        & 0xFF;
2542 	hdr.dim_x[1] = (img.dim_x >>  8) & 0xFF;
2543 	hdr.dim_x[2] = (img.dim_x >> 16) & 0xFF;
2544 
2545 	hdr.dim_y[0] =  img.dim_y       & 0xFF;
2546 	hdr.dim_y[1] = (img.dim_y >>  8) & 0xFF;
2547 	hdr.dim_y[2] = (img.dim_y >> 16) & 0xFF;
2548 
2549 	hdr.dim_z[0] =  img.dim_z        & 0xFF;
2550 	hdr.dim_z[1] = (img.dim_z >>  8) & 0xFF;
2551 	hdr.dim_z[2] = (img.dim_z >> 16) & 0xFF;
2552 
2553 	std::ofstream file(filename, std::ios::out | std::ios::binary);
2554 	if (!file)
2555 	{
2556 		print_error("ERROR: File open failed '%s'\n", filename);
2557 		return 1;
2558 	}
2559 
2560 	file.write(reinterpret_cast<char*>(&hdr), sizeof(astc_header));
2561 	file.write(reinterpret_cast<char*>(img.data), img.data_len);
2562 	return 0;
2563 }
2564