• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for creating in-memory ASTC image structures.
20  */
21 
22 #include <cassert>
23 #include <cstring>
24 
25 #include "astcenc_internal.h"
26 
27 /**
28  * @brief Loader pipeline function type for data fetch from memory.
29  */
30 using pixel_loader = vfloat4(*)(const void*, int);
31 
32 /**
33  * @brief Loader pipeline function type for swizzling data in a vector.
34  */
35 using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
36 
37 /**
38  * @brief Loader pipeline function type for converting data in a vector to LNS.
39  */
40 using pixel_converter = vfloat4(*)(vfloat4, vmask4);
41 
42 /**
43  * @brief Load a 8-bit UNORM texel from a data array.
44  *
45  * @param data          The data pointer.
46  * @param base_offset   The index offset to the start of the pixel.
47  */
load_texel_u8(const void * data,int base_offset)48 static vfloat4 load_texel_u8(
49 	const void* data,
50 	int base_offset
51 ) {
52 	const uint8_t* data8 = static_cast<const uint8_t*>(data);
53 	return int_to_float(vint4(data8 + base_offset)) / 255.0f;
54 }
55 
56 /**
57  * @brief Load a 16-bit fp16 texel from a data array.
58  *
59  * @param data          The data pointer.
60  * @param base_offset   The index offset to the start of the pixel.
61  */
load_texel_f16(const void * data,int base_offset)62 static vfloat4 load_texel_f16(
63 	const void* data,
64 	int base_offset
65 ) {
66 	const uint16_t* data16 = static_cast<const uint16_t*>(data);
67 	int r = data16[base_offset    ];
68 	int g = data16[base_offset + 1];
69 	int b = data16[base_offset + 2];
70 	int a = data16[base_offset + 3];
71 	return float16_to_float(vint4(r, g, b, a));
72 }
73 
74 /**
75  * @brief Load a 32-bit float texel from a data array.
76  *
77  * @param data          The data pointer.
78  * @param base_offset   The index offset to the start of the pixel.
79  */
load_texel_f32(const void * data,int base_offset)80 static vfloat4 load_texel_f32(
81 	const void* data,
82 	int base_offset
83 ) {
84 	const float* data32 = static_cast<const float*>(data);
85 	return vfloat4(data32 + base_offset);
86 }
87 
88 /**
89  * @brief Dummy no-op swizzle function.
90  *
91  * @param data   The source RGBA vector to swizzle.
92  * @param swz    The swizzle to use.
93  */
swz_texel_skip(vfloat4 data,const astcenc_swizzle & swz)94 static vfloat4 swz_texel_skip(
95 	vfloat4 data,
96 	const astcenc_swizzle& swz
97 ) {
98 	(void)swz;
99 	return data;
100 }
101 
102 /**
103  * @brief Swizzle a texel into a new arrangement.
104  *
105  * @param data   The source RGBA vector to swizzle.
106  * @param swz    The swizzle to use.
107  */
swz_texel(vfloat4 data,const astcenc_swizzle & swz)108 static vfloat4 swz_texel(
109 	vfloat4 data,
110 	const astcenc_swizzle& swz
111 ) {
112 	alignas(16) float datas[6];
113 
114 	storea(data, datas);
115 	datas[ASTCENC_SWZ_0] = 0.0f;
116 	datas[ASTCENC_SWZ_1] = 1.0f;
117 
118 	return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
119 }
120 
121 /**
122  * @brief Encode a texel that is entirely LDR linear.
123  *
124  * @param data       The RGBA data to encode.
125  * @param lns_mask   The mask for the HDR channels than need LNS encoding.
126  */
encode_texel_unorm(vfloat4 data,vmask4 lns_mask)127 static vfloat4 encode_texel_unorm(
128 	vfloat4 data,
129 	vmask4 lns_mask
130 ) {
131 	(void)lns_mask;
132 	return data * 65535.0f;
133 }
134 
135 /**
136  * @brief Encode a texel that includes at least some HDR LNS texels.
137  *
138  * @param data       The RGBA data to encode.
139  * @param lns_mask   The mask for the HDR channels than need LNS encoding.
140  */
encode_texel_lns(vfloat4 data,vmask4 lns_mask)141 static vfloat4 encode_texel_lns(
142 	vfloat4 data,
143 	vmask4 lns_mask
144 ) {
145 	vfloat4 datav_unorm = data * 65535.0f;
146 	vfloat4 datav_lns   = float_to_lns(data);
147 	return select(datav_unorm, datav_lns, lns_mask);
148 }
149 
150 /* See header for documentation. */
fetch_image_block(astcenc_profile decode_mode,const astcenc_image & img,image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)151 void fetch_image_block(
152 	astcenc_profile decode_mode,
153 	const astcenc_image& img,
154 	image_block& blk,
155 	const block_size_descriptor& bsd,
156 	unsigned int xpos,
157 	unsigned int ypos,
158 	unsigned int zpos,
159 	const astcenc_swizzle& swz
160 ) {
161 	unsigned int xsize = img.dim_x;
162 	unsigned int ysize = img.dim_y;
163 	unsigned int zsize = img.dim_z;
164 
165 	blk.xpos = xpos;
166 	blk.ypos = ypos;
167 	blk.zpos = zpos;
168 
169 	// True if any non-identity swizzle
170 	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
171 	                 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
172 
173 	int idx = 0;
174 
175 	vfloat4 data_min(1e38f);
176 	vfloat4 data_mean(0.0f);
177 	vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
178 	vfloat4 data_max(-1e38f);
179 	vmask4 grayscalev(true);
180 
181 	// This works because we impose the same choice everywhere during encode
182 	uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
183 	                  (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
184 	uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
185 	vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
186 	vmask4 lns_mask = use_lns != vint4::zero();
187 
188 	// Set up the function pointers for loading pipeline as needed
189 	pixel_loader loader = load_texel_u8;
190 	if (img.data_type == ASTCENC_TYPE_F16)
191 	{
192 		loader = load_texel_f16;
193 	}
194 	else if  (img.data_type == ASTCENC_TYPE_F32)
195 	{
196 		loader = load_texel_f32;
197 	}
198 
199 	pixel_swizzler swizzler = swz_texel_skip;
200 	if (needs_swz)
201 	{
202 		swizzler = swz_texel;
203 	}
204 
205 	pixel_converter converter = encode_texel_unorm;
206 	if (any(lns_mask))
207 	{
208 		converter = encode_texel_lns;
209 	}
210 
211 	for (unsigned int z = 0; z < bsd.zdim; z++)
212 	{
213 		unsigned int zi = astc::min(zpos + z, zsize - 1);
214 		void* plane = img.data[zi];
215 
216 		for (unsigned int y = 0; y < bsd.ydim; y++)
217 		{
218 			unsigned int yi = astc::min(ypos + y, ysize - 1);
219 
220 			for (unsigned int x = 0; x < bsd.xdim; x++)
221 			{
222 				unsigned int xi = astc::min(xpos + x, xsize - 1);
223 
224 				vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
225 				datav = swizzler(datav, swz);
226 				datav = converter(datav, lns_mask);
227 
228 				// Compute block metadata
229 				data_min = min(data_min, datav);
230 				data_mean += datav * data_mean_scale;
231 				data_max = max(data_max, datav);
232 
233 				grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
234 
235 				blk.data_r[idx] = datav.lane<0>();
236 				blk.data_g[idx] = datav.lane<1>();
237 				blk.data_b[idx] = datav.lane<2>();
238 				blk.data_a[idx] = datav.lane<3>();
239 
240 				blk.rgb_lns[idx] = rgb_lns;
241 				blk.alpha_lns[idx] = a_lns;
242 
243 				idx++;
244 			}
245 		}
246 	}
247 
248 	// Reverse the encoding so we store origin block in the original format
249 	vfloat4 data_enc = blk.texel(0);
250 	vfloat4 data_enc_unorm = data_enc / 65535.0f;
251 	vfloat4 data_enc_lns = vfloat4::zero();
252 
253 	if (rgb_lns || a_lns)
254 	{
255 		data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
256 	}
257 
258 	blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
259 
260 	// Store block metadata
261 	blk.data_min = data_min;
262 	blk.data_mean = data_mean;
263 	blk.data_max = data_max;
264 	blk.grayscale = all(grayscalev);
265 }
266 
267 /* See header for documentation. */
fetch_image_block_fast_ldr(astcenc_profile decode_mode,const astcenc_image & img,image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)268 void fetch_image_block_fast_ldr(
269 	astcenc_profile decode_mode,
270 	const astcenc_image& img,
271 	image_block& blk,
272 	const block_size_descriptor& bsd,
273 	unsigned int xpos,
274 	unsigned int ypos,
275 	unsigned int zpos,
276 	const astcenc_swizzle& swz
277 ) {
278 	(void)swz;
279 	(void)decode_mode;
280 
281 	unsigned int xsize = img.dim_x;
282 	unsigned int ysize = img.dim_y;
283 	unsigned int stride = img.dim_stride;
284 	blk.xpos = xpos;
285 	blk.ypos = ypos;
286 	blk.zpos = zpos;
287 
288 	vfloat4 data_min(1e38f);
289 	vfloat4 data_mean = vfloat4::zero();
290 	vfloat4 data_max(-1e38f);
291 	vmask4 grayscalev(true);
292 	int idx = 0;
293 
294 	const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
295 	for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
296 	{
297 		unsigned int yi = astc::min(y, ysize - 1);
298 
299 		for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
300 		{
301 			unsigned int xi = astc::min(x, xsize - 1);
302 
303 			vint4 datavi = vint4(plane + (4 * stride * yi) + (4 * xi));
304 			vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
305 
306 			// Compute block metadata
307 			data_min = min(data_min, datav);
308 			data_mean += datav;
309 			data_max = max(data_max, datav);
310 
311 			grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
312 
313 			blk.data_r[idx] = datav.lane<0>();
314 			blk.data_g[idx] = datav.lane<1>();
315 			blk.data_b[idx] = datav.lane<2>();
316 			blk.data_a[idx] = datav.lane<3>();
317 
318 			idx++;
319 		}
320 	}
321 
322 	// Reverse the encoding so we store origin block in the original format
323 	blk.origin_texel = blk.texel(0) / 65535.0f;
324 
325 	// Store block metadata
326 	blk.rgb_lns[0] = 0;
327 	blk.alpha_lns[0] = 0;
328 	blk.data_min = data_min;
329 	blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
330 	blk.data_max = data_max;
331 	blk.grayscale = all(grayscalev);
332 }
333 
334 /* See header for documentation. */
write_image_block(astcenc_image & img,const image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)335 void write_image_block(
336 	astcenc_image& img,
337 	const image_block& blk,
338 	const block_size_descriptor& bsd,
339 	unsigned int xpos,
340 	unsigned int ypos,
341 	unsigned int zpos,
342 	const astcenc_swizzle& swz
343 ) {
344 	unsigned int xsize = img.dim_x;
345 	unsigned int ysize = img.dim_y;
346 	unsigned int zsize = img.dim_z;
347 
348 	unsigned int x_start = xpos;
349 	unsigned int x_end = std::min(xsize, xpos + bsd.xdim);
350 	unsigned int x_nudge = bsd.xdim - (x_end - x_start);
351 
352 	unsigned int y_start = ypos;
353 	unsigned int y_end = std::min(ysize, ypos + bsd.ydim);
354 	unsigned int y_nudge = (bsd.ydim - (y_end - y_start)) * bsd.xdim;
355 
356 	unsigned int z_start = zpos;
357 	unsigned int z_end = std::min(zsize, zpos + bsd.zdim);
358 
359 	float data[7];
360 	data[ASTCENC_SWZ_0] = 0.0f;
361 	data[ASTCENC_SWZ_1] = 1.0f;
362 
363 	// True if any non-identity swizzle
364 	bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
365 	                 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
366 
367 	// True if any swizzle uses Z reconstruct
368 	bool needs_z = (swz.r == ASTCENC_SWZ_Z) || (swz.g == ASTCENC_SWZ_Z) ||
369 	               (swz.b == ASTCENC_SWZ_Z) || (swz.a == ASTCENC_SWZ_Z);
370 
371 	int idx = 0;
372 	if (img.data_type == ASTCENC_TYPE_U8)
373 	{
374 		for (unsigned int z = z_start; z < z_end; z++)
375 		{
376 			// Fetch the image plane
377 			uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
378 
379 			for (unsigned int y = y_start; y < y_end; y++)
380 			{
381 				for (unsigned int x = x_start; x < x_end; x++)
382 				{
383 					vint4 colori = vint4::zero();
384 
385 					// Errors are NaN encoded - convert to magenta error color
386 					if (blk.data_r[idx] != blk.data_r[idx])
387 					{
388 						colori = vint4(0xFF, 0x00, 0xFF, 0xFF);
389 					}
390 					else if (needs_swz)
391 					{
392 						data[ASTCENC_SWZ_R] = blk.data_r[idx];
393 						data[ASTCENC_SWZ_G] = blk.data_g[idx];
394 						data[ASTCENC_SWZ_B] = blk.data_b[idx];
395 						data[ASTCENC_SWZ_A] = blk.data_a[idx];
396 
397 						if (needs_z)
398 						{
399 							float xcoord = (data[0] * 2.0f) - 1.0f;
400 							float ycoord = (data[3] * 2.0f) - 1.0f;
401 							float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord;
402 							if (zcoord < 0.0f)
403 							{
404 								zcoord = 0.0f;
405 							}
406 							data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f;
407 						}
408 
409 						vfloat4 color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
410 						colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
411 					}
412 					else
413 					{
414 						vfloat4 color = blk.texel(idx);
415 						colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
416 					}
417 
418 					colori = pack_low_bytes(colori);
419 					store_nbytes(colori, data8 + (4 * xsize * y) + (4 * x    ));
420 
421 					idx++;
422 				}
423 				idx += x_nudge;
424 			}
425 			idx += y_nudge;
426 		}
427 	}
428 	else if (img.data_type == ASTCENC_TYPE_F16)
429 	{
430 		for (unsigned int z = z_start; z < z_end; z++)
431 		{
432 			// Fetch the image plane
433 			uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
434 
435 			for (unsigned int y = y_start; y < y_end; y++)
436 			{
437 				for (unsigned int x = x_start; x < x_end; x++)
438 				{
439 					vint4 color;
440 
441 					// NaNs are handled inline - no need to special case
442 					if (needs_swz)
443 					{
444 						data[ASTCENC_SWZ_R] = blk.data_r[idx];
445 						data[ASTCENC_SWZ_G] = blk.data_g[idx];
446 						data[ASTCENC_SWZ_B] = blk.data_b[idx];
447 						data[ASTCENC_SWZ_A] = blk.data_a[idx];
448 
449 						if (needs_z)
450 						{
451 							float xN = (data[0] * 2.0f) - 1.0f;
452 							float yN = (data[3] * 2.0f) - 1.0f;
453 							float zN = 1.0f - xN * xN - yN * yN;
454 							if (zN < 0.0f)
455 							{
456 								zN = 0.0f;
457 							}
458 							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
459 						}
460 
461 						vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
462 						color = float_to_float16(colorf);
463 					}
464 					else
465 					{
466 						vfloat4 colorf = blk.texel(idx);
467 						color = float_to_float16(colorf);
468 					}
469 
470 					data16[(4 * xsize * y) + (4 * x    )] = static_cast<uint16_t>(color.lane<0>());
471 					data16[(4 * xsize * y) + (4 * x + 1)] = static_cast<uint16_t>(color.lane<1>());
472 					data16[(4 * xsize * y) + (4 * x + 2)] = static_cast<uint16_t>(color.lane<2>());
473 					data16[(4 * xsize * y) + (4 * x + 3)] = static_cast<uint16_t>(color.lane<3>());
474 
475 					idx++;
476 				}
477 				idx += x_nudge;
478 			}
479 			idx += y_nudge;
480 		}
481 	}
482 	else // if (img.data_type == ASTCENC_TYPE_F32)
483 	{
484 		assert(img.data_type == ASTCENC_TYPE_F32);
485 
486 		for (unsigned int z = z_start; z < z_end; z++)
487 		{
488 			// Fetch the image plane
489 			float* data32 = static_cast<float*>(img.data[z]);
490 
491 			for (unsigned int y = y_start; y < y_end; y++)
492 			{
493 				for (unsigned int x = x_start; x < x_end; x++)
494 				{
495 					vfloat4 color = blk.texel(idx);
496 
497 					// NaNs are handled inline - no need to special case
498 					if (needs_swz)
499 					{
500 						data[ASTCENC_SWZ_R] = color.lane<0>();
501 						data[ASTCENC_SWZ_G] = color.lane<1>();
502 						data[ASTCENC_SWZ_B] = color.lane<2>();
503 						data[ASTCENC_SWZ_A] = color.lane<3>();
504 
505 						if (needs_z)
506 						{
507 							float xN = (data[0] * 2.0f) - 1.0f;
508 							float yN = (data[3] * 2.0f) - 1.0f;
509 							float zN = 1.0f - xN * xN - yN * yN;
510 							if (zN < 0.0f)
511 							{
512 								zN = 0.0f;
513 							}
514 							data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
515 						}
516 
517 						color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
518 					}
519 
520 					store(color, data32 + (4 * xsize * y) + (4 * x    ));
521 
522 					idx++;
523 				}
524 				idx += x_nudge;
525 			}
526 			idx += y_nudge;
527 		}
528 	}
529 }
530