1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions for creating in-memory ASTC image structures.
20 */
21
22 #include <cassert>
23 #include <cstring>
24
25 #include "astcenc_internal.h"
26
27 /**
28 * @brief Loader pipeline function type for data fetch from memory.
29 */
30 using pixel_loader = vfloat4(*)(const void*, int);
31
32 /**
33 * @brief Loader pipeline function type for swizzling data in a vector.
34 */
35 using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
36
37 /**
38 * @brief Loader pipeline function type for converting data in a vector to LNS.
39 */
40 using pixel_converter = vfloat4(*)(vfloat4, vmask4);
41
42 /**
43 * @brief Load a 8-bit UNORM texel from a data array.
44 *
45 * @param data The data pointer.
46 * @param base_offset The index offset to the start of the pixel.
47 */
load_texel_u8(const void * data,int base_offset)48 static vfloat4 load_texel_u8(
49 const void* data,
50 int base_offset
51 ) {
52 const uint8_t* data8 = static_cast<const uint8_t*>(data);
53 return int_to_float(vint4(data8 + base_offset)) / 255.0f;
54 }
55
56 /**
57 * @brief Load a 16-bit fp16 texel from a data array.
58 *
59 * @param data The data pointer.
60 * @param base_offset The index offset to the start of the pixel.
61 */
load_texel_f16(const void * data,int base_offset)62 static vfloat4 load_texel_f16(
63 const void* data,
64 int base_offset
65 ) {
66 const uint16_t* data16 = static_cast<const uint16_t*>(data);
67 int r = data16[base_offset ];
68 int g = data16[base_offset + 1];
69 int b = data16[base_offset + 2];
70 int a = data16[base_offset + 3];
71 return float16_to_float(vint4(r, g, b, a));
72 }
73
74 /**
75 * @brief Load a 32-bit float texel from a data array.
76 *
77 * @param data The data pointer.
78 * @param base_offset The index offset to the start of the pixel.
79 */
load_texel_f32(const void * data,int base_offset)80 static vfloat4 load_texel_f32(
81 const void* data,
82 int base_offset
83 ) {
84 const float* data32 = static_cast<const float*>(data);
85 return vfloat4(data32 + base_offset);
86 }
87
88 /**
89 * @brief Dummy no-op swizzle function.
90 *
91 * @param data The source RGBA vector to swizzle.
92 * @param swz The swizzle to use.
93 */
swz_texel_skip(vfloat4 data,const astcenc_swizzle & swz)94 static vfloat4 swz_texel_skip(
95 vfloat4 data,
96 const astcenc_swizzle& swz
97 ) {
98 (void)swz;
99 return data;
100 }
101
102 /**
103 * @brief Swizzle a texel into a new arrangement.
104 *
105 * @param data The source RGBA vector to swizzle.
106 * @param swz The swizzle to use.
107 */
swz_texel(vfloat4 data,const astcenc_swizzle & swz)108 static vfloat4 swz_texel(
109 vfloat4 data,
110 const astcenc_swizzle& swz
111 ) {
112 alignas(16) float datas[6];
113
114 storea(data, datas);
115 datas[ASTCENC_SWZ_0] = 0.0f;
116 datas[ASTCENC_SWZ_1] = 1.0f;
117
118 return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
119 }
120
121 /**
122 * @brief Encode a texel that is entirely LDR linear.
123 *
124 * @param data The RGBA data to encode.
125 * @param lns_mask The mask for the HDR channels than need LNS encoding.
126 */
encode_texel_unorm(vfloat4 data,vmask4 lns_mask)127 static vfloat4 encode_texel_unorm(
128 vfloat4 data,
129 vmask4 lns_mask
130 ) {
131 (void)lns_mask;
132 return data * 65535.0f;
133 }
134
135 /**
136 * @brief Encode a texel that includes at least some HDR LNS texels.
137 *
138 * @param data The RGBA data to encode.
139 * @param lns_mask The mask for the HDR channels than need LNS encoding.
140 */
encode_texel_lns(vfloat4 data,vmask4 lns_mask)141 static vfloat4 encode_texel_lns(
142 vfloat4 data,
143 vmask4 lns_mask
144 ) {
145 vfloat4 datav_unorm = data * 65535.0f;
146 vfloat4 datav_lns = float_to_lns(data);
147 return select(datav_unorm, datav_lns, lns_mask);
148 }
149
150 /* See header for documentation. */
fetch_image_block(astcenc_profile decode_mode,const astcenc_image & img,image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)151 void fetch_image_block(
152 astcenc_profile decode_mode,
153 const astcenc_image& img,
154 image_block& blk,
155 const block_size_descriptor& bsd,
156 unsigned int xpos,
157 unsigned int ypos,
158 unsigned int zpos,
159 const astcenc_swizzle& swz
160 ) {
161 unsigned int xsize = img.dim_x;
162 unsigned int ysize = img.dim_y;
163 unsigned int zsize = img.dim_z;
164
165 blk.xpos = xpos;
166 blk.ypos = ypos;
167 blk.zpos = zpos;
168
169 // True if any non-identity swizzle
170 bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
171 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
172
173 int idx = 0;
174
175 vfloat4 data_min(1e38f);
176 vfloat4 data_mean(0.0f);
177 vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
178 vfloat4 data_max(-1e38f);
179 vmask4 grayscalev(true);
180
181 // This works because we impose the same choice everywhere during encode
182 uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
183 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
184 uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
185 vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
186 vmask4 lns_mask = use_lns != vint4::zero();
187
188 // Set up the function pointers for loading pipeline as needed
189 pixel_loader loader = load_texel_u8;
190 if (img.data_type == ASTCENC_TYPE_F16)
191 {
192 loader = load_texel_f16;
193 }
194 else if (img.data_type == ASTCENC_TYPE_F32)
195 {
196 loader = load_texel_f32;
197 }
198
199 pixel_swizzler swizzler = swz_texel_skip;
200 if (needs_swz)
201 {
202 swizzler = swz_texel;
203 }
204
205 pixel_converter converter = encode_texel_unorm;
206 if (any(lns_mask))
207 {
208 converter = encode_texel_lns;
209 }
210
211 for (unsigned int z = 0; z < bsd.zdim; z++)
212 {
213 unsigned int zi = astc::min(zpos + z, zsize - 1);
214 void* plane = img.data[zi];
215
216 for (unsigned int y = 0; y < bsd.ydim; y++)
217 {
218 unsigned int yi = astc::min(ypos + y, ysize - 1);
219
220 for (unsigned int x = 0; x < bsd.xdim; x++)
221 {
222 unsigned int xi = astc::min(xpos + x, xsize - 1);
223
224 vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
225 datav = swizzler(datav, swz);
226 datav = converter(datav, lns_mask);
227
228 // Compute block metadata
229 data_min = min(data_min, datav);
230 data_mean += datav * data_mean_scale;
231 data_max = max(data_max, datav);
232
233 grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
234
235 blk.data_r[idx] = datav.lane<0>();
236 blk.data_g[idx] = datav.lane<1>();
237 blk.data_b[idx] = datav.lane<2>();
238 blk.data_a[idx] = datav.lane<3>();
239
240 blk.rgb_lns[idx] = rgb_lns;
241 blk.alpha_lns[idx] = a_lns;
242
243 idx++;
244 }
245 }
246 }
247
248 // Reverse the encoding so we store origin block in the original format
249 vfloat4 data_enc = blk.texel(0);
250 vfloat4 data_enc_unorm = data_enc / 65535.0f;
251 vfloat4 data_enc_lns = vfloat4::zero();
252
253 if (rgb_lns || a_lns)
254 {
255 data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
256 }
257
258 blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
259
260 // Store block metadata
261 blk.data_min = data_min;
262 blk.data_mean = data_mean;
263 blk.data_max = data_max;
264 blk.grayscale = all(grayscalev);
265 }
266
267 /* See header for documentation. */
fetch_image_block_fast_ldr(astcenc_profile decode_mode,const astcenc_image & img,image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)268 void fetch_image_block_fast_ldr(
269 astcenc_profile decode_mode,
270 const astcenc_image& img,
271 image_block& blk,
272 const block_size_descriptor& bsd,
273 unsigned int xpos,
274 unsigned int ypos,
275 unsigned int zpos,
276 const astcenc_swizzle& swz
277 ) {
278 (void)swz;
279 (void)decode_mode;
280
281 unsigned int xsize = img.dim_x;
282 unsigned int ysize = img.dim_y;
283 unsigned int stride = img.dim_stride;
284 blk.xpos = xpos;
285 blk.ypos = ypos;
286 blk.zpos = zpos;
287
288 vfloat4 data_min(1e38f);
289 vfloat4 data_mean = vfloat4::zero();
290 vfloat4 data_max(-1e38f);
291 vmask4 grayscalev(true);
292 int idx = 0;
293
294 const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
295 for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
296 {
297 unsigned int yi = astc::min(y, ysize - 1);
298
299 for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
300 {
301 unsigned int xi = astc::min(x, xsize - 1);
302
303 vint4 datavi = vint4(plane + (4 * stride * yi) + (4 * xi));
304 vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
305
306 // Compute block metadata
307 data_min = min(data_min, datav);
308 data_mean += datav;
309 data_max = max(data_max, datav);
310
311 grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
312
313 blk.data_r[idx] = datav.lane<0>();
314 blk.data_g[idx] = datav.lane<1>();
315 blk.data_b[idx] = datav.lane<2>();
316 blk.data_a[idx] = datav.lane<3>();
317
318 idx++;
319 }
320 }
321
322 // Reverse the encoding so we store origin block in the original format
323 blk.origin_texel = blk.texel(0) / 65535.0f;
324
325 // Store block metadata
326 blk.rgb_lns[0] = 0;
327 blk.alpha_lns[0] = 0;
328 blk.data_min = data_min;
329 blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
330 blk.data_max = data_max;
331 blk.grayscale = all(grayscalev);
332 }
333
334 /* See header for documentation. */
write_image_block(astcenc_image & img,const image_block & blk,const block_size_descriptor & bsd,unsigned int xpos,unsigned int ypos,unsigned int zpos,const astcenc_swizzle & swz)335 void write_image_block(
336 astcenc_image& img,
337 const image_block& blk,
338 const block_size_descriptor& bsd,
339 unsigned int xpos,
340 unsigned int ypos,
341 unsigned int zpos,
342 const astcenc_swizzle& swz
343 ) {
344 unsigned int xsize = img.dim_x;
345 unsigned int ysize = img.dim_y;
346 unsigned int zsize = img.dim_z;
347
348 unsigned int x_start = xpos;
349 unsigned int x_end = std::min(xsize, xpos + bsd.xdim);
350 unsigned int x_nudge = bsd.xdim - (x_end - x_start);
351
352 unsigned int y_start = ypos;
353 unsigned int y_end = std::min(ysize, ypos + bsd.ydim);
354 unsigned int y_nudge = (bsd.ydim - (y_end - y_start)) * bsd.xdim;
355
356 unsigned int z_start = zpos;
357 unsigned int z_end = std::min(zsize, zpos + bsd.zdim);
358
359 float data[7];
360 data[ASTCENC_SWZ_0] = 0.0f;
361 data[ASTCENC_SWZ_1] = 1.0f;
362
363 // True if any non-identity swizzle
364 bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
365 (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
366
367 // True if any swizzle uses Z reconstruct
368 bool needs_z = (swz.r == ASTCENC_SWZ_Z) || (swz.g == ASTCENC_SWZ_Z) ||
369 (swz.b == ASTCENC_SWZ_Z) || (swz.a == ASTCENC_SWZ_Z);
370
371 int idx = 0;
372 if (img.data_type == ASTCENC_TYPE_U8)
373 {
374 for (unsigned int z = z_start; z < z_end; z++)
375 {
376 // Fetch the image plane
377 uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
378
379 for (unsigned int y = y_start; y < y_end; y++)
380 {
381 for (unsigned int x = x_start; x < x_end; x++)
382 {
383 vint4 colori = vint4::zero();
384
385 // Errors are NaN encoded - convert to magenta error color
386 if (blk.data_r[idx] != blk.data_r[idx])
387 {
388 colori = vint4(0xFF, 0x00, 0xFF, 0xFF);
389 }
390 else if (needs_swz)
391 {
392 data[ASTCENC_SWZ_R] = blk.data_r[idx];
393 data[ASTCENC_SWZ_G] = blk.data_g[idx];
394 data[ASTCENC_SWZ_B] = blk.data_b[idx];
395 data[ASTCENC_SWZ_A] = blk.data_a[idx];
396
397 if (needs_z)
398 {
399 float xcoord = (data[0] * 2.0f) - 1.0f;
400 float ycoord = (data[3] * 2.0f) - 1.0f;
401 float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord;
402 if (zcoord < 0.0f)
403 {
404 zcoord = 0.0f;
405 }
406 data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f;
407 }
408
409 vfloat4 color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
410 colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
411 }
412 else
413 {
414 vfloat4 color = blk.texel(idx);
415 colori = float_to_int_rtn(min(color, 1.0f) * 255.0f);
416 }
417
418 colori = pack_low_bytes(colori);
419 store_nbytes(colori, data8 + (4 * xsize * y) + (4 * x ));
420
421 idx++;
422 }
423 idx += x_nudge;
424 }
425 idx += y_nudge;
426 }
427 }
428 else if (img.data_type == ASTCENC_TYPE_F16)
429 {
430 for (unsigned int z = z_start; z < z_end; z++)
431 {
432 // Fetch the image plane
433 uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
434
435 for (unsigned int y = y_start; y < y_end; y++)
436 {
437 for (unsigned int x = x_start; x < x_end; x++)
438 {
439 vint4 color;
440
441 // NaNs are handled inline - no need to special case
442 if (needs_swz)
443 {
444 data[ASTCENC_SWZ_R] = blk.data_r[idx];
445 data[ASTCENC_SWZ_G] = blk.data_g[idx];
446 data[ASTCENC_SWZ_B] = blk.data_b[idx];
447 data[ASTCENC_SWZ_A] = blk.data_a[idx];
448
449 if (needs_z)
450 {
451 float xN = (data[0] * 2.0f) - 1.0f;
452 float yN = (data[3] * 2.0f) - 1.0f;
453 float zN = 1.0f - xN * xN - yN * yN;
454 if (zN < 0.0f)
455 {
456 zN = 0.0f;
457 }
458 data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
459 }
460
461 vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
462 color = float_to_float16(colorf);
463 }
464 else
465 {
466 vfloat4 colorf = blk.texel(idx);
467 color = float_to_float16(colorf);
468 }
469
470 data16[(4 * xsize * y) + (4 * x )] = static_cast<uint16_t>(color.lane<0>());
471 data16[(4 * xsize * y) + (4 * x + 1)] = static_cast<uint16_t>(color.lane<1>());
472 data16[(4 * xsize * y) + (4 * x + 2)] = static_cast<uint16_t>(color.lane<2>());
473 data16[(4 * xsize * y) + (4 * x + 3)] = static_cast<uint16_t>(color.lane<3>());
474
475 idx++;
476 }
477 idx += x_nudge;
478 }
479 idx += y_nudge;
480 }
481 }
482 else // if (img.data_type == ASTCENC_TYPE_F32)
483 {
484 assert(img.data_type == ASTCENC_TYPE_F32);
485
486 for (unsigned int z = z_start; z < z_end; z++)
487 {
488 // Fetch the image plane
489 float* data32 = static_cast<float*>(img.data[z]);
490
491 for (unsigned int y = y_start; y < y_end; y++)
492 {
493 for (unsigned int x = x_start; x < x_end; x++)
494 {
495 vfloat4 color = blk.texel(idx);
496
497 // NaNs are handled inline - no need to special case
498 if (needs_swz)
499 {
500 data[ASTCENC_SWZ_R] = color.lane<0>();
501 data[ASTCENC_SWZ_G] = color.lane<1>();
502 data[ASTCENC_SWZ_B] = color.lane<2>();
503 data[ASTCENC_SWZ_A] = color.lane<3>();
504
505 if (needs_z)
506 {
507 float xN = (data[0] * 2.0f) - 1.0f;
508 float yN = (data[3] * 2.0f) - 1.0f;
509 float zN = 1.0f - xN * xN - yN * yN;
510 if (zN < 0.0f)
511 {
512 zN = 0.0f;
513 }
514 data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
515 }
516
517 color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
518 }
519
520 store(color, data32 + (4 * xsize * y) + (4 * x ));
521
522 idx++;
523 }
524 idx += x_nudge;
525 }
526 idx += y_nudge;
527 }
528 }
529 }
530