Lines Matching +full:1 +full:- +full:8
1 // SPDX-License-Identifier: LGPL-2.1+
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
14 #include "codec-fwht.h"
20 * be guaranteed that the magic 8 byte sequence (see below) can
27 #define IBLOCK 1
33 1, 8,
56 s16 block[8 * 8]; in rlc()
66 for (y = 0; y < 8; y++) { in rlc()
67 for (x = 0; x < 8; x++) { in rlc()
68 *wp = in[x + y * 8]; in rlc()
74 for (i = 63; i >= 0 && !block[zigzag[i]]; i--) in rlc()
80 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0); in rlc()
92 cnt--; in rlc()
110 * This function will worst-case increase rlc_in by 65*2 bytes:
111 * one s16 value for the header and 8 * 8 coefficients of type s16.
120 s16 block[8 * 8 + 16]; in derlc()
129 * Now de-compress, it expands one byte to up to 15 bytes in derlc()
133 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to in derlc()
136 while (dec_count < 8 * 8) { in derlc()
149 for (i = 0; i < 64 - dec_count; i++) in derlc()
157 dec_count += length + 1; in derlc()
164 int y = pos / 8; in derlc()
165 int x = pos % 8; in derlc()
167 dwht_out[x + y * 8] = *wp++; in derlc()
181 2, 2, 3, 6, 6, 6, 6, 8,
200 for (j = 0; j < 8; j++) { in quantize_intra()
201 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_intra()
203 if (*coeff >= -qp && *coeff <= qp) in quantize_intra()
216 for (j = 0; j < 8; j++) in dequantize_intra()
217 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_intra()
226 for (j = 0; j < 8; j++) { in quantize_inter()
227 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) { in quantize_inter()
229 if (*coeff >= -qp && *coeff <= qp) in quantize_inter()
242 for (j = 0; j < 8; j++) in dequantize_inter()
243 for (i = 0; i < 8; i++, quant++, coeff++) in dequantize_inter()
251 /* we'll need more than 8 bits for the transformed coefficients */ in fwht()
252 s32 workspace1[8], workspace2[8]; in fwht()
258 /* stage 1 */ in fwht()
259 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht()
261 case 1: in fwht()
262 workspace1[0] = tmp[0] + tmp[1] - add; in fwht()
263 workspace1[1] = tmp[0] - tmp[1]; in fwht()
265 workspace1[2] = tmp[2] + tmp[3] - add; in fwht()
266 workspace1[3] = tmp[2] - tmp[3]; in fwht()
268 workspace1[4] = tmp[4] + tmp[5] - add; in fwht()
269 workspace1[5] = tmp[4] - tmp[5]; in fwht()
271 workspace1[6] = tmp[6] + tmp[7] - add; in fwht()
272 workspace1[7] = tmp[6] - tmp[7]; in fwht()
275 workspace1[0] = tmp[0] + tmp[2] - add; in fwht()
276 workspace1[1] = tmp[0] - tmp[2]; in fwht()
278 workspace1[2] = tmp[4] + tmp[6] - add; in fwht()
279 workspace1[3] = tmp[4] - tmp[6]; in fwht()
281 workspace1[4] = tmp[8] + tmp[10] - add; in fwht()
282 workspace1[5] = tmp[8] - tmp[10]; in fwht()
284 workspace1[6] = tmp[12] + tmp[14] - add; in fwht()
285 workspace1[7] = tmp[12] - tmp[14]; in fwht()
288 workspace1[0] = tmp[0] + tmp[3] - add; in fwht()
289 workspace1[1] = tmp[0] - tmp[3]; in fwht()
291 workspace1[2] = tmp[6] + tmp[9] - add; in fwht()
292 workspace1[3] = tmp[6] - tmp[9]; in fwht()
294 workspace1[4] = tmp[12] + tmp[15] - add; in fwht()
295 workspace1[5] = tmp[12] - tmp[15]; in fwht()
297 workspace1[6] = tmp[18] + tmp[21] - add; in fwht()
298 workspace1[7] = tmp[18] - tmp[21]; in fwht()
301 workspace1[0] = tmp[0] + tmp[4] - add; in fwht()
302 workspace1[1] = tmp[0] - tmp[4]; in fwht()
304 workspace1[2] = tmp[8] + tmp[12] - add; in fwht()
305 workspace1[3] = tmp[8] - tmp[12]; in fwht()
307 workspace1[4] = tmp[16] + tmp[20] - add; in fwht()
308 workspace1[5] = tmp[16] - tmp[20]; in fwht()
310 workspace1[6] = tmp[24] + tmp[28] - add; in fwht()
311 workspace1[7] = tmp[24] - tmp[28]; in fwht()
317 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
318 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
319 workspace2[3] = workspace1[1] + workspace1[3]; in fwht()
322 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
323 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
328 out[1] = workspace2[0] - workspace2[4]; in fwht()
329 out[2] = workspace2[1] - workspace2[5]; in fwht()
330 out[3] = workspace2[1] + workspace2[5]; in fwht()
332 out[5] = workspace2[2] - workspace2[6]; in fwht()
333 out[6] = workspace2[3] - workspace2[7]; in fwht()
339 for (i = 0; i < 8; i++, out++) { in fwht()
340 /* stage 1 */ in fwht()
341 workspace1[0] = out[0] + out[1 * 8]; in fwht()
342 workspace1[1] = out[0] - out[1 * 8]; in fwht()
344 workspace1[2] = out[2 * 8] + out[3 * 8]; in fwht()
345 workspace1[3] = out[2 * 8] - out[3 * 8]; in fwht()
347 workspace1[4] = out[4 * 8] + out[5 * 8]; in fwht()
348 workspace1[5] = out[4 * 8] - out[5 * 8]; in fwht()
350 workspace1[6] = out[6 * 8] + out[7 * 8]; in fwht()
351 workspace1[7] = out[6 * 8] - out[7 * 8]; in fwht()
355 workspace2[1] = workspace1[0] - workspace1[2]; in fwht()
356 workspace2[2] = workspace1[1] - workspace1[3]; in fwht()
357 workspace2[3] = workspace1[1] + workspace1[3]; in fwht()
360 workspace2[5] = workspace1[4] - workspace1[6]; in fwht()
361 workspace2[6] = workspace1[5] - workspace1[7]; in fwht()
364 out[0 * 8] = workspace2[0] + workspace2[4]; in fwht()
365 out[1 * 8] = workspace2[0] - workspace2[4]; in fwht()
366 out[2 * 8] = workspace2[1] - workspace2[5]; in fwht()
367 out[3 * 8] = workspace2[1] + workspace2[5]; in fwht()
368 out[4 * 8] = workspace2[2] + workspace2[6]; in fwht()
369 out[5 * 8] = workspace2[2] - workspace2[6]; in fwht()
370 out[6 * 8] = workspace2[3] - workspace2[7]; in fwht()
371 out[7 * 8] = workspace2[3] + workspace2[7]; in fwht()
376 * Not the nicest way of doing it, but P-blocks get twice the range of
377 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
384 /* we'll need more than 8 bits for the transformed coefficients */ in fwht16()
385 s32 workspace1[8], workspace2[8]; in fwht16()
390 for (i = 0; i < 8; i++, tmp += stride, out += 8) { in fwht16()
391 /* stage 1 */ in fwht16()
392 workspace1[0] = tmp[0] + tmp[1]; in fwht16()
393 workspace1[1] = tmp[0] - tmp[1]; in fwht16()
396 workspace1[3] = tmp[2] - tmp[3]; in fwht16()
399 workspace1[5] = tmp[4] - tmp[5]; in fwht16()
402 workspace1[7] = tmp[6] - tmp[7]; in fwht16()
406 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
407 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
408 workspace2[3] = workspace1[1] + workspace1[3]; in fwht16()
411 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
412 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
417 out[1] = workspace2[0] - workspace2[4]; in fwht16()
418 out[2] = workspace2[1] - workspace2[5]; in fwht16()
419 out[3] = workspace2[1] + workspace2[5]; in fwht16()
421 out[5] = workspace2[2] - workspace2[6]; in fwht16()
422 out[6] = workspace2[3] - workspace2[7]; in fwht16()
428 for (i = 0; i < 8; i++, out++) { in fwht16()
429 /* stage 1 */ in fwht16()
430 workspace1[0] = out[0] + out[1*8]; in fwht16()
431 workspace1[1] = out[0] - out[1*8]; in fwht16()
433 workspace1[2] = out[2*8] + out[3*8]; in fwht16()
434 workspace1[3] = out[2*8] - out[3*8]; in fwht16()
436 workspace1[4] = out[4*8] + out[5*8]; in fwht16()
437 workspace1[5] = out[4*8] - out[5*8]; in fwht16()
439 workspace1[6] = out[6*8] + out[7*8]; in fwht16()
440 workspace1[7] = out[6*8] - out[7*8]; in fwht16()
444 workspace2[1] = workspace1[0] - workspace1[2]; in fwht16()
445 workspace2[2] = workspace1[1] - workspace1[3]; in fwht16()
446 workspace2[3] = workspace1[1] + workspace1[3]; in fwht16()
449 workspace2[5] = workspace1[4] - workspace1[6]; in fwht16()
450 workspace2[6] = workspace1[5] - workspace1[7]; in fwht16()
454 out[0*8] = workspace2[0] + workspace2[4]; in fwht16()
455 out[1*8] = workspace2[0] - workspace2[4]; in fwht16()
456 out[2*8] = workspace2[1] - workspace2[5]; in fwht16()
457 out[3*8] = workspace2[1] + workspace2[5]; in fwht16()
458 out[4*8] = workspace2[2] + workspace2[6]; in fwht16()
459 out[5*8] = workspace2[2] - workspace2[6]; in fwht16()
460 out[6*8] = workspace2[3] - workspace2[7]; in fwht16()
461 out[7*8] = workspace2[3] + workspace2[7]; in fwht16()
469 * we'll need more than 8 bits for the transformed coefficients in ifwht()
472 int workspace1[8], workspace2[8]; in ifwht()
473 int inter = intra ? 0 : 1; in ifwht()
478 for (i = 0; i < 8; i++, tmp += 8, out += 8) { in ifwht()
479 /* stage 1 */ in ifwht()
480 workspace1[0] = tmp[0] + tmp[1]; in ifwht()
481 workspace1[1] = tmp[0] - tmp[1]; in ifwht()
484 workspace1[3] = tmp[2] - tmp[3]; in ifwht()
487 workspace1[5] = tmp[4] - tmp[5]; in ifwht()
490 workspace1[7] = tmp[6] - tmp[7]; in ifwht()
494 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
495 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
496 workspace2[3] = workspace1[1] + workspace1[3]; in ifwht()
499 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
500 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
505 out[1] = workspace2[0] - workspace2[4]; in ifwht()
506 out[2] = workspace2[1] - workspace2[5]; in ifwht()
507 out[3] = workspace2[1] + workspace2[5]; in ifwht()
509 out[5] = workspace2[2] - workspace2[6]; in ifwht()
510 out[6] = workspace2[3] - workspace2[7]; in ifwht()
516 for (i = 0; i < 8; i++, out++) { in ifwht()
517 /* stage 1 */ in ifwht()
518 workspace1[0] = out[0] + out[1 * 8]; in ifwht()
519 workspace1[1] = out[0] - out[1 * 8]; in ifwht()
521 workspace1[2] = out[2 * 8] + out[3 * 8]; in ifwht()
522 workspace1[3] = out[2 * 8] - out[3 * 8]; in ifwht()
524 workspace1[4] = out[4 * 8] + out[5 * 8]; in ifwht()
525 workspace1[5] = out[4 * 8] - out[5 * 8]; in ifwht()
527 workspace1[6] = out[6 * 8] + out[7 * 8]; in ifwht()
528 workspace1[7] = out[6 * 8] - out[7 * 8]; in ifwht()
532 workspace2[1] = workspace1[0] - workspace1[2]; in ifwht()
533 workspace2[2] = workspace1[1] - workspace1[3]; in ifwht()
534 workspace2[3] = workspace1[1] + workspace1[3]; in ifwht()
537 workspace2[5] = workspace1[4] - workspace1[6]; in ifwht()
538 workspace2[6] = workspace1[5] - workspace1[7]; in ifwht()
545 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
546 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
547 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
548 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
549 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
550 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
551 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
552 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
554 for (d = 0; d < 8; d++) in ifwht()
555 out[8 * d] >>= 6; in ifwht()
559 out[0 * 8] = workspace2[0] + workspace2[4]; in ifwht()
560 out[1 * 8] = workspace2[0] - workspace2[4]; in ifwht()
561 out[2 * 8] = workspace2[1] - workspace2[5]; in ifwht()
562 out[3 * 8] = workspace2[1] + workspace2[5]; in ifwht()
563 out[4 * 8] = workspace2[2] + workspace2[6]; in ifwht()
564 out[5 * 8] = workspace2[2] - workspace2[6]; in ifwht()
565 out[6 * 8] = workspace2[3] - workspace2[7]; in ifwht()
566 out[7 * 8] = workspace2[3] + workspace2[7]; in ifwht()
568 for (d = 0; d < 8; d++) { in ifwht()
569 out[8 * d] >>= 6; in ifwht()
570 out[8 * d] += 128; in ifwht()
581 for (i = 0; i < 8; i++) { in fill_encoder_block()
582 for (j = 0; j < 8; j++, input += input_step) in fill_encoder_block()
584 input += stride - 8 * input_step; in fill_encoder_block()
595 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
599 for (i = 0; i < 8 * 8; i++, tmp++) in var_intra()
600 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean); in var_intra()
609 for (i = 0; i < 8 * 8; i++, old++, new++) in var_inter()
610 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new); in var_inter()
626 fill_encoder_block(reference, old, 8, 1); in decide_blocktype()
629 for (k = 0; k < 8; k++) { in decide_blocktype()
630 for (l = 0; l < 8; l++) { in decide_blocktype()
631 *deltablock = *work - *reference; in decide_blocktype()
637 deltablock -= 64; in decide_blocktype()
647 for (i = 0; i < 8; i++) { in fill_decoder_block()
648 for (j = 0; j < 8; j++, input++, dst += dst_step) { in fill_decoder_block()
656 dst += stride - (8 * dst_step); in fill_decoder_block()
665 for (k = 0; k < 8; k++) { in add_deltas()
666 for (l = 0; l < 8; l++) { in add_deltas()
679 ref += stride - (8 * ref_step); in add_deltas()
696 width = round_up(width, 8); in encode_plane()
697 height = round_up(height, 8); in encode_plane()
699 for (j = 0; j < height / 8; j++) { in encode_plane()
700 input = input_start + j * 8 * stride; in encode_plane()
701 for (i = 0; i < width / 8; i++) { in encode_plane()
710 fwht(input, cf->coeffs, stride, input_step, 1); in encode_plane()
711 quantize_intra(cf->coeffs, cf->de_coeffs, in encode_plane()
712 cf->i_frame_qp); in encode_plane()
716 fwht16(deltablock, cf->coeffs, 8, 0); in encode_plane()
717 quantize_inter(cf->coeffs, cf->de_coeffs, in encode_plane()
718 cf->p_frame_qp); in encode_plane()
721 ifwht(cf->de_coeffs, cf->de_fwht, blocktype); in encode_plane()
724 add_deltas(cf->de_fwht, refp, 8, 1); in encode_plane()
725 fill_decoder_block(refp, cf->de_fwht, 8, 1); in encode_plane()
728 input += 8 * input_step; in encode_plane()
729 refp += 8 * 8; in encode_plane()
731 size = rlc(cf->coeffs, *rlco, blocktype); in encode_plane()
733 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) { in encode_plane()
734 __be16 *last_rlco = *rlco - size; in encode_plane()
784 __be16 *rlco = cf->rlc_data; in fwht_encode_frame()
788 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
789 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf, in fwht_encode_frame()
791 frm->luma_alpha_step, is_intra, next_is_intra); in fwht_encode_frame()
796 if (frm->components_num >= 3) { in fwht_encode_frame()
797 u32 chroma_h = height / frm->height_div; in fwht_encode_frame()
798 u32 chroma_w = width / frm->width_div; in fwht_encode_frame()
801 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
802 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, in fwht_encode_frame()
804 chroma_stride, frm->chroma_step, in fwht_encode_frame()
809 rlco_max = rlco + chroma_size / 2 - 256; in fwht_encode_frame()
810 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, in fwht_encode_frame()
812 chroma_stride, frm->chroma_step, in fwht_encode_frame()
819 if (frm->components_num == 4) { in fwht_encode_frame()
820 rlco_max = rlco + size / 2 - 256; in fwht_encode_frame()
821 encoding |= encode_plane(frm->alpha, ref_frm->alpha, &rlco, in fwht_encode_frame()
823 stride, frm->luma_alpha_step, in fwht_encode_frame()
830 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco); in fwht_encode_frame()
841 s16 copy[8 * 8]; in decode_plane()
846 width = round_up(width, 8); in decode_plane()
847 height = round_up(height, 8); in decode_plane()
852 if (end_of_rlco_buf + 1 < *rlco + width * height / 2) in decode_plane()
864 * by 65 * 2 bytes worst-case. in decode_plane()
868 for (j = 0; j < height / 8; j++) { in decode_plane()
869 for (i = 0; i < width / 8; i++) { in decode_plane()
870 const u8 *refp = ref + j * 8 * ref_stride + in decode_plane()
871 i * 8 * ref_step; in decode_plane()
872 u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; in decode_plane()
875 memcpy(cf->de_fwht, copy, sizeof(copy)); in decode_plane()
877 add_deltas(cf->de_fwht, refp, in decode_plane()
879 fill_decoder_block(dstp, cf->de_fwht, in decode_plane()
881 copies--; in decode_plane()
885 stat = derlc(rlco, cf->coeffs, end_of_rlco_buf); in decode_plane()
889 dequantize_inter(cf->coeffs); in decode_plane()
891 dequantize_intra(cf->coeffs); in decode_plane()
893 ifwht(cf->coeffs, cf->de_fwht, in decode_plane()
894 ((stat & PFRAME_BIT) && !is_intra) ? 0 : 1); in decode_plane()
896 copies = (stat & DUPS_MASK) >> 1; in decode_plane()
898 memcpy(copy, cf->de_fwht, sizeof(copy)); in decode_plane()
900 add_deltas(cf->de_fwht, refp, in decode_plane()
902 fill_decoder_block(dstp, cf->de_fwht, dst_stride, in decode_plane()
916 const __be16 *rlco = cf->rlc_data; in fwht_decode_frame()
917 const __be16 *end_of_rlco_buf = cf->rlc_data + in fwht_decode_frame()
918 (cf->size / sizeof(*rlco)) - 1; in fwht_decode_frame()
920 if (!decode_plane(cf, &rlco, height, width, ref->luma, ref_stride, in fwht_decode_frame()
921 ref->luma_alpha_step, dst->luma, dst_stride, in fwht_decode_frame()
922 dst->luma_alpha_step, in fwht_decode_frame()
936 if (!decode_plane(cf, &rlco, h, w, ref->cb, ref_chroma_stride, in fwht_decode_frame()
937 ref->chroma_step, dst->cb, dst_chroma_stride, in fwht_decode_frame()
938 dst->chroma_step, in fwht_decode_frame()
942 if (!decode_plane(cf, &rlco, h, w, ref->cr, ref_chroma_stride, in fwht_decode_frame()
943 ref->chroma_step, dst->cr, dst_chroma_stride, in fwht_decode_frame()
944 dst->chroma_step, in fwht_decode_frame()
951 if (!decode_plane(cf, &rlco, height, width, ref->alpha, ref_stride, in fwht_decode_frame()
952 ref->luma_alpha_step, dst->alpha, dst_stride, in fwht_decode_frame()
953 dst->luma_alpha_step, in fwht_decode_frame()