1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12 #include <assert.h>
13
14 #include "./vp9_rtcd.h"
15 #include "./vpx_config.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/arm/neon/vp9_iht_neon.h"
18 #include "vpx_dsp/arm/idct_neon.h"
19 #include "vpx_dsp/arm/mem_neon.h"
20 #include "vpx_dsp/arm/transpose_neon.h"
21
vp9_iht8x8_64_add_neon(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)22 void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
23 int tx_type) {
24 const int16x8_t cospis = vld1q_s16(kCospi);
25 const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
26 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
27 int16x8_t a[8];
28
29 a[0] = load_tran_low_to_s16q(input + 0 * 8);
30 a[1] = load_tran_low_to_s16q(input + 1 * 8);
31 a[2] = load_tran_low_to_s16q(input + 2 * 8);
32 a[3] = load_tran_low_to_s16q(input + 3 * 8);
33 a[4] = load_tran_low_to_s16q(input + 4 * 8);
34 a[5] = load_tran_low_to_s16q(input + 5 * 8);
35 a[6] = load_tran_low_to_s16q(input + 6 * 8);
36 a[7] = load_tran_low_to_s16q(input + 7 * 8);
37
38 transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
39
40 switch (tx_type) {
41 case DCT_DCT:
42 idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
43 transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
44 idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
45 break;
46
47 case ADST_DCT:
48 idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
49 transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
50 iadst8(a);
51 break;
52
53 case DCT_ADST:
54 iadst8(a);
55 transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
56 idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
57 break;
58
59 default:
60 assert(tx_type == ADST_ADST);
61 iadst8(a);
62 transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
63 iadst8(a);
64 break;
65 }
66
67 idct8x8_add8x8_neon(a, dest, stride);
68 }
69