• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <arm_neon.h>
12 #include <assert.h>
13 
14 #include "./vp9_rtcd.h"
15 #include "./vpx_config.h"
16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/arm/neon/vp9_iht_neon.h"
18 #include "vpx_dsp/arm/idct_neon.h"
19 #include "vpx_dsp/arm/mem_neon.h"
20 #include "vpx_dsp/arm/transpose_neon.h"
21 
vp9_iht8x8_64_add_neon(const tran_low_t * input,uint8_t * dest,int stride,int tx_type)22 void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
23                             int tx_type) {
24   const int16x8_t cospis = vld1q_s16(kCospi);
25   const int16x4_t cospis0 = vget_low_s16(cospis);   // cospi 0, 8, 16, 24
26   const int16x4_t cospis1 = vget_high_s16(cospis);  // cospi 4, 12, 20, 28
27   int16x8_t a[8];
28 
29   a[0] = load_tran_low_to_s16q(input + 0 * 8);
30   a[1] = load_tran_low_to_s16q(input + 1 * 8);
31   a[2] = load_tran_low_to_s16q(input + 2 * 8);
32   a[3] = load_tran_low_to_s16q(input + 3 * 8);
33   a[4] = load_tran_low_to_s16q(input + 4 * 8);
34   a[5] = load_tran_low_to_s16q(input + 5 * 8);
35   a[6] = load_tran_low_to_s16q(input + 6 * 8);
36   a[7] = load_tran_low_to_s16q(input + 7 * 8);
37 
38   transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
39 
40   switch (tx_type) {
41     case DCT_DCT:
42       idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
43       transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
44       idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
45       break;
46 
47     case ADST_DCT:
48       idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
49       transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
50       iadst8(a);
51       break;
52 
53     case DCT_ADST:
54       iadst8(a);
55       transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
56       idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
57       break;
58 
59     default:
60       assert(tx_type == ADST_ADST);
61       iadst8(a);
62       transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
63       iadst8(a);
64       break;
65   }
66 
67   idct8x8_add8x8_neon(a, dest, stride);
68 }
69