• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_dequant_idct_add_neon|
13    ARM
14    REQUIRE8
15    PRESERVE8
16
17    AREA ||.text||, CODE, READONLY, ALIGN=2
18;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
19;                           unsigned char *dest, int pitch, int stride)
20; r0    short *input,
21; r1    short *dq,
22; r2    unsigned char *pred
23; r3    unsigned char *dest
24; sp    int pitch
25; sp+4  int stride
26
27|vp8_dequant_idct_add_neon| PROC
28    vld1.16         {q3, q4}, [r0]
29    vld1.16         {q5, q6}, [r1]
30    ldr             r1, [sp]                ; pitch
31    vld1.32         {d14[0]}, [r2], r1
32    vld1.32         {d14[1]}, [r2], r1
33    vld1.32         {d15[0]}, [r2], r1
34    vld1.32         {d15[1]}, [r2]
35
36    ldr             r1, [sp, #4]            ; stride
37
38    adr             r12, _CONSTANTS_
39
40    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
41    vmul.i16        q2, q4, q6
42
43;|short_idct4x4llm_neon| PROC
44    vld1.16         {d0}, [r12]
45    vswp            d3, d4                  ;q2(vp[4] vp[12])
46
47    vqdmulh.s16     q3, q2, d0[2]
48    vqdmulh.s16     q4, q2, d0[0]
49
50    vqadd.s16       d12, d2, d3             ;a1
51    vqsub.s16       d13, d2, d3             ;b1
52
53    vshr.s16        q3, q3, #1
54    vshr.s16        q4, q4, #1
55
56    vqadd.s16       q3, q3, q2
57    vqadd.s16       q4, q4, q2
58
59    vqsub.s16       d10, d6, d9             ;c1
60    vqadd.s16       d11, d7, d8             ;d1
61
62    vqadd.s16       d2, d12, d11
63    vqadd.s16       d3, d13, d10
64    vqsub.s16       d4, d13, d10
65    vqsub.s16       d5, d12, d11
66
67    vtrn.32         d2, d4
68    vtrn.32         d3, d5
69    vtrn.16         d2, d3
70    vtrn.16         d4, d5
71
72; memset(input, 0, 32) -- 32bytes
73    vmov.i16        q14, #0
74
75    vswp            d3, d4
76    vqdmulh.s16     q3, q2, d0[2]
77    vqdmulh.s16     q4, q2, d0[0]
78
79    vqadd.s16       d12, d2, d3             ;a1
80    vqsub.s16       d13, d2, d3             ;b1
81
82    vmov            q15, q14
83
84    vshr.s16        q3, q3, #1
85    vshr.s16        q4, q4, #1
86
87    vqadd.s16       q3, q3, q2
88    vqadd.s16       q4, q4, q2
89
90    vqsub.s16       d10, d6, d9             ;c1
91    vqadd.s16       d11, d7, d8             ;d1
92
93    vqadd.s16       d2, d12, d11
94    vqadd.s16       d3, d13, d10
95    vqsub.s16       d4, d13, d10
96    vqsub.s16       d5, d12, d11
97
98    vst1.16         {q14, q15}, [r0]
99
100    vrshr.s16       d2, d2, #3
101    vrshr.s16       d3, d3, #3
102    vrshr.s16       d4, d4, #3
103    vrshr.s16       d5, d5, #3
104
105    vtrn.32         d2, d4
106    vtrn.32         d3, d5
107    vtrn.16         d2, d3
108    vtrn.16         d4, d5
109
110    vaddw.u8        q1, q1, d14
111    vaddw.u8        q2, q2, d15
112
113    vqmovun.s16     d0, q1
114    vqmovun.s16     d1, q2
115
116    vst1.32         {d0[0]}, [r3], r1
117    vst1.32         {d0[1]}, [r3], r1
118    vst1.32         {d1[0]}, [r3], r1
119    vst1.32         {d1[1]}, [r3]
120
121    bx             lr
122
123    ENDP           ; |vp8_dequant_idct_add_neon|
124
125; Constant Pool
126_CONSTANTS_
127cospi8sqrt2minus1 DCD 0x4e7b4e7b
128sinpi8sqrt2       DCD 0x8a8c8a8c
129
130    END
131