• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@*******************************************************************************
20@* @file
21@*  ihevc_intra_pred_luma_mode_18_34_neon.s
22@*
23@* @brief
24@*  contains function definitions for intra prediction dc filtering.
25@* functions are coded using neon  intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@*  yogeswaran rs
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@*  none
37@*
38@*******************************************************************************
39@*/
40@/**
41@*******************************************************************************
42@*
43@* @brief
44@*    luma intraprediction filter for dc input
45@*
46@* @par description:
47@*
48@* @param[in] pu1_ref
49@*  uword8 pointer to the source
50@*
51@* @param[out] pu1_dst
52@*  uword8 pointer to the destination
53@*
54@* @param[in] src_strd
55@*  integer source stride
56@*
57@* @param[in] dst_strd
58@*  integer destination stride
59@*
60@* @param[in] pi1_coeff
61@*  word8 pointer to the planar coefficients
62@*
63@* @param[in] nt
64@*  size of tranform block
65@*
66@* @param[in] mode
67@*  type of filtering
68@*
69@* @returns
70@*
71@* @remarks
72@*  none
73@*
74@*******************************************************************************
75@*/
76
77@void ihevc_intra_pred_luma_mode_18_34(uword8 *pu1_ref,
78@                                      word32 src_strd,
79@                                      uword8 *pu1_dst,
80@                                      word32 dst_strd,
81@                                      word32 nt,
82@                                      word32 mode)
83@
84@**************variables vs registers*****************************************
85@r0 => *pu1_ref
86@r1 => src_strd
87@r2 => *pu1_dst
88@r3 => dst_strd
89
90@stack contents from #40
91@   nt
92@   mode
93@   pi1_coeff
94
95.equ    nt_offset,      40
96.equ    mode_offset,    44
97
98.text
99.align 4
100
101
102
103
104.globl ihevc_intra_pred_luma_mode_18_34_a9q
105
106.type ihevc_intra_pred_luma_mode_18_34_a9q, %function
107
108ihevc_intra_pred_luma_mode_18_34_a9q:
109
110    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
111
112
113    ldr         r4,[sp,#nt_offset]
114    ldr         r5,[sp,#mode_offset]
115
116    cmp         r4,#4
117    beq         mode2_4
118
119    mov         r11,r4
120    mov         r12,r4
121    sub         r14,r4,#8
122
123    add         r0,r0,r4,lsl #1
124
125    cmp         r5,#0x22
126    mov         r10,r2
127
128    add         r0,r0,#2
129    subne       r0,r0,#2
130    moveq       r6,#1
131    movne       r6,#-1
132    mov         r8,r0
133
134prologue_cpy_32:
135
136    vld1.8      {d0},[r8],r6
137    lsr         r1, r4, #3
138    vld1.8      {d1},[r8],r6
139    mul         r1, r4, r1
140    vld1.8      {d2},[r8],r6
141    vld1.8      {d3},[r8],r6
142    subs        r1,r1,#8
143    vld1.8      {d4},[r8],r6
144    vld1.8      {d5},[r8],r6
145    vld1.8      {d6},[r8],r6
146
147    vld1.8      {d7},[r8],r6
148
149
150    beq         epilogue_mode2
151    sub         r11,r11,#8
152
153    cmp         r5,#0x22
154    addne       r0,r0,#8
155    movne       r8,r0
156    bne         kernel_mode18
157    @add        r8,r0,#8
158
159kernel_mode2:
160    vst1.8      {d0},[r10],r3
161    vst1.8      {d1},[r10],r3
162    subs        r12,r12,#8
163    vst1.8      {d2},[r10],r3
164    addne       r2,r2,#8
165    vst1.8      {d3},[r10],r3
166
167    vld1.8      {d0},[r8],r6
168    vst1.8      {d4},[r10],r3
169
170    vst1.8      {d5},[r10],r3
171    vld1.8      {d1},[r8],r6
172    vst1.8      {d6},[r10],r3
173    vld1.8      {d2},[r8],r6
174    vst1.8      {d7},[r10],r3
175
176    vld1.8      {d3},[r8],r6
177    subeq       r2,r10,r14
178    vld1.8      {d4},[r8],r6
179    mov         r10,r2
180    vld1.8      {d5},[r8],r6
181    moveq       r12,r4
182    vld1.8      {d6},[r8],r6
183    subs        r11,r11,#8
184
185    vld1.8      {d7},[r8],r6
186
187    addeq       r0,r0,#8
188    moveq       r11,r4
189    moveq       r8,r0
190
191    subs        r1, r1, #8
192
193    bne         kernel_mode2
194
195    b           epilogue_mode2
196
197kernel_mode18:
198    vst1.8      {d0},[r10],r3
199    vst1.8      {d1},[r10],r3
200    subs        r12,r12,#8
201    vst1.8      {d2},[r10],r3
202    addne       r2,r2,#8
203    vst1.8      {d3},[r10],r3
204
205    vld1.8      {d0},[r8],r6
206    vst1.8      {d4},[r10],r3
207
208    vst1.8      {d5},[r10],r3
209    vld1.8      {d1},[r8],r6
210
211    vst1.8      {d6},[r10],r3
212    vld1.8      {d2},[r8],r6
213    vst1.8      {d7},[r10],r3
214
215    vld1.8      {d3},[r8],r6
216    subeq       r2,r10,r14
217    vld1.8      {d4},[r8],r6
218    mov         r10,r2
219    vld1.8      {d5},[r8],r6
220    moveq       r12,r4
221    vld1.8      {d6},[r8],r6
222    subs        r11,r11,#8
223    vld1.8      {d7},[r8],r6
224
225    addne       r0,r0,#8
226    moveq       r11,r4
227    subeq       r0,r8,r14
228    subs        r1, r1, #8
229    mov         r8,r0
230
231    bne         kernel_mode18
232
233
234epilogue_mode2:
235
236    vst1.8      {d0},[r10],r3
237    vst1.8      {d1},[r10],r3
238    vst1.8      {d2},[r10],r3
239    vst1.8      {d3},[r10],r3
240    vst1.8      {d4},[r10],r3
241    vst1.8      {d5},[r10],r3
242    vst1.8      {d6},[r10],r3
243    vst1.8      {d7},[r10],r3
244
245    b           end_func
246
247mode2_4:
248
249    add         r0,r0,#10
250    cmp         r5,#0x22
251    subne       r0,r0,#2
252
253    moveq       r8,#1
254    movne       r8,#-1
255
256    vld1.8      {d0},[r0],r8
257    vst1.32     {d0[0]},[r2],r3
258
259    vld1.8      {d0},[r0],r8
260    vst1.32     {d0[0]},[r2],r3
261
262    vld1.8      {d0},[r0],r8
263    vst1.32     {d0[0]},[r2],r3
264
265    vld1.8      {d0},[r0],r8
266    vst1.32     {d0[0]},[r2],r3
267
268end_func:
269    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
270
271
272
273
274
275
276
277