• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20@*
21@ *******************************************************************************
22@ * @file
23@ *  ih264_padding_neon.s
24@ *
25@ * @brief
26@ *  Contains function definitions padding
27@ *
28@ * @author
29@ *  Ittiam
30@ *
31@ * @par List of Functions:
32@ *  - ih264_pad_top_a9q()
33@ *  - ih264_pad_left_luma_a9q()
34@ *  - ih264_pad_left_chroma_a9q()
35@ *  - ih264_pad_right_luma_a9q()
36@ *  - ih264_pad_right_chroma_a9q()
37@ *
38@ * @remarks
39@ *  None
40@ *
41@ *******************************************************************************
42@*
43
44
45@**
46@*******************************************************************************
47@*
48@* @brief pad at the top of a 2d array
49@*
50@* @par Description:
51@*  The top row of a 2d array is replicated for pad_size times at the top
52@*
53@* @param[in] pu1_src
54@*  UWORD8 pointer to the source
55@*
56@* @param[in] src_strd
57@*  integer source stride
58@*
59@* @param[in] wd
60@*  integer width of the array
61@*
62@* @param[in] pad_size
63@*  integer -padding size of the array
64@*
65@* @returns none
66@*
67@* @remarks none
68@*
69@*******************************************************************************
70@*
71@void ih264_pad_top(UWORD8 *pu1_src,
72@                   WORD32 src_strd,
73@                   WORD32 wd,
74@                   WORD32 pad_size)
75@**************Variables Vs Registers*************************
76@   r0 => *pu1_src
77@   r1 => src_strd
78@   r2 => wd
79@   r3 => pad_size
80
81.text
82.p2align 2
83
84    .global ih264_pad_top_a9q
85
86ih264_pad_top_a9q:
87
88    stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
89
90    sub           r5, r0, r1
91    neg           r6, r1
92
93loop_neon_memcpy_mul_16:
94    @ Load 16 bytes
95    vld1.8        {d0, d1}, [r0]!
96    mov           r4, r5
97    mov           r7, r3
98    add           r5, r5, #16
99
100loop_neon_pad_top:
101    vst1.8        {d0, d1}, [r4], r6
102    subs          r7, r7, #1
103    bne           loop_neon_pad_top
104
105    subs          r2, r2, #16
106    bne           loop_neon_memcpy_mul_16
107
108    ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
109
110
111
112
113@**
114@*******************************************************************************
115@*
116@* @brief
117@*   Padding (luma block) at the left of a 2d array
118@*
119@* @par Description:
120@*   The left column of a 2d array is replicated for pad_size times at the left
121@*
122@*
123@* @param[in] pu1_src
124@*  UWORD8 pointer to the source
125@*
126@* @param[in] src_strd
127@*  integer source stride
128@*
129@* @param[in] ht
130@*  integer height of the array
131@*
132@* @param[in] wd
133@*  integer width of the array
134@*
135@* @param[in] pad_size
136@*  integer -padding size of the array
137@*
138@* @param[in] ht
139@*  integer height of the array
140@*
141@* @param[in] wd
142@*  integer width of the array
143@*
144@* @returns
145@*
146@* @remarks
147@*  None
148@*
149@*******************************************************************************
150@*
151@#if PAD_LEFT_LUMA == C
152@void ih264_pad_left_luma(UWORD8 *pu1_src,
153@                        WORD32 src_strd,
154@                        WORD32 ht,
155@                        WORD32 pad_size)
156@**************Variables Vs Registers*************************
157@   r0 => *pu1_src
158@   r1 => src_strd
159@   r2 => ht
160@   r3 => pad_size
161
162
163
164    .global ih264_pad_left_luma_a9q
165
166ih264_pad_left_luma_a9q:
167
168    stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
169
170
171    sub           r4, r0, r3
172    sub           r6, r1, #16
173    subs          r5, r3, #16
174    bne           loop_32
175loop_16:                                @  /*hard coded for width=16  ,height =8,16*/
176    ldrb          r8, [r0], r1
177    ldrb          r9, [r0], r1
178    vdup.u8       q0, r8
179    ldrb          r10, [r0], r1
180    vst1.8        {q0}, [r4], r1        @ 16 bytes store
181    vdup.u8       q1, r9
182    vst1.8        {q1}, [r4], r1        @ 16 bytes store
183    ldrb          r11, [r0], r1
184    vdup.u8       q2, r10
185    vdup.u8       q3, r11
186    vst1.8        {q2}, [r4], r1        @ 16 bytes store
187    ldrb          r8, [r0], r1
188    vst1.8        {q3}, [r4], r1        @ 16 bytes store
189    ldrb          r9, [r0], r1
190    vdup.u8       q0, r8
191    ldrb          r10, [r0], r1
192    vst1.8        {q0}, [r4], r1        @ 16 bytes store
193    vdup.u8       q1, r9
194    ldrb          r11, [r0], r1
195    vst1.8        {q1}, [r4], r1        @ 16 bytes store
196    vdup.u8       q2, r10
197    vdup.u8       q3, r11
198    subs          r2, r2, #8
199    vst1.8        {q2}, [r4], r1        @ 16 bytes store
200    vst1.8        {q3}, [r4], r1        @ 16 bytes store
201    bne           loop_16
202    b             end_func
203
204loop_32:                                @  /*hard coded for width=32 ,height =8,16*/
205    ldrb          r8, [r0], r1
206    ldrb          r9, [r0], r1
207    vdup.u8       q0, r8
208    ldrb          r10, [r0], r1
209    vst1.8        {q0}, [r4]!           @ 16 bytes store
210    vdup.u8       q1, r9
211    vst1.8        {q0}, [r4], r6
212    vst1.8        {q1}, [r4]!           @ 16 bytes store
213    vdup.u8       q2, r10
214    vst1.8        {q1}, [r4], r6        @ 16 bytes store
215    ldrb          r11, [r0], r1
216    vst1.8        {q2}, [r4]!           @ 16 bytes store
217    vdup.u8       q3, r11
218    vst1.8        {q2}, [r4], r6        @ 16 bytes store
219    ldrb          r8, [r0], r1
220    vst1.8        {q3}, [r4]!           @ 16 bytes store
221    vdup.u8       q0, r8
222    ldrb          r9, [r0], r1
223    vst1.8        {q3}, [r4], r6        @ 16 bytes store
224    ldrb          r10, [r0], r1
225    vst1.8        {q0}, [r4]!           @ 16 bytes store
226    vdup.u8       q1, r9
227    vst1.8        {q0}, [r4], r6        @ 16 bytes store
228    ldrb          r11, [r0], r1
229    vst1.8        {q1}, [r4]!           @ 16 bytes store
230    vdup.u8       q2, r10
231    vst1.8        {q1}, [r4], r6        @ 16 bytes store
232    vst1.8        {q2}, [r4]!           @ 16 bytes store
233    vdup.u8       q3, r11
234    vst1.8        {q2}, [r4], r6        @ 16 bytes store
235    subs          r2, r2, #8
236    vst1.8        {q3}, [r4]!           @ 16 bytes store
237    vst1.8        {q3}, [r4], r6        @ 16 bytes store
238    bne           loop_32
239
240
241
242end_func:
243    ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
244
245
246
247
248
249@**
250@*******************************************************************************
251@*
252@* @brief
253@*   Padding (chroma block) at the left of a 2d array
254@*
255@* @par Description:
256@*   The left column of a 2d array is replicated for pad_size times at the left
257@*
258@*
259@* @param[in] pu1_src
260@*  UWORD8 pointer to the source
261@*
262@* @param[in] src_strd
263@*  integer source stride
264@*
265@* @param[in] ht
266@*  integer height of the array
267@*
268@* @param[in] wd
269@*  integer width of the array (each colour component)
270@*
271@* @param[in] pad_size
272@*  integer -padding size of the array
273@*
274@* @param[in] ht
275@*  integer height of the array
276@*
277@* @param[in] wd
278@*  integer width of the array
279@*
280@* @returns
281@*
282@* @remarks
283@*  None
284@*
285@*******************************************************************************
286@*
287@#if PAD_LEFT_CHROMA == C
288@void ih264_pad_left_chroma(UWORD8 *pu1_src,
289@                            WORD32 src_strd,
290@                            WORD32 ht,
291@                            WORD32 pad_size)
292@{
293@   r0 => *pu1_src
294@   r1 => src_strd
295@   r2 => ht
296@   r3 => pad_size
297
298
299
300    .global ih264_pad_left_chroma_a9q
301
302ih264_pad_left_chroma_a9q:
303
304    stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
305
306    sub           r4, r0, r3
307    sub           r6, r1, #16
308
309
310loop_32_l_c:                            @  /*hard coded for width=32  ,height =4,8,12*/
311    ldrh          r8, [r0], r1
312    ldrh          r9, [r0], r1
313    vdup.u16      q0, r8
314    ldrh          r10, [r0], r1
315    vst1.8        {q0}, [r4]!           @ 16 bytes store
316    vdup.u16      q1, r9
317    vst1.8        {q0}, [r4], r6        @ 16 bytes store
318    ldrh          r11, [r0], r1
319    vst1.8        {q1}, [r4]!           @ 16 bytes store
320    vdup.u16      q2, r10
321    vst1.8        {q1}, [r4], r6        @ 16 bytes store
322    vdup.u16      q3, r11
323    vst1.8        {q2}, [r4]!           @ 16 bytes store
324    vst1.8        {q2}, [r4], r6        @ 16 bytes store
325    subs          r2, r2, #4
326    vst1.8        {q3}, [r4]!           @ 16 bytes store
327    vst1.8        {q3}, [r4], r6        @ 16 bytes store
328
329
330    beq           end_func_l_c          @/* Branching when ht=4*/
331
332    ldrh          r8, [r0], r1
333    ldrh          r9, [r0], r1
334    vdup.u16      q0, r8
335    ldrh          r10, [r0], r1
336    vst1.8        {q0}, [r4]!           @ 16 bytes store
337    vdup.u16      q1, r9
338    vst1.8        {q0}, [r4], r6
339    ldrh          r11, [r0], r1
340    vst1.8        {q1}, [r4]!           @ 16 bytes store
341    vdup.u16      q2, r10
342    vst1.8        {q1}, [r4], r6        @ 16 bytes store
343    vdup.u16      q3, r11
344    vst1.8        {q2}, [r4]!           @ 16 bytes store
345    vst1.8        {q2}, [r4], r6        @ 16 bytes store
346    subs          r2, r2, #4
347    vst1.8        {q3}, [r4]!           @ 16 bytes store
348    vst1.8        {q3}, [r4], r6        @ 16 bytes store
349
350    beq           end_func_l_c          @/* Branching when ht=8*/
351    bne           loop_32_l_c
352
353    ldrh          r8, [r0], r1
354    ldrh          r9, [r0], r1
355    vdup.u16      q0, r8
356    ldrh          r10, [r0], r1
357    vst1.8        {q0}, [r4]!           @ 16 bytes store
358    vdup.u16      q1, r9
359    vst1.8        {q0}, [r4], r6
360    ldrh          r11, [r0], r1
361    vst1.8        {q1}, [r4]!           @ 16 bytes store
362    vdup.u16      q2, r10
363    vst1.8        {q1}, [r4], r6        @ 16 bytes store
364    vdup.u16      q3, r11
365    vst1.8        {q2}, [r4]!           @ 16 bytes store
366    vst1.8        {q2}, [r4], r6        @ 16 bytes store
367    vst1.8        {q3}, [r4]!           @ 16 bytes store
368    vst1.8        {q3}, [r4], r6        @ 16 bytes store
369
370end_func_l_c:
371    ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
372
373
374
375
376
377@**
378@*******************************************************************************
379@*
380@* @brief
381@* Padding (luma block) at the right of a 2d array
382@*
383@* @par Description:
384@* The right column of a 2d array is replicated for pad_size times at the right
385@*
386@*
387@* @param[in] pu1_src
388@*  UWORD8 pointer to the source
389@*
390@* @param[in] src_strd
391@*  integer source stride
392@*
393@* @param[in] ht
394@*  integer height of the array
395@*
396@* @param[in] wd
397@*  integer width of the array
398@*
399@* @param[in] pad_size
400@*  integer -padding size of the array
401@*
402@* @param[in] ht
403@*  integer height of the array
404@*
405@* @param[in] wd
406@*  integer width of the array
407@*
408@* @returns
409@*
410@* @remarks
411@*  None
412@*
413@*******************************************************************************
414@*
415@#if PAD_RIGHT_LUMA == C
416@void ih264_pad_right_luma(UWORD8 *pu1_src,
417@                        WORD32 src_strd,
418@                        WORD32 ht,
419@                        WORD32 pad_size)
420@{
421@    WORD32 row;
422@
423@    for(row = 0; row < ht; row++)
424@    {
425@        memset(pu1_src, *(pu1_src -1), pad_size);
426@
427@        pu1_src += src_strd;
428@    }
429@}
430@
431@   r0 => *pu1_src
432@   r1 => src_strd
433@   r2 => ht
434@   r3 => pad_size
435
436
437
438    .global ih264_pad_right_luma_a9q
439
440ih264_pad_right_luma_a9q:
441
442    stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
443
444    mov           r4, r0
445    sub           r6, r1, #16
446    sub           r0, r0, #1
447    subs          r5, r3, #16
448    bne           loop_32
449loop_16_r: @  /*hard coded for width=16  ,height =8,16*/
450    ldrb          r8, [r0], r1
451    ldrb          r9, [r0], r1
452    vdup.u8       q0, r8
453    ldrb          r10, [r0], r1
454    vst1.8        {q0}, [r4], r1        @ 16 bytes store
455    vdup.u8       q1, r9
456    vst1.8        {q1}, [r4], r1        @ 16 bytes store
457    ldrb          r11, [r0], r1
458    vdup.u8       q2, r10
459    vdup.u8       q3, r11
460    vst1.8        {q2}, [r4], r1        @ 16 bytes store
461    ldrb          r8, [r0], r1
462    vst1.8        {q3}, [r4], r1        @ 16 bytes store
463    ldrb          r9, [r0], r1
464    vdup.u8       q0, r8
465    ldrb          r10, [r0], r1
466    vst1.8        {q0}, [r4], r1        @ 16 bytes store
467    vdup.u8       q1, r9
468    ldrb          r11, [r0], r1
469    vst1.8        {q1}, [r4], r1        @ 16 bytes store
470    vdup.u8       q2, r10
471    vdup.u8       q3, r11
472    subs          r2, r2, #8
473    vst1.8        {q2}, [r4], r1        @ 16 bytes store
474    vst1.8        {q3}, [r4], r1        @ 16 bytes store
475    bne           loop_16_r
476    b             end_func_r
477
478loop_32_r:                              @  /*hard coded for width=32  ,height =8,16*/
479    ldrb          r8, [r0], r1
480    ldrb          r9, [r0], r1
481    vdup.u8       q0, r8
482    ldrb          r10, [r0], r1
483    vst1.8        {q0}, [r4]!           @ 16 bytes store
484    vdup.u8       q1, r9
485    vst1.8        {q0}, [r4], r6
486    vst1.8        {q1}, [r4]!           @ 16 bytes store
487    vdup.u8       q2, r10
488    vst1.8        {q1}, [r4], r6        @ 16 bytes store
489    ldrb          r11, [r0], r1
490    vst1.8        {q2}, [r4]!           @ 16 bytes store
491    vdup.u8       q3, r11
492    vst1.8        {q2}, [r4], r6        @ 16 bytes store
493    ldrb          r8, [r0], r1
494    vst1.8        {q3}, [r4]!           @ 16 bytes store
495    ldrb          r9, [r0], r1
496    vdup.u8       q0, r8
497    vst1.8        {q3}, [r4], r6        @ 16 bytes store
498    ldrb          r10, [r0], r1
499    vst1.8        {q0}, [r4]!           @ 16 bytes store
500    vdup.u8       q1, r9
501    vst1.8        {q0}, [r4], r6        @ 16 bytes store
502    ldrb          r11, [r0], r1
503    vst1.8        {q1}, [r4]!           @ 16 bytes store
504    vdup.u8       q2, r10
505    vst1.8        {q1}, [r4], r6        @ 16 bytes store
506    vst1.8        {q2}, [r4]!           @ 16 bytes store
507    vdup.u8       q3, r11
508    vst1.8        {q2}, [r4], r6        @ 16 bytes store
509    subs          r2, r2, #8
510    vst1.8        {q3}, [r4]!           @ 16 bytes store
511    vst1.8        {q3}, [r4], r6        @ 16 bytes store
512    bne           loop_32_r
513
514
515
516end_func_r:
517    ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
518
519
520
521
522
523@**
524@*******************************************************************************
525@*
526@* @brief
527@;* Padding (chroma block) at the right of a 2d array
528@*
529@* @par Description:
530@* The right column of a 2d array is replicated for pad_size times at the right
531@*
532@*
533@* @param[in] pu1_src
534@;*  UWORD8 pointer to the source
535@*
536@* @param[in] src_strd
537@*  integer source stride
538@*
539@* @param[in] ht
540@;*  integer height of the array
541@*
542@* @param[in] wd
543@*  integer width of the array (each colour component)
544@*
545@* @param[in] pad_size
546@*  integer -padding size of the array
547@*
548@* @param[in] ht
549@;*  integer height of the array
550@*
551@* @param[in] wd
552@*  integer width of the array
553@*
554@* @returns
555@*
556@* @remarks
557@*  None
558@*
559@*******************************************************************************
560@*
561@#if PAD_RIGHT_CHROMA == C
562@void ih264_pad_right_chroma(UWORD8 *pu1_src,
563@                        WORD32 src_strd,
564@                        WORD32 ht,
565@                        WORD32 pad_size)
566@   r0 => *pu1_src
567@   r1 => src_strd
568@   r2 => ht
569@   r3 => pad_size
570
571
572
573    .global ih264_pad_right_chroma_a9q
574
575ih264_pad_right_chroma_a9q:
576
577    stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
578
579    mov           r4, r0
580    sub           r6, r1, #16
581    sub           r0, r0, #2
582loop_32_r_c: @  /*hard coded for width=32 ,height =8,4*/
583    ldrh          r8, [r0], r1
584    ldrh          r9, [r0], r1
585    vdup.u16      q0, r8
586    ldrh          r10, [r0], r1
587    vst1.8        {q0}, [r4]!           @ 16 bytes store
588    vdup.u16      q1, r9
589    vst1.8        {q0}, [r4], r6
590    vst1.8        {q1}, [r4]!           @ 16 bytes store
591    vdup.u16      q2, r10
592    vst1.8        {q1}, [r4], r6        @ 16 bytes store
593    subs          r2, r2, #4
594    ldrh          r11, [r0], r1
595    vst1.8        {q2}, [r4]!           @ 16 bytes store
596    vdup.u16      q3, r11
597    vst1.8        {q2}, [r4], r6        @ 16 bytes store
598    vst1.8        {q3}, [r4]!           @ 16 bytes store
599    vst1.8        {q3}, [r4], r6        @ 16 bytes store
600
601    beq           end_func_r_c          @/* Branching when ht=4*/
602
603    ldrh          r8, [r0], r1
604    vdup.u16      q0, r8
605    ldrh          r9, [r0], r1
606    ldrh          r10, [r0], r1
607    vst1.8        {q0}, [r4]!           @ 16 bytes store
608    vdup.u16      q1, r9
609    vst1.8        {q0}, [r4], r6        @ 16 bytes store
610    ldrh          r11, [r0], r1
611    vst1.8        {q1}, [r4]!           @ 16 bytes store
612    vdup.u16      q2, r10
613    vst1.8        {q1}, [r4], r6        @ 16 bytes store
614    vst1.8        {q2}, [r4]!           @ 16 bytes store
615    vdup.u16      q3, r11
616    vst1.8        {q2}, [r4], r6        @ 16 bytes store
617    subs          r2, r2, #4
618    vst1.8        {q3}, [r4]!           @ 16 bytes store
619    vst1.8        {q3}, [r4], r6        @ 16 bytes store
620
621    beq           end_func_r_c          @/* Branching when ht=8*/
622    bne           loop_32_r_c
623
624    ldrh          r8, [r0], r1
625    vdup.u16      q0, r8
626    ldrh          r9, [r0], r1
627    ldrh          r10, [r0], r1
628    vst1.8        {q0}, [r4]!           @ 16 bytes store
629    vdup.u16      q1, r9
630    vst1.8        {q0}, [r4], r6        @ 16 bytes store
631    ldrh          r11, [r0], r1
632    vst1.8        {q1}, [r4]!           @ 16 bytes store
633    vdup.u16      q2, r10
634    vst1.8        {q1}, [r4], r6        @ 16 bytes store
635    vst1.8        {q2}, [r4]!           @ 16 bytes store
636    vdup.u16      q3, r11
637    vst1.8        {q2}, [r4], r6        @ 16 bytes store
638    vst1.8        {q3}, [r4]!           @ 16 bytes store
639    vst1.8        {q3}, [r4], r6        @ 16 bytes store
640
641end_func_r_c:
642    ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
643
644
645
646
647
648