• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//******************************************************************************
2//*
3//* Copyright (C) 2015 The Android Open Source Project
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************
18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20///**
21//******************************************************************************
22//* @file
23//*  ih264_intra_pred_luma_4x4_av8.s
24//*
25//* @brief
26//*  Contains function definitions for intra 4x4 Luma prediction .
27//*
28//* @author
29//*  Ittiam
30//*
31//* @par List of Functions:
32//*
33//*  -ih264_intra_pred_luma_4x4_mode_vert_av8
34//*  -ih264_intra_pred_luma_4x4_mode_horz_av8
35//*  -ih264_intra_pred_luma_4x4_mode_dc_av8
36//*  -ih264_intra_pred_luma_4x4_mode_diag_dl_av8
37//*  -ih264_intra_pred_luma_4x4_mode_diag_dr_av8
38//*  -ih264_intra_pred_luma_4x4_mode_vert_r_av8
39//*  -ih264_intra_pred_luma_4x4_mode_horz_d_av8
40//*  -ih264_intra_pred_luma_4x4_mode_vert_l_av8
41//*  -ih264_intra_pred_luma_4x4_mode_horz_u_av8
42//*
43//* @remarks
44//*  None
45//*
46//*******************************************************************************
47//*/
48
49///* All the functions here are replicated from ih264_intra_pred_filters.c
50//
51
52///**
53///**
54///**
55//
56
57.text
58.p2align 2
59.include "ih264_neon_macros.s"
60
61
62
63
64///**
65//*******************************************************************************
66//*
67//*ih264_intra_pred_luma_4x4_mode_vert
68//*
69//* @brief
70//*  Perform Intra prediction for  luma_4x4 mode:vertical
71//*
72//* @par Description:
73//* Perform Intra prediction for  luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
74//*
75//* @param[in] pu1_src
76//*  UWORD8 pointer to the source
77//*
78//* @param[out] pu1_dst
79//*  UWORD8 pointer to the destination
80//*
81//* @param[in] src_strd
82//*  integer source stride
83//*
84//* @param[in] dst_strd
85//*  integer destination stride
86//*
87//* @param[in] ui_neighboravailability
88//* availability of neighbouring pixels(Not used in this function)
89//*
90//* @returns
91//*
92//* @remarks
93//*  None
94//*
95//*******************************************************************************
96//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
97//                                        UWORD8 *pu1_dst,
98//                                        WORD32 src_strd,
99//                                        WORD32 dst_strd,
100//                                        WORD32 ui_neighboravailability)
101
102//**************Variables Vs Registers*****************************************
103//    x0 => *pu1_src
104//    x1 => *pu1_dst
105//    w2 =>  src_strd
106//    w3 =>  dst_strd
107//    w4 =>  ui_neighboravailability
108
109    .global ih264_intra_pred_luma_4x4_mode_vert_av8
110
111ih264_intra_pred_luma_4x4_mode_vert_av8:
112
113    push_v_regs
114    sxtw      x3, w3
115
116    add       x0, x0, #5
117
118    ld1       {v0.s}[0], [x0]
119    st1       {v0.s}[0], [x1], x3
120    st1       {v0.s}[0], [x1], x3
121    st1       {v0.s}[0], [x1], x3
122    st1       {v0.s}[0], [x1], x3
123
124    pop_v_regs
125    ret
126
127
128
129
130
131///******************************************************************************
132
133
134///**
135//*******************************************************************************
136//*
137//*ih264_intra_pred_luma_4x4_mode_horz
138//*
139//* @brief
140//*  Perform Intra prediction for  luma_4x4 mode:horizontal
141//*
142//* @par Description:
143//*  Perform Intra prediction for  luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
144//*
145//* @param[in] pu1_src
146//*  UWORD8 pointer to the source
147//*
148//* @param[out] pu1_dst
149//*  UWORD8 pointer to the destination
150//*
151//* @param[in] src_strd
152//*  integer source stride
153//*
154//* @param[in] dst_strd
155//*  integer destination stride
156//*
157//* @param[in] ui_neighboravailability
158//* availability of neighbouring pixels(Not used in this function)
159//*
160//* @returns
161//*
162//* @remarks
163//*  None
164//*
165//*******************************************************************************
166//*/
167//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
168//                                         UWORD8 *pu1_dst,
169//                                         WORD32 src_strd,
170//                                         WORD32 dst_strd,
171//                                         WORD32 ui_neighboravailability)
172//**************Variables Vs Registers*****************************************
173//    x0 => *pu1_src
174//    x1 => *pu1_dst
175//    w2 =>  src_strd
176//    w3 =>  dst_strd
177//    w4 =>  ui_neighboravailability
178
179
180
181    .global ih264_intra_pred_luma_4x4_mode_horz_av8
182
183ih264_intra_pred_luma_4x4_mode_horz_av8:
184
185    push_v_regs
186    sxtw      x3, w3
187
188    ld1       {v1.s}[0], [x0]
189    dup       v0.8b, v1.b[3]
190    dup       v2.8b, v1.b[2]
191    st1       {v0.s}[0], [x1], x3
192    dup       v3.8b, v1.b[1]
193    st1       {v2.s}[0], [x1], x3
194    dup       v4.8b, v1.b[0]
195    st1       {v3.s}[0], [x1], x3
196    st1       {v4.s}[0], [x1], x3
197
198    pop_v_regs
199    ret
200
201
202
203
204
205
206
207///******************************************************************************
208
209
210///**
211//*******************************************************************************
212//*
213//*ih264_intra_pred_luma_4x4_mode_dc
214//*
215//* @brief
216//*  Perform Intra prediction for  luma_4x4 mode:DC
217//*
218//* @par Description:
219//*  Perform Intra prediction for  luma_4x4 mode:DC ,described in sec 8.3.1.2.3
220//*
221//* @param[in] pu1_src
222//*  UWORD8 pointer to the source
223//*
224//* @param[out] pu1_dst
225//*  UWORD8 pointer to the destination
226//*
227//* @param[in] src_strd
228//*  integer source stride
229//*
230//* @param[in] dst_strd
231//*  integer destination stride
232//*
233//* @param[in] ui_neighboravailability
234//*  availability of neighbouring pixels
235//*
236//* @returns
237//*
238//* @remarks
239//*  None
240//*
241//*******************************************************************************/
242//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
243//                                       UWORD8 *pu1_dst,
244//                                       WORD32 src_strd,
245//                                       WORD32 dst_strd,
246//                                       WORD32 ui_neighboravailability)
247
248//**************Variables Vs Registers*****************************************
249//    x0 => *pu1_src
250//    x1 => *pu1_dst
251//    w2 =>  src_strd
252//    w3 =>  dst_strd
253//    w4 =>  ui_neighboravailability
254
255
256
257    .global ih264_intra_pred_luma_4x4_mode_dc_av8
258
259ih264_intra_pred_luma_4x4_mode_dc_av8:
260
261
262
263
264    push_v_regs
265    stp       x19, x20, [sp, #-16]!
266    sxtw      x3, w3
267
268    ands      w5, w4, #0x01
269    beq       top_available             //LEFT NOT AVAILABLE
270
271    add       x10, x0, #3
272    mov       x2, #-1
273    ldrb      w5, [x10], #-1
274    ldrb      w6, [x10], #-1
275    ldrb      w7, [x10], #-1
276    add       w5, w5, w6
277    ldrb      w8, [x10], #-1
278    add       w5, w5, w7
279    ands      w11, w4, #0x04            // CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
280    add       w5, w5, w8
281    beq       left_available
282    add       x10, x0, #5
283    //    BOTH LEFT AND TOP AVAILABLE
284    ldrb      w6, [x10], #1
285    ldrb      w7, [x10], #1
286    add       w5, w5, w6
287    ldrb      w8, [x10], #1
288    add       w5, w5, w7
289    ldrb      w9, [x10], #1
290    add       w5, w5, w8
291    add       w5, w5, w9
292    add       w5, w5, #4
293    lsr       w5, w5, #3
294    dup       v0.8b, w5
295    st1       {v0.s}[0], [x1], x3
296    st1       {v0.s}[0], [x1], x3
297    st1       {v0.s}[0], [x1], x3
298    st1       {v0.s}[0], [x1], x3
299    b         end_func
300
301top_available: // ONLT TOP AVAILABLE
302    ands      w11, w4, #0x04            // CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
303    beq       none_available
304
305    add       x10, x0, #5
306    ldrb      w6, [x10], #1
307    ldrb      w7, [x10], #1
308    ldrb      w8, [x10], #1
309    add       w5, w6, w7
310    ldrb      w9, [x10], #1
311    add       w5, w5, w8
312    add       w5, w5, w9
313    add       w5, w5, #2
314    lsr       w5, w5, #2
315    dup       v0.8b, w5
316    st1       {v0.s}[0], [x1], x3
317    st1       {v0.s}[0], [x1], x3
318    st1       {v0.s}[0], [x1], x3
319    st1       {v0.s}[0], [x1], x3
320    b         end_func
321
322left_available: //ONLY LEFT AVAILABLE
323    add       x5, x5, #2
324    lsr       x5, x5, #2
325    dup       v0.8b, w5
326    st1       {v0.s}[0], [x1], x3
327    st1       {v0.s}[0], [x1], x3
328    st1       {v0.s}[0], [x1], x3
329    st1       {v0.s}[0], [x1], x3
330    b         end_func
331
332none_available:                         //NONE AVAILABLE
333    mov       x5, #128
334    dup       v0.8b, w5
335    st1       {v0.s}[0], [x1], x3
336    st1       {v0.s}[0], [x1], x3
337    st1       {v0.s}[0], [x1], x3
338    st1       {v0.s}[0], [x1], x3
339    b         end_func
340
341
342end_func:
343
344    ldp       x19, x20, [sp], #16
345    pop_v_regs
346    ret
347
348
349
350
351
352
353
354///**
355//*******************************************************************************
356//*
357//*ih264_intra_pred_luma_4x4_mode_diag_dl
358//*
359//* @brief
360//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left
361//*
362//* @par Description:
363//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
364//*
365//* @param[in] pu1_src
366//*  UWORD8 pointer to the source
367//*
368//* @param[out] pu1_dst
369//*  UWORD8 pointer to the destination
370//*
371//* @param[in] src_strd
372//*  integer source stride
373//*
374//* @param[in] dst_strd
375//*  integer destination stride
376//*
377//* @param[in] ui_neighboravailability
378//*  availability of neighbouring pixels
379//*
380//* @returns
381//*
382//* @remarks
383//*  None
384//*
385//*******************************************************************************/
386//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
387//                                            UWORD8 *pu1_dst,
388//                                            WORD32 src_strd,
389//                                              WORD32 dst_strd,
390//                                              WORD32 ui_neighboravailability)
391
392//**************Variables Vs Registers*****************************************
393//    x0 => *pu1_src
394//    x1 => *pu1_dst
395//    w2 =>  src_strd
396//    w3 =>  dst_strd
397//    w4 =>  ui_neighboravailability
398
399
400    .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
401
402ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
403
404
405    push_v_regs
406    stp       x19, x20, [sp, #-16]!
407    sxtw      x3, w3
408
409    add       x0, x0, #5
410    sub       x5, x3, #2
411    add       x6, x0, #7
412    ld1       {v0.8b}, [x0]
413    ext       v1.8b, v0.8b , v0.8b , #1
414    ext       v2.8b, v0.8b , v0.8b , #2
415    ld1       {v2.b}[6], [x6]
416    uaddl     v20.8h, v0.8b, v1.8b
417    uaddl     v22.8h, v1.8b, v2.8b
418    add       v24.8h, v20.8h , v22.8h
419    sqrshrun  v3.8b, v24.8h, #2
420    st1       {v3.s}[0], [x1], x3
421    ext       v4.8b, v3.8b , v3.8b , #1
422    st1       {v4.s}[0], [x1], x3
423    st1       {v3.h}[1], [x1], #2
424    st1       {v3.h}[2], [x1], x5
425    st1       {v4.h}[1], [x1], #2
426    st1       {v4.h}[2], [x1]
427
428end_func_diag_dl:
429
430    ldp       x19, x20, [sp], #16
431    pop_v_regs
432    ret
433
434
435
436
437
438
439
440
441
442///**
443//*******************************************************************************
444//*
445//*ih264_intra_pred_luma_4x4_mode_diag_dr
446//*
447//* @brief
448//* Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right
449//*
450//* @par Description:
451//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
452//*
453//* @param[in] pu1_src
454//*  UWORD8 pointer to the source
455//*
456//* @param[out] pu1_dst
457//*  UWORD8 pointer to the destination
458//*
459//* @param[in] src_strd
460//*  integer source stride
461//*
462//* @param[in] dst_strd
463//*  integer destination stride
464//*
465//* @param[in] ui_neighboravailability
466//*  availability of neighbouring pixels
467//*
468//* @returns
469//*
470//* @remarks
471//*  None
472//*
473//*******************************************************************************/
474//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
475//                                            UWORD8 *pu1_dst,
476//                                            WORD32 src_strd,
477//                                              WORD32 dst_strd,
478//                                              WORD32 ui_neighboravailability)
479
480//**************Variables Vs Registers*****************************************
481//    x0 => *pu1_src
482//    x1 => *pu1_dst
483//    w2 =>  src_strd
484//    w3 =>  dst_strd
485//    w4 =>  ui_neighboravailability
486
487
488    .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
489
490ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
491
492    push_v_regs
493    stp       x19, x20, [sp, #-16]!
494    sxtw      x3, w3
495
496
497    ld1       {v0.8b}, [x0]
498    add       x0, x0, #1
499    ld1       {v1.8b}, [x0]
500    ext       v2.8b, v1.8b , v1.8b , #1
501    uaddl     v20.8h, v0.8b, v1.8b
502    uaddl     v22.8h, v1.8b, v2.8b
503    add       v24.8h, v20.8h , v22.8h
504    sqrshrun  v3.8b, v24.8h, #2
505
506    ext       v4.8b, v3.8b , v3.8b , #1
507    sub       x5, x3, #2
508    st1       {v4.h}[1], [x1], #2
509    st1       {v4.h}[2], [x1], x5
510    st1       {v3.h}[1], [x1], #2
511    st1       {v3.h}[2], [x1], x5
512    st1       {v4.s}[0], [x1], x3
513    st1       {v3.s}[0], [x1], x3
514
515end_func_diag_dr:
516    ldp       x19, x20, [sp], #16
517    pop_v_regs
518    ret
519
520
521
522
523
524
525
526///**
527//*******************************************************************************
528//*
529//*ih264_intra_pred_luma_4x4_mode_vert_r
530//*
531//* @brief
532//* Perform Intra prediction for  luma_4x4 mode:Vertical_Right
533//*
534//* @par Description:
535//*   Perform Intra prediction for  luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
536//*
537//* @param[in] pu1_src
538//*  UWORD8 pointer to the source
539//*
540//* @param[out] pu1_dst
541//*  UWORD8 pointer to the destination
542//*
543//* @param[in] src_strd
544//*  integer source stride
545//*
546//* @param[in] dst_strd
547//*  integer destination stride
548//*
549//* @param[in] ui_neighboravailability
550//*  availability of neighbouring pixels
551//*
552//* @returns
553//*
554//* @remarks
555//*  None
556//*
557//*******************************************************************************/
558//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
559//                                            UWORD8 *pu1_dst,
560//                                            WORD32 src_strd,
561//                                              WORD32 dst_strd,
562//                                              WORD32 ui_neighboravailability)
563
564//**************Variables Vs Registers*****************************************
565//    x0 => *pu1_src
566//    x1 => *pu1_dst
567//    w2 =>  src_strd
568//    w3 =>  dst_strd
569//    w4 =>  ui_neighboravailability
570
571
572    .global ih264_intra_pred_luma_4x4_mode_vert_r_av8
573
574ih264_intra_pred_luma_4x4_mode_vert_r_av8:
575
576    push_v_regs
577    stp       x19, x20, [sp, #-16]!
578    sxtw      x3, w3
579
580
581    ld1       {v0.8b}, [x0]
582    add       x0, x0, #1
583    ld1       {v1.8b}, [x0]
584    ext       v2.8b, v1.8b , v1.8b , #1
585    uaddl     v20.8h, v0.8b, v1.8b
586    uaddl     v22.8h, v1.8b, v2.8b
587    add       v24.8h, v20.8h , v22.8h
588    sqrshrun  v4.8b, v20.8h, #1
589    sqrshrun  v3.8b, v24.8h, #2
590    sub       x5, x3, #2
591    ext       v5.8b, v3.8b , v3.8b , #3
592    st1       {v4.s}[1], [x1], x3
593    st1       {v5.s}[0], [x1], x3
594    sub       x8, x3, #3
595    st1       {v3.b}[2], [x1], #1
596    st1       {v4.h}[2], [x1], #2
597    st1       {v4.b}[6], [x1], x8
598    st1       {v3.b}[1], [x1], #1
599    st1       {v5.h}[0], [x1], #2
600    st1       {v5.b}[2], [x1]
601
602
603end_func_vert_r:
604    ldp       x19, x20, [sp], #16
605    pop_v_regs
606    ret
607
608
609
610
611
612///**
613//*******************************************************************************
614//*
615//*ih264_intra_pred_luma_4x4_mode_horz_d
616//*
617//* @brief
618//* Perform Intra prediction for  luma_4x4 mode:Horizontal_Down
619//*
620//* @par Description:
621//*   Perform Intra prediction for  luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
622//*
623//* @param[in] pu1_src
624//*  UWORD8 pointer to the source
625//*
626//* @param[out] pu1_dst
627//*  UWORD8 pointer to the destination
628//*
629//* @param[in] src_strd
630//*  integer source stride
631//*
632//* @param[in] dst_strd
633//*  integer destination stride
634//*
635//* @param[in] ui_neighboravailability
636//*  availability of neighbouring pixels
637//*
638//* @returns
639//*
640//* @remarks
641//*  None
642//*
643//*******************************************************************************/
644//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
645//                                            UWORD8 *pu1_dst,
646//                                            WORD32 src_strd,
647//                                              WORD32 dst_strd,
648//                                              WORD32 ui_neighboravailability)
649
650//**************Variables Vs Registers*****************************************
651//    x0 => *pu1_src
652//    x1 => *pu1_dst
653//    w2 =>  src_strd
654//    w3 =>  dst_strd
655//    w4 =>  ui_neighboravailability
656
657
658    .global ih264_intra_pred_luma_4x4_mode_horz_d_av8
659
660ih264_intra_pred_luma_4x4_mode_horz_d_av8:
661
662    push_v_regs
663    stp       x19, x20, [sp, #-16]!
664    sxtw      x3, w3
665
666    ld1       {v0.8b}, [x0]
667    add       x0, x0, #1
668    ld1       {v1.8b}, [x0]
669    ext       v2.8b, v1.8b , v0.8b , #1
670    uaddl     v20.8h, v0.8b, v1.8b
671    uaddl     v22.8h, v1.8b, v2.8b
672    add       v24.8h, v20.8h , v22.8h
673    sqrshrun  v4.8b, v20.8h, #1
674    sqrshrun  v5.8b, v24.8h, #2
675    sub       x5, x3, #2
676    mov       v6.8b, v5.8b
677    trn1      v10.8b, v4.8b, v5.8b
678    trn2      v5.8b, v4.8b, v5.8b       //
679    mov       v4.8b, v10.8b
680    st1       {v5.h}[1], [x1], #2
681    st1       {v6.h}[2], [x1], x5
682    st1       {v4.h}[1], [x1], #2
683    st1       {v5.h}[1], [x1], x5
684    st1       {v5.h}[0], [x1], #2
685    st1       {v4.h}[1], [x1], x5
686    st1       {v4.h}[0], [x1], #2
687    st1       {v5.h}[0], [x1], x5
688
689end_func_horz_d:
690    ldp       x19, x20, [sp], #16
691    pop_v_regs
692    ret
693
694
695
696
697
698
699
700///**
701//*******************************************************************************
702//*
703//*ih264_intra_pred_luma_4x4_mode_vert_l
704//*
705//* @brief
706//*  Perform Intra prediction for  luma_4x4 mode:Vertical_Left
707//*
708//* @par Description:
709//*   Perform Intra prediction for  luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
710//*
711//* @param[in] pu1_src
712//*  UWORD8 pointer to the source
713//*
714//* @param[out] pu1_dst
715//*  UWORD8 pointer to the destination
716//*
717//* @param[in] src_strd
718//*  integer source stride
719//*
720//* @param[in] dst_strd
721//*  integer destination stride
722//*
723//* @param[in] ui_neighboravailability
724//*  availability of neighbouring pixels
725//*
726//* @returns
727//*
728//* @remarks
729//*  None
730//*
731//*******************************************************************************/
732//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
733//                                            UWORD8 *pu1_dst,
734//                                            WORD32 src_strd,
735//                                              WORD32 dst_strd,
736//                                              WORD32 ui_neighboravailability)
737
738//**************Variables Vs Registers*****************************************
739//    x0 => *pu1_src
740//    x1 => *pu1_dst
741//    w2 =>  src_strd
742//    w3 =>  dst_strd
743//    w4 =>  ui_neighboravailability
744
745
746    .global ih264_intra_pred_luma_4x4_mode_vert_l_av8
747
748ih264_intra_pred_luma_4x4_mode_vert_l_av8:
749
750    push_v_regs
751    stp       x19, x20, [sp, #-16]!
752    sxtw      x3, w3
753    add       x0, x0, #4
754    ld1       {v0.8b}, [x0]
755    add       x0, x0, #1
756    ld1       {v1.8b}, [x0]
757    ext       v2.8b, v1.8b , v0.8b , #1
758    uaddl     v20.8h, v0.8b, v1.8b
759    uaddl     v22.8h, v1.8b, v2.8b
760    add       v24.8h, v20.8h , v22.8h
761    sqrshrun  v4.8b, v20.8h, #1
762    sqrshrun  v5.8b, v24.8h, #2
763    ext       v6.8b, v4.8b , v4.8b , #1
764    ext       v7.8b, v5.8b , v5.8b , #1
765    st1       {v6.s}[0], [x1], x3
766    ext       v8.8b, v4.8b , v4.8b , #2
767    ext       v9.8b, v5.8b , v5.8b , #2
768    st1       {v7.s}[0], [x1], x3
769    st1       {v8.s}[0], [x1], x3
770    st1       {v9.s}[0], [x1], x3
771
772end_func_vert_l:
773    ldp       x19, x20, [sp], #16
774    pop_v_regs
775    ret
776
777
778
779
780
781
782
783///**
784//*******************************************************************************
785//*
786//*ih264_intra_pred_luma_4x4_mode_horz_u
787//*
788//* @brief
789//*     Perform Intra prediction for  luma_4x4 mode:Horizontal_Up
790//*
791//* @par Description:
792//*      Perform Intra prediction for  luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
793//*
794//* @param[in] pu1_src
795//*  UWORD8 pointer to the source
796//*
797//* @param[out] pu1_dst
798//*  UWORD8 pointer to the destination
799//*
800//* @param[in] src_strd
801//*  integer source stride
802//*
803//* @param[in] dst_strd
804//*  integer destination stride
805//*
806//* @param[in] ui_neighboravailability
807//*  availability of neighbouring pixels
808//*
809//* @returns
810//*
811//* @remarks
812//*  None
813//*
814//*******************************************************************************/
815//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
816//                                           UWORD8 *pu1_dst,
817//                                           WORD32 src_strd,
818//                                             WORD32 dst_strd,
819//                                             WORD32 ui_neighboravailability)
820
821//**************Variables Vs Registers*****************************************
822//    x0 => *pu1_src
823//    x1 => *pu1_dst
824//    w2 =>  src_strd
825//    w3 =>  dst_strd
826//    w4 =>  ui_neighboravailability
827
828
829    .global ih264_intra_pred_luma_4x4_mode_horz_u_av8
830
831ih264_intra_pred_luma_4x4_mode_horz_u_av8:
832
833    push_v_regs
834    sxtw      x3, w3
835    stp       x19, x20, [sp, #-16]!
836    mov       x10, x0
837    ld1       {v0.8b}, [x0]
838    ldrb      w9, [x0], #1
839    ext       v1.8b, v0.8b , v0.8b , #1
840    ld1       {v0.b}[7], [x10]
841    ext       v2.8b, v1.8b , v1.8b , #1
842    uaddl     v20.8h, v0.8b, v1.8b
843    uaddl     v22.8h, v1.8b, v2.8b
844    add       v24.8h, v20.8h , v22.8h
845    sqrshrun  v4.8b, v20.8h, #1
846    sqrshrun  v5.8b, v24.8h, #2
847    mov       v6.8b, v4.8b
848    ext       v6.8b, v5.8b , v4.8b , #1
849    st1       {v4.b}[2], [x1], #1
850    st1       {v6.b}[0], [x1], #1
851    trn1      v10.8b, v6.8b, v5.8b
852    trn2      v5.8b, v6.8b, v5.8b       //
853    mov       v6.8b , v10.8b
854    sub       x5, x3, #2
855    trn1      v10.8b, v4.8b, v6.8b
856    trn2      v6.8b, v4.8b, v6.8b       //
857    mov       v4.8b , v10.8b
858    dup       v7.8b, w9
859    st1       {v6.h}[0], [x1], x5
860    st1       {v6.h}[0], [x1], #2
861    st1       {v5.h}[3], [x1], x5
862    st1       {v5.h}[3], [x1], #2
863    st1       {v7.h}[3], [x1], x5
864    st1       {v7.s}[0], [x1], x3
865
866end_func_horz_u:
867    ldp       x19, x20, [sp], #16
868    pop_v_regs
869    ret
870
871
872
873