• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//******************************************************************************
2//*
3//* Copyright (C) 2015 The Android Open Source Project
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************
18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20///**
21//******************************************************************************
22//* @file
23//*  ih264_intra_pred_luma_8x8_av8.s
24//*
25//* @brief
26//*  Contains function definitions for intra 8x8 Luma prediction .
27//*
28//* @author
29//*  Ittiam
30//*
31//* @par List of Functions:
32//*
33//*  -ih264_intra_pred_luma_8x8_mode_vert_av8
34//*  -ih264_intra_pred_luma_8x8_mode_horz_av8
35//*  -ih264_intra_pred_luma_8x8_mode_dc_av8
36//*  -ih264_intra_pred_luma_8x8_mode_diag_dl_av8
37//*  -ih264_intra_pred_luma_8x8_mode_diag_dr_av8
38//*  -ih264_intra_pred_luma_8x8_mode_vert_r_av8
39//*  -ih264_intra_pred_luma_8x8_mode_horz_d_av8
40//*  -ih264_intra_pred_luma_8x8_mode_vert_l_av8
41//*  -ih264_intra_pred_luma_8x8_mode_horz_u_av8
42//*
43//* @remarks
44//*  None
45//*
46//*******************************************************************************
47//*/
48
49///* All the functions here are replicated from ih264_intra_pred_filters.c
50//
51
52///**
53///**
54///**
55
56.text
57.p2align 2
58.include "ih264_neon_macros.s"
59
60.extern ih264_gai1_intrapred_luma_8x8_horz_u
61
62
63
64///**
65//*******************************************************************************
66//*
67//*ih264_intra_pred_luma_8x8_mode_vert
68//*
69//* @brief
70//*   Perform Intra prediction for  luma_8x8 mode:vertical
71//*
72//* @par Description:
73//* Perform Intra prediction for  luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
74//*
75//* @param[in] pu1_src
76//*  UWORD8 pointer to the source
77//*
78//* @param[out] pu1_dst
79//*  UWORD8 pointer to the destination
80//*
81//* @param[in] src_strd
82//*  integer source stride
83//*
84//* @param[in] dst_strd
85//*  integer destination stride
86//*
87//* @param[in] ui_neighboravailability
88//* availability of neighbouring pixels(Not used in this function)
89//*
90//* @returns
91//*
92//* @remarks
93//*  None
94//*
95//*******************************************************************************
96//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
97//                                        UWORD8 *pu1_dst,
98//                                        WORD32 src_strd,
99//                                        WORD32 dst_strd,
100//                                        WORD32 ui_neighboravailability)
101
102//**************Variables Vs Registers*****************************************
103//    x0 => *pu1_src
104//    x1 => *pu1_dst
105//    x2 =>  src_strd
106//    x3 =>  dst_strd
107//   x4 =>  ui_neighboravailability
108
109
110    .global ih264_intra_pred_luma_8x8_mode_vert_av8
111
112ih264_intra_pred_luma_8x8_mode_vert_av8:
113
114    // STMFD sp!, {x4-x12, x14}          //store register values to stack
115    push_v_regs
116    //stp x19, x20,[sp,#-16]!
117
118    add       x0, x0, #9
119    ld1       {v0.8b}, [x0]
120
121    st1       {v0.8b}, [x1], x3
122    st1       {v0.8b}, [x1], x3
123    st1       {v0.8b}, [x1], x3
124    st1       {v0.8b}, [x1], x3
125    st1       {v0.8b}, [x1], x3
126    st1       {v0.8b}, [x1], x3
127    st1       {v0.8b}, [x1], x3
128    st1       {v0.8b}, [x1], x3
129
130    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
131    //ldp x19, x20,[sp],#16
132    pop_v_regs
133    ret
134
135
136
137
138
139///******************************************************************************
140
141
142///**
143//*******************************************************************************
144//*
145//*ih264_intra_pred_luma_8x8_mode_horz
146//*
147//* @brief
148//*  Perform Intra prediction for  luma_8x8 mode:horizontal
149//*
150//* @par Description:
151//*  Perform Intra prediction for  luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
152//*
153//* @param[in] pu1_src
154//*  UWORD8 pointer to the source
155//*
156//* @param[out] pu1_dst
157//*  UWORD8 pointer to the destination
158//*
159//* @param[in] src_strd
160//*  integer source stride
161//*
162//* @param[in] dst_strd
163//*  integer destination stride
164//*
165//* @param[in] ui_neighboravailability
166//* availability of neighbouring pixels(Not used in this function)
167//*
168//* @returns
169//*
170//* @remarks
171//*  None
172//*
173//*******************************************************************************
174//*/
175//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
176//                                         UWORD8 *pu1_dst,
177//                                         WORD32 src_strd,
178//                                         WORD32 dst_strd,
179//                                         WORD32 ui_neighboravailability)
180//**************Variables Vs Registers*****************************************
181//    x0 => *pu1_src
182//    x1 => *pu1_dst
183//    x2 =>  src_strd
184//    x3 =>  dst_strd
185//   x4 =>  ui_neighboravailability
186
187
188    .global ih264_intra_pred_luma_8x8_mode_horz_av8
189
190ih264_intra_pred_luma_8x8_mode_horz_av8:
191
192
193
194    // STMFD sp!, {x4-x12, x14}          //store register values to stack
195    push_v_regs
196    stp       x19, x20, [sp, #-16]!
197    add       x0, x0, #7
198    mov       x2 , #-1
199
200    ldrb      w5, [x0], #-1
201    sxtw      x5, w5
202    ldrb      w6, [x0], #-1
203    sxtw      x6, w6
204    dup       v0.8b, w5
205    st1       {v0.8b}, [x1], x3
206    ldrb      w7, [x0], #-1
207    sxtw      x7, w7
208    dup       v1.8b, w6
209    st1       {v1.8b}, [x1], x3
210    dup       v2.8b, w7
211    ldrb      w8, [x0], #-1
212    sxtw      x8, w8
213    dup       v3.8b, w8
214    st1       {v2.8b}, [x1], x3
215    ldrb      w5, [x0], #-1
216    sxtw      x5, w5
217    st1       {v3.8b}, [x1], x3
218    dup       v0.8b, w5
219    ldrb      w6, [x0], #-1
220    sxtw      x6, w6
221    st1       {v0.8b}, [x1], x3
222    ldrb      w7, [x0], #-1
223    sxtw      x7, w7
224    dup       v1.8b, w6
225    dup       v2.8b, w7
226    st1       {v1.8b}, [x1], x3
227    ldrb      w8, [x0], #-1
228    sxtw      x8, w8
229    dup       v3.8b, w8
230    st1       {v2.8b}, [x1], x3
231    st1       {v3.8b}, [x1], x3
232
233    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
234    ldp       x19, x20, [sp], #16
235    pop_v_regs
236    ret
237
238
239
240
241
242
243
244///******************************************************************************
245
246
247///**
248//*******************************************************************************
249//*
250//*ih264_intra_pred_luma_8x8_mode_dc
251//*
252//* @brief
253//*  Perform Intra prediction for  luma_8x8 mode:DC
254//*
255//* @par Description:
256//*  Perform Intra prediction for  luma_8x8 mode:DC ,described in sec 8.3.2.2.3
257//*
258//* @param[in] pu1_src
259//*  UWORD8 pointer to the source
260//*
261//* @param[out] pu1_dst
262//*  UWORD8 pointer to the destination
263//*
264//* @param[in] src_strd
265//*  integer source stride
266//*
267//* @param[in] dst_strd
268//*  integer destination stride
269//*
270//* @param[in] ui_neighboravailability
271//*  availability of neighbouring pixels
272//*
273//* @returns
274//*
275//* @remarks
276//*  None
277//*
278//*******************************************************************************/
279//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
280//                                       UWORD8 *pu1_dst,
281//                                       WORD32 src_strd,
282//                                       WORD32 dst_strd,
283//                                       WORD32 ui_neighboravailability)
284
285//**************Variables Vs Registers*****************************************
286//    x0 => *pu1_src
287//    x1 => *pu1_dst
288//    x2 =>  src_strd
289//    x3 =>  dst_strd
290//   x4 =>  ui_neighboravailability
291
292
293    .global ih264_intra_pred_luma_8x8_mode_dc_av8
294
295ih264_intra_pred_luma_8x8_mode_dc_av8:
296
297
298
299    // STMFD sp!, {x4-x12, x14}          //store register values to stack
300    push_v_regs
301    stp       x19, x20, [sp, #-16]!
302
303    ands      x6, x4, #0x01
304    beq       top_available             //LEFT NOT AVAILABLE
305
306    add       x10, x0, #7
307    mov       x2, #-1
308    ldrb      w5, [x10], -1
309    sxtw      x5, w5
310    ldrb      w6, [x10], -1
311    sxtw      x6, w6
312    ldrb      w7, [x10], -1
313    sxtw      x7, w7
314    add       x5, x5, x6
315    ldrb      w8, [x10], -1
316    sxtw      x8, w8
317    add       x5, x5, x7
318    ldrb      w6, [x10], -1
319    sxtw      x6, w6
320    add       x5, x5, x8
321    ldrb      w7, [x10], -1
322    sxtw      x7, w7
323    add       x5, x5, x6
324    ldrb      w8, [x10], -1
325    sxtw      x8, w8
326    add       x5, x5, x7
327    ands      x11, x4, #0x04            // CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
328    add       x5, x5, x8
329    ldrb      w6, [x10], -1
330    sxtw      x6, w6
331    add       x5, x5, x6
332    beq       left_available
333    add       x10, x0, #9
334    //    BOTH LEFT AND TOP AVAILABLE
335    ld1       {v0.8b}, [x10]
336    uaddlp    v1.4h, v0.8b
337    uaddlp    v3.2s, v1.4h
338    uaddlp    v2.1d, v3.2s
339    dup       v10.8h, w5
340    dup       v8.8h, v2.h[0]
341    add       v12.8h, v8.8h , v10.8h
342    sqrshrun  v31.8b, v12.8h, #4
343    st1       {v31.8b}, [x1], x3
344    st1       {v31.8b}, [x1], x3
345    st1       {v31.8b}, [x1], x3
346    st1       {v31.8b}, [x1], x3
347    st1       {v31.8b}, [x1], x3
348    st1       {v31.8b}, [x1], x3
349    st1       {v31.8b}, [x1], x3
350    st1       {v31.8b}, [x1], x3
351    b         end_func
352
353top_available: // ONLT TOP AVAILABLE
354    ands      x11, x4, #0x04            // CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
355    beq       none_available
356
357    add       x10, x0, #9
358    ld1       {v10.8b}, [x10]
359    uaddlp    v14.4h, v10.8b
360    uaddlp    v13.2s, v14.4h
361    uaddlp    v12.1d, v13.2s
362    rshrn     v4.8b, v12.8h, #3
363    dup       v31.8b, v4.b[0]
364    st1       {v31.8b}, [x1], x3
365    st1       {v31.8b}, [x1], x3
366    st1       {v31.8b}, [x1], x3
367    st1       {v31.8b}, [x1], x3
368    st1       {v31.8b}, [x1], x3
369    st1       {v31.8b}, [x1], x3
370    st1       {v31.8b}, [x1], x3
371    st1       {v31.8b}, [x1], x3
372    b         end_func
373
374
375left_available: //ONLY LEFT AVAILABLE
376    add       x5, x5, #4
377    lsr       x5, x5, #3
378    dup       v0.8b, w5
379    st1       {v0.8b}, [x1], x3
380    st1       {v0.8b}, [x1], x3
381    st1       {v0.8b}, [x1], x3
382    st1       {v0.8b}, [x1], x3
383    st1       {v0.8b}, [x1], x3
384    st1       {v0.8b}, [x1], x3
385    st1       {v0.8b}, [x1], x3
386    st1       {v0.8b}, [x1], x3
387    b         end_func
388
389none_available:                         //NONE AVAILABLE
390    mov       x9, #128
391    dup       v0.8b, w9
392    st1       {v0.8b}, [x1], x3
393    st1       {v0.8b}, [x1], x3
394    st1       {v0.8b}, [x1], x3
395    st1       {v0.8b}, [x1], x3
396    st1       {v0.8b}, [x1], x3
397    st1       {v0.8b}, [x1], x3
398    st1       {v0.8b}, [x1], x3
399    st1       {v0.8b}, [x1], x3
400
401
402end_func:
403
404    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
405    ldp       x19, x20, [sp], #16
406    pop_v_regs
407    ret
408
409
410
411
412
413
414///**
415//*******************************************************************************
416//*
417//*ih264_intra_pred_luma_8x8_mode_diag_dl
418//*
419//* @brief
420//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left
421//*
422//* @par Description:
423//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
424//*
425//* @param[in] pu1_src
426//*  UWORD8 pointer to the source
427//*
428//* @param[out] pu1_dst
429//*  UWORD8 pointer to the destination
430//*
431//* @param[in] src_strd
432//*  integer source stride
433//*
434//* @param[in] dst_strd
435//*  integer destination stride
436//*
437//* @param[in] ui_neighboravailability
438//*  availability of neighbouring pixels
439//*
440//* @returns
441//*
442//* @remarks
443//*  None
444//*
445//*******************************************************************************/
446//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
447//                                            UWORD8 *pu1_dst,
448//                                            WORD32 src_strd,
449//                                              WORD32 dst_strd,
450//                                              WORD32 ui_neighboravailability)
451
452//**************Variables Vs Registers*****************************************
453//    x0 => *pu1_src
454//    x1 => *pu1_dst
455//    x2 =>  src_strd
456//    x3 =>  dst_strd
457//   x4 =>  ui_neighboravailability
458
459    .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8
460
461ih264_intra_pred_luma_8x8_mode_diag_dl_av8:
462
463    // STMFD sp!, {x4-x12, x14}          //store register values to stack
464    push_v_regs
465    stp       x19, x20, [sp, #-16]!
466
467    add       x0, x0, #9
468    sub       x5, x3, #4
469    add       x6, x0, #15
470    ld1       { v0.16b}, [x0]
471    mov       v1.d[0], v0.d[1]
472    ext       v4.16b, v0.16b , v0.16b , #2
473    mov       v5.d[0], v4.d[1]
474    ext       v2.16b, v0.16b , v0.16b , #1
475    mov       v3.d[0], v2.d[1]
476    ld1       {v5.b}[6], [x6]
477    // q1 = q0 shifted to left once
478    // q2 = q1 shifted to left once
479    uaddl     v20.8h, v0.8b, v2.8b      //Adding for FILT121
480    uaddl     v22.8h, v1.8b, v3.8b
481    uaddl     v24.8h, v2.8b, v4.8b
482    uaddl     v26.8h, v3.8b, v5.8b
483    add       v24.8h, v20.8h , v24.8h
484    add       v26.8h, v22.8h , v26.8h
485
486    sqrshrun  v4.8b, v24.8h, #2
487    sqrshrun  v5.8b, v26.8h, #2
488    mov       v4.d[1], v5.d[0]
489    //Q2 has all FILT121 values
490    st1       {v4.8b}, [x1], x3
491    ext       v18.16b, v4.16b , v4.16b , #1
492    ext       v16.16b, v18.16b , v18.16b , #1
493    st1       {v18.8b}, [x1], x3
494    ext       v14.16b, v16.16b , v16.16b , #1
495    st1       {v16.8b}, [x1], x3
496    st1       {v14.8b}, [x1], x3
497    st1       {v4.s}[1], [x1], #4
498    st1       {v5.s}[0], [x1], x5
499    st1       {v18.s}[1], [x1], #4
500    st1       {v18.s}[2], [x1], x5
501    st1       {v16.s}[1], [x1], #4
502    st1       {v16.s}[2], [x1], x5
503    st1       {v14.s}[1], [x1], #4
504    st1       {v14.s}[2], [x1], x5
505
506
507end_func_diag_dl:
508    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
509    ldp       x19, x20, [sp], #16
510    pop_v_regs
511    ret
512
513
514
515
516///**
517//*******************************************************************************
518//*
519//*ih264_intra_pred_luma_8x8_mode_diag_dr
520//*
521//* @brief
522//* Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right
523//*
524//* @par Description:
525//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
526//*
527//* @param[in] pu1_src
528//*  UWORD8 pointer to the source
529//*
530//* @param[out] pu1_dst
531//*  UWORD8 pointer to the destination
532//*
533//* @param[in] src_strd
534//*  integer source stride
535//*
536//* @param[in] dst_strd
537//*  integer destination stride
538//*
539//* @param[in] ui_neighboravailability
540//*  availability of neighbouring pixels
541//*
542//* @returns
543//*
544//* @remarks
545//*  None
546//*
547//*******************************************************************************/
548//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
549//                                            UWORD8 *pu1_dst,
550//                                            WORD32 src_strd,
551//                                              WORD32 dst_strd,
552//                                              WORD32 ui_neighboravailability)
553
554//**************Variables Vs Registers*****************************************
555//    x0 => *pu1_src
556//    x1 => *pu1_dst
557//    x2 =>  src_strd
558//    x3 =>  dst_strd
559//   x4 =>  ui_neighboravailability
560
561
562    .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8
563
564ih264_intra_pred_luma_8x8_mode_diag_dr_av8:
565
566    // STMFD sp!, {x4-x12, x14}          //store register values to stack
567    push_v_regs
568    stp       x19, x20, [sp, #-16]!
569
570
571    ld1       { v0.16b}, [x0]
572    mov       v1.d[0], v0.d[1]
573    add       x0, x0, #1
574    ld1       { v2.16b}, [x0]
575    mov       v3.d[0], v2.d[1]
576    ext       v4.16b, v2.16b , v2.16b , #1
577    mov       v5.d[0], v4.d[1]
578    // q1 = q0 shifted to left once
579    // q2 = q1 shifted to left once
580    uaddl     v20.8h, v0.8b, v2.8b      //Adding for FILT121
581    uaddl     v22.8h, v1.8b, v3.8b
582    uaddl     v24.8h, v2.8b, v4.8b
583    uaddl     v26.8h, v3.8b, v5.8b
584    add       v24.8h, v20.8h , v24.8h
585    add       v26.8h, v22.8h , v26.8h
586    sqrshrun  v4.8b, v24.8h, #2
587    sqrshrun  v5.8b, v26.8h, #2
588    mov       v4.d[1], v5.d[0]
589    //Q2 has all FILT121 values
590    sub       x5, x3, #4
591    ext       v18.16b, v4.16b , v4.16b , #15
592    st1       {v18.d}[1], [x1], x3
593    ext       v16.16b, v18.16b , v18.16b , #15
594    st1       {v16.d}[1], [x1], x3
595    ext       v14.16b, v16.16b , v16.16b , #15
596    st1       {v14.d}[1], [x1], x3
597    st1       {v4.s}[1], [x1], #4
598    st1       {v5.s}[0], [x1], x5
599    st1       {v18.s}[1], [x1], #4
600    st1       {v18.s}[2], [x1], x5
601    st1       {v16.s}[1], [x1], #4
602    st1       {v16.s}[2], [x1], x5
603    st1       {v14.s}[1], [x1], #4
604    st1       {v14.s}[2], [x1], x5
605    st1       {v4.8b}, [x1], x3
606
607end_func_diag_dr:
608    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
609    ldp       x19, x20, [sp], #16
610    pop_v_regs
611    ret
612
613
614
615
616///**
617//*******************************************************************************
618//*
619//*ih264_intra_pred_luma_8x8_mode_vert_r
620//*
621//* @brief
622//* Perform Intra prediction for  luma_8x8 mode:Vertical_Right
623//*
624//* @par Description:
625//*   Perform Intra prediction for  luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
626//*
627//* @param[in] pu1_src
628//*  UWORD8 pointer to the source
629//*
630//* @param[out] pu1_dst
631//*  UWORD8 pointer to the destination
632//*
633//* @param[in] src_strd
634//*  integer source stride
635//*
636//* @param[in] dst_strd
637//*  integer destination stride
638//*
639//* @param[in] ui_neighboravailability
640//*  availability of neighbouring pixels
641//*
642//* @returns
643//*
644//* @remarks
645//*  None
646//*
647//*******************************************************************************/
648//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
649//                                            UWORD8 *pu1_dst,
650//                                            WORD32 src_strd,
651//                                              WORD32 dst_strd,
652//                                              WORD32 ui_neighboravailability)
653
654//**************Variables Vs Registers*****************************************
655//    x0 => *pu1_src
656//    x1 => *pu1_dst
657//    x2 =>  src_strd
658//    x3 =>  dst_strd
659//   x4 =>  ui_neighboravailability
660
661
662    .global ih264_intra_pred_luma_8x8_mode_vert_r_av8
663
664ih264_intra_pred_luma_8x8_mode_vert_r_av8:
665
666    // STMFD sp!, {x4-x12, x14}          //store register values to stack
667    push_v_regs
668    stp       x19, x20, [sp, #-16]!
669
670    ld1       { v0.16b}, [x0]
671    mov       v1.d[0], v0.d[1]
672    add       x0, x0, #1
673    ld1       { v2.16b}, [x0]
674    mov       v3.d[0], v2.d[1]
675    ext       v4.16b, v2.16b , v2.16b , #1
676    mov       v5.d[0], v4.d[1]
677    // q1 = q0 shifted to left once
678    // q2 = q1 shifted to left once
679    uaddl     v20.8h, v0.8b, v2.8b
680    uaddl     v22.8h, v1.8b, v3.8b
681    uaddl     v24.8h, v2.8b, v4.8b
682    uaddl     v26.8h, v3.8b, v5.8b
683    add       v24.8h, v20.8h , v24.8h
684    add       v26.8h, v22.8h , v26.8h
685
686    sqrshrun  v4.8b, v20.8h, #1
687    sqrshrun  v5.8b, v22.8h, #1
688    mov       v4.d[1], v5.d[0]
689    sqrshrun  v6.8b, v24.8h, #2
690    sqrshrun  v7.8b, v26.8h, #2
691    mov       v6.d[1], v7.d[0]
692    //Q2 has all FILT11 values
693    //Q3 has all FILT121 values
694    sub       x5, x3, #6
695    sub       x6, x3, #4
696    st1       {v5.8b}, [x1], x3         // row 0
697    ext       v18.16b, v6.16b , v6.16b , #15
698    mov       v22.16b , v18.16b
699    ext       v16.16b, v4.16b , v4.16b , #1
700    st1       {v18.d}[1], [x1], x3      //row 1
701    mov       v14.16b , v16.16b
702    ext       v20.16b, v4.16b , v4.16b , #15
703    uzp1      v17.16b, v16.16b, v18.16b
704    uzp2      v18.16b, v16.16b, v18.16b
705    mov       v16.16b , v17.16b
706    //row 2
707    ext       v12.16b, v16.16b , v16.16b , #1
708    st1       {v20.d}[1], [x1]
709    st1       {v6.b}[6], [x1], x3
710    //row 3
711
712    st1       {v12.h}[5], [x1], #2
713    st1       {v6.s}[2], [x1], #4
714    st1       {v6.h}[6], [x1], x5
715    //row 4
716    st1       {v18.h}[5], [x1], #2
717    st1       {v4.s}[2], [x1], #4
718    st1       {v4.h}[6], [x1], x5
719    //row 5
720    ext       v26.16b, v18.16b , v18.16b , #1
721    st1       {v16.h}[5], [x1], #2
722    st1       {v22.s}[2], [x1], #4
723    st1       {v22.h}[6], [x1], x5
724    //row 6
725    st1       {v26.h}[4], [x1], #2
726    st1       {v26.b}[10], [x1], #1
727    st1       {v4.b}[8], [x1], #1
728    st1       {v14.s}[2], [x1], x6
729    //row 7
730    st1       {v12.s}[2], [x1], #4
731    st1       {v6.s}[2], [x1], #4
732
733end_func_vert_r:
734    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
735    ldp       x19, x20, [sp], #16
736    pop_v_regs
737    ret
738
739
740
741
742///**
743//*******************************************************************************
744//*
745//*ih264_intra_pred_luma_8x8_mode_horz_d
746//*
747//* @brief
748//* Perform Intra prediction for  luma_8x8 mode:Horizontal_Down
749//*
750//* @par Description:
751//*   Perform Intra prediction for  luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
752//*
753//* @param[in] pu1_src
754//*  UWORD8 pointer to the source
755//*
756//* @param[out] pu1_dst
757//*  UWORD8 pointer to the destination
758//*
759//* @param[in] src_strd
760//*  integer source stride
761//*
762//* @param[in] dst_strd
763//*  integer destination stride
764//*
765//* @param[in] ui_neighboravailability
766//*  availability of neighbouring pixels
767//*
768//* @returns
769//*
770//* @remarks
771//*  None
772//*
773//*******************************************************************************/
774//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
775//                                            UWORD8 *pu1_dst,
776//                                            WORD32 src_strd,
777//                                              WORD32 dst_strd,
778//                                              WORD32 ui_neighboravailability)
779
780//**************Variables Vs Registers*****************************************
781//    x0 => *pu1_src
782//    x1 => *pu1_dst
783//    x2 =>  src_strd
784//    x3 =>  dst_strd
785//   x4 =>  ui_neighboravailability
786
787    .global ih264_intra_pred_luma_8x8_mode_horz_d_av8
788
789ih264_intra_pred_luma_8x8_mode_horz_d_av8:
790
791    // STMFD sp!, {x4-x12, x14}          //store register values to stack
792    push_v_regs
793    stp       x19, x20, [sp, #-16]!
794
795    ld1       { v0.16b}, [x0]
796    mov       v1.d[0], v0.d[1]
797    add       x0, x0, #1
798    ld1       { v2.16b}, [x0]
799    mov       v3.d[0], v2.d[1]
800    ext       v4.16b, v2.16b , v2.16b , #1
801    mov       v5.d[0], v4.d[1]
802    // q1 = q0 shifted to left once
803    // q2 = q1 shifted to left once
804    uaddl     v20.8h, v0.8b, v2.8b
805    uaddl     v22.8h, v1.8b, v3.8b
806    uaddl     v24.8h, v2.8b, v4.8b
807    uaddl     v26.8h, v3.8b, v5.8b
808    add       v24.8h, v20.8h , v24.8h
809    add       v26.8h, v22.8h , v26.8h
810
811    sqrshrun  v4.8b, v20.8h, #1
812    sqrshrun  v5.8b, v22.8h, #1
813    mov       v4.d[1], v5.d[0]
814    sqrshrun  v6.8b, v24.8h, #2
815    sqrshrun  v7.8b, v26.8h, #2
816    mov       v6.d[1], v7.d[0]
817    //Q2 has all FILT11 values
818    //Q3 has all FILT121 values
819    mov       v8.16b, v4.16b
820    mov       v10.16b, v6.16b
821    sub       x6, x3, #6
822    trn1      v9.16b, v8.16b, v10.16b
823    trn2      v10.16b, v8.16b, v10.16b  //
824    mov       v8.16b, v9.16b
825    mov       v12.16b, v8.16b
826    mov       v14.16b, v10.16b
827    sub       x5, x3, #4
828    trn1      v13.8h, v12.8h, v14.8h
829    trn2      v14.8h, v12.8h, v14.8h
830    mov       v12.16b, v13.16b
831    ext       v16.16b, v6.16b , v6.16b , #14
832    //ROW 0
833    st1       {v16.d}[1], [x1]
834    st1       {v10.h}[3], [x1], x3
835
836    //ROW 1
837    st1       {v14.s}[1], [x1], #4
838    st1       {v6.s}[2], [x1], x5
839    //ROW 2
840    st1       {v10.h}[2], [x1], #2
841    st1       {v14.s}[1], [x1], #4
842    st1       {v7.h}[0], [x1], x6
843    //ROW 3
844    st1       {v12.s}[1], [x1], #4
845    st1       {v14.s}[1], [x1], x5
846    //ROW 4
847    st1       {v14.h}[1], [x1], #2
848    st1       {v12.s}[1], [x1], #4
849    st1       {v14.h}[2], [x1], x6
850    //ROW 5
851    st1       {v14.s}[0], [x1], #4
852    st1       {v12.s}[1], [x1], x5
853    //ROW 6
854    st1       {v10.h}[0], [x1], #2
855    st1       {v8.h}[1], [x1], #2
856    st1       {v14.h}[1], [x1], #2
857    st1       {v12.h}[2], [x1], x6
858    //ROW 7
859    st1       {v12.s}[0], [x1], #4
860    st1       {v14.s}[0], [x1], x5
861
862end_func_horz_d:
863    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
864    ldp       x19, x20, [sp], #16
865    pop_v_regs
866    ret
867
868
869
870
871
872///**
873//*******************************************************************************
874//*
875//*ih264_intra_pred_luma_8x8_mode_vert_l
876//*
877//* @brief
878//*  Perform Intra prediction for  luma_8x8 mode:Vertical_Left
879//*
880//* @par Description:
881//*   Perform Intra prediction for  luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
882//*
883//* @param[in] pu1_src
884//*  UWORD8 pointer to the source
885//*
886//* @param[out] pu1_dst
887//*  UWORD8 pointer to the destination
888//*
889//* @param[in] src_strd
890//*  integer source stride
891//*
892//* @param[in] dst_strd
893//*  integer destination stride
894//*
895//* @param[in] ui_neighboravailability
896//*  availability of neighbouring pixels
897//*
898//* @returns
899//*
900//* @remarks
901//*  None
902//*
903//*******************************************************************************/
904//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
905//                                            UWORD8 *pu1_dst,
906//                                            WORD32 src_strd,
907//                                              WORD32 dst_strd,
908//                                              WORD32 ui_neighboravailability)
909
910//**************Variables Vs Registers*****************************************
911//    x0 => *pu1_src
912//    x1 => *pu1_dst
913//    x2 =>  src_strd
914//    x3 =>  dst_strd
915//   x4 =>  ui_neighboravailability
916
917
918    .global ih264_intra_pred_luma_8x8_mode_vert_l_av8
919
920ih264_intra_pred_luma_8x8_mode_vert_l_av8:
921
922    // STMFD sp!, {x4-x12, x14}         //Restoring registers from stack
923    push_v_regs
924    stp       x19, x20, [sp, #-16]!
925    add       x0, x0, #9
926    ld1       { v0.16b}, [x0]
927    mov       v1.d[0], v0.d[1]
928    add       x0, x0, #1
929    ld1       { v2.16b}, [x0]
930    mov       v3.d[0], v2.d[1]
931    ext       v4.16b, v2.16b , v2.16b , #1
932    mov       v5.d[0], v4.d[1]
933    uaddl     v20.8h, v0.8b, v2.8b
934    uaddl     v22.8h, v1.8b, v3.8b
935    uaddl     v24.8h, v2.8b, v4.8b
936    uaddl     v26.8h, v3.8b, v5.8b
937    add       v24.8h, v20.8h , v24.8h
938    add       v26.8h, v22.8h , v26.8h
939
940    sqrshrun  v4.8b, v20.8h, #1
941    sqrshrun  v5.8b, v22.8h, #1
942    mov       v4.d[1], v5.d[0]
943    sqrshrun  v6.8b, v24.8h, #2
944    ext       v8.16b, v4.16b , v4.16b , #1
945    sqrshrun  v7.8b, v26.8h, #2
946    mov       v6.d[1], v7.d[0]
947    //Q2 has all FILT11 values
948    //Q3 has all FILT121 values
949
950    ext       v10.16b, v6.16b , v6.16b , #1
951    //ROW 0,1
952    st1       {v4.8b}, [x1], x3
953    st1       {v6.8b}, [x1], x3
954
955    ext       v12.16b, v8.16b , v8.16b , #1
956    ext       v14.16b, v10.16b , v10.16b , #1
957    //ROW 2,3
958    st1       {v8.8b}, [x1], x3
959    st1       {v10.8b}, [x1], x3
960
961    ext       v16.16b, v12.16b , v12.16b , #1
962    ext       v18.16b, v14.16b , v14.16b , #1
963    //ROW 4,5
964    st1       {v12.8b}, [x1], x3
965    st1       {v14.8b}, [x1], x3
966    //ROW 6,7
967    st1       {v16.8b}, [x1], x3
968    st1       {v18.8b}, [x1], x3
969
970end_func_vert_l:
971    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
972    ldp       x19, x20, [sp], #16
973    pop_v_regs
974    ret
975
976
977
978
979
980///**
981//*******************************************************************************
982//*
983//*ih264_intra_pred_luma_8x8_mode_horz_u
984//*
985//* @brief
986//*     Perform Intra prediction for  luma_8x8 mode:Horizontal_Up
987//*
988//* @par Description:
989//*      Perform Intra prediction for  luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
990//*
991//* @param[in] pu1_src
992//*  UWORD8 pointer to the source
993//*
994//* @param[out] pu1_dst
995//*  UWORD8 pointer to the destination
996//*
997//* @param[in] src_strd
998//*  integer source stride
999//*
1000//* @param[in] dst_strd
1001//*  integer destination stride
1002//*
1003//* @param[in] ui_neighboravailability
1004//*  availability of neighbouring pixels
1005//*
1006//* @returns
1007//*
1008//* @remarks
1009//*  None
1010//*
1011//*******************************************************************************/
1012//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
1013//                                           UWORD8 *pu1_dst,
1014//                                           WORD32 src_strd,
1015//                                             WORD32 dst_strd,
1016//                                             WORD32 ui_neighboravailability)
1017
1018//**************Variables Vs Registers*****************************************
1019//    x0 => *pu1_src
1020//    x1 => *pu1_dst
1021//    x2 =>  src_strd
1022//    x3 =>  dst_strd
1023//   x4 =>  ui_neighboravailability
1024
1025    .global ih264_intra_pred_luma_8x8_mode_horz_u_av8
1026
1027ih264_intra_pred_luma_8x8_mode_horz_u_av8:
1028
1029    // STMFD sp!, {x4-x12, x14}          //store register values to stack
1030    push_v_regs
1031    stp       x19, x20, [sp, #-16]!
1032
1033    ld1       {v0.8b}, [x0]
1034    ld1       {v1.b}[7], [x0]
1035    mov       v0.d[1], v1.d[0]
1036    ext       v2.16b, v0.16b , v0.16b , #1
1037    mov       v3.d[0], v2.d[1]
1038    ext       v4.16b, v2.16b , v2.16b , #1
1039    mov       v5.d[0], v4.d[1]
1040
1041    adrp      x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
1042    ldr       x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
1043    uaddl     v20.8h, v0.8b, v2.8b
1044    uaddl     v22.8h, v1.8b, v3.8b
1045    uaddl     v24.8h, v2.8b, v4.8b
1046    uaddl     v26.8h, v3.8b, v5.8b
1047    add       v24.8h, v20.8h , v24.8h
1048    add       v26.8h, v22.8h , v26.8h
1049    ld1       { v10.16b}, [x12]
1050    mov       v11.d[0], v10.d[1]
1051    sqrshrun  v4.8b, v20.8h, #1
1052    sqrshrun  v5.8b, v22.8h, #1
1053    mov       v4.d[1], v5.d[0]
1054    sqrshrun  v6.8b, v24.8h, #2
1055    sqrshrun  v7.8b, v26.8h, #2
1056    mov       v6.d[1], v7.d[0]
1057    //Q2 has all FILT11 values
1058    //Q3 has all FILT121 values
1059    mov       v30.16b, v4.16b
1060    mov       v31.16b, v6.16b
1061    tbl       v12.8b, {v30.16b, v31.16b}, v10.8b
1062    dup       v14.16b, v5.b[7]          //
1063    tbl       v13.8b, {v30.16b, v31.16b}, v11.8b
1064    mov       v12.d[1], v13.d[0]
1065    ext       v16.16b, v12.16b , v14.16b , #2
1066    ext       v18.16b, v16.16b , v14.16b , #2
1067    st1       {v12.8b}, [x1], x3        //0
1068    ext       v20.16b, v18.16b , v14.16b , #2
1069    st1       {v16.8b}, [x1], x3        //1
1070    st1       {v18.8b}, [x1], x3        //2
1071    st1       {v20.8b}, [x1], x3        //3
1072    st1       {v13.8b}, [x1], x3        //4
1073    st1       {v16.d}[1], [x1], x3      //5
1074    st1       {v18.d}[1], [x1], x3      //6
1075    st1       {v20.d}[1], [x1], x3      //7
1076
1077
1078end_func_horz_u:
1079    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
1080    ldp       x19, x20, [sp], #16
1081    pop_v_regs
1082    ret
1083
1084
1085