///***************************************************************************** //* //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore //* //* Licensed under the Apache License, Version 2.0 (the "License"); //* you may not use this file except in compliance with the License. //* You may obtain a copy of the License at: //* //* http://www.apache.org/licenses/LICENSE-2.0 //* //* Unless required by applicable law or agreed to in writing, software //* distributed under the License is distributed on an "AS IS" BASIS, //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //* See the License for the specific language governing permissions and //* limitations under the License. //* //*****************************************************************************/ ///******************************************************************************* //* @file //* ihevc_deblk_luma_horz.s //* //* @brief //* contains function definitions for inter prediction interpolation. //* functions are coded using neon intrinsics and can be compiled using //* rvct //* //* @author //* anand s //* //* @par list of functions: //* //* //* @remarks //* none //* //void ihevc_deblk_chroma_horz(UWORD8 *pu1_src, // WORD32 src_strd, // WORD32 quant_param_p, // WORD32 quant_param_q, // WORD32 qp_offset_u, // WORD32 qp_offset_v, // WORD32 tc_offset_div2, // WORD32 filter_flag_p, // WORD32 filter_flag_q) // .text .align 4 .include "ihevc_neon_macros.s" .extern gai4_ihevc_qp_table .extern gai4_ihevc_tc_table .globl ihevc_deblk_chroma_horz_av8 .type ihevc_deblk_chroma_horz_av8, %function ihevc_deblk_chroma_horz_av8: sxtw x4,w4 sxtw x5,w5 sxtw x6,w6 ldr w9, [sp] sxtw x9,w9 push_v_regs stp x19, x20,[sp,#-16]! mov x10, x4 mov x8, x7 mov x7, x5 mov x4, x6 sub x12,x0,x1 ld1 {v0.8b},[x0] sub x5,x12,x1 add x6,x0,x1 add x1,x2,x3 uxtl v0.8h, v0.8b ld1 {v2.8b},[x12] add x2,x1,#1 ld1 {v4.8b},[x5] ld1 {v16.8b},[x6] adds x1,x10,x2,asr #1 uxtl v2.8h, v2.8b adrp x3, :got:gai4_ihevc_qp_table ldr x3, [x3, #:got_lo12:gai4_ihevc_qp_table] bmi l1.3312 cmp x1,#0x39 bgt lbl78 ldr w1, [x3,x1,lsl #2] lbl78: sub x20,x1,#6 csel x1, x20, x1,gt l1.3312: adds x2,x7,x2,asr #1 uxtl v4.8h, v4.8b bmi l1.3332 cmp x2,#0x39 bgt lbl85 ldr w2, [x3,x2,lsl #2] lbl85: sub x20,x2,#6 csel x2, x20, x2,gt l1.3332: add x1,x1,x4,lsl #1 sub v6.8h, v0.8h , v2.8h add x3,x1,#2 cmp x3,#0x35 mov x20,#0x35 csel x1, x20, x1,gt shl v6.8h, v6.8h,#2 uxtl v16.8h, v16.8b bgt l1.3368 adds x3,x1,#2 add x20,x1,#2 csel x1, x20, x1,pl mov x20,#0 csel x1, x20, x1,mi l1.3368: adrp x3, :got:gai4_ihevc_tc_table ldr x3, [x3, #:got_lo12:gai4_ihevc_tc_table] add v4.8h, v6.8h , v4.8h add x2,x2,x4,lsl #1 sub v6.8h, v4.8h , v16.8h add x4,x2,#2 ldr w1, [x3,x1,lsl #2] cmp x4,#0x35 mov x20,#0x35 csel x2, x20, x2,gt bgt l1.3412 adds x4,x2,#2 add x20,x2,#2 csel x2, x20, x2,pl mov x20,#0 csel x2, x20, x2,mi l1.3412: ldr w2, [x3,x2,lsl #2] cmp x8,#0 dup v31.8h,w2 dup v30.8h,w1 sub x20,x1,#0 neg x1, x20 srshr v6.8h, v6.8h,#3 dup v28.8h,w1 sub x20,x2,#0 neg x1, x20 zip1 v4.8h, v30.8h, v31.8h dup v29.8h,w1 zip1 v18.8h, v28.8h, v29.8h smin v16.8h, v6.8h , v4.8h smax v4.8h, v18.8h , v16.8h add v2.8h, v2.8h , v4.8h sub v0.8h, v0.8h , v4.8h sqxtun v2.8b, v2.8h sqxtun v0.8b, v0.8h beq l1.3528 st1 {v2.8b},[x12] l1.3528: cmp x9,#0 beq l1.3540 st1 {v0.8b},[x0] l1.3540: ldp x19, x20,[sp],#16 pop_v_regs ret