1///***************************************************************************** 2//* 3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//*****************************************************************************/ 18///******************************************************************************* 19//* @file 20//* ihevc_deblk_luma_horz.s 21//* 22//* @brief 23//* contains function definitions for inter prediction interpolation. 24//* functions are coded using neon intrinsics and can be compiled using 25 26//* rvct 27//* 28//* @author 29//* anand s 30//* 31//* @par list of functions: 32//* 33//* 34//* @remarks 35//* none 36//* 37//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src, 38// WORD32 src_strd, 39// WORD32 quant_param_p, 40// WORD32 quant_param_q, 41// WORD32 qp_offset_u, 42// WORD32 qp_offset_v, 43// WORD32 tc_offset_div2, 44// WORD32 filter_flag_p, 45// WORD32 filter_flag_q) 46// 47 48.text 49.align 4 50.include "ihevc_neon_macros.s" 51 52 53 54.extern gai4_ihevc_qp_table 55.extern gai4_ihevc_tc_table 56.globl ihevc_deblk_chroma_horz_av8 57 58.type ihevc_deblk_chroma_horz_av8, %function 59 60ihevc_deblk_chroma_horz_av8: 61 sxtw x4,w4 62 sxtw x5,w5 63 sxtw x6,w6 64 ldr w9, [sp] 65 sxtw x9,w9 66 push_v_regs 67 stp x19, x20,[sp,#-16]! 68 mov x10, x4 69 mov x8, x7 70 mov x7, x5 71 mov x4, x6 72 73 sub x12,x0,x1 74 ld1 {v0.8b},[x0] 75 sub x5,x12,x1 76 add x6,x0,x1 77 add x1,x2,x3 78 uxtl v0.8h, v0.8b 79 ld1 {v2.8b},[x12] 80 add x2,x1,#1 81 ld1 {v4.8b},[x5] 82 ld1 {v16.8b},[x6] 83 adds x1,x10,x2,asr #1 84 uxtl v2.8h, v2.8b 85 adrp x3, :got:gai4_ihevc_qp_table 86 ldr x3, [x3, #:got_lo12:gai4_ihevc_qp_table] 87 bmi l1.3312 88 cmp x1,#0x39 89 bgt lbl78 90 ldr w1, [x3,x1,lsl #2] 91lbl78: 92 sub x20,x1,#6 93 csel x1, x20, x1,gt 94l1.3312: 95 adds x2,x7,x2,asr #1 96 uxtl v4.8h, v4.8b 97 bmi l1.3332 98 cmp x2,#0x39 99 bgt lbl85 100 ldr w2, [x3,x2,lsl #2] 101lbl85: 102 sub x20,x2,#6 103 csel x2, x20, x2,gt 104l1.3332: 105 add x1,x1,x4,lsl #1 106 sub v6.8h, v0.8h , v2.8h 107 add x3,x1,#2 108 cmp x3,#0x35 109 mov x20,#0x35 110 csel x1, x20, x1,gt 111 shl v6.8h, v6.8h,#2 112 uxtl v16.8h, v16.8b 113 bgt l1.3368 114 adds x3,x1,#2 115 add x20,x1,#2 116 csel x1, x20, x1,pl 117 mov x20,#0 118 csel x1, x20, x1,mi 119l1.3368: 120 adrp x3, :got:gai4_ihevc_tc_table 121 ldr x3, [x3, #:got_lo12:gai4_ihevc_tc_table] 122 add v4.8h, v6.8h , v4.8h 123 add x2,x2,x4,lsl #1 124 sub v6.8h, v4.8h , v16.8h 125 add x4,x2,#2 126 ldr w1, [x3,x1,lsl #2] 127 cmp x4,#0x35 128 mov x20,#0x35 129 csel x2, x20, x2,gt 130 bgt l1.3412 131 adds x4,x2,#2 132 add x20,x2,#2 133 csel x2, x20, x2,pl 134 mov x20,#0 135 csel x2, x20, x2,mi 136l1.3412: 137 138 139 ldr w2, [x3,x2,lsl #2] 140 cmp x8,#0 141 dup v31.8h,w2 142 dup v30.8h,w1 143 sub x20,x1,#0 144 neg x1, x20 145 srshr v6.8h, v6.8h,#3 146 dup v28.8h,w1 147 sub x20,x2,#0 148 neg x1, x20 149 zip1 v4.8h, v30.8h, v31.8h 150 dup v29.8h,w1 151 152 zip1 v18.8h, v28.8h, v29.8h 153 154 smin v16.8h, v6.8h , v4.8h 155 smax v4.8h, v18.8h , v16.8h 156 add v2.8h, v2.8h , v4.8h 157 sub v0.8h, v0.8h , v4.8h 158 sqxtun v2.8b, v2.8h 159 sqxtun v0.8b, v0.8h 160 beq l1.3528 161 st1 {v2.8b},[x12] 162l1.3528: 163 cmp x9,#0 164 beq l1.3540 165 st1 {v0.8b},[x0] 166l1.3540: 167 ldp x19, x20,[sp],#16 168 pop_v_regs 169 ret 170 171 172 173 174