@/***************************************************************************** @* @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore @* @* Licensed under the Apache License, Version 2.0 (the "License"); @* you may not use this file except in compliance with the License. @* You may obtain a copy of the License at: @* @* http://www.apache.org/licenses/LICENSE-2.0 @* @* Unless required by applicable law or agreed to in writing, software @* distributed under the License is distributed on an "AS IS" BASIS, @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @* See the License for the specific language governing permissions and @* limitations under the License. @* @*****************************************************************************/ @/** @/******************************************************************************* @* @file @* ihevcd_itrans_recon_dc_chroma.s @* @* @brief @* contains function definitions itrans and recon for dc only case @* @* @author @* ittiam @* @* @par list of functions: @* @* @* @remarks @* none @* @*******************************************************************************/ .text .globl ihevcd_itrans_recon_dc_chroma_a9q .type ihevcd_itrans_recon_dc_chroma_a9q, %function ihevcd_itrans_recon_dc_chroma_a9q: @void ihevcd_itrans_recon_dc_chroma(uword8 *pu1_pred, @ uword8 *pu1_dst, @ word32 pred_strd, @ word32 dst_strd, @ word32 log2_trans_size, @ word16 i2_coeff_value) @r0:pu1_pred @r1:pu1_dest @r2:pred_strd @r3:dst_strd push {r0-r11,lr} vpush {d8-d15} ldr r4,[sp,#0x74] @loads log2_trans_size ldr r5,[sp,#0x78] @ loads i2_coeff_value mov r10,#1 lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ mov r6,#64 @ 1 << (shift1 - 1)@ mov r7,#2048 @ 1<<(shift2-1) add r8,r6,r5,lsl #6 ssat r8,#16,r8,asr #7 add r5,r7,r8,lsl #6 ssat r6,#16,r5,asr #12 mov r9,r4 mov r8,r4 @ r6 has the dc_value @ r4 has the trans_size value @ r8 has the row value @ r9 has the col value vdup.s16 q0,r6 cmp r4,#4 beq row_loop_4chroma row_loop_chroma: mov r9,r4 col_loop_chroma: mov r7,r0 vld2.8 {d2,d3},[r7],r2 vld2.8 {d4,d5},[r7],r2 vld2.8 {d6,d7},[r7],r2 vld2.8 {d8,d9},[r7],r2 vld2.8 {d10,d11},[r7],r2 vld2.8 {d12,d13},[r7],r2 vld2.8 {d14,d15},[r7],r2 vld2.8 {d16,d17},[r7] add r0,r0,#16 vaddw.u8 q15,q0,d2 vaddw.u8 q14,q0,d4 vaddw.u8 q13,q0,d6 vaddw.u8 q12,q0,d8 vaddw.u8 q11,q0,d10 vaddw.u8 q10,q0,d12 vaddw.u8 q9,q0,d14 mov r11,r1 vqmovun.s16 d2,q15 vqmovun.s16 d4,q14 vqmovun.s16 d6,q13 vqmovun.s16 d8,q12 vaddw.u8 q15,q0,d16 vqmovun.s16 d10,q11 vqmovun.s16 d12,q10 vqmovun.s16 d14,q9 vqmovun.s16 d16,q15 vst2.8 {d2,d3},[r11],r3 vst2.8 {d4,d5},[r11],r3 vst2.8 {d6,d7},[r11],r3 vst2.8 {d8,d9},[r11],r3 vst2.8 {d10,d11},[r11],r3 vst2.8 {d12,d13},[r11],r3 vst2.8 {d14,d15},[r11],r3 vst2.8 {d16,d17},[r11] add r1,r1,#16 subs r9,r9,#8 bgt col_loop_chroma subs r8,r8,#8 add r0,r0,r2,lsl #3 add r1,r1,r3,lsl #3 sub r0,r0,r4,lsl #1 sub r1,r1,r4,lsl #1 bgt row_loop_chroma b end_loops_chroma row_loop_4chroma: mov r9,r10 col_loop_4chroma: vld2.8 {d2,d3},[r0],r2 vld2.8 {d4,d5},[r0],r2 vld2.8 {d6,d7},[r0],r2 vld2.8 {d8,d9},[r0] vaddw.u8 q15,q0,d2 vaddw.u8 q14,q0,d4 vaddw.u8 q13,q0,d6 vaddw.u8 q12,q0,d8 vqmovun.s16 d2,q15 vqmovun.s16 d4,q14 vqmovun.s16 d6,q13 vqmovun.s16 d8,q12 vzip.8 d2,d3 vzip.8 d4,d5 vzip.8 d6,d7 vzip.8 d8,d9 vst1.u32 {d2},[r1],r3 vst1.u32 {d4},[r1],r3 vst1.u32 {d6},[r1],r3 vst1.u32 {d8},[r1] end_loops_chroma: vpop {d8-d15} pop {r0-r11,pc}