1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@/******************************************************************************* 20@* @file 21@* ihevcd_itrans_recon_dc_luma.s 22@* 23@* @brief 24@* contains function definitions itrans and recon for dc only case 25@* 26@* @author 27@* ittiam 28@* 29@* @par list of functions: 30@* 31@* 32@* @remarks 33@* none 34@* 35@*******************************************************************************/ 36 37.text 38 39 40 41.globl ihevcd_itrans_recon_dc_luma_a9q 42 43.type ihevcd_itrans_recon_dc_luma_a9q, %function 44 45ihevcd_itrans_recon_dc_luma_a9q: 46 47@void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred, 48@ uword8 *pu1_dst, 49@ word32 pred_strd, 50@ word32 dst_strd, 51@ word32 log2_trans_size, 52@ word16 i2_coeff_value) 53 54@r0:pu1_pred 55@r1:pu1_dest 56@r2:pred_strd 57@r3:dst_strd 58 59 60 61 push {r0-r11,lr} 62 vpush {d8-d15} 63 ldr r4,[sp,#0x74] @loads log2_trans_size 64 ldr r5,[sp,#0x78] @ loads i2_coeff_value 65 mov r10,#1 66 lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ 67 mov r6,#64 @ 1 << (shift1 - 1)@ 68 mov r7,#2048 @ 1<<(shift2-1) 69 70 add r8,r6,r5,lsl #6 71 ssat r8,#16,r8,asr #7 72 add r5,r7,r8,lsl #6 73 ssat r6,#16,r5,asr #12 74 mov r9,r4 75 mov r8,r4 76 77 @ r6 has the dc_value 78 @ r4 has the trans_size value 79 @ r8 has the row value 80 @ r9 has the col value 81 vdup.s16 q0,r6 82 cmp r4,#4 83 beq row_loop_4 84 85 86row_loop: 87 mov r9,r4 88 89 90col_loop: 91 92 mov r7,r0 93 vld1.8 d2,[r7],r2 94 vld1.8 d3,[r7],r2 95 vld1.8 d4,[r7],r2 96 vld1.8 d5,[r7],r2 97 98 vld1.8 d6,[r7],r2 99 vld1.8 d7,[r7],r2 100 vld1.8 d8,[r7],r2 101 vld1.8 d9,[r7] 102 103 add r0,r0,#8 104 105 106 vaddw.u8 q15,q0,d2 107 vaddw.u8 q14,q0,d3 108 vaddw.u8 q13,q0,d4 109 vaddw.u8 q12,q0,d5 110 vaddw.u8 q11,q0,d6 111 vaddw.u8 q10,q0,d7 112 vaddw.u8 q9,q0,d8 113 vaddw.u8 q8,q0,d9 114 115 mov r11,r1 116 vqmovun.s16 d2,q15 117 vqmovun.s16 d3,q14 118 vqmovun.s16 d4,q13 119 vqmovun.s16 d5,q12 120 vqmovun.s16 d6,q11 121 vqmovun.s16 d7,q10 122 vqmovun.s16 d8,q9 123 vqmovun.s16 d9,q8 124 125 126 vst1.u32 {d2},[r11],r3 127 vst1.u32 {d3},[r11],r3 128 vst1.u32 {d4},[r11],r3 129 vst1.u32 {d5},[r11],r3 130 vst1.u32 {d6},[r11],r3 131 vst1.u32 {d7},[r11],r3 132 vst1.u32 {d8},[r11],r3 133 vst1.u32 {d9},[r11] 134 135 add r1,r1,#8 136 137 subs r9,r9,#8 138 bgt col_loop 139 140 subs r8,r8,#8 141 142 add r0,r0,r2,lsl #3 143 add r1,r1,r3,lsl #3 144 sub r0,r0,r4 145 sub r1,r1,r4 146 bgt row_loop 147 b end_loops 148 149 150row_loop_4: 151 mov r9,r10 152 153 154col_loop_4: 155 156 157 vld1.8 d2,[r0],r2 158 vld1.8 d3,[r0],r2 159 vld1.8 d4,[r0],r2 160 vld1.8 d5,[r0] 161 162 163 164 165 vaddw.u8 q15,q0,d2 166 vaddw.u8 q14,q0,d3 167 vaddw.u8 q13,q0,d4 168 vaddw.u8 q12,q0,d5 169 170 171 172 vqmovun.s16 d2,q15 173 vqmovun.s16 d3,q14 174 vqmovun.s16 d4,q13 175 vqmovun.s16 d5,q12 176 177 178 179 vst1.u32 {d2[0]},[r1],r3 180 vst1.u32 {d3[0]},[r1],r3 181 vst1.u32 {d4[0]},[r1],r3 182 vst1.u32 {d5[0]},[r1] 183 184end_loops: 185 vpop {d8-d15} 186 pop {r0-r11,pc} 187 188 189 190 191 192 193 194 195