1/* 2 C-like prototype : 3 void j_rev_dct_arm(DCTBLOCK data) 4 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' 6 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26*/ 27 28#include "libavutil/arm/asm.S" 29 30#define FIX_0_298631336 2446 31#define FIX_0_541196100 4433 32#define FIX_0_765366865 6270 33#define FIX_1_175875602 9633 34#define FIX_1_501321110 12299 35#define FIX_2_053119869 16819 36#define FIX_3_072711026 25172 37#define FIX_M_0_390180644 -3196 38#define FIX_M_0_899976223 -7373 39#define FIX_M_1_847759065 -15137 40#define FIX_M_1_961570560 -16069 41#define FIX_M_2_562915447 -20995 42#define FIX_0xFFFF 0xFFFF 43 44#define FIX_0_298631336_ID 0 45#define FIX_0_541196100_ID 4 46#define FIX_0_765366865_ID 8 47#define FIX_1_175875602_ID 12 48#define FIX_1_501321110_ID 16 49#define FIX_2_053119869_ID 20 50#define FIX_3_072711026_ID 24 51#define FIX_M_0_390180644_ID 28 52#define FIX_M_0_899976223_ID 32 53#define FIX_M_1_847759065_ID 36 54#define FIX_M_1_961570560_ID 40 55#define FIX_M_2_562915447_ID 44 56#define FIX_0xFFFF_ID 48 57 58function ff_j_rev_dct_arm, export=1 59 push {r0, r4 - r11, lr} 60 61 mov lr, r0 @ lr = pointer to the current row 62 mov r12, #8 @ r12 = row-counter 63 movrel r11, const_array @ r11 = base pointer to the constants array 64row_loop: 65 ldrsh r0, [lr, # 0] @ r0 = 'd0' 66 ldrsh r2, [lr, # 2] @ r2 = 'd2' 67 68 @ Optimization for row that have all items except the first set to 0 69 @ (this works as the int16_t are always 4-byte aligned) 70 ldr r5, [lr, # 0] 71 ldr r6, [lr, # 4] 72 ldr r3, [lr, # 8] 73 ldr r4, [lr, #12] 74 orr r3, r3, r4 75 orr r3, r3, r6 76 orrs r5, r3, r5 77 beq end_of_row_loop @ nothing to be done as ALL of them are '0' 78 orrs r3, r3, r2 79 beq empty_row 80 81 ldrsh r1, [lr, # 8] @ r1 = 'd1' 82 ldrsh r4, [lr, # 4] @ r4 = 'd4' 83 ldrsh r6, [lr, # 6] @ r6 = 'd6' 84 85 ldr r3, [r11, #FIX_0_541196100_ID] 86 add r7, r2, r6 87 ldr r5, [r11, #FIX_M_1_847759065_ID] 88 mul r7, r3, r7 @ r7 = z1 89 ldr r3, [r11, #FIX_0_765366865_ID] 90 mla r6, r5, r6, r7 @ r6 = tmp2 91 add r5, r0, r4 @ r5 = tmp0 92 mla r2, r3, r2, r7 @ r2 = tmp3 93 sub r3, r0, r4 @ r3 = tmp1 94 95 add r0, r2, r5, lsl #13 @ r0 = tmp10 96 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 97 add r4, r6, r3, lsl #13 @ r4 = tmp11 98 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 99 100 push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11 101 102 ldrsh r3, [lr, #10] @ r3 = 'd3' 103 ldrsh r5, [lr, #12] @ r5 = 'd5' 104 ldrsh r7, [lr, #14] @ r7 = 'd7' 105 106 add r0, r3, r5 @ r0 = 'z2' 107 add r2, r1, r7 @ r2 = 'z1' 108 add r4, r3, r7 @ r4 = 'z3' 109 add r6, r1, r5 @ r6 = 'z4' 110 ldr r9, [r11, #FIX_1_175875602_ID] 111 add r8, r4, r6 @ r8 = z3 + z4 112 ldr r10, [r11, #FIX_M_0_899976223_ID] 113 mul r8, r9, r8 @ r8 = 'z5' 114 ldr r9, [r11, #FIX_M_2_562915447_ID] 115 mul r2, r10, r2 @ r2 = 'z1' 116 ldr r10, [r11, #FIX_M_1_961570560_ID] 117 mul r0, r9, r0 @ r0 = 'z2' 118 ldr r9, [r11, #FIX_M_0_390180644_ID] 119 mla r4, r10, r4, r8 @ r4 = 'z3' 120 ldr r10, [r11, #FIX_0_298631336_ID] 121 mla r6, r9, r6, r8 @ r6 = 'z4' 122 ldr r9, [r11, #FIX_2_053119869_ID] 123 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 124 ldr r10, [r11, #FIX_3_072711026_ID] 125 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 126 ldr r9, [r11, #FIX_1_501321110_ID] 127 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 128 add r7, r7, r4 @ r7 = tmp0 129 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 130 add r5, r5, r6 @ r5 = tmp1 131 add r3, r3, r4 @ r3 = tmp2 132 add r1, r1, r6 @ r1 = tmp3 133 134 pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 135 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 136 137 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) 138 add r8, r0, r1 139 add r8, r8, #(1<<10) 140 mov r8, r8, asr #11 141 strh r8, [lr, # 0] 142 143 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) 144 sub r8, r0, r1 145 add r8, r8, #(1<<10) 146 mov r8, r8, asr #11 147 strh r8, [lr, #14] 148 149 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) 150 add r8, r6, r3 151 add r8, r8, #(1<<10) 152 mov r8, r8, asr #11 153 strh r8, [lr, # 2] 154 155 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) 156 sub r8, r6, r3 157 add r8, r8, #(1<<10) 158 mov r8, r8, asr #11 159 strh r8, [lr, #12] 160 161 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) 162 add r8, r4, r5 163 add r8, r8, #(1<<10) 164 mov r8, r8, asr #11 165 strh r8, [lr, # 4] 166 167 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) 168 sub r8, r4, r5 169 add r8, r8, #(1<<10) 170 mov r8, r8, asr #11 171 strh r8, [lr, #10] 172 173 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) 174 add r8, r2, r7 175 add r8, r8, #(1<<10) 176 mov r8, r8, asr #11 177 strh r8, [lr, # 6] 178 179 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) 180 sub r8, r2, r7 181 add r8, r8, #(1<<10) 182 mov r8, r8, asr #11 183 strh r8, [lr, # 8] 184 185 @ End of row loop 186 add lr, lr, #16 187 subs r12, r12, #1 188 bne row_loop 189 beq start_column_loop 190 191empty_row: 192 ldr r1, [r11, #FIX_0xFFFF_ID] 193 mov r0, r0, lsl #2 194 and r0, r0, r1 195 add r0, r0, r0, lsl #16 196 str r0, [lr, # 0] 197 str r0, [lr, # 4] 198 str r0, [lr, # 8] 199 str r0, [lr, #12] 200 201end_of_row_loop: 202 @ End of loop 203 add lr, lr, #16 204 subs r12, r12, #1 205 bne row_loop 206 207start_column_loop: 208 @ Start of column loop 209 pop {lr} 210 mov r12, #8 211column_loop: 212 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' 213 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' 214 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' 215 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' 216 217 ldr r3, [r11, #FIX_0_541196100_ID] 218 add r1, r2, r6 219 ldr r5, [r11, #FIX_M_1_847759065_ID] 220 mul r1, r3, r1 @ r1 = z1 221 ldr r3, [r11, #FIX_0_765366865_ID] 222 mla r6, r5, r6, r1 @ r6 = tmp2 223 add r5, r0, r4 @ r5 = tmp0 224 mla r2, r3, r2, r1 @ r2 = tmp3 225 sub r3, r0, r4 @ r3 = tmp1 226 227 add r0, r2, r5, lsl #13 @ r0 = tmp10 228 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 229 add r4, r6, r3, lsl #13 @ r4 = tmp11 230 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 231 232 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' 233 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' 234 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' 235 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' 236 237 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) 238 orr r9, r1, r3 239 orr r10, r5, r7 240 orrs r10, r9, r10 241 beq empty_odd_column 242 243 push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11 244 245 add r0, r3, r5 @ r0 = 'z2' 246 add r2, r1, r7 @ r2 = 'z1' 247 add r4, r3, r7 @ r4 = 'z3' 248 add r6, r1, r5 @ r6 = 'z4' 249 ldr r9, [r11, #FIX_1_175875602_ID] 250 add r8, r4, r6 251 ldr r10, [r11, #FIX_M_0_899976223_ID] 252 mul r8, r9, r8 @ r8 = 'z5' 253 ldr r9, [r11, #FIX_M_2_562915447_ID] 254 mul r2, r10, r2 @ r2 = 'z1' 255 ldr r10, [r11, #FIX_M_1_961570560_ID] 256 mul r0, r9, r0 @ r0 = 'z2' 257 ldr r9, [r11, #FIX_M_0_390180644_ID] 258 mla r4, r10, r4, r8 @ r4 = 'z3' 259 ldr r10, [r11, #FIX_0_298631336_ID] 260 mla r6, r9, r6, r8 @ r6 = 'z4' 261 ldr r9, [r11, #FIX_2_053119869_ID] 262 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 263 ldr r10, [r11, #FIX_3_072711026_ID] 264 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 265 ldr r9, [r11, #FIX_1_501321110_ID] 266 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 267 add r7, r7, r4 @ r7 = tmp0 268 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 269 add r5, r5, r6 @ r5 = tmp1 270 add r3, r3, r4 @ r3 = tmp2 271 add r1, r1, r6 @ r1 = tmp3 272 273 pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 274 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 275 276 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 277 add r8, r0, r1 278 add r8, r8, #(1<<17) 279 mov r8, r8, asr #18 280 strh r8, [lr, #( 0*8)] 281 282 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 283 sub r8, r0, r1 284 add r8, r8, #(1<<17) 285 mov r8, r8, asr #18 286 strh r8, [lr, #(14*8)] 287 288 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 289 add r8, r4, r3 290 add r8, r8, #(1<<17) 291 mov r8, r8, asr #18 292 strh r8, [lr, #( 2*8)] 293 294 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 295 sub r8, r4, r3 296 add r8, r8, #(1<<17) 297 mov r8, r8, asr #18 298 strh r8, [lr, #(12*8)] 299 300 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 301 add r8, r6, r5 302 add r8, r8, #(1<<17) 303 mov r8, r8, asr #18 304 strh r8, [lr, #( 4*8)] 305 306 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 307 sub r8, r6, r5 308 add r8, r8, #(1<<17) 309 mov r8, r8, asr #18 310 strh r8, [lr, #(10*8)] 311 312 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 313 add r8, r2, r7 314 add r8, r8, #(1<<17) 315 mov r8, r8, asr #18 316 strh r8, [lr, #( 6*8)] 317 318 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 319 sub r8, r2, r7 320 add r8, r8, #(1<<17) 321 mov r8, r8, asr #18 322 strh r8, [lr, #( 8*8)] 323 324 @ End of row loop 325 add lr, lr, #2 326 subs r12, r12, #1 327 bne column_loop 328 beq the_end 329 330empty_odd_column: 331 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 332 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 333 add r0, r0, #(1<<17) 334 mov r0, r0, asr #18 335 strh r0, [lr, #( 0*8)] 336 strh r0, [lr, #(14*8)] 337 338 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 339 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 340 add r4, r4, #(1<<17) 341 mov r4, r4, asr #18 342 strh r4, [lr, #( 2*8)] 343 strh r4, [lr, #(12*8)] 344 345 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 346 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 347 add r6, r6, #(1<<17) 348 mov r6, r6, asr #18 349 strh r6, [lr, #( 4*8)] 350 strh r6, [lr, #(10*8)] 351 352 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 353 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 354 add r2, r2, #(1<<17) 355 mov r2, r2, asr #18 356 strh r2, [lr, #( 6*8)] 357 strh r2, [lr, #( 8*8)] 358 359 @ End of row loop 360 add lr, lr, #2 361 subs r12, r12, #1 362 bne column_loop 363 364the_end: 365 @ The end.... 366 pop {r4 - r11, pc} 367endfunc 368 369const const_array 370 .word FIX_0_298631336 371 .word FIX_0_541196100 372 .word FIX_0_765366865 373 .word FIX_1_175875602 374 .word FIX_1_501321110 375 .word FIX_2_053119869 376 .word FIX_3_072711026 377 .word FIX_M_0_390180644 378 .word FIX_M_0_899976223 379 .word FIX_M_1_847759065 380 .word FIX_M_1_961570560 381 .word FIX_M_2_562915447 382 .word FIX_0xFFFF 383endconst 384