• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1@ This file was created from a .asm file
2@  using the ads2gas.pl script.
3	.equ DO1STROUNDING, 0
4	.syntax unified
5@
6@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
7@
8@  Use of this source code is governed by a BSD-style license and patent
9@  grant that can be found in the LICENSE file in the root of the source
10@  tree. All contributing project authors may be found in the AUTHORS
11@  file in the root of the source tree.
12@
13
14
15    .global vpx_idct4x4_1_add_neon
16	.type vpx_idct4x4_1_add_neon, function
17   .arm
18   .eabi_attribute 24, 1 @Tag_ABI_align_needed
19   .eabi_attribute 25, 1 @Tag_ABI_align_preserved
20
21.text
22.p2align 2
23
24@void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int stride)
25@
26@ r0  int16_t input
27@ r1  uint8_t *dest
28@ r2  int stride)
29
30_vpx_idct4x4_1_add_neon:
31	vpx_idct4x4_1_add_neon: @ PROC
32    ldrsh            r0, [r0]
33
34    @ cospi_16_64 = 11585
35    movw             r12, #0x2d41
36
37    @ out = dct_const_round_shift(input[0] * cospi_16_64)
38    mul              r0, r0, r12               @ input[0] * cospi_16_64
39    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
40    asr              r0, r0, #14               @ >> DCT_CONST_BITS
41
42    @ out = dct_const_round_shift(out * cospi_16_64)
43    mul              r0, r0, r12               @ out * cospi_16_64
44    mov              r12, r1                   @ save dest
45    add              r0, r0, #0x2000           @ +(1 << ((DCT_CONST_BITS) - 1))
46    asr              r0, r0, #14               @ >> DCT_CONST_BITS
47
48    @ a1 = ROUND_POWER_OF_TWO(out, 4)
49    add              r0, r0, #8                @ + (1 <<((4) - 1))
50    asr              r0, r0, #4                @ >> 4
51
52    vdup.s16         q0, r0                    @ duplicate a1
53
54    vld1.32          {d2[0]}, [r1], r2
55    vld1.32          {d2[1]}, [r1], r2
56    vld1.32          {d4[0]}, [r1], r2
57    vld1.32          {d4[1]}, [r1]
58
59    vaddw.u8         q8, q0, d2                @ dest[x] + a1
60    vaddw.u8         q9, q0, d4
61
62    vqmovun.s16      d6, q8                    @ clip_pixel
63    vqmovun.s16      d7, q9
64
65    vst1.32          {d6[0]}, [r12], r2
66    vst1.32          {d6[1]}, [r12], r2
67    vst1.32          {d7[0]}, [r12], r2
68    vst1.32          {d7[1]}, [r12]
69
70    bx               lr
71	.size vpx_idct4x4_1_add_neon, .-vpx_idct4x4_1_add_neon    @ ENDP             @ |vpx_idct4x4_1_add_neon|
72
73	.section	.note.GNU-stack,"",%progbits
74