• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * ARM-NEON-optimized IDCT functions
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24function ff_put_pixels_clamped_neon, export=1
25        vld1.16         {d16-d19}, [r0,:128]!
26        vqmovun.s16     d0, q8
27        vld1.16         {d20-d23}, [r0,:128]!
28        vqmovun.s16     d1, q9
29        vld1.16         {d24-d27}, [r0,:128]!
30        vqmovun.s16     d2, q10
31        vld1.16         {d28-d31}, [r0,:128]!
32        vqmovun.s16     d3, q11
33        vst1.8          {d0},      [r1,:64], r2
34        vqmovun.s16     d4, q12
35        vst1.8          {d1},      [r1,:64], r2
36        vqmovun.s16     d5, q13
37        vst1.8          {d2},      [r1,:64], r2
38        vqmovun.s16     d6, q14
39        vst1.8          {d3},      [r1,:64], r2
40        vqmovun.s16     d7, q15
41        vst1.8          {d4},      [r1,:64], r2
42        vst1.8          {d5},      [r1,:64], r2
43        vst1.8          {d6},      [r1,:64], r2
44        vst1.8          {d7},      [r1,:64], r2
45        bx              lr
46endfunc
47
48function ff_put_signed_pixels_clamped_neon, export=1
49        vmov.u8         d31, #128
50        vld1.16         {d16-d17}, [r0,:128]!
51        vqmovn.s16      d0, q8
52        vld1.16         {d18-d19}, [r0,:128]!
53        vqmovn.s16      d1, q9
54        vld1.16         {d16-d17}, [r0,:128]!
55        vqmovn.s16      d2, q8
56        vld1.16         {d18-d19}, [r0,:128]!
57        vadd.u8         d0, d0, d31
58        vld1.16         {d20-d21}, [r0,:128]!
59        vadd.u8         d1, d1, d31
60        vld1.16         {d22-d23}, [r0,:128]!
61        vadd.u8         d2, d2, d31
62        vst1.8          {d0},      [r1,:64], r2
63        vqmovn.s16      d3, q9
64        vst1.8          {d1},      [r1,:64], r2
65        vqmovn.s16      d4, q10
66        vst1.8          {d2},      [r1,:64], r2
67        vqmovn.s16      d5, q11
68        vld1.16         {d24-d25}, [r0,:128]!
69        vadd.u8         d3, d3, d31
70        vld1.16         {d26-d27}, [r0,:128]!
71        vadd.u8         d4, d4, d31
72        vadd.u8         d5, d5, d31
73        vst1.8          {d3},      [r1,:64], r2
74        vqmovn.s16      d6, q12
75        vst1.8          {d4},      [r1,:64], r2
76        vqmovn.s16      d7, q13
77        vst1.8          {d5},      [r1,:64], r2
78        vadd.u8         d6, d6, d31
79        vadd.u8         d7, d7, d31
80        vst1.8          {d6},      [r1,:64], r2
81        vst1.8          {d7},      [r1,:64], r2
82        bx              lr
83endfunc
84
85function ff_add_pixels_clamped_neon, export=1
86        mov             r3, r1
87        vld1.8          {d16},   [r1,:64], r2
88        vld1.16         {d0-d1}, [r0,:128]!
89        vaddw.u8        q0, q0, d16
90        vld1.8          {d17},   [r1,:64], r2
91        vld1.16         {d2-d3}, [r0,:128]!
92        vqmovun.s16     d0, q0
93        vld1.8          {d18},   [r1,:64], r2
94        vaddw.u8        q1, q1, d17
95        vld1.16         {d4-d5}, [r0,:128]!
96        vaddw.u8        q2, q2, d18
97        vst1.8          {d0},    [r3,:64], r2
98        vqmovun.s16     d2, q1
99        vld1.8          {d19},   [r1,:64], r2
100        vld1.16         {d6-d7}, [r0,:128]!
101        vaddw.u8        q3, q3, d19
102        vqmovun.s16     d4, q2
103        vst1.8          {d2},    [r3,:64], r2
104        vld1.8          {d16},   [r1,:64], r2
105        vqmovun.s16     d6, q3
106        vld1.16         {d0-d1}, [r0,:128]!
107        vaddw.u8        q0, q0, d16
108        vst1.8          {d4},    [r3,:64], r2
109        vld1.8          {d17},   [r1,:64], r2
110        vld1.16         {d2-d3}, [r0,:128]!
111        vaddw.u8        q1, q1, d17
112        vst1.8          {d6},    [r3,:64], r2
113        vqmovun.s16     d0, q0
114        vld1.8          {d18},   [r1,:64], r2
115        vld1.16         {d4-d5}, [r0,:128]!
116        vaddw.u8        q2, q2, d18
117        vst1.8          {d0},    [r3,:64], r2
118        vqmovun.s16     d2, q1
119        vld1.8          {d19},   [r1,:64], r2
120        vqmovun.s16     d4, q2
121        vld1.16         {d6-d7}, [r0,:128]!
122        vaddw.u8        q3, q3, d19
123        vst1.8          {d2},    [r3,:64], r2
124        vqmovun.s16     d6, q3
125        vst1.8          {d4},    [r3,:64], r2
126        vst1.8          {d6},    [r3,:64], r2
127        bx              lr
128endfunc
129