• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro alias name, tgt, set=1
24.if \set != 0
25    \name   .req    \tgt
26.else
27    .unreq  \name
28.endif
29.endm
30
31.altmacro
32
33.macro alias_dw_all qw, dw_l, dw_h
34    alias   q\qw\()_l, d\dw_l
35    alias   q\qw\()_h, d\dw_h
36    .if \qw < 15
37        alias_dw_all  %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
38    .endif
39.endm
40
41alias_dw_all    0, 0, 1
42
43.noaltmacro
44
45.macro alias_qw     name, qw, set=1
46    alias   \name\(), \qw, \set
47    alias   \name\()_l, \qw\()_l, \set
48    alias   \name\()_h, \qw\()_h, \set
49.endm
50
51.macro prologue
52    push            {r4-r12, lr}
53    vpush           {q4-q7}
54.endm
55
56.macro epilogue
57    vpop            {q4-q7}
58    pop             {r4-r12, pc}
59.endm
60
61.macro  load_arg    reg, ix
62    ldr     \reg,   [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
63.endm
64
65
66/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
67 *                  int width, int height,
68 *                  int y_stride, int c_stride, int src_stride,
69 *                  int32_t coeff_table[9]);
70 */
71.macro  alias_loop_420sp set=1
72    alias   src,        r0, \set
73    alias   src0,       src, \set
74    alias   y,          r1, \set
75    alias   y0,         y, \set
76    alias   chroma,     r2, \set
77    alias   width,      r3, \set
78    alias   header,     width, \set
79
80    alias   height,     r4, \set
81    alias   y_stride,   r5, \set
82    alias   c_stride,   r6, \set
83    alias   c_padding,  c_stride, \set
84    alias   src_stride, r7, \set
85
86    alias   y0_end,     r8, \set
87
88    alias   src_padding,r9, \set
89    alias   y_padding,  r10, \set
90
91    alias   src1,       r11, \set
92    alias   y1,         r12, \set
93
94    alias   coeff_table,r12, \set
95.endm
96
97
98.macro  loop_420sp s_fmt, d_fmt, init, kernel, precision
99
100function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
101    prologue
102
103    alias_loop_420sp
104
105    load_arg    height,         4
106    load_arg    y_stride,       5
107    load_arg    c_stride,       6
108    load_arg    src_stride,     7
109    load_arg    coeff_table,    8
110
111    \init       coeff_table
112
113    sub         y_padding,      y_stride,       width
114    sub         c_padding,      c_stride,       width
115    sub         src_padding,    src_stride,     width, LSL #2
116
117    add         y0_end,         y0,             width
118    and         header,         width,          #15
119
120    add         y1,             y0,             y_stride
121    add         src1,           src0,           src_stride
122
1230:
124    cmp         header,     #0
125    beq         1f
126
127    \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
128
1291:
130    \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
131
132    cmp         y0,         y0_end
133    blt         1b
1342:
135    add         y0,         y1,         y_padding
136    add         y0_end,     y1,         y_stride
137    add         chroma,     chroma,     c_padding
138    add         src0,       src1,       src_padding
139
140    add         y1,         y0,         y_stride
141    add         src1,       src0,       src_stride
142
143    subs        height,     height,     #2
144
145    bgt         0b
146
147    epilogue
148
149    alias_loop_420sp 0
150
151endfunc
152.endm
153
154.macro downsample
155    vpaddl.u8   r16x8,  r8x16
156    vpaddl.u8   g16x8,  g8x16
157    vpaddl.u8   b16x8,  b8x16
158.endm
159
160
161/* acculumate and right shift by 2 */
162.macro downsample_ars2
163    vpadal.u8   r16x8,  r8x16
164    vpadal.u8   g16x8,  g8x16
165    vpadal.u8   b16x8,  b8x16
166
167    vrshr.u16   r16x8,  r16x8,  #2
168    vrshr.u16   g16x8,  g16x8,  #2
169    vrshr.u16   b16x8,  b16x8,  #2
170.endm
171
172.macro store_y8_16x1            dst, count
173.ifc "\count",""
174    vstmia      \dst!,  {y8x16}
175.else
176    vstmia      \dst,   {y8x16}
177    add         \dst,   \dst,           \count
178.endif
179.endm
180
181.macro store_chroma_nv12_8x1    dst, count
182.ifc "\count",""
183    vst2.i8     {u8x8, v8x8},   [\dst]!
184.else
185    vst2.i8     {u8x8, v8x8},   [\dst], \count
186.endif
187.endm
188
189.macro store_chroma_nv21_8x1    dst, count
190.ifc "\count",""
191    vst2.i8     {v8x8, u8x8},   [\dst]!
192.else
193    vst2.i8     {v8x8, u8x8},   [\dst], \count
194.endif
195.endm
196
197.macro load_8888_16x1   a, b, c, d, src, count
198.ifc "\count",""
199    vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
200    vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]!
201.else
202    vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
203    vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]
204    sub         \src,   \src,   #32
205    add         \src,   \src,   \count, LSL #2
206.endif
207.endm
208
209.macro load_rgbx_16x1   src, count
210    load_8888_16x1  r, g, b, x, \src, \count
211.endm
212
213.macro load_bgrx_16x1   src, count
214    load_8888_16x1  b, g, r, x, \src, \count
215.endm
216
217.macro alias_src_rgbx   set=1
218    alias_src_8888  r, g, b, x, \set
219.endm
220
221.macro alias_src_bgrx   set=1
222    alias_src_8888  b, g, r, x, \set
223.endm
224
225.macro alias_dst_nv12   set=1
226    alias   u8x8, c8x8x2_l, \set
227    alias   v8x8, c8x8x2_h, \set
228.endm
229
230.macro alias_dst_nv21   set=1
231    alias   v8x8, c8x8x2_l, \set
232    alias   u8x8, c8x8x2_h, \set
233.endm
234
235
236// common aliases
237
238alias   CO_R    d0
239CO_RY   .dn     d0.s16[0]
240CO_RU   .dn     d0.s16[1]
241CO_RV   .dn     d0.s16[2]
242
243alias   CO_G    d1
244CO_GY   .dn     d1.s16[0]
245CO_GU   .dn     d1.s16[1]
246CO_GV   .dn     d1.s16[2]
247
248alias   CO_B    d2
249CO_BY   .dn     d2.s16[0]
250CO_BU   .dn     d2.s16[1]
251CO_BV   .dn     d2.s16[2]
252
253alias   BIAS_U, d3
254alias   BIAS_V, BIAS_U
255
256alias   BIAS_Y, q2
257
258
259/* q3-q6 R8G8B8X8 x16 */
260
261.macro alias_src_8888   a, b, c, d, set
262    alias_qw  \a\()8x16, q3, \set
263    alias_qw  \b\()8x16, q4, \set
264    alias_qw  \c\()8x16, q5, \set
265    alias_qw  \d\()8x16, q6, \set
266.endm
267
268.macro kernel_420_16x2  rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
269    alias_src_\rgb_fmt
270    alias_dst_\yuv_fmt
271
272    load_\rgb_fmt\()_16x1   \rgb0, \count
273
274    downsample
275    compute_y_16x1
276    store_y8_16x1   \y0, \count
277
278
279    load_\rgb_fmt\()_16x1   \rgb1, \count
280    downsample_ars2
281    compute_y_16x1
282    store_y8_16x1   \y1, \count
283
284    compute_chroma_8x1  u, U
285    compute_chroma_8x1  v, V
286
287    store_chroma_\yuv_fmt\()_8x1 \chroma, \count
288
289    alias_dst_\yuv_fmt 0
290    alias_src_\rgb_fmt 0
291.endm
292