1/* 2 * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/arm/asm.S" 22 23.macro alias name, tgt, set=1 24.if \set != 0 25 \name .req \tgt 26.else 27 .unreq \name 28.endif 29.endm 30 31.altmacro 32 33.macro alias_dw_all qw, dw_l, dw_h 34 alias q\qw\()_l, d\dw_l 35 alias q\qw\()_h, d\dw_h 36 .if \qw < 15 37 alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2) 38 .endif 39.endm 40 41alias_dw_all 0, 0, 1 42 43.noaltmacro 44 45.macro alias_qw name, qw, set=1 46 alias \name\(), \qw, \set 47 alias \name\()_l, \qw\()_l, \set 48 alias \name\()_h, \qw\()_h, \set 49.endm 50 51.macro prologue 52 push {r4-r12, lr} 53 vpush {q4-q7} 54.endm 55 56.macro epilogue 57 vpop {q4-q7} 58 pop {r4-r12, pc} 59.endm 60 61.macro load_arg reg, ix 62 ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)] 63.endm 64 65 66/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma 67 * int width, int height, 68 * int y_stride, int c_stride, int src_stride, 69 * int32_t coeff_table[9]); 70 */ 71.macro alias_loop_420sp set=1 72 alias src, r0, \set 73 alias src0, src, \set 74 alias y, r1, \set 75 alias y0, y, \set 76 alias chroma, r2, \set 77 alias width, r3, \set 78 alias header, width, \set 79 80 alias height, r4, \set 81 alias y_stride, r5, \set 82 alias c_stride, r6, \set 83 alias c_padding, c_stride, \set 84 alias src_stride, r7, \set 85 86 alias y0_end, r8, \set 87 88 alias src_padding,r9, \set 89 alias y_padding, r10, \set 90 91 alias src1, r11, \set 92 alias y1, r12, \set 93 94 alias coeff_table,r12, \set 95.endm 96 97 98.macro loop_420sp s_fmt, d_fmt, init, kernel, precision 99 100function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1 101 prologue 102 103 alias_loop_420sp 104 105 load_arg height, 4 106 load_arg y_stride, 5 107 load_arg c_stride, 6 108 load_arg src_stride, 7 109 load_arg coeff_table, 8 110 111 \init coeff_table 112 113 sub y_padding, y_stride, width 114 sub c_padding, c_stride, width 115 sub src_padding, src_stride, width, LSL #2 116 117 add y0_end, y0, width 118 and header, width, #15 119 120 add y1, y0, y_stride 121 add src1, src0, src_stride 122 1230: 124 cmp header, #0 125 beq 1f 126 127 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header 128 1291: 130 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma 131 132 cmp y0, y0_end 133 blt 1b 1342: 135 add y0, y1, y_padding 136 add y0_end, y1, y_stride 137 add chroma, chroma, c_padding 138 add src0, src1, src_padding 139 140 add y1, y0, y_stride 141 add src1, src0, src_stride 142 143 subs height, height, #2 144 145 bgt 0b 146 147 epilogue 148 149 alias_loop_420sp 0 150 151endfunc 152.endm 153 154.macro downsample 155 vpaddl.u8 r16x8, r8x16 156 vpaddl.u8 g16x8, g8x16 157 vpaddl.u8 b16x8, b8x16 158.endm 159 160 161/* acculumate and right shift by 2 */ 162.macro downsample_ars2 163 vpadal.u8 r16x8, r8x16 164 vpadal.u8 g16x8, g8x16 165 vpadal.u8 b16x8, b8x16 166 167 vrshr.u16 r16x8, r16x8, #2 168 vrshr.u16 g16x8, g16x8, #2 169 vrshr.u16 b16x8, b16x8, #2 170.endm 171 172.macro store_y8_16x1 dst, count 173.ifc "\count","" 174 vstmia \dst!, {y8x16} 175.else 176 vstmia \dst, {y8x16} 177 add \dst, \dst, \count 178.endif 179.endm 180 181.macro store_chroma_nv12_8x1 dst, count 182.ifc "\count","" 183 vst2.i8 {u8x8, v8x8}, [\dst]! 184.else 185 vst2.i8 {u8x8, v8x8}, [\dst], \count 186.endif 187.endm 188 189.macro store_chroma_nv21_8x1 dst, count 190.ifc "\count","" 191 vst2.i8 {v8x8, u8x8}, [\dst]! 192.else 193 vst2.i8 {v8x8, u8x8}, [\dst], \count 194.endif 195.endm 196 197.macro load_8888_16x1 a, b, c, d, src, count 198.ifc "\count","" 199 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! 200 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]! 201.else 202 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]! 203 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src] 204 sub \src, \src, #32 205 add \src, \src, \count, LSL #2 206.endif 207.endm 208 209.macro load_rgbx_16x1 src, count 210 load_8888_16x1 r, g, b, x, \src, \count 211.endm 212 213.macro load_bgrx_16x1 src, count 214 load_8888_16x1 b, g, r, x, \src, \count 215.endm 216 217.macro alias_src_rgbx set=1 218 alias_src_8888 r, g, b, x, \set 219.endm 220 221.macro alias_src_bgrx set=1 222 alias_src_8888 b, g, r, x, \set 223.endm 224 225.macro alias_dst_nv12 set=1 226 alias u8x8, c8x8x2_l, \set 227 alias v8x8, c8x8x2_h, \set 228.endm 229 230.macro alias_dst_nv21 set=1 231 alias v8x8, c8x8x2_l, \set 232 alias u8x8, c8x8x2_h, \set 233.endm 234 235 236// common aliases 237 238alias CO_R d0 239CO_RY .dn d0.s16[0] 240CO_RU .dn d0.s16[1] 241CO_RV .dn d0.s16[2] 242 243alias CO_G d1 244CO_GY .dn d1.s16[0] 245CO_GU .dn d1.s16[1] 246CO_GV .dn d1.s16[2] 247 248alias CO_B d2 249CO_BY .dn d2.s16[0] 250CO_BU .dn d2.s16[1] 251CO_BV .dn d2.s16[2] 252 253alias BIAS_U, d3 254alias BIAS_V, BIAS_U 255 256alias BIAS_Y, q2 257 258 259/* q3-q6 R8G8B8X8 x16 */ 260 261.macro alias_src_8888 a, b, c, d, set 262 alias_qw \a\()8x16, q3, \set 263 alias_qw \b\()8x16, q4, \set 264 alias_qw \c\()8x16, q5, \set 265 alias_qw \d\()8x16, q6, \set 266.endm 267 268.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count 269 alias_src_\rgb_fmt 270 alias_dst_\yuv_fmt 271 272 load_\rgb_fmt\()_16x1 \rgb0, \count 273 274 downsample 275 compute_y_16x1 276 store_y8_16x1 \y0, \count 277 278 279 load_\rgb_fmt\()_16x1 \rgb1, \count 280 downsample_ars2 281 compute_y_16x1 282 store_y8_16x1 \y1, \count 283 284 compute_chroma_8x1 u, U 285 compute_chroma_8x1 v, V 286 287 store_chroma_\yuv_fmt\()_8x1 \chroma, \count 288 289 alias_dst_\yuv_fmt 0 290 alias_src_\rgb_fmt 0 291.endm 292