1/* 2 * ARM-NEON-optimized IDCT functions 3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavutil/arm/asm.S" 23 24function ff_put_pixels_clamped_neon, export=1 25 vld1.16 {d16-d19}, [r0,:128]! 26 vqmovun.s16 d0, q8 27 vld1.16 {d20-d23}, [r0,:128]! 28 vqmovun.s16 d1, q9 29 vld1.16 {d24-d27}, [r0,:128]! 30 vqmovun.s16 d2, q10 31 vld1.16 {d28-d31}, [r0,:128]! 32 vqmovun.s16 d3, q11 33 vst1.8 {d0}, [r1,:64], r2 34 vqmovun.s16 d4, q12 35 vst1.8 {d1}, [r1,:64], r2 36 vqmovun.s16 d5, q13 37 vst1.8 {d2}, [r1,:64], r2 38 vqmovun.s16 d6, q14 39 vst1.8 {d3}, [r1,:64], r2 40 vqmovun.s16 d7, q15 41 vst1.8 {d4}, [r1,:64], r2 42 vst1.8 {d5}, [r1,:64], r2 43 vst1.8 {d6}, [r1,:64], r2 44 vst1.8 {d7}, [r1,:64], r2 45 bx lr 46endfunc 47 48function ff_put_signed_pixels_clamped_neon, export=1 49 vmov.u8 d31, #128 50 vld1.16 {d16-d17}, [r0,:128]! 51 vqmovn.s16 d0, q8 52 vld1.16 {d18-d19}, [r0,:128]! 53 vqmovn.s16 d1, q9 54 vld1.16 {d16-d17}, [r0,:128]! 55 vqmovn.s16 d2, q8 56 vld1.16 {d18-d19}, [r0,:128]! 57 vadd.u8 d0, d0, d31 58 vld1.16 {d20-d21}, [r0,:128]! 59 vadd.u8 d1, d1, d31 60 vld1.16 {d22-d23}, [r0,:128]! 61 vadd.u8 d2, d2, d31 62 vst1.8 {d0}, [r1,:64], r2 63 vqmovn.s16 d3, q9 64 vst1.8 {d1}, [r1,:64], r2 65 vqmovn.s16 d4, q10 66 vst1.8 {d2}, [r1,:64], r2 67 vqmovn.s16 d5, q11 68 vld1.16 {d24-d25}, [r0,:128]! 69 vadd.u8 d3, d3, d31 70 vld1.16 {d26-d27}, [r0,:128]! 71 vadd.u8 d4, d4, d31 72 vadd.u8 d5, d5, d31 73 vst1.8 {d3}, [r1,:64], r2 74 vqmovn.s16 d6, q12 75 vst1.8 {d4}, [r1,:64], r2 76 vqmovn.s16 d7, q13 77 vst1.8 {d5}, [r1,:64], r2 78 vadd.u8 d6, d6, d31 79 vadd.u8 d7, d7, d31 80 vst1.8 {d6}, [r1,:64], r2 81 vst1.8 {d7}, [r1,:64], r2 82 bx lr 83endfunc 84 85function ff_add_pixels_clamped_neon, export=1 86 mov r3, r1 87 vld1.8 {d16}, [r1,:64], r2 88 vld1.16 {d0-d1}, [r0,:128]! 89 vaddw.u8 q0, q0, d16 90 vld1.8 {d17}, [r1,:64], r2 91 vld1.16 {d2-d3}, [r0,:128]! 92 vqmovun.s16 d0, q0 93 vld1.8 {d18}, [r1,:64], r2 94 vaddw.u8 q1, q1, d17 95 vld1.16 {d4-d5}, [r0,:128]! 96 vaddw.u8 q2, q2, d18 97 vst1.8 {d0}, [r3,:64], r2 98 vqmovun.s16 d2, q1 99 vld1.8 {d19}, [r1,:64], r2 100 vld1.16 {d6-d7}, [r0,:128]! 101 vaddw.u8 q3, q3, d19 102 vqmovun.s16 d4, q2 103 vst1.8 {d2}, [r3,:64], r2 104 vld1.8 {d16}, [r1,:64], r2 105 vqmovun.s16 d6, q3 106 vld1.16 {d0-d1}, [r0,:128]! 107 vaddw.u8 q0, q0, d16 108 vst1.8 {d4}, [r3,:64], r2 109 vld1.8 {d17}, [r1,:64], r2 110 vld1.16 {d2-d3}, [r0,:128]! 111 vaddw.u8 q1, q1, d17 112 vst1.8 {d6}, [r3,:64], r2 113 vqmovun.s16 d0, q0 114 vld1.8 {d18}, [r1,:64], r2 115 vld1.16 {d4-d5}, [r0,:128]! 116 vaddw.u8 q2, q2, d18 117 vst1.8 {d0}, [r3,:64], r2 118 vqmovun.s16 d2, q1 119 vld1.8 {d19}, [r1,:64], r2 120 vqmovun.s16 d4, q2 121 vld1.16 {d6-d7}, [r0,:128]! 122 vaddw.u8 q3, q3, d19 123 vst1.8 {d2}, [r3,:64], r2 124 vqmovun.s16 d6, q3 125 vst1.8 {d4}, [r3,:64], r2 126 vst1.8 {d6}, [r3,:64], r2 127 bx lr 128endfunc 129