1/* 2 * Alpha optimized DSP utils 3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "regdef.h" 23 24/* Some nicer register names. */ 25#define ta t10 26#define tb t11 27#define tc t12 28#define td AT 29/* Danger: these overlap with the argument list and the return value */ 30#define te a5 31#define tf a4 32#define tg a3 33#define th v0 34 35 .set noat 36 .set noreorder 37 .arch pca56 38 .text 39 40/***************************************************************************** 41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) 42 * 43 * This code is written with a pca56 in mind. For ev6, one should 44 * really take the increased latency of 3 cycles for MVI instructions 45 * into account. 46 * 47 * It is important to keep the loading and first use of a register as 48 * far apart as possible, because if a register is accessed before it 49 * has been fetched from memory, the CPU will stall. 50 */ 51 .align 4 52 .globl pix_abs16x16_mvi_asm 53 .ent pix_abs16x16_mvi_asm 54pix_abs16x16_mvi_asm: 55 .frame sp, 0, ra, 0 56 .prologue 0 57 58 and a2, 7, t0 59 clr v0 60 beq t0, $aligned 61 .align 4 62$unaligned: 63 /* Registers: 64 line 0: 65 t0: left_u -> left lo -> left 66 t1: mid 67 t2: right_u -> right hi -> right 68 t3: ref left 69 t4: ref right 70 line 1: 71 t5: left_u -> left lo -> left 72 t6: mid 73 t7: right_u -> right hi -> right 74 t8: ref left 75 t9: ref right 76 temp: 77 ta: left hi 78 tb: right lo 79 tc: error left 80 td: error right */ 81 82 /* load line 0 */ 83 ldq_u t0, 0(a2) # left_u 84 ldq_u t1, 8(a2) # mid 85 ldq_u t2, 16(a2) # right_u 86 ldq t3, 0(a1) # ref left 87 ldq t4, 8(a1) # ref right 88 addq a1, a3, a1 # pix1 89 addq a2, a3, a2 # pix2 90 /* load line 1 */ 91 ldq_u t5, 0(a2) # left_u 92 ldq_u t6, 8(a2) # mid 93 ldq_u t7, 16(a2) # right_u 94 ldq t8, 0(a1) # ref left 95 ldq t9, 8(a1) # ref right 96 addq a1, a3, a1 # pix1 97 addq a2, a3, a2 # pix2 98 /* calc line 0 */ 99 extql t0, a2, t0 # left lo 100 extqh t1, a2, ta # left hi 101 extql t1, a2, tb # right lo 102 or t0, ta, t0 # left 103 extqh t2, a2, t2 # right hi 104 perr t3, t0, tc # error left 105 or t2, tb, t2 # right 106 perr t4, t2, td # error right 107 addq v0, tc, v0 # add error left 108 addq v0, td, v0 # add error left 109 /* calc line 1 */ 110 extql t5, a2, t5 # left lo 111 extqh t6, a2, ta # left hi 112 extql t6, a2, tb # right lo 113 or t5, ta, t5 # left 114 extqh t7, a2, t7 # right hi 115 perr t8, t5, tc # error left 116 or t7, tb, t7 # right 117 perr t9, t7, td # error right 118 addq v0, tc, v0 # add error left 119 addq v0, td, v0 # add error left 120 /* loop */ 121 subq a4, 2, a4 # h -= 2 122 bne a4, $unaligned 123 ret 124 125 .align 4 126$aligned: 127 /* load line 0 */ 128 ldq t0, 0(a2) # left 129 ldq t1, 8(a2) # right 130 addq a2, a3, a2 # pix2 131 ldq t2, 0(a1) # ref left 132 ldq t3, 8(a1) # ref right 133 addq a1, a3, a1 # pix1 134 /* load line 1 */ 135 ldq t4, 0(a2) # left 136 ldq t5, 8(a2) # right 137 addq a2, a3, a2 # pix2 138 ldq t6, 0(a1) # ref left 139 ldq t7, 8(a1) # ref right 140 addq a1, a3, a1 # pix1 141 /* load line 2 */ 142 ldq t8, 0(a2) # left 143 ldq t9, 8(a2) # right 144 addq a2, a3, a2 # pix2 145 ldq ta, 0(a1) # ref left 146 ldq tb, 8(a1) # ref right 147 addq a1, a3, a1 # pix1 148 /* load line 3 */ 149 ldq tc, 0(a2) # left 150 ldq td, 8(a2) # right 151 addq a2, a3, a2 # pix2 152 ldq te, 0(a1) # ref left 153 ldq a0, 8(a1) # ref right 154 /* calc line 0 */ 155 perr t0, t2, t0 # error left 156 addq a1, a3, a1 # pix1 157 perr t1, t3, t1 # error right 158 addq v0, t0, v0 # add error left 159 /* calc line 1 */ 160 perr t4, t6, t0 # error left 161 addq v0, t1, v0 # add error right 162 perr t5, t7, t1 # error right 163 addq v0, t0, v0 # add error left 164 /* calc line 2 */ 165 perr t8, ta, t0 # error left 166 addq v0, t1, v0 # add error right 167 perr t9, tb, t1 # error right 168 addq v0, t0, v0 # add error left 169 /* calc line 3 */ 170 perr tc, te, t0 # error left 171 addq v0, t1, v0 # add error right 172 perr td, a0, t1 # error right 173 addq v0, t0, v0 # add error left 174 addq v0, t1, v0 # add error right 175 /* loop */ 176 subq a4, 4, a4 # h -= 4 177 bne a4, $aligned 178 ret 179 .end pix_abs16x16_mvi_asm 180