1 /*! 2 * \copy 3 * Copyright (c) 2013, Loongson Technology Co.,Ltd. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33 #ifndef ASMDEFS_MMI_H_ 34 #define ASMDEFS_MMI_H_ 35 36 #define CACHE_LINE_SIZE 32 37 38 #if defined(_ABI64) && _MIPS_SIM == _ABI64 39 # define mips_reg int64_t 40 # define PTRSIZE " 8 " 41 # define PTRLOG " 3 " 42 # define PTR_ADDU "daddu " 43 # define PTR_ADDIU "daddiu " 44 # define PTR_ADDI "daddi " 45 # define PTR_SUBU "dsubu " 46 # define PTR_L "ld " 47 # define PTR_S "sd " 48 # define PTR_SRA "dsra " 49 # define PTR_SRL "dsrl " 50 # define PTR_SLL "dsll " 51 #else 52 # define mips_reg int32_t 53 # define PTRSIZE " 4 " 54 # define PTRLOG " 2 " 55 # define PTR_ADDU "addu " 56 # define PTR_ADDIU "addiu " 57 # define PTR_ADDI "addi " 58 # define PTR_SUBU "subu " 59 # define PTR_L "lw " 60 # define PTR_S "sw " 61 # define PTR_SRA "sra " 62 # define PTR_SRL "srl " 63 # define PTR_SLL "sll " 64 #endif 65 66 #define MMI_XSawp_BH(f0, f2, f4, f6, f8, f10) \ 67 "mov.d "#f8", "#f2" \n\t" \ 68 "punpckhbh "#f2", "#f0", "#f4" \n\t" \ 69 "punpcklbh "#f0", "#f0", "#f4" \n\t" \ 70 "punpckhbh "#f10", "#f8", "#f6" \n\t" \ 71 "punpcklbh "#f8", "#f8", "#f6" \n\t" 72 73 #define MMI_XSawp_HW(f0, f2, f4, f6, f8, f10) \ 74 "mov.d "#f8", "#f2" \n\t" \ 75 "punpckhhw "#f2", "#f0", "#f4" \n\t" \ 76 "punpcklhw "#f0", "#f0", "#f4" \n\t" \ 77 "punpckhhw "#f10", "#f8", "#f6" \n\t" \ 78 "punpcklhw "#f8", "#f8", "#f6" \n\t" 79 80 #define MMI_XSawp_WD(f0, f2, f4, f6, f8, f10) \ 81 "mov.d "#f8", "#f2" \n\t" \ 82 "punpckhwd "#f2", "#f0", "#f4" \n\t" \ 83 "punpcklwd "#f0", "#f0", "#f4" \n\t" \ 84 "punpckhwd "#f10", "#f8", "#f6" \n\t" \ 85 "punpcklwd "#f8", "#f8", "#f6" \n\t" 86 87 #define MMI_XSawp_DQ(f0, f2, f4, f6, f8, f10) \ 88 "mov.d "#f8", "#f2" \n\t" \ 89 "mov.d "#f2", "#f4" \n\t" \ 90 "mov.d "#f10", "#f6" \n\t" 91 92 #define WELS_AbsH(f0, f2, f4, f6, f8, f10) \ 93 "xor "#f8", "#f8", "#f8" \n\t" \ 94 "psubh "#f10", "#f8", "#f6" \n\t" \ 95 "psubh "#f8", "#f8", "#f4" \n\t" \ 96 "pmaxsh "#f0", "#f4", "#f8" \n\t" \ 97 "pmaxsh "#f2", "#f6", "#f10" \n\t" 98 99 #define MMI_SumSub(f0, f2, f4, f6, f8, f10) \ 100 "mov.d "#f8", "#f4" \n\t" \ 101 "mov.d "#f10", "#f6" \n\t" \ 102 "paddh "#f4", "#f4", "#f0" \n\t" \ 103 "paddh "#f6", "#f6", "#f2" \n\t" \ 104 "psubh "#f0", "#f0", "#f8" \n\t" \ 105 "psubh "#f2", "#f2", "#f10" \n\t" 106 107 #define MMI_LoadDiff8P(f0, f2, f4, f6, f8, r0, r1) \ 108 "gsldlc1 "#f0", 0x7("#r0") \n\t" \ 109 "gsldlc1 "#f4", 0x7("#r1") \n\t" \ 110 "gsldrc1 "#f0", 0x0("#r0") \n\t" \ 111 "gsldrc1 "#f4", 0x0("#r1") \n\t" \ 112 "punpckhbh "#f2", "#f0", "#f8" \n\t" \ 113 "punpcklbh "#f0", "#f0", "#f8" \n\t" \ 114 "punpckhbh "#f6", "#f4", "#f8" \n\t" \ 115 "punpcklbh "#f4", "#f4", "#f8" \n\t" \ 116 "psubh "#f0", "#f0", "#f4" \n\t" \ 117 "psubh "#f2", "#f2", "#f6" \n\t" 118 119 #define MMI_TransTwo4x4H(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18) \ 120 MMI_XSawp_HW(f0, f2, f4, f6, f16, f18) \ 121 MMI_XSawp_HW(f8, f10, f12, f14, f4, f6) \ 122 MMI_XSawp_WD(f0, f2, f8, f10, f12, f14) \ 123 MMI_XSawp_WD(f16, f18, f4, f6, f8, f10) \ 124 MMI_XSawp_DQ(f0, f2, f16, f18, f4, f6) \ 125 MMI_XSawp_DQ(f12, f14, f8, f10, f16, f18) 126 127 #define MMI_TransTwo8x8B(f0, f2, f4, f6, f8, f10, f12, f14, f16, f18, f20, f22, f24, f26, f28, f30, r0, r1) \ 128 "dmfc1 "#r0", "#f28" \n\t" \ 129 "dmfc1 "#r1", "#f30" \n\t" \ 130 MMI_XSawp_BH(f0, f2, f4, f6, f28, f30) \ 131 MMI_XSawp_BH(f8, f10, f12, f14, f4, f6) \ 132 MMI_XSawp_BH(f16, f18, f20, f22, f12, f14) \ 133 "dmtc1 "#r0", "#f20" \n\t" \ 134 "dmtc1 "#r1", "#f22" \n\t" \ 135 "dmfc1 "#r0", "#f12" \n\t" \ 136 "dmfc1 "#r1", "#f14" \n\t" \ 137 MMI_XSawp_BH(f24, f26, f20, f22, f12, f14) \ 138 MMI_XSawp_HW(f0, f2, f8, f10, f20, f22) \ 139 MMI_XSawp_HW(f28, f30, f4, f6, f8, f10) \ 140 MMI_XSawp_HW(f16, f18, f24, f26, f4, f6) \ 141 "dmtc1 "#r0", "#f24" \n\t" \ 142 "dmtc1 "#r1", "#f26" \n\t" \ 143 "dmfc1 "#r0", "#f8" \n\t" \ 144 "dmfc1 "#r1", "#f10" \n\t" \ 145 MMI_XSawp_HW(f24, f26, f12, f14, f8, f10) \ 146 MMI_XSawp_WD(f0, f2, f16, f18, f12, f14) \ 147 MMI_XSawp_WD(f20, f22, f4, f6, f16, f18) \ 148 MMI_XSawp_WD(f28, f30, f24, f26, f4, f6) \ 149 "dmtc1 "#r0", "#f24" \n\t" \ 150 "dmtc1 "#r1", "#f26" \n\t" \ 151 "dmfc1 "#r0", "#f16" \n\t" \ 152 "dmfc1 "#r1", "#f18" \n\t" \ 153 MMI_XSawp_WD(f24, f26, f8, f10, f16, f18) \ 154 MMI_XSawp_DQ(f0, f2, f28, f30, f8, f10) \ 155 MMI_XSawp_DQ(f12, f14, f4, f6, f28, f30) \ 156 MMI_XSawp_DQ(f20, f22, f24, f26, f4, f6) \ 157 "dmtc1 "#r0", "#f24" \n\t" \ 158 "dmtc1 "#r1", "#f26" \n\t" \ 159 "dmfc1 "#r0", "#f0" \n\t" \ 160 "dmfc1 "#r1", "#f2" \n\t" \ 161 MMI_XSawp_DQ(f24, f26, f16, f18, f0, f2) \ 162 "dmtc1 "#r0", "#f16" \n\t" \ 163 "dmtc1 "#r1", "#f18" \n\t" 164 165 #define MMI_XSwap_HW_SINGLE(f0, f2, f4) \ 166 "punpckhhw "#f4", "#f0", "#f2" \n\t" \ 167 "punpcklhw "#f0", "#f0", "#f2" \n\t" 168 169 #define MMI_XSwap_WD_SINGLE(f0, f2, f4) \ 170 "punpckhwd "#f4", "#f0", "#f2" \n\t" \ 171 "punpcklwd "#f0", "#f0", "#f2" \n\t" 172 173 #define MMI_Trans4x4H_SINGLE(f0, f2, f4, f6, f8) \ 174 MMI_XSwap_HW_SINGLE(f0, f2, f8) \ 175 MMI_XSwap_HW_SINGLE(f4, f6, f2) \ 176 MMI_XSwap_WD_SINGLE(f0, f4, f6) \ 177 MMI_XSwap_WD_SINGLE(f8, f2, f4) 178 179 #define MMI_SumSub_SINGLE(f0, f2, f4) \ 180 "mov.d "#f4", "#f2" \n\t" \ 181 "psubh "#f2", "#f2", "#f0" \n\t" \ 182 "paddh "#f0", "#f0", "#f4" \n\t" 183 184 #define MMI_SumSubMul2_SINGLE(f0, f2, f4, f6) \ 185 "mov.d "#f4", "#f0" \n\t" \ 186 "psllh "#f0", "#f0", "#f6" \n\t" \ 187 "paddh "#f0", "#f0", "#f2" \n\t" \ 188 "psllh "#f2", "#f2", "#f6" \n\t" \ 189 "psubh "#f4", "#f4", "#f2" \n\t" 190 191 //f4 should be 0x0 192 #define MMI_Copy8Times(f0, f2, f4, r0) \ 193 "dmtc1 "#r0", "#f0" \n\t" \ 194 "pshufh "#f0", "#f0", "#f4" \n\t" \ 195 "mov.d "#f2", "#f0" \n\t" 196 197 //f4 should be 0x0 198 #define MMI_Copy16Times(f0, f2, f4, r0) \ 199 "dmtc1 "#r0", "#f0" \n\t" \ 200 "punpcklbh "#f0", "#f0", "#f0" \n\t" \ 201 "pshufh "#f0", "#f0", "#f4" \n\t" \ 202 "mov.d "#f2", "#f0" \n\t" 203 204 #define MMI_SumSubDiv2_SINGLE(f0, f2, f4, f6) \ 205 "psrah "#f4", "#f2", "#f6" \n\t" \ 206 "paddh "#f4", "#f4", "#f0" \n\t" \ 207 "psrah "#f0", "#f0", "#f6" \n\t" \ 208 "psubh "#f0", "#f0", "#f2" \n\t" 209 210 #define MMI_IDCT_SINGLE(f0, f2, f4, f6, f8, f10, f12) \ 211 MMI_SumSub_SINGLE(f6, f8, f10) \ 212 MMI_SumSubDiv2_SINGLE(f4, f2, f0, f12) \ 213 MMI_SumSub_SINGLE(f0, f6, f10) \ 214 MMI_SumSub_SINGLE(f4, f8, f10) 215 216 #define MMI_StoreDiff4P_SINGLE(f0, f2, f4, f6, r0, r1, f8) \ 217 "gsldlc1 "#f2", 0x7("#r1") \n\t" \ 218 "gsldrc1 "#f2", 0x0("#r1") \n\t" \ 219 "punpcklbh "#f2", "#f2", "#f6" \n\t" \ 220 "paddh "#f0", "#f0", "#f4" \n\t" \ 221 "psrah "#f0", "#f0", "#f8" \n\t" \ 222 "paddsh "#f0", "#f0", "#f2" \n\t" \ 223 "packushb "#f0", "#f0", "#f2" \n\t" \ 224 "gsswlc1 "#f0", 0x3("#r0") \n\t" \ 225 "gsswrc1 "#f0", 0x0("#r0") \n\t" 226 227 #define SUMH_HORIZON(f0, f2, f4, f6, f8) \ 228 "paddh "#f0", "#f0", "#f2" \n\t" \ 229 "punpckhhw "#f2", "#f0", "#f8" \n\t" \ 230 "punpcklhw "#f0", "#f0", "#f8" \n\t" \ 231 "paddw "#f0", "#f0", "#f2" \n\t" \ 232 "punpckhwd "#f2", "#f0", "#f0" \n\t" \ 233 "paddw "#f0", "#f0", "#f2" \n\t" 234 235 #define LOAD_COLUMN(f0, f2, f4, f6, f8, f10, f12, f14, r0, r1, r2) \ 236 "daddu "#r2", "#r0", "#r1" \n\t" \ 237 "gsldlc1 "#f0", 0x7("#r0") \n\t" \ 238 "gsldlc1 "#f4", 0x7("#r2") \n\t" \ 239 "gsldrc1 "#f0", 0x0("#r0") \n\t" \ 240 "gsldrc1 "#f4", 0x0("#r2") \n\t" \ 241 "punpcklbh "#f0", "#f0", "#f4" \n\t" \ 242 "daddu "#r0", "#r2", "#r1" \n\t" \ 243 "daddu "#r2", "#r0", "#r1" \n\t" \ 244 "gsldlc1 "#f8", 0x7("#r0") \n\t" \ 245 "gsldlc1 "#f4", 0x7("#r2") \n\t" \ 246 "gsldrc1 "#f8", 0x0("#r0") \n\t" \ 247 "gsldrc1 "#f4", 0x0("#r2") \n\t" \ 248 "punpcklbh "#f8", "#f8", "#f4" \n\t" \ 249 "punpckhhw "#f2", "#f0", "#f8" \n\t" \ 250 "punpcklhw "#f0", "#f0", "#f8" \n\t" \ 251 "daddu "#r0", "#r2", "#r1" \n\t" \ 252 "daddu "#r2", "#r0", "#r1" \n\t" \ 253 "gsldlc1 "#f12", 0x7("#r0") \n\t" \ 254 "gsldlc1 "#f4", 0x7("#r2") \n\t" \ 255 "gsldrc1 "#f12", 0x0("#r0") \n\t" \ 256 "gsldrc1 "#f4", 0x0("#r2") \n\t" \ 257 "punpcklbh "#f12", "#f12", "#f4" \n\t" \ 258 "daddu "#r0", "#r2", "#r1" \n\t" \ 259 "daddu "#r2", "#r0", "#r1" \n\t" \ 260 "gsldlc1 "#f8", 0x7("#r0") \n\t" \ 261 "gsldlc1 "#f4", 0x7("#r2") \n\t" \ 262 "gsldrc1 "#f8", 0x0("#r0") \n\t" \ 263 "gsldrc1 "#f4", 0x0("#r2") \n\t" \ 264 "punpcklbh "#f8", "#f8", "#f4" \n\t" \ 265 "punpckhhw "#f14", "#f12", "#f8" \n\t" \ 266 "punpcklhw "#f12", "#f12", "#f8" \n\t" \ 267 "daddu "#r0", "#r2", "#r1" \n\t" \ 268 "punpcklwd "#f0", "#f2", "#f14" \n\t" \ 269 "punpckhwd "#f2", "#f2", "#f14" \n\t" 270 271 #define LOAD_COLUMN_C(f0, f2, f4, f6, r0, r1, r2) \ 272 "daddu "#r2", "#r0", "#r1" \n\t" \ 273 "gsldlc1 "#f0", 0x7("#r0") \n\t" \ 274 "gsldlc1 "#f2", 0x7("#r2") \n\t" \ 275 "gsldrc1 "#f0", 0x0("#r0") \n\t" \ 276 "gsldrc1 "#f2", 0x0("#r2") \n\t" \ 277 "punpcklbh "#f0", "#f0", "#f2" \n\t" \ 278 "daddu "#r0", "#r2", "#r1" \n\t" \ 279 "daddu "#r2", "#r0", "#r1" \n\t" \ 280 "gsldlc1 "#f4", 0x7("#r0") \n\t" \ 281 "gsldlc1 "#f2", 0x7("#r2") \n\t" \ 282 "gsldrc1 "#f4", 0x0("#r0") \n\t" \ 283 "gsldrc1 "#f2", 0x0("#r2") \n\t" \ 284 "punpcklbh "#f4", "#f4", "#f2" \n\t" \ 285 "punpckhhw "#f0", "#f0", "#f4" \n\t" \ 286 "daddu "#r0", "#r2", "#r1" \n\t" 287 288 /** 289 * backup register 290 */ 291 #if defined(_ABI64) && _MIPS_SIM == _ABI64 292 #define BACKUP_REG \ 293 double __attribute__((aligned(16))) __back_temp[8]; \ 294 __asm__ volatile ( \ 295 "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ 296 "gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ 297 "gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ 298 "gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ 299 : \ 300 : [temp]"r"(__back_temp) \ 301 : "memory" \ 302 ); 303 #else 304 #define BACKUP_REG \ 305 double __attribute__((aligned(16))) __back_temp[8]; \ 306 __asm__ volatile ( \ 307 "gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ 308 "gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ 309 "gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ 310 : \ 311 : [temp]"r"(__back_temp) \ 312 : "memory" \ 313 ); 314 #endif 315 316 /** 317 * recover register 318 */ 319 #if defined(_ABI64) && _MIPS_SIM == _ABI64 320 #define RECOVER_REG \ 321 __asm__ volatile ( \ 322 "gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ 323 "gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ 324 "gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ 325 "gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ 326 : \ 327 : [temp]"r"(__back_temp) \ 328 : "memory" \ 329 ); 330 #else 331 #define RECOVER_REG \ 332 __asm__ volatile ( \ 333 "gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ 334 "gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ 335 "gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ 336 : \ 337 : [temp]"r"(__back_temp) \ 338 : "memory" \ 339 ); 340 #endif 341 342 # define OK 1 343 # define NOTOK 0 344 345 #endif /* ASMDEFS_MMI_H_ */ 346