1 /*
2 * Loongson SIMD optimized blockdsp
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "blockdsp_mips.h"
25 #include "libavutil/mips/mmiutils.h"
26
ff_fill_block16_mmi(uint8_t * block,uint8_t value,ptrdiff_t line_size,int h)27 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
28 {
29 double ftmp[1];
30 DECLARE_VAR_ALL64;
31
32 __asm__ volatile (
33 "mtc1 %[value], %[ftmp0] \n\t"
34 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
35 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
36 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
37 "1: \n\t"
38 MMI_SDC1(%[ftmp0], %[block], 0x00)
39 PTR_ADDI "%[h], %[h], -0x01 \n\t"
40 MMI_SDC1(%[ftmp0], %[block], 0x08)
41 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
42 "bnez %[h], 1b \n\t"
43 : [ftmp0]"=&f"(ftmp[0]),
44 RESTRICT_ASM_ALL64
45 [block]"+&r"(block), [h]"+&r"(h)
46 : [value]"r"(value), [line_size]"r"((mips_reg)line_size)
47 : "memory"
48 );
49 }
50
ff_fill_block8_mmi(uint8_t * block,uint8_t value,ptrdiff_t line_size,int h)51 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
52 {
53 double ftmp0;
54 DECLARE_VAR_ALL64;
55
56 __asm__ volatile (
57 "mtc1 %[value], %[ftmp0] \n\t"
58 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
59 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
60 "punpcklbh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
61 "1: \n\t"
62 MMI_SDC1(%[ftmp0], %[block], 0x00)
63 PTR_ADDI "%[h], %[h], -0x01 \n\t"
64 PTR_ADDU "%[block], %[block], %[line_size] \n\t"
65 "bnez %[h], 1b \n\t"
66 : [ftmp0]"=&f"(ftmp0),
67 RESTRICT_ASM_ALL64
68 [block]"+&r"(block), [h]"+&r"(h)
69 : [value]"r"(value), [line_size]"r"((mips_reg)line_size)
70 : "memory"
71 );
72 }
73
ff_clear_block_mmi(int16_t * block)74 void ff_clear_block_mmi(int16_t *block)
75 {
76 double ftmp[2];
77
78 __asm__ volatile (
79 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
80 "xor %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
81 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
82 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
83 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
84 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
85 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
86 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
87 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
88 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
89 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
90 : [block]"r"(block)
91 : "memory"
92 );
93 }
94
ff_clear_blocks_mmi(int16_t * block)95 void ff_clear_blocks_mmi(int16_t *block)
96 {
97 double ftmp[2];
98
99 __asm__ volatile (
100 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
101 "xor %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
102 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
103 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
104 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
105 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
106 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
107 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
108 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
109 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
110
111 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x80)
112 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x90)
113 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xa0)
114 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xb0)
115 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xc0)
116 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xd0)
117 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xe0)
118 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xf0)
119
120 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x100)
121 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x110)
122 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x120)
123 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x130)
124 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x140)
125 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x150)
126 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x160)
127 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x170)
128
129 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x180)
130 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x190)
131 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1a0)
132 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1b0)
133 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1c0)
134 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1d0)
135 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1e0)
136 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1f0)
137
138 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x200)
139 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x210)
140 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x220)
141 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x230)
142 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x240)
143 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x250)
144 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x260)
145 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x270)
146
147 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x280)
148 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x290)
149 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2a0)
150 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2b0)
151 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2c0)
152 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2d0)
153 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2e0)
154 MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2f0)
155 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
156 : [block]"r"((uint64_t *)block)
157 : "memory"
158 );
159 }
160