• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;*!
2;* \copy
3;*     Copyright (c)  2010-2013, Cisco Systems
4;*     All rights reserved.
5;*
6;*     Redistribution and use in source and binary forms, with or without
7;*     modification, are permitted provided that the following conditions
8;*     are met:
9;*
10;*        * Redistributions of source code must retain the above copyright
11;*          notice, this list of conditions and the following disclaimer.
12;*
13;*        * Redistributions in binary form must reproduce the above copyright
14;*          notice, this list of conditions and the following disclaimer in
15;*          the documentation and/or other materials provided with the
16;*          distribution.
17;*
18;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29;*     POSSIBILITY OF SUCH DAMAGE.
30;*
31;*
32;*  predenoise.asm
33;*
34;*  Abstract
35;*      denoise for SVC2.1
36;*  History
37;*      4/13/2010 Created
38;*      7/30/2010 Modified
39;*
40;*
41;*************************************************************************/
42%include "asm_inc.asm"
43
44;***********************************************************************
45; Constant
46;***********************************************************************
47%ifdef X86_32_PICASM
48SECTION .text align=16
49%else
50SECTION .rodata align=16
51%endif
52
53sse2_32 times 8 dw 32
54sse2_20 times 8 dw 20
55
56
57
58;***********************************************************************
59; Code
60;***********************************************************************
61SECTION .text
62
63%macro WEIGHT_LINE  9
64    movq        %2, %9
65    punpcklbw   %2, %7
66    movdqa      %8, %2
67
68    movdqa      %1, %6
69    psubusb     %1, %8
70    psubusb     %8, %6
71    por         %8, %1      ; ABS(curPixel - centerPixel);
72
73    movdqa      %1, %3
74    psubusb     %1, %8
75
76    pmullw      %1, %1
77    psrlw       %1, 5
78    pmullw      %2, %1
79    paddusw     %4, %1
80    paddusw     %5, %2
81%endmacro
82
83%macro WEIGHT_LINE1_UV  4
84    movdqa      %2, %1
85    punpcklbw   %2, %4
86    paddw       %3, %2
87
88    movdqa      %2, %1
89    psrldq      %2, 1
90    punpcklbw   %2, %4
91    paddw       %3, %2
92
93    movdqa      %2, %1
94    psrldq      %2, 2
95    punpcklbw   %2, %4
96    psllw       %2, 1
97    paddw       %3, %2
98
99    movdqa      %2, %1
100    psrldq      %2, 3
101    punpcklbw   %2, %4
102    paddw       %3, %2
103
104    movdqa      %2, %1
105    psrldq      %2, 4
106    punpcklbw   %2, %4
107    paddw       %3, %2
108%endmacro
109
110%macro WEIGHT_LINE2_UV  4
111    movdqa      %2, %1
112    punpcklbw   %2, %4
113    paddw       %3, %2
114
115    movdqa      %2, %1
116    psrldq      %2, 1
117    punpcklbw   %2, %4
118    psllw       %2, 1
119    paddw       %3, %2
120
121    movdqa      %2, %1
122    psrldq      %2, 2
123    punpcklbw   %2, %4
124    psllw       %2, 2
125    paddw       %3, %2
126
127    movdqa      %2, %1
128    psrldq      %2, 3
129    punpcklbw   %2, %4
130    psllw       %2, 1
131    paddw       %3, %2
132
133    movdqa      %2, %1
134    psrldq      %2, 4
135    punpcklbw   %2, %4
136    paddw       %3, %2
137%endmacro
138
139%macro WEIGHT_LINE3_UV  4
140    movdqa      %2, %1
141    punpcklbw   %2, %4
142    psllw       %2, 1
143    paddw       %3, %2
144
145    movdqa      %2, %1
146    psrldq      %2, 1
147    punpcklbw   %2, %4
148    psllw       %2, 2
149    paddw       %3, %2
150
151    movdqa      %2, %1
152    psrldq      %2, 2
153    punpcklbw   %2, %4
154    pmullw      %2, [pic(sse2_20)]
155    paddw       %3, %2
156
157    movdqa      %2, %1
158    psrldq      %2, 3
159    punpcklbw   %2, %4
160    psllw       %2, 2
161    paddw       %3, %2
162
163    movdqa      %2, %1
164    psrldq      %2, 4
165    punpcklbw   %2, %4
166    psllw       %2, 1
167    paddw       %3, %2
168%endmacro
169
170;***********************************************************************
171;  BilateralLumaFilter8_sse2(uint8_t *pixels, int stride);
172;***********************************************************************
173;   1   2   3
174;   4   0   5
175;   6   7   8
176;   0:  the center point
177
178WELS_EXTERN BilateralLumaFilter8_sse2
179
180    push r3
181    %assign push_num 1
182    LOAD_2_PARA
183    PUSH_XMM 8
184
185    pxor        xmm7,   xmm7
186
187    mov         r3,     r0
188
189    movq        xmm6,   [r0]
190    punpcklbw   xmm6,   xmm7
191%ifdef X86_32_PICASM
192    pcmpeqw     xmm3,   xmm3
193    psrlw       xmm3,   15
194    psllw       xmm3,   5
195%else
196    movdqa      xmm3,   [sse2_32]
197%endif
198    pxor        xmm4,   xmm4        ; nTotWeight
199    pxor        xmm5,   xmm5        ; nSum
200
201    dec         r0
202    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0]           ; pixel 4
203    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0 + 2]       ; pixel 5
204
205    sub         r0, r1
206    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0]           ; pixel 1
207    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0 + 1]       ; pixel 2
208    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0 + 2]       ; pixel 3
209
210    lea         r0, [r0 + r1 * 2]
211    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0]           ; pixel 6
212    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0 + 1]       ; pixel 7
213    WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,  [r0 + 2]       ; pixel 8
214
215    pcmpeqw     xmm0,   xmm0
216    psrlw       xmm0,   15
217    psllw       xmm0,   8
218    psubusw     xmm0,   xmm4
219    pmullw      xmm0,   xmm6
220    paddusw     xmm5,   xmm0
221    psrlw       xmm5,   8
222    packuswb    xmm5,   xmm5
223    movq        [r3],   xmm5
224
225
226    POP_XMM
227    pop r3
228    %assign push_num 0
229
230    ret
231
232;***********************************************************************
233; void      WaverageChromaFilter8_sse2(uint8_t *pixels, int stride);
234;***********************************************************************
235;5x5 filter:
236;1  1   2   1   1
237;1  2   4   2   1
238;2  4   20  4   2
239;1  2   4   2   1
240;1  1   2   1   1
241
242WELS_EXTERN WaverageChromaFilter8_sse2
243
244    push r3
245
246    %assign push_num 1
247
248    INIT_X86_32_PIC r4
249    LOAD_2_PARA
250
251    mov     r3, r1
252    add     r3, r3
253    sub     r0, r3          ; pixels - 2 * stride
254    sub     r0, 2
255
256    pxor    xmm0,   xmm0
257    pxor    xmm3,   xmm3
258
259    movdqu      xmm1,   [r0]
260    WEIGHT_LINE1_UV xmm1,   xmm2,   xmm3,   xmm0
261
262    movdqu      xmm1,   [r0 + r1]
263    WEIGHT_LINE2_UV xmm1,   xmm2,   xmm3,   xmm0
264
265    add     r0, r3
266    movdqu      xmm1,   [r0]
267    WEIGHT_LINE3_UV xmm1,   xmm2,   xmm3,   xmm0
268
269    movdqu      xmm1,   [r0 + r1]
270    WEIGHT_LINE2_UV xmm1,   xmm2,   xmm3,   xmm0
271
272    movdqu      xmm1,   [r0 + r1 * 2]
273    WEIGHT_LINE1_UV xmm1,   xmm2,   xmm3,   xmm0
274
275    psrlw       xmm3,       6
276    packuswb    xmm3,       xmm3
277    movq        [r0 + 2],       xmm3
278
279
280    DEINIT_X86_32_PIC
281    pop r3
282
283    %assign push_num 0
284    ret
285