• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;*!
2;* \copy
3;*     Copyright (c)  2009-2013, Cisco Systems
4;*     All rights reserved.
5;*
6;*     Redistribution and use in source and binary forms, with or without
7;*     modification, are permitted provided that the following conditions
8;*     are met:
9;*
10;*        ?Redistributions of source code must retain the above copyright
11;*          notice, this list of conditions and the following disclaimer.
12;*
13;*        ?Redistributions in binary form must reproduce the above copyright
14;*          notice, this list of conditions and the following disclaimer in
15;*          the documentation and/or other materials provided with the
16;*          distribution.
17;*
18;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29;*     POSSIBILITY OF SUCH DAMAGE.
30;*
31;*
32;*  dct.asm
33;*
34;*  History
35;*      8/4/2009 Created
36;*
37;*
38;*************************************************************************/
39
40%include "asm_inc.asm"
41
42SECTION .text
43
44;***********************************************************************
45; SSE2 functions
46;***********************************************************************
47
48%macro SSE2_SumSubD 3
49    movdqa  %3, %2
50    paddd   %2, %1
51    psubd   %1, %3
52%endmacro
53
54%macro SSE2_SumSubDiv2D 4
55    paddd   %1, %2
56    paddd   %1, %3
57    psrad   %1,  1
58    movdqa  %4, %1
59    psubd   %4, %2
60%endmacro
61%macro SSE2_Load4Col    5
62    movsx       r2,     WORD[%5]
63    movd        %1,         r2d
64    movsx       r2,     WORD[%5 + 0x20]
65    movd        %2,         r2d
66    punpckldq   %1,         %2
67    movsx       r2,     WORD[%5 + 0x80]
68    movd        %3,         r2d
69    movsx       r2,     WORD[%5 + 0xa0]
70    movd        %4,         r2d
71    punpckldq   %3,         %4
72    punpcklqdq  %1,         %3
73%endmacro
74
75;***********************************************************************
76;void WelsHadamardT4Dc_sse2( int16_t *luma_dc, int16_t *pDct)
77;***********************************************************************
78WELS_EXTERN WelsHadamardT4Dc_sse2
79    %assign push_num 0
80    LOAD_2_PARA
81    PUSH_XMM 8
82    SSE2_Load4Col       xmm1, xmm5, xmm6, xmm0, r1
83    SSE2_Load4Col       xmm2, xmm5, xmm6, xmm0, r1 + 0x40
84    SSE2_Load4Col       xmm3, xmm5, xmm6, xmm0, r1 + 0x100
85    SSE2_Load4Col       xmm4, xmm5, xmm6, xmm0, r1 + 0x140
86
87    SSE2_SumSubD        xmm1, xmm2, xmm7
88    SSE2_SumSubD        xmm3, xmm4, xmm7
89    SSE2_SumSubD        xmm2, xmm4, xmm7
90    SSE2_SumSubD        xmm1, xmm3, xmm7
91
92    SSE2_Trans4x4D      xmm4, xmm2, xmm1, xmm3, xmm5    ; pOut: xmm4,xmm3,xmm5,xmm1
93
94    SSE2_SumSubD        xmm4, xmm3, xmm7
95    SSE2_SumSubD        xmm5, xmm1, xmm7
96
97    WELS_DD1 xmm6
98    SSE2_SumSubDiv2D    xmm3, xmm1, xmm6, xmm0          ; pOut: xmm3 = (xmm3+xmm1+1)/2, xmm0 = (xmm3-xmm1+1)/2
99    SSE2_SumSubDiv2D    xmm4, xmm5, xmm6, xmm1          ; pOut: xmm4 = (xmm4+xmm5+1)/2, xmm1 = (xmm4-xmm5+1)/2
100    SSE2_Trans4x4D      xmm3, xmm0, xmm1, xmm4, xmm2    ; pOut: xmm3,xmm4,xmm2,xmm1
101
102    packssdw    xmm3,   xmm4
103    packssdw    xmm2,   xmm1
104    movdqa  [r0+ 0],   xmm3
105    movdqa  [r0+16],   xmm2
106
107    POP_XMM
108    ret
109