1;*! 2;* \copy 3;* Copyright (c) 2009-2013, Cisco Systems 4;* All rights reserved. 5;* 6;* Redistribution and use in source and binary forms, with or without 7;* modification, are permitted provided that the following conditions 8;* are met: 9;* 10;* ?Redistributions of source code must retain the above copyright 11;* notice, this list of conditions and the following disclaimer. 12;* 13;* ?Redistributions in binary form must reproduce the above copyright 14;* notice, this list of conditions and the following disclaimer in 15;* the documentation and/or other materials provided with the 16;* distribution. 17;* 18;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29;* POSSIBILITY OF SUCH DAMAGE. 30;* 31;* 32;* dct.asm 33;* 34;* History 35;* 8/4/2009 Created 36;* 37;* 38;*************************************************************************/ 39 40%include "asm_inc.asm" 41 42SECTION .text 43 44;*********************************************************************** 45; SSE2 functions 46;*********************************************************************** 47 48%macro SSE2_SumSubD 3 49 movdqa %3, %2 50 paddd %2, %1 51 psubd %1, %3 52%endmacro 53 54%macro SSE2_SumSubDiv2D 4 55 paddd %1, %2 56 paddd %1, %3 57 psrad %1, 1 58 movdqa %4, %1 59 psubd %4, %2 60%endmacro 61%macro SSE2_Load4Col 5 62 movsx r2, WORD[%5] 63 movd %1, r2d 64 movsx r2, WORD[%5 + 0x20] 65 movd %2, r2d 66 punpckldq %1, %2 67 movsx r2, WORD[%5 + 0x80] 68 movd %3, r2d 69 movsx r2, WORD[%5 + 0xa0] 70 movd %4, r2d 71 punpckldq %3, %4 72 punpcklqdq %1, %3 73%endmacro 74 75;*********************************************************************** 76;void WelsHadamardT4Dc_sse2( int16_t *luma_dc, int16_t *pDct) 77;*********************************************************************** 78WELS_EXTERN WelsHadamardT4Dc_sse2 79 %assign push_num 0 80 LOAD_2_PARA 81 PUSH_XMM 8 82 SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1 83 SSE2_Load4Col xmm2, xmm5, xmm6, xmm0, r1 + 0x40 84 SSE2_Load4Col xmm3, xmm5, xmm6, xmm0, r1 + 0x100 85 SSE2_Load4Col xmm4, xmm5, xmm6, xmm0, r1 + 0x140 86 87 SSE2_SumSubD xmm1, xmm2, xmm7 88 SSE2_SumSubD xmm3, xmm4, xmm7 89 SSE2_SumSubD xmm2, xmm4, xmm7 90 SSE2_SumSubD xmm1, xmm3, xmm7 91 92 SSE2_Trans4x4D xmm4, xmm2, xmm1, xmm3, xmm5 ; pOut: xmm4,xmm3,xmm5,xmm1 93 94 SSE2_SumSubD xmm4, xmm3, xmm7 95 SSE2_SumSubD xmm5, xmm1, xmm7 96 97 WELS_DD1 xmm6 98 SSE2_SumSubDiv2D xmm3, xmm1, xmm6, xmm0 ; pOut: xmm3 = (xmm3+xmm1+1)/2, xmm0 = (xmm3-xmm1+1)/2 99 SSE2_SumSubDiv2D xmm4, xmm5, xmm6, xmm1 ; pOut: xmm4 = (xmm4+xmm5+1)/2, xmm1 = (xmm4-xmm5+1)/2 100 SSE2_Trans4x4D xmm3, xmm0, xmm1, xmm4, xmm2 ; pOut: xmm3,xmm4,xmm2,xmm1 101 102 packssdw xmm3, xmm4 103 packssdw xmm2, xmm1 104 movdqa [r0+ 0], xmm3 105 movdqa [r0+16], xmm2 106 107 POP_XMM 108 ret 109