• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include <string.h>
34 
35 #include "decode_mb_aux.h"
36 #include "wels_common_basis.h"
37 
38 namespace WelsDec {
39 
40 //NOTE::: p_RS should NOT be modified and it will lead to mismatch with JSVM.
41 //        so should allocate kA array to store the temporary value (idct).
IdctResAddPred_c(uint8_t * pPred,const int32_t kiStride,int16_t * pRs)42 void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
43   int16_t iSrc[16];
44 
45   uint8_t* pDst           = pPred;
46   const int32_t kiStride2 = kiStride << 1;
47   const int32_t kiStride3 = kiStride + kiStride2;
48   int32_t i;
49 
50   for (i = 0; i < 4; i++) {
51     const int32_t kiY  = i << 2;
52     const int32_t kiT0 = pRs[kiY] + pRs[kiY + 2];
53     const int32_t kiT1 = pRs[kiY] - pRs[kiY + 2];
54     const int32_t kiT2 = (pRs[kiY + 1] >> 1) - pRs[kiY + 3];
55     const int32_t kiT3 = pRs[kiY + 1] + (pRs[kiY + 3] >> 1);
56 
57     iSrc[kiY] = kiT0 + kiT3;
58     iSrc[kiY + 1] = kiT1 + kiT2;
59     iSrc[kiY + 2] = kiT1 - kiT2;
60     iSrc[kiY + 3] = kiT0 - kiT3;
61   }
62 
63   for (i = 0; i < 4; i++) {
64     int32_t kT1 = iSrc[i]     +  iSrc[i + 8];
65     int32_t kT2 = iSrc[i + 4] + (iSrc[i + 12] >> 1);
66     int32_t kT3 = (32 + kT1 + kT2) >> 6;
67     int32_t kT4 = (32 + kT1 - kT2) >> 6;
68 
69     pDst[i] = WelsClip1 (kT3 + pPred[i]);
70     pDst[i + kiStride3] = WelsClip1 (kT4 + pPred[i + kiStride3]);
71 
72     kT1 = iSrc[i] - iSrc[i + 8];
73     kT2 = (iSrc[i + 4] >> 1) - iSrc[i + 12];
74     pDst[i + kiStride] = WelsClip1 (((32 + kT1 + kT2) >> 6) + pDst[i + kiStride]);
75     pDst[i + kiStride2] = WelsClip1 (((32 + kT1 - kT2) >> 6) + pDst[i + kiStride2]);
76   }
77 }
78 
IdctResAddPred8x8_c(uint8_t * pPred,const int32_t kiStride,int16_t * pRs)79 void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
80   // To make the ASM code easy to write, should using one funciton to apply hor and ver together, such as we did on HEVC
81   // Ugly code, just for easy debug, the final version need optimization
82   int16_t p[8], b[8];
83   int16_t a[4];
84 
85   int16_t iTmp[64];
86   int16_t iRes[64];
87 
88   // Horizontal
89   for (int i = 0; i < 8; i++) {
90     for (int j = 0; j < 8; j++) {
91       p[j] = pRs[j + (i << 3)];
92     }
93     a[0] = p[0] + p[4];
94     a[1] = p[0] - p[4];
95     a[2] = p[6] - (p[2] >> 1);
96     a[3] = p[2] + (p[6] >> 1);
97 
98     b[0] =  a[0] + a[3];
99     b[2] =  a[1] - a[2];
100     b[4] =  a[1] + a[2];
101     b[6] =  a[0] - a[3];
102 
103     a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
104     a[1] =  p[1] + p[7] - p[3] - (p[3] >> 1);
105     a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
106     a[3] =  p[3] + p[5] + p[1] + (p[1] >> 1);
107 
108     b[1] =  a[0] + (a[3] >> 2);
109     b[3] =  a[1] + (a[2] >> 2);
110     b[5] =  a[2] - (a[1] >> 2);
111     b[7] =  a[3] - (a[0] >> 2);
112 
113     iTmp[0 + (i << 3)] = b[0] + b[7];
114     iTmp[1 + (i << 3)] = b[2] - b[5];
115     iTmp[2 + (i << 3)] = b[4] + b[3];
116     iTmp[3 + (i << 3)] = b[6] + b[1];
117     iTmp[4 + (i << 3)] = b[6] - b[1];
118     iTmp[5 + (i << 3)] = b[4] - b[3];
119     iTmp[6 + (i << 3)] = b[2] + b[5];
120     iTmp[7 + (i << 3)] = b[0] - b[7];
121   }
122 
123   //Vertical
124   for (int i = 0; i < 8; i++) {
125     for (int j = 0; j < 8; j++) {
126       p[j] = iTmp[i + (j << 3)];
127     }
128 
129     a[0] =  p[0] + p[4];
130     a[1] =  p[0] - p[4];
131     a[2] =  p[6] - (p[2] >> 1);
132     a[3] =  p[2] + (p[6] >> 1);
133 
134     b[0] = a[0] + a[3];
135     b[2] = a[1] - a[2];
136     b[4] = a[1] + a[2];
137     b[6] = a[0] - a[3];
138 
139     a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
140     a[1] =  p[1] + p[7] - p[3] - (p[3] >> 1);
141     a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
142     a[3] =  p[3] + p[5] + p[1] + (p[1] >> 1);
143 
144 
145     b[1] =  a[0] + (a[3] >> 2);
146     b[7] =  a[3] - (a[0] >> 2);
147     b[3] =  a[1] + (a[2] >> 2);
148     b[5] =  a[2] - (a[1] >> 2);
149 
150     iRes[ (0 << 3) + i] = b[0] + b[7];
151     iRes[ (1 << 3) + i] = b[2] - b[5];
152     iRes[ (2 << 3) + i] = b[4] + b[3];
153     iRes[ (3 << 3) + i] = b[6] + b[1];
154     iRes[ (4 << 3) + i] = b[6] - b[1];
155     iRes[ (5 << 3) + i] = b[4] - b[3];
156     iRes[ (6 << 3) + i] = b[2] + b[5];
157     iRes[ (7 << 3) + i] = b[0] - b[7];
158   }
159 
160   uint8_t* pDst = pPred;
161   for (int i = 0; i < 8; i++) {
162     for (int j = 0; j < 8; j++) {
163       pDst[i * kiStride + j] = WelsClip1 (((32 + iRes[ (i << 3) + j]) >> 6) + pDst[i * kiStride + j]);
164     }
165   }
166 
167 }
168 
GetI4LumaIChromaAddrTable(int32_t * pBlockOffset,const int32_t kiYStride,const int32_t kiUVStride)169 void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride) {
170   int32_t* pOffset = pBlockOffset;
171   int32_t i;
172   const uint8_t kuiScan0 = g_kuiScan8[0];
173 
174   for (i = 0; i < 16; i++) {
175     const uint32_t kuiA = g_kuiScan8[i] - kuiScan0;
176     const uint32_t kuiX = kuiA & 0x07;
177     const uint32_t kuiY = kuiA >> 3;
178 
179     pOffset[i] = (kuiX + kiYStride * kuiY) << 2;
180   }
181 
182   for (i = 0; i < 4; i++) {
183     const uint32_t kuiA = g_kuiScan8[i] - kuiScan0;
184 
185     pOffset[16 + i] =
186       pOffset[20 + i] = ((kuiA & 0x07) + (kiUVStride/*>>1*/) * (kuiA >> 3)) << 2;
187   }
188 }
189 
190 } // namespace WelsDec
191