1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*------------------------------------------------------------------------------
18
19 Table of contents
20
21 1. Include headers
22 2. External compiler flags
23 3. Module defines
24 4. Local function prototypes
25 5. Functions
26 h264bsdWriteMacroblock
27 h264bsdWriteOutputBlocks
28
29 ------------------------------------------------------------------------------*/
30
31 /*------------------------------------------------------------------------------
32 1. Include headers
33 ------------------------------------------------------------------------------*/
34
35 #include "h264bsd_image.h"
36 #include "h264bsd_util.h"
37 #include "h264bsd_neighbour.h"
38
39 /*------------------------------------------------------------------------------
40 2. External compiler flags
41 --------------------------------------------------------------------------------
42
43 --------------------------------------------------------------------------------
44 3. Module defines
45 ------------------------------------------------------------------------------*/
46
47 /* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */
48 extern const u32 h264bsdBlockX[];
49 extern const u32 h264bsdBlockY[];
50
51 /* clipping table, defined in h264bsd_intra_prediction.c */
52 extern const u8 h264bsdClip[];
53
54 /*------------------------------------------------------------------------------
55 4. Local function prototypes
56 ------------------------------------------------------------------------------*/
57
58
59
60 /*------------------------------------------------------------------------------
61
62 Function: h264bsdWriteMacroblock
63
64 Functional description:
65 Write one macroblock into the image. Both luma and chroma
66 components will be written at the same time.
67
68 Inputs:
69 data pointer to macroblock data to be written, 256 values for
70 luma followed by 64 values for both chroma components
71
72 Outputs:
73 image pointer to the image where the macroblock will be written
74
75 Returns:
76 none
77
78 ------------------------------------------------------------------------------*/
79 #ifndef H264DEC_NEON
h264bsdWriteMacroblock(image_t * image,u8 * data)80 void h264bsdWriteMacroblock(image_t *image, u8 *data)
81 {
82
83 /* Variables */
84
85 u32 i;
86 u32 width;
87 u32 *lum, *cb, *cr;
88 u32 *ptr;
89 u32 tmp1, tmp2;
90
91 /* Code */
92
93 ASSERT(image);
94 ASSERT(data);
95 ASSERT(!((u32)data&0x3));
96
97 width = image->width;
98
99 /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable
100 * "area too small" info message */
101 lum = (u32*)image->luma;
102 cb = (u32*)image->cb;
103 cr = (u32*)image->cr;
104 ASSERT(!((u32)lum&0x3));
105 ASSERT(!((u32)cb&0x3));
106 ASSERT(!((u32)cr&0x3));
107
108 ptr = (u32*)data;
109
110 width *= 4;
111 for (i = 16; i ; i--)
112 {
113 tmp1 = *ptr++;
114 tmp2 = *ptr++;
115 *lum++ = tmp1;
116 *lum++ = tmp2;
117 tmp1 = *ptr++;
118 tmp2 = *ptr++;
119 *lum++ = tmp1;
120 *lum++ = tmp2;
121 lum += width-4;
122 }
123
124 width >>= 1;
125 for (i = 8; i ; i--)
126 {
127 tmp1 = *ptr++;
128 tmp2 = *ptr++;
129 *cb++ = tmp1;
130 *cb++ = tmp2;
131 cb += width-2;
132 }
133
134 for (i = 8; i ; i--)
135 {
136 tmp1 = *ptr++;
137 tmp2 = *ptr++;
138 *cr++ = tmp1;
139 *cr++ = tmp2;
140 cr += width-2;
141 }
142
143 }
144 #endif
145 #ifndef H264DEC_OMXDL
146 /*------------------------------------------------------------------------------
147
148 Function: h264bsdWriteOutputBlocks
149
150 Functional description:
151 Write one macroblock into the image. Prediction for the macroblock
152 and the residual are given separately and will be combined while
153 writing the data to the image
154
155 Inputs:
156 data pointer to macroblock prediction data, 256 values for
157 luma followed by 64 values for both chroma components
158 mbNum number of the macroblock
159 residual pointer to residual data, 16 16-element arrays for luma
160 followed by 4 16-element arrays for both chroma
161 components
162
163 Outputs:
164 image pointer to the image where the data will be written
165
166 Returns:
167 none
168
169 ------------------------------------------------------------------------------*/
170
h264bsdWriteOutputBlocks(image_t * image,u32 mbNum,u8 * data,i32 residual[][16])171 void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
172 i32 residual[][16])
173 {
174
175 /* Variables */
176
177 u32 i;
178 u32 picWidth, picSize;
179 u8 *lum, *cb, *cr;
180 u8 *imageBlock;
181 u8 *tmp;
182 u32 row, col;
183 u32 block;
184 u32 x, y;
185 i32 *pRes;
186 i32 tmp1, tmp2, tmp3, tmp4;
187 const u8 *clp = h264bsdClip + 512;
188
189 /* Code */
190
191 ASSERT(image);
192 ASSERT(data);
193 ASSERT(mbNum < image->width * image->height);
194 ASSERT(!((u32)data&0x3));
195
196 /* Image size in macroblocks */
197 picWidth = image->width;
198 picSize = picWidth * image->height;
199 row = mbNum / picWidth;
200 col = mbNum % picWidth;
201
202 /* Output macroblock position in output picture */
203 lum = (image->data + row * picWidth * 256 + col * 16);
204 cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8);
205 cr = (cb + picSize * 64);
206
207 picWidth *= 16;
208
209 for (block = 0; block < 16; block++)
210 {
211 x = h264bsdBlockX[block];
212 y = h264bsdBlockY[block];
213
214 pRes = residual[block];
215
216 ASSERT(pRes);
217
218 tmp = data + y*16 + x;
219 imageBlock = lum + y*picWidth + x;
220
221 ASSERT(!((u32)tmp&0x3));
222 ASSERT(!((u32)imageBlock&0x3));
223
224 if (IS_RESIDUAL_EMPTY(pRes))
225 {
226 /*lint -e826 */
227 i32 *in32 = (i32*)tmp;
228 i32 *out32 = (i32*)imageBlock;
229
230 /* Residual is zero => copy prediction block to output */
231 tmp1 = *in32; in32 += 4;
232 tmp2 = *in32; in32 += 4;
233 *out32 = tmp1; out32 += picWidth/4;
234 *out32 = tmp2; out32 += picWidth/4;
235 tmp1 = *in32; in32 += 4;
236 tmp2 = *in32;
237 *out32 = tmp1; out32 += picWidth/4;
238 *out32 = tmp2;
239 }
240 else
241 {
242
243 RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
244
245 /* Calculate image = prediction + residual
246 * Process four pixels in a loop */
247 for (i = 4; i; i--)
248 {
249 tmp1 = tmp[0];
250 tmp2 = *pRes++;
251 tmp3 = tmp[1];
252 tmp1 = clp[tmp1 + tmp2];
253 tmp4 = *pRes++;
254 imageBlock[0] = (u8)tmp1;
255 tmp3 = clp[tmp3 + tmp4];
256 tmp1 = tmp[2];
257 tmp2 = *pRes++;
258 imageBlock[1] = (u8)tmp3;
259 tmp1 = clp[tmp1 + tmp2];
260 tmp3 = tmp[3];
261 tmp4 = *pRes++;
262 imageBlock[2] = (u8)tmp1;
263 tmp3 = clp[tmp3 + tmp4];
264 tmp += 16;
265 imageBlock[3] = (u8)tmp3;
266 imageBlock += picWidth;
267 }
268 }
269
270 }
271
272 picWidth /= 2;
273
274 for (block = 16; block <= 23; block++)
275 {
276 x = h264bsdBlockX[block & 0x3];
277 y = h264bsdBlockY[block & 0x3];
278
279 pRes = residual[block];
280
281 ASSERT(pRes);
282
283 tmp = data + 256;
284 imageBlock = cb;
285
286 if (block >= 20)
287 {
288 imageBlock = cr;
289 tmp += 64;
290 }
291
292 tmp += y*8 + x;
293 imageBlock += y*picWidth + x;
294
295 ASSERT(!((u32)tmp&0x3));
296 ASSERT(!((u32)imageBlock&0x3));
297
298 if (IS_RESIDUAL_EMPTY(pRes))
299 {
300 /*lint -e826 */
301 i32 *in32 = (i32*)tmp;
302 i32 *out32 = (i32*)imageBlock;
303
304 /* Residual is zero => copy prediction block to output */
305 tmp1 = *in32; in32 += 2;
306 tmp2 = *in32; in32 += 2;
307 *out32 = tmp1; out32 += picWidth/4;
308 *out32 = tmp2; out32 += picWidth/4;
309 tmp1 = *in32; in32 += 2;
310 tmp2 = *in32;
311 *out32 = tmp1; out32 += picWidth/4;
312 *out32 = tmp2;
313 }
314 else
315 {
316
317 RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
318
319 for (i = 4; i; i--)
320 {
321 tmp1 = tmp[0];
322 tmp2 = *pRes++;
323 tmp3 = tmp[1];
324 tmp1 = clp[tmp1 + tmp2];
325 tmp4 = *pRes++;
326 imageBlock[0] = (u8)tmp1;
327 tmp3 = clp[tmp3 + tmp4];
328 tmp1 = tmp[2];
329 tmp2 = *pRes++;
330 imageBlock[1] = (u8)tmp3;
331 tmp1 = clp[tmp1 + tmp2];
332 tmp3 = tmp[3];
333 tmp4 = *pRes++;
334 imageBlock[2] = (u8)tmp1;
335 tmp3 = clp[tmp3 + tmp4];
336 tmp += 8;
337 imageBlock[3] = (u8)tmp3;
338 imageBlock += picWidth;
339 }
340 }
341 }
342
343 }
344 #endif /* H264DEC_OMXDL */
345
346