1 // Copyright 2011 Google Inc.
2 //
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // VP8Iterator: block iterator
9 //
10 // Author: Skal (pascal.massimino@gmail.com)
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include "vp8enci.h"
15
16 #if defined(__cplusplus) || defined(c_plusplus)
17 extern "C" {
18 #endif
19
20 //-----------------------------------------------------------------------------
21 // VP8Iterator
22 //-----------------------------------------------------------------------------
23
InitLeft(VP8EncIterator * const it)24 static void InitLeft(VP8EncIterator* const it) {
25 const VP8Encoder* const enc = it->enc_;
26 enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] =
27 (it->y_) > 0 ? 129 : 127;
28 memset(enc->y_left_, 129, 16);
29 memset(enc->u_left_, 129, 8);
30 memset(enc->v_left_, 129, 8);
31 it->left_nz_[8] = 0;
32 }
33
InitTop(VP8EncIterator * const it)34 static void InitTop(VP8EncIterator* const it) {
35 const VP8Encoder* const enc = it->enc_;
36 const int top_size = enc->mb_w_ * 16;
37 memset(enc->y_top_, 127, 2 * top_size);
38 memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
39 }
40
VP8IteratorReset(VP8EncIterator * const it)41 void VP8IteratorReset(VP8EncIterator* const it) {
42 VP8Encoder* const enc = it->enc_;
43 it->x_ = 0;
44 it->y_ = 0;
45 it->y_offset_ = 0;
46 it->uv_offset_ = 0;
47 it->mb_ = enc->mb_info_;
48 it->preds_ = enc->preds_;
49 it->nz_ = enc->nz_;
50 it->bw_ = &enc->parts_[0];
51 it->done_ = enc->mb_w_* enc->mb_h_;
52 InitTop(it);
53 InitLeft(it);
54 memset(it->bit_count_, 0, sizeof(it->bit_count_));
55 it->do_trellis_ = 0;
56 }
57
VP8IteratorInit(VP8Encoder * const enc,VP8EncIterator * const it)58 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
59 it->enc_ = enc;
60 it->y_stride_ = enc->pic_->y_stride;
61 it->uv_stride_ = enc->pic_->uv_stride;
62 // TODO(later): for multithreading, these should be owned by 'it'.
63 it->yuv_in_ = enc->yuv_in_;
64 it->yuv_out_ = enc->yuv_out_;
65 it->yuv_out2_ = enc->yuv_out2_;
66 it->yuv_p_ = enc->yuv_p_;
67 it->lf_stats_ = enc->lf_stats_;
68 VP8IteratorReset(it);
69 }
70
71 //-----------------------------------------------------------------------------
72 // Import the source samples into the cache. Takes care of replicating
73 // boundary pixels if necessary.
74
VP8IteratorImport(const VP8EncIterator * const it)75 void VP8IteratorImport(const VP8EncIterator* const it) {
76 const VP8Encoder* const enc = it->enc_;
77 const int x = it->x_, y = it->y_;
78 const WebPPicture* const pic = enc->pic_;
79 const uint8_t* ysrc = pic->y + (y * pic->y_stride + x) * 16;
80 const uint8_t* usrc = pic->u + (y * pic->uv_stride + x) * 8;
81 const uint8_t* vsrc = pic->v + (y * pic->uv_stride + x) * 8;
82 uint8_t* ydst = it->yuv_in_ + Y_OFF;
83 uint8_t* udst = it->yuv_in_ + U_OFF;
84 uint8_t* vdst = it->yuv_in_ + V_OFF;
85 int w = (pic->width - x * 16);
86 int h = (pic->height - y * 16);
87 int i;
88
89 if (w > 16) w = 16;
90 if (h > 16) h = 16;
91 // Luma plane
92 for (i = 0; i < h; ++i) {
93 memcpy(ydst, ysrc, w);
94 if (w < 16) memset(ydst + w, ydst[w - 1], 16 - w);
95 ydst += BPS;
96 ysrc += pic->y_stride;
97 }
98 for (i = h; i < 16; ++i) {
99 memcpy(ydst, ydst - BPS, 16);
100 ydst += BPS;
101 }
102 // U/V plane
103 w = (w + 1) / 2;
104 h = (h + 1) / 2;
105 for (i = 0; i < h; ++i) {
106 memcpy(udst, usrc, w);
107 memcpy(vdst, vsrc, w);
108 if (w < 8) {
109 memset(udst + w, udst[w - 1], 8 - w);
110 memset(vdst + w, vdst[w - 1], 8 - w);
111 }
112 udst += BPS;
113 vdst += BPS;
114 usrc += pic->uv_stride;
115 vsrc += pic->uv_stride;
116 }
117 for (i = h; i < 8; ++i) {
118 memcpy(udst, udst - BPS, 8);
119 memcpy(vdst, vdst - BPS, 8);
120 udst += BPS;
121 vdst += BPS;
122 }
123 }
124
125 //-----------------------------------------------------------------------------
126 // Copy back the compressed samples into user space if requested.
127
VP8IteratorExport(const VP8EncIterator * const it)128 void VP8IteratorExport(const VP8EncIterator* const it) {
129 const VP8Encoder* const enc = it->enc_;
130 if (enc->config_->show_compressed) {
131 const int x = it->x_, y = it->y_;
132 const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
133 const uint8_t* const usrc = it->yuv_out_ + U_OFF;
134 const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
135 const WebPPicture* const pic = enc->pic_;
136 uint8_t* ydst = pic->y + (y * pic->y_stride + x) * 16;
137 uint8_t* udst = pic->u + (y * pic->uv_stride + x) * 8;
138 uint8_t* vdst = pic->v + (y * pic->uv_stride + x) * 8;
139 int w = (pic->width - x * 16);
140 int h = (pic->height - y * 16);
141 int i;
142
143 if (w > 16) w = 16;
144 if (h > 16) h = 16;
145
146 // Luma plane
147 for (i = 0; i < h; ++i) {
148 memcpy(ydst + i * pic->y_stride, ysrc + i * BPS, w);
149 }
150 // U/V plane
151 {
152 const int uv_w = (w + 1) / 2;
153 const int uv_h = (h + 1) / 2;
154 for (i = 0; i < uv_h; ++i) {
155 memcpy(udst + i * pic->uv_stride, usrc + i * BPS, uv_w);
156 memcpy(vdst + i * pic->uv_stride, vsrc + i * BPS, uv_w);
157 }
158 }
159 }
160 }
161
162 //-----------------------------------------------------------------------------
163 // Non-zero contexts setup/teardown
164
165 // Nz bits:
166 // 0 1 2 3 Y
167 // 4 5 6 7
168 // 8 9 10 11
169 // 12 13 14 15
170 // 16 17 U
171 // 18 19
172 // 20 21 V
173 // 22 23
174 // 24 DC-intra16
175
176 // Convert packed context to byte array
177 #define BIT(nz, n) (!!((nz) & (1 << (n))))
178
VP8IteratorNzToBytes(VP8EncIterator * const it)179 void VP8IteratorNzToBytes(VP8EncIterator* const it) {
180 const int tnz = it->nz_[0], lnz = it->nz_[-1];
181
182 // Top-Y
183 it->top_nz_[0] = BIT(tnz, 12);
184 it->top_nz_[1] = BIT(tnz, 13);
185 it->top_nz_[2] = BIT(tnz, 14);
186 it->top_nz_[3] = BIT(tnz, 15);
187 // Top-U
188 it->top_nz_[4] = BIT(tnz, 18);
189 it->top_nz_[5] = BIT(tnz, 19);
190 // Top-V
191 it->top_nz_[6] = BIT(tnz, 22);
192 it->top_nz_[7] = BIT(tnz, 23);
193 // DC
194 it->top_nz_[8] = BIT(tnz, 24);
195
196 // left-Y
197 it->left_nz_[0] = BIT(lnz, 3);
198 it->left_nz_[1] = BIT(lnz, 7);
199 it->left_nz_[2] = BIT(lnz, 11);
200 it->left_nz_[3] = BIT(lnz, 15);
201 // left-U
202 it->left_nz_[4] = BIT(lnz, 17);
203 it->left_nz_[5] = BIT(lnz, 19);
204 // left-V
205 it->left_nz_[6] = BIT(lnz, 21);
206 it->left_nz_[7] = BIT(lnz, 23);
207 // left-DC is special, iterated separately
208 }
209
VP8IteratorBytesToNz(VP8EncIterator * const it)210 void VP8IteratorBytesToNz(VP8EncIterator* const it) {
211 uint32_t nz = 0;
212 // top
213 nz |= (it->top_nz_[0] << 12) | (it->top_nz_[1] << 13);
214 nz |= (it->top_nz_[2] << 14) | (it->top_nz_[3] << 15);
215 nz |= (it->top_nz_[4] << 18) | (it->top_nz_[5] << 19);
216 nz |= (it->top_nz_[6] << 22) | (it->top_nz_[7] << 23);
217 nz |= (it->top_nz_[8] << 24); // we propagate the _top_ bit, esp. for intra4
218 // left
219 nz |= (it->left_nz_[0] << 3) | (it->left_nz_[1] << 7);
220 nz |= (it->left_nz_[2] << 11);
221 nz |= (it->left_nz_[4] << 17) | (it->left_nz_[6] << 21);
222
223 *it->nz_ = nz;
224 }
225
226 #undef BIT
227
228 //-----------------------------------------------------------------------------
229 // Advance to the next position, doing the bookeeping.
230
VP8IteratorNext(VP8EncIterator * const it,const uint8_t * const block_to_save)231 int VP8IteratorNext(VP8EncIterator* const it,
232 const uint8_t* const block_to_save) {
233 VP8Encoder* const enc = it->enc_;
234 if (block_to_save) {
235 const int x = it->x_, y = it->y_;
236 const uint8_t* const ysrc = block_to_save + Y_OFF;
237 const uint8_t* const usrc = block_to_save + U_OFF;
238 if (x < enc->mb_w_ - 1) { // left
239 int i;
240 for (i = 0; i < 16; ++i) {
241 enc->y_left_[i] = ysrc[15 + i * BPS];
242 }
243 for (i = 0; i < 8; ++i) {
244 enc->u_left_[i] = usrc[7 + i * BPS];
245 enc->v_left_[i] = usrc[15 + i * BPS];
246 }
247 // top-left (before 'top'!)
248 enc->y_left_[-1] = enc->y_top_[x * 16 + 15];
249 enc->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7];
250 enc->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7];
251 }
252 if (y < enc->mb_h_ - 1) { // top
253 memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16);
254 memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8);
255 }
256 }
257
258 it->mb_++;
259 it->preds_ += 4;
260 it->nz_++;
261 it->x_++;
262 if (it->x_ == enc->mb_w_) {
263 it->x_ = 0;
264 it->y_++;
265 it->bw_ = &enc->parts_[it->y_ & (enc->num_parts_ - 1)];
266 it->preds_ = enc->preds_ + it->y_ * 4 * enc->preds_w_;
267 it->nz_ = enc->nz_;
268 InitLeft(it);
269 }
270 return (0 < --it->done_);
271 }
272
273 //-----------------------------------------------------------------------------
274 // Helper function to set mode properties
275
VP8SetIntra16Mode(const VP8EncIterator * const it,int mode)276 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
277 int y;
278 uint8_t* preds = it->preds_;
279 for (y = 0; y < 4; ++y) {
280 memset(preds, mode, 4);
281 preds += it->enc_->preds_w_;
282 }
283 it->mb_->type_ = 1;
284 }
285
VP8SetIntra4Mode(const VP8EncIterator * const it,int modes[16])286 void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]) {
287 int x, y;
288 uint8_t* preds = it->preds_;
289 for (y = 0; y < 4; ++y) {
290 for (x = 0; x < 4; ++x) {
291 preds[x] = modes[x + y * 4];
292 }
293 preds += it->enc_->preds_w_;
294 }
295 it->mb_->type_ = 0;
296 }
297
VP8SetIntraUVMode(const VP8EncIterator * const it,int mode)298 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
299 it->mb_->uv_mode_ = mode;
300 }
301
VP8SetSkip(const VP8EncIterator * const it,int skip)302 void VP8SetSkip(const VP8EncIterator* const it, int skip) {
303 it->mb_->skip_ = skip;
304 }
305
VP8SetSegment(const VP8EncIterator * const it,int segment)306 void VP8SetSegment(const VP8EncIterator* const it, int segment) {
307 it->mb_->segment_ = segment;
308 }
309
310 //-----------------------------------------------------------------------------
311 // Intra4x4 sub-blocks iteration
312 //
313 // We store and update the boundary samples into an array of 37 pixels. They
314 // are updated as we iterate and reconstructs each intra4x4 blocks in turn.
315 // The position of the samples has the following snake pattern:
316 //
317 // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right
318 // --+-----------+-----------+-----------+-----------+
319 // 15| 19| 23| 27| 31|
320 // 14| 18| 22| 26| 30|
321 // 13| 17| 21| 25| 29|
322 // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
323 // --+-----------+-----------+-----------+-----------+
324 // 11| 15| 19| 23| 27|
325 // 10| 14| 18| 22| 26|
326 // 9| 13| 17| 21| 25|
327 // 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
328 // --+-----------+-----------+-----------+-----------+
329 // 7| 11| 15| 19| 23|
330 // 6| 10| 14| 18| 22|
331 // 5| 9| 13| 17| 21|
332 // 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20|
333 // --+-----------+-----------+-----------+-----------+
334 // 3| 7| 11| 15| 19|
335 // 2| 6| 10| 14| 18|
336 // 1| 5| 9| 13| 17|
337 // 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16|
338 // --+-----------+-----------+-----------+-----------+
339
340 // Array to record the position of the top sample to pass to the prediction
341 // functions in dsp.c.
342 static const uint8_t VP8TopLeftI4[16] = {
343 17, 21, 25, 29,
344 13, 17, 21, 25,
345 9, 13, 17, 21,
346 5, 9, 13, 17
347 };
348
VP8IteratorStartI4(VP8EncIterator * const it)349 void VP8IteratorStartI4(VP8EncIterator* const it) {
350 VP8Encoder* const enc = it->enc_;
351 int i;
352
353 it->i4_ = 0; // first 4x4 sub-block
354 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
355
356 // Import the boundary samples
357 for (i = 0; i < 17; ++i) { // left
358 it->i4_boundary_[i] = enc->y_left_[15 - i];
359 }
360 for (i = 0; i < 16; ++i) { // top
361 it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
362 }
363 // top-right samples have a special case on the far right of the picture
364 if (it->x_ < enc->mb_w_ - 1) {
365 for (i = 16; i < 16 + 4; ++i) {
366 it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
367 }
368 } else { // else, replicate the last valid pixel four times
369 for (i = 16; i < 16 + 4; ++i) {
370 it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
371 }
372 }
373 VP8IteratorNzToBytes(it); // import the non-zero context
374 }
375
VP8IteratorRotateI4(VP8EncIterator * const it,const uint8_t * const yuv_out)376 int VP8IteratorRotateI4(VP8EncIterator* const it,
377 const uint8_t* const yuv_out) {
378 const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
379 uint8_t* const top = it->i4_top_;
380 int i;
381
382 // Update the cache with 7 fresh samples
383 for (i = 0; i <= 3; ++i) {
384 top[-4 + i] = blk[i + 3 * BPS]; // store future top samples
385 }
386 if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15
387 for (i = 0; i <= 2; ++i) { // store future left samples
388 top[i] = blk[3 + (2 - i) * BPS];
389 }
390 } else { // else replicate top-right samples, as says the specs.
391 for (i = 0; i <= 3; ++i) {
392 top[i] = top[i + 4];
393 }
394 }
395 // move pointers to next sub-block
396 it->i4_++;
397 if (it->i4_ == 16) { // we're done
398 return 0;
399 }
400
401 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
402 return 1;
403 }
404
405 //-----------------------------------------------------------------------------
406
407 #if defined(__cplusplus) || defined(c_plusplus)
408 } // extern "C"
409 #endif
410