1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // VP8Iterator: block iterator
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13
14 #include <string.h>
15
16 #include "src/dsp/cpu.h"
17 #include "src/enc/vp8i_enc.h"
18
19 //------------------------------------------------------------------------------
20 // VP8Iterator
21 //------------------------------------------------------------------------------
22
InitLeft(VP8EncIterator * const it)23 static void InitLeft(VP8EncIterator* const it) {
24 it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
25 (it->y_ > 0) ? 129 : 127;
26 memset(it->y_left_, 129, 16);
27 memset(it->u_left_, 129, 8);
28 memset(it->v_left_, 129, 8);
29 it->left_nz_[8] = 0;
30 if (it->top_derr_ != NULL) {
31 memset(&it->left_derr_, 0, sizeof(it->left_derr_));
32 }
33 }
34
InitTop(VP8EncIterator * const it)35 static void InitTop(VP8EncIterator* const it) {
36 const VP8Encoder* const enc = it->enc_;
37 const size_t top_size = enc->mb_w_ * 16;
38 memset(enc->y_top_, 127, 2 * top_size);
39 memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
40 if (enc->top_derr_ != NULL) {
41 memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
42 }
43 }
44
VP8IteratorSetRow(VP8EncIterator * const it,int y)45 void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
46 VP8Encoder* const enc = it->enc_;
47 it->x_ = 0;
48 it->y_ = y;
49 it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
50 it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
51 it->nz_ = enc->nz_;
52 it->mb_ = enc->mb_info_ + y * enc->mb_w_;
53 it->y_top_ = enc->y_top_;
54 it->uv_top_ = enc->uv_top_;
55 InitLeft(it);
56 }
57
58 // restart a scan
VP8IteratorReset(VP8EncIterator * const it)59 static void VP8IteratorReset(VP8EncIterator* const it) {
60 VP8Encoder* const enc = it->enc_;
61 VP8IteratorSetRow(it, 0);
62 VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_); // default
63 InitTop(it);
64 memset(it->bit_count_, 0, sizeof(it->bit_count_));
65 it->do_trellis_ = 0;
66 }
67
VP8IteratorSetCountDown(VP8EncIterator * const it,int count_down)68 void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
69 it->count_down_ = it->count_down0_ = count_down;
70 }
71
VP8IteratorIsDone(const VP8EncIterator * const it)72 int VP8IteratorIsDone(const VP8EncIterator* const it) {
73 return (it->count_down_ <= 0);
74 }
75
VP8IteratorInit(VP8Encoder * const enc,VP8EncIterator * const it)76 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
77 it->enc_ = enc;
78 it->yuv_in_ = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
79 it->yuv_out_ = it->yuv_in_ + YUV_SIZE_ENC;
80 it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
81 it->yuv_p_ = it->yuv_out2_ + YUV_SIZE_ENC;
82 it->lf_stats_ = enc->lf_stats_;
83 it->percent0_ = enc->percent_;
84 it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
85 it->u_left_ = it->y_left_ + 16 + 16;
86 it->v_left_ = it->u_left_ + 16;
87 it->top_derr_ = enc->top_derr_;
88 VP8IteratorReset(it);
89 }
90
VP8IteratorProgress(const VP8EncIterator * const it,int delta)91 int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
92 VP8Encoder* const enc = it->enc_;
93 if (delta && enc->pic_->progress_hook != NULL) {
94 const int done = it->count_down0_ - it->count_down_;
95 const int percent = (it->count_down0_ <= 0)
96 ? it->percent0_
97 : it->percent0_ + delta * done / it->count_down0_;
98 return WebPReportProgress(enc->pic_, percent, &enc->percent_);
99 }
100 return 1;
101 }
102
103 //------------------------------------------------------------------------------
104 // Import the source samples into the cache. Takes care of replicating
105 // boundary pixels if necessary.
106
MinSize(int a,int b)107 static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; }
108
ImportBlock(const uint8_t * src,int src_stride,uint8_t * dst,int w,int h,int size)109 static void ImportBlock(const uint8_t* src, int src_stride,
110 uint8_t* dst, int w, int h, int size) {
111 int i;
112 for (i = 0; i < h; ++i) {
113 memcpy(dst, src, w);
114 if (w < size) {
115 memset(dst + w, dst[w - 1], size - w);
116 }
117 dst += BPS;
118 src += src_stride;
119 }
120 for (i = h; i < size; ++i) {
121 memcpy(dst, dst - BPS, size);
122 dst += BPS;
123 }
124 }
125
ImportLine(const uint8_t * src,int src_stride,uint8_t * dst,int len,int total_len)126 static void ImportLine(const uint8_t* src, int src_stride,
127 uint8_t* dst, int len, int total_len) {
128 int i;
129 for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;
130 for (; i < total_len; ++i) dst[i] = dst[len - 1];
131 }
132
VP8IteratorImport(VP8EncIterator * const it,uint8_t * const tmp_32)133 void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
134 const VP8Encoder* const enc = it->enc_;
135 const int x = it->x_, y = it->y_;
136 const WebPPicture* const pic = enc->pic_;
137 const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
138 const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
139 const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
140 const int w = MinSize(pic->width - x * 16, 16);
141 const int h = MinSize(pic->height - y * 16, 16);
142 const int uv_w = (w + 1) >> 1;
143 const int uv_h = (h + 1) >> 1;
144
145 ImportBlock(ysrc, pic->y_stride, it->yuv_in_ + Y_OFF_ENC, w, h, 16);
146 ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
147 ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
148
149 if (tmp_32 == NULL) return;
150
151 // Import source (uncompressed) samples into boundary.
152 if (x == 0) {
153 InitLeft(it);
154 } else {
155 if (y == 0) {
156 it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
157 } else {
158 it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
159 it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
160 it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
161 }
162 ImportLine(ysrc - 1, pic->y_stride, it->y_left_, h, 16);
163 ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
164 ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
165 }
166
167 it->y_top_ = tmp_32 + 0;
168 it->uv_top_ = tmp_32 + 16;
169 if (y == 0) {
170 memset(tmp_32, 127, 32 * sizeof(*tmp_32));
171 } else {
172 ImportLine(ysrc - pic->y_stride, 1, tmp_32, w, 16);
173 ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16, uv_w, 8);
174 ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);
175 }
176 }
177
178 //------------------------------------------------------------------------------
179 // Copy back the compressed samples into user space if requested.
180
ExportBlock(const uint8_t * src,uint8_t * dst,int dst_stride,int w,int h)181 static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
182 int w, int h) {
183 while (h-- > 0) {
184 memcpy(dst, src, w);
185 dst += dst_stride;
186 src += BPS;
187 }
188 }
189
VP8IteratorExport(const VP8EncIterator * const it)190 void VP8IteratorExport(const VP8EncIterator* const it) {
191 const VP8Encoder* const enc = it->enc_;
192 if (enc->config_->show_compressed) {
193 const int x = it->x_, y = it->y_;
194 const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
195 const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
196 const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
197 const WebPPicture* const pic = enc->pic_;
198 uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
199 uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
200 uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
201 int w = (pic->width - x * 16);
202 int h = (pic->height - y * 16);
203
204 if (w > 16) w = 16;
205 if (h > 16) h = 16;
206
207 // Luma plane
208 ExportBlock(ysrc, ydst, pic->y_stride, w, h);
209
210 { // U/V planes
211 const int uv_w = (w + 1) >> 1;
212 const int uv_h = (h + 1) >> 1;
213 ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
214 ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
215 }
216 }
217 }
218
219 //------------------------------------------------------------------------------
220 // Non-zero contexts setup/teardown
221
222 // Nz bits:
223 // 0 1 2 3 Y
224 // 4 5 6 7
225 // 8 9 10 11
226 // 12 13 14 15
227 // 16 17 U
228 // 18 19
229 // 20 21 V
230 // 22 23
231 // 24 DC-intra16
232
233 // Convert packed context to byte array
234 #define BIT(nz, n) (!!((nz) & (1 << (n))))
235
VP8IteratorNzToBytes(VP8EncIterator * const it)236 void VP8IteratorNzToBytes(VP8EncIterator* const it) {
237 const int tnz = it->nz_[0], lnz = it->nz_[-1];
238 int* const top_nz = it->top_nz_;
239 int* const left_nz = it->left_nz_;
240
241 // Top-Y
242 top_nz[0] = BIT(tnz, 12);
243 top_nz[1] = BIT(tnz, 13);
244 top_nz[2] = BIT(tnz, 14);
245 top_nz[3] = BIT(tnz, 15);
246 // Top-U
247 top_nz[4] = BIT(tnz, 18);
248 top_nz[5] = BIT(tnz, 19);
249 // Top-V
250 top_nz[6] = BIT(tnz, 22);
251 top_nz[7] = BIT(tnz, 23);
252 // DC
253 top_nz[8] = BIT(tnz, 24);
254
255 // left-Y
256 left_nz[0] = BIT(lnz, 3);
257 left_nz[1] = BIT(lnz, 7);
258 left_nz[2] = BIT(lnz, 11);
259 left_nz[3] = BIT(lnz, 15);
260 // left-U
261 left_nz[4] = BIT(lnz, 17);
262 left_nz[5] = BIT(lnz, 19);
263 // left-V
264 left_nz[6] = BIT(lnz, 21);
265 left_nz[7] = BIT(lnz, 23);
266 // left-DC is special, iterated separately
267 }
268
VP8IteratorBytesToNz(VP8EncIterator * const it)269 void VP8IteratorBytesToNz(VP8EncIterator* const it) {
270 uint32_t nz = 0;
271 const int* const top_nz = it->top_nz_;
272 const int* const left_nz = it->left_nz_;
273 // top
274 nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
275 nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
276 nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
277 nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
278 nz |= (top_nz[8] << 24); // we propagate the _top_ bit, esp. for intra4
279 // left
280 nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
281 nz |= (left_nz[2] << 11);
282 nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
283
284 *it->nz_ = nz;
285 }
286
287 #undef BIT
288
289 //------------------------------------------------------------------------------
290 // Advance to the next position, doing the bookkeeping.
291
VP8IteratorSaveBoundary(VP8EncIterator * const it)292 void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
293 VP8Encoder* const enc = it->enc_;
294 const int x = it->x_, y = it->y_;
295 const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
296 const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
297 if (x < enc->mb_w_ - 1) { // left
298 int i;
299 for (i = 0; i < 16; ++i) {
300 it->y_left_[i] = ysrc[15 + i * BPS];
301 }
302 for (i = 0; i < 8; ++i) {
303 it->u_left_[i] = uvsrc[7 + i * BPS];
304 it->v_left_[i] = uvsrc[15 + i * BPS];
305 }
306 // top-left (before 'top'!)
307 it->y_left_[-1] = it->y_top_[15];
308 it->u_left_[-1] = it->uv_top_[0 + 7];
309 it->v_left_[-1] = it->uv_top_[8 + 7];
310 }
311 if (y < enc->mb_h_ - 1) { // top
312 memcpy(it->y_top_, ysrc + 15 * BPS, 16);
313 memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
314 }
315 }
316
VP8IteratorNext(VP8EncIterator * const it)317 int VP8IteratorNext(VP8EncIterator* const it) {
318 if (++it->x_ == it->enc_->mb_w_) {
319 VP8IteratorSetRow(it, ++it->y_);
320 } else {
321 it->preds_ += 4;
322 it->mb_ += 1;
323 it->nz_ += 1;
324 it->y_top_ += 16;
325 it->uv_top_ += 16;
326 }
327 return (0 < --it->count_down_);
328 }
329
330 //------------------------------------------------------------------------------
331 // Helper function to set mode properties
332
VP8SetIntra16Mode(const VP8EncIterator * const it,int mode)333 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
334 uint8_t* preds = it->preds_;
335 int y;
336 for (y = 0; y < 4; ++y) {
337 memset(preds, mode, 4);
338 preds += it->enc_->preds_w_;
339 }
340 it->mb_->type_ = 1;
341 }
342
VP8SetIntra4Mode(const VP8EncIterator * const it,const uint8_t * modes)343 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
344 uint8_t* preds = it->preds_;
345 int y;
346 for (y = 4; y > 0; --y) {
347 memcpy(preds, modes, 4 * sizeof(*modes));
348 preds += it->enc_->preds_w_;
349 modes += 4;
350 }
351 it->mb_->type_ = 0;
352 }
353
VP8SetIntraUVMode(const VP8EncIterator * const it,int mode)354 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
355 it->mb_->uv_mode_ = mode;
356 }
357
VP8SetSkip(const VP8EncIterator * const it,int skip)358 void VP8SetSkip(const VP8EncIterator* const it, int skip) {
359 it->mb_->skip_ = skip;
360 }
361
VP8SetSegment(const VP8EncIterator * const it,int segment)362 void VP8SetSegment(const VP8EncIterator* const it, int segment) {
363 it->mb_->segment_ = segment;
364 }
365
366 //------------------------------------------------------------------------------
367 // Intra4x4 sub-blocks iteration
368 //
369 // We store and update the boundary samples into an array of 37 pixels. They
370 // are updated as we iterate and reconstructs each intra4x4 blocks in turn.
371 // The position of the samples has the following snake pattern:
372 //
373 // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36 <- Top-right
374 // --+-----------+-----------+-----------+-----------+
375 // 15| 19| 23| 27| 31|
376 // 14| 18| 22| 26| 30|
377 // 13| 17| 21| 25| 29|
378 // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
379 // --+-----------+-----------+-----------+-----------+
380 // 11| 15| 19| 23| 27|
381 // 10| 14| 18| 22| 26|
382 // 9| 13| 17| 21| 25|
383 // 8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
384 // --+-----------+-----------+-----------+-----------+
385 // 7| 11| 15| 19| 23|
386 // 6| 10| 14| 18| 22|
387 // 5| 9| 13| 17| 21|
388 // 4| 5 6 7 8| 9 10 11 12|13 14 15 16|17 18 19 20|
389 // --+-----------+-----------+-----------+-----------+
390 // 3| 7| 11| 15| 19|
391 // 2| 6| 10| 14| 18|
392 // 1| 5| 9| 13| 17|
393 // 0| 1 2 3 4| 5 6 7 8| 9 10 11 12|13 14 15 16|
394 // --+-----------+-----------+-----------+-----------+
395
396 // Array to record the position of the top sample to pass to the prediction
397 // functions in dsp.c.
398 static const uint8_t VP8TopLeftI4[16] = {
399 17, 21, 25, 29,
400 13, 17, 21, 25,
401 9, 13, 17, 21,
402 5, 9, 13, 17
403 };
404
VP8IteratorStartI4(VP8EncIterator * const it)405 void VP8IteratorStartI4(VP8EncIterator* const it) {
406 const VP8Encoder* const enc = it->enc_;
407 int i;
408
409 it->i4_ = 0; // first 4x4 sub-block
410 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
411
412 // Import the boundary samples
413 for (i = 0; i < 17; ++i) { // left
414 it->i4_boundary_[i] = it->y_left_[15 - i];
415 }
416 for (i = 0; i < 16; ++i) { // top
417 it->i4_boundary_[17 + i] = it->y_top_[i];
418 }
419 // top-right samples have a special case on the far right of the picture
420 if (it->x_ < enc->mb_w_ - 1) {
421 for (i = 16; i < 16 + 4; ++i) {
422 it->i4_boundary_[17 + i] = it->y_top_[i];
423 }
424 } else { // else, replicate the last valid pixel four times
425 for (i = 16; i < 16 + 4; ++i) {
426 it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
427 }
428 }
429 #if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
430 // Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
431 // is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
432 // meaningfully, but due to limitations in MemorySanitizer related to
433 // modeling of tbl instructions, a warning will be issued. This can be
434 // removed if MSan is updated to support the instructions. See
435 // https://issues.webmproject.org/372109644.
436 memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
437 #endif
438 VP8IteratorNzToBytes(it); // import the non-zero context
439 }
440
VP8IteratorRotateI4(VP8EncIterator * const it,const uint8_t * const yuv_out)441 int VP8IteratorRotateI4(VP8EncIterator* const it,
442 const uint8_t* const yuv_out) {
443 const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
444 uint8_t* const top = it->i4_top_;
445 int i;
446
447 // Update the cache with 7 fresh samples
448 for (i = 0; i <= 3; ++i) {
449 top[-4 + i] = blk[i + 3 * BPS]; // store future top samples
450 }
451 if ((it->i4_ & 3) != 3) { // if not on the right sub-blocks #3, #7, #11, #15
452 for (i = 0; i <= 2; ++i) { // store future left samples
453 top[i] = blk[3 + (2 - i) * BPS];
454 }
455 } else { // else replicate top-right samples, as says the specs.
456 for (i = 0; i <= 3; ++i) {
457 top[i] = top[i + 4];
458 }
459 }
460 // move pointers to next sub-block
461 ++it->i4_;
462 if (it->i4_ == 16) { // we're done
463 return 0;
464 }
465
466 it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
467 return 1;
468 }
469
470 //------------------------------------------------------------------------------
471