• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // VP8Iterator: block iterator
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13 
14 #include <string.h>
15 
16 #include "src/dsp/cpu.h"
17 #include "src/enc/vp8i_enc.h"
18 
19 //------------------------------------------------------------------------------
20 // VP8Iterator
21 //------------------------------------------------------------------------------
22 
InitLeft(VP8EncIterator * const it)23 static void InitLeft(VP8EncIterator* const it) {
24   it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
25       (it->y_ > 0) ? 129 : 127;
26   memset(it->y_left_, 129, 16);
27   memset(it->u_left_, 129, 8);
28   memset(it->v_left_, 129, 8);
29   it->left_nz_[8] = 0;
30   if (it->top_derr_ != NULL) {
31     memset(&it->left_derr_, 0, sizeof(it->left_derr_));
32   }
33 }
34 
InitTop(VP8EncIterator * const it)35 static void InitTop(VP8EncIterator* const it) {
36   const VP8Encoder* const enc = it->enc_;
37   const size_t top_size = enc->mb_w_ * 16;
38   memset(enc->y_top_, 127, 2 * top_size);
39   memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
40   if (enc->top_derr_ != NULL) {
41     memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
42   }
43 }
44 
VP8IteratorSetRow(VP8EncIterator * const it,int y)45 void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
46   VP8Encoder* const enc = it->enc_;
47   it->x_ = 0;
48   it->y_ = y;
49   it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
50   it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
51   it->nz_ = enc->nz_;
52   it->mb_ = enc->mb_info_ + y * enc->mb_w_;
53   it->y_top_ = enc->y_top_;
54   it->uv_top_ = enc->uv_top_;
55   InitLeft(it);
56 }
57 
58 // restart a scan
VP8IteratorReset(VP8EncIterator * const it)59 static void VP8IteratorReset(VP8EncIterator* const it) {
60   VP8Encoder* const enc = it->enc_;
61   VP8IteratorSetRow(it, 0);
62   VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_);  // default
63   InitTop(it);
64   memset(it->bit_count_, 0, sizeof(it->bit_count_));
65   it->do_trellis_ = 0;
66 }
67 
VP8IteratorSetCountDown(VP8EncIterator * const it,int count_down)68 void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
69   it->count_down_ = it->count_down0_ = count_down;
70 }
71 
VP8IteratorIsDone(const VP8EncIterator * const it)72 int VP8IteratorIsDone(const VP8EncIterator* const it) {
73   return (it->count_down_ <= 0);
74 }
75 
VP8IteratorInit(VP8Encoder * const enc,VP8EncIterator * const it)76 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
77   it->enc_ = enc;
78   it->yuv_in_   = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
79   it->yuv_out_  = it->yuv_in_ + YUV_SIZE_ENC;
80   it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
81   it->yuv_p_    = it->yuv_out2_ + YUV_SIZE_ENC;
82   it->lf_stats_ = enc->lf_stats_;
83   it->percent0_ = enc->percent_;
84   it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
85   it->u_left_ = it->y_left_ + 16 + 16;
86   it->v_left_ = it->u_left_ + 16;
87   it->top_derr_ = enc->top_derr_;
88   VP8IteratorReset(it);
89 }
90 
VP8IteratorProgress(const VP8EncIterator * const it,int delta)91 int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
92   VP8Encoder* const enc = it->enc_;
93   if (delta && enc->pic_->progress_hook != NULL) {
94     const int done = it->count_down0_ - it->count_down_;
95     const int percent = (it->count_down0_ <= 0)
96                       ? it->percent0_
97                       : it->percent0_ + delta * done / it->count_down0_;
98     return WebPReportProgress(enc->pic_, percent, &enc->percent_);
99   }
100   return 1;
101 }
102 
103 //------------------------------------------------------------------------------
104 // Import the source samples into the cache. Takes care of replicating
105 // boundary pixels if necessary.
106 
MinSize(int a,int b)107 static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; }
108 
ImportBlock(const uint8_t * src,int src_stride,uint8_t * dst,int w,int h,int size)109 static void ImportBlock(const uint8_t* src, int src_stride,
110                         uint8_t* dst, int w, int h, int size) {
111   int i;
112   for (i = 0; i < h; ++i) {
113     memcpy(dst, src, w);
114     if (w < size) {
115       memset(dst + w, dst[w - 1], size - w);
116     }
117     dst += BPS;
118     src += src_stride;
119   }
120   for (i = h; i < size; ++i) {
121     memcpy(dst, dst - BPS, size);
122     dst += BPS;
123   }
124 }
125 
ImportLine(const uint8_t * src,int src_stride,uint8_t * dst,int len,int total_len)126 static void ImportLine(const uint8_t* src, int src_stride,
127                        uint8_t* dst, int len, int total_len) {
128   int i;
129   for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;
130   for (; i < total_len; ++i) dst[i] = dst[len - 1];
131 }
132 
VP8IteratorImport(VP8EncIterator * const it,uint8_t * const tmp_32)133 void VP8IteratorImport(VP8EncIterator* const it, uint8_t* const tmp_32) {
134   const VP8Encoder* const enc = it->enc_;
135   const int x = it->x_, y = it->y_;
136   const WebPPicture* const pic = enc->pic_;
137   const uint8_t* const ysrc = pic->y + (y * pic->y_stride  + x) * 16;
138   const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
139   const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
140   const int w = MinSize(pic->width - x * 16, 16);
141   const int h = MinSize(pic->height - y * 16, 16);
142   const int uv_w = (w + 1) >> 1;
143   const int uv_h = (h + 1) >> 1;
144 
145   ImportBlock(ysrc, pic->y_stride,  it->yuv_in_ + Y_OFF_ENC, w, h, 16);
146   ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
147   ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
148 
149   if (tmp_32 == NULL) return;
150 
151   // Import source (uncompressed) samples into boundary.
152   if (x == 0) {
153     InitLeft(it);
154   } else {
155     if (y == 0) {
156       it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
157     } else {
158       it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
159       it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
160       it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
161     }
162     ImportLine(ysrc - 1, pic->y_stride,  it->y_left_, h,   16);
163     ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
164     ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
165   }
166 
167   it->y_top_  = tmp_32 + 0;
168   it->uv_top_ = tmp_32 + 16;
169   if (y == 0) {
170     memset(tmp_32, 127, 32 * sizeof(*tmp_32));
171   } else {
172     ImportLine(ysrc - pic->y_stride,  1, tmp_32,          w,   16);
173     ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16,     uv_w, 8);
174     ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);
175   }
176 }
177 
178 //------------------------------------------------------------------------------
179 // Copy back the compressed samples into user space if requested.
180 
ExportBlock(const uint8_t * src,uint8_t * dst,int dst_stride,int w,int h)181 static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
182                         int w, int h) {
183   while (h-- > 0) {
184     memcpy(dst, src, w);
185     dst += dst_stride;
186     src += BPS;
187   }
188 }
189 
VP8IteratorExport(const VP8EncIterator * const it)190 void VP8IteratorExport(const VP8EncIterator* const it) {
191   const VP8Encoder* const enc = it->enc_;
192   if (enc->config_->show_compressed) {
193     const int x = it->x_, y = it->y_;
194     const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
195     const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
196     const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
197     const WebPPicture* const pic = enc->pic_;
198     uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
199     uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
200     uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
201     int w = (pic->width - x * 16);
202     int h = (pic->height - y * 16);
203 
204     if (w > 16) w = 16;
205     if (h > 16) h = 16;
206 
207     // Luma plane
208     ExportBlock(ysrc, ydst, pic->y_stride, w, h);
209 
210     {   // U/V planes
211       const int uv_w = (w + 1) >> 1;
212       const int uv_h = (h + 1) >> 1;
213       ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
214       ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
215     }
216   }
217 }
218 
219 //------------------------------------------------------------------------------
220 // Non-zero contexts setup/teardown
221 
222 // Nz bits:
223 //  0  1  2  3  Y
224 //  4  5  6  7
225 //  8  9 10 11
226 // 12 13 14 15
227 // 16 17        U
228 // 18 19
229 // 20 21        V
230 // 22 23
231 // 24           DC-intra16
232 
233 // Convert packed context to byte array
234 #define BIT(nz, n) (!!((nz) & (1 << (n))))
235 
VP8IteratorNzToBytes(VP8EncIterator * const it)236 void VP8IteratorNzToBytes(VP8EncIterator* const it) {
237   const int tnz = it->nz_[0], lnz = it->nz_[-1];
238   int* const top_nz = it->top_nz_;
239   int* const left_nz = it->left_nz_;
240 
241   // Top-Y
242   top_nz[0] = BIT(tnz, 12);
243   top_nz[1] = BIT(tnz, 13);
244   top_nz[2] = BIT(tnz, 14);
245   top_nz[3] = BIT(tnz, 15);
246   // Top-U
247   top_nz[4] = BIT(tnz, 18);
248   top_nz[5] = BIT(tnz, 19);
249   // Top-V
250   top_nz[6] = BIT(tnz, 22);
251   top_nz[7] = BIT(tnz, 23);
252   // DC
253   top_nz[8] = BIT(tnz, 24);
254 
255   // left-Y
256   left_nz[0] = BIT(lnz,  3);
257   left_nz[1] = BIT(lnz,  7);
258   left_nz[2] = BIT(lnz, 11);
259   left_nz[3] = BIT(lnz, 15);
260   // left-U
261   left_nz[4] = BIT(lnz, 17);
262   left_nz[5] = BIT(lnz, 19);
263   // left-V
264   left_nz[6] = BIT(lnz, 21);
265   left_nz[7] = BIT(lnz, 23);
266   // left-DC is special, iterated separately
267 }
268 
VP8IteratorBytesToNz(VP8EncIterator * const it)269 void VP8IteratorBytesToNz(VP8EncIterator* const it) {
270   uint32_t nz = 0;
271   const int* const top_nz = it->top_nz_;
272   const int* const left_nz = it->left_nz_;
273   // top
274   nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
275   nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
276   nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
277   nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
278   nz |= (top_nz[8] << 24);  // we propagate the _top_ bit, esp. for intra4
279   // left
280   nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
281   nz |= (left_nz[2] << 11);
282   nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
283 
284   *it->nz_ = nz;
285 }
286 
287 #undef BIT
288 
289 //------------------------------------------------------------------------------
290 // Advance to the next position, doing the bookkeeping.
291 
VP8IteratorSaveBoundary(VP8EncIterator * const it)292 void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
293   VP8Encoder* const enc = it->enc_;
294   const int x = it->x_, y = it->y_;
295   const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
296   const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
297   if (x < enc->mb_w_ - 1) {   // left
298     int i;
299     for (i = 0; i < 16; ++i) {
300       it->y_left_[i] = ysrc[15 + i * BPS];
301     }
302     for (i = 0; i < 8; ++i) {
303       it->u_left_[i] = uvsrc[7 + i * BPS];
304       it->v_left_[i] = uvsrc[15 + i * BPS];
305     }
306     // top-left (before 'top'!)
307     it->y_left_[-1] = it->y_top_[15];
308     it->u_left_[-1] = it->uv_top_[0 + 7];
309     it->v_left_[-1] = it->uv_top_[8 + 7];
310   }
311   if (y < enc->mb_h_ - 1) {  // top
312     memcpy(it->y_top_, ysrc + 15 * BPS, 16);
313     memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
314   }
315 }
316 
VP8IteratorNext(VP8EncIterator * const it)317 int VP8IteratorNext(VP8EncIterator* const it) {
318   if (++it->x_ == it->enc_->mb_w_) {
319     VP8IteratorSetRow(it, ++it->y_);
320   } else {
321     it->preds_ += 4;
322     it->mb_ += 1;
323     it->nz_ += 1;
324     it->y_top_ += 16;
325     it->uv_top_ += 16;
326   }
327   return (0 < --it->count_down_);
328 }
329 
330 //------------------------------------------------------------------------------
331 // Helper function to set mode properties
332 
VP8SetIntra16Mode(const VP8EncIterator * const it,int mode)333 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
334   uint8_t* preds = it->preds_;
335   int y;
336   for (y = 0; y < 4; ++y) {
337     memset(preds, mode, 4);
338     preds += it->enc_->preds_w_;
339   }
340   it->mb_->type_ = 1;
341 }
342 
VP8SetIntra4Mode(const VP8EncIterator * const it,const uint8_t * modes)343 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
344   uint8_t* preds = it->preds_;
345   int y;
346   for (y = 4; y > 0; --y) {
347     memcpy(preds, modes, 4 * sizeof(*modes));
348     preds += it->enc_->preds_w_;
349     modes += 4;
350   }
351   it->mb_->type_ = 0;
352 }
353 
VP8SetIntraUVMode(const VP8EncIterator * const it,int mode)354 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
355   it->mb_->uv_mode_ = mode;
356 }
357 
VP8SetSkip(const VP8EncIterator * const it,int skip)358 void VP8SetSkip(const VP8EncIterator* const it, int skip) {
359   it->mb_->skip_ = skip;
360 }
361 
VP8SetSegment(const VP8EncIterator * const it,int segment)362 void VP8SetSegment(const VP8EncIterator* const it, int segment) {
363   it->mb_->segment_ = segment;
364 }
365 
366 //------------------------------------------------------------------------------
367 // Intra4x4 sub-blocks iteration
368 //
369 //  We store and update the boundary samples into an array of 37 pixels. They
370 //  are updated as we iterate and reconstructs each intra4x4 blocks in turn.
371 //  The position of the samples has the following snake pattern:
372 //
373 // 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36  <- Top-right
374 // --+-----------+-----------+-----------+-----------+
375 // 15|         19|         23|         27|         31|
376 // 14|         18|         22|         26|         30|
377 // 13|         17|         21|         25|         29|
378 // 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
379 // --+-----------+-----------+-----------+-----------+
380 // 11|         15|         19|         23|         27|
381 // 10|         14|         18|         22|         26|
382 //  9|         13|         17|         21|         25|
383 //  8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
384 // --+-----------+-----------+-----------+-----------+
385 //  7|         11|         15|         19|         23|
386 //  6|         10|         14|         18|         22|
387 //  5|          9|         13|         17|         21|
388 //  4| 5  6  7  8| 9 10 11 12|13 14 15 16|17 18 19 20|
389 // --+-----------+-----------+-----------+-----------+
390 //  3|          7|         11|         15|         19|
391 //  2|          6|         10|         14|         18|
392 //  1|          5|          9|         13|         17|
393 //  0| 1  2  3  4| 5  6  7  8| 9 10 11 12|13 14 15 16|
394 // --+-----------+-----------+-----------+-----------+
395 
396 // Array to record the position of the top sample to pass to the prediction
397 // functions in dsp.c.
398 static const uint8_t VP8TopLeftI4[16] = {
399   17, 21, 25, 29,
400   13, 17, 21, 25,
401   9,  13, 17, 21,
402   5,   9, 13, 17
403 };
404 
VP8IteratorStartI4(VP8EncIterator * const it)405 void VP8IteratorStartI4(VP8EncIterator* const it) {
406   const VP8Encoder* const enc = it->enc_;
407   int i;
408 
409   it->i4_ = 0;    // first 4x4 sub-block
410   it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
411 
412   // Import the boundary samples
413   for (i = 0; i < 17; ++i) {    // left
414     it->i4_boundary_[i] = it->y_left_[15 - i];
415   }
416   for (i = 0; i < 16; ++i) {    // top
417     it->i4_boundary_[17 + i] = it->y_top_[i];
418   }
419   // top-right samples have a special case on the far right of the picture
420   if (it->x_ < enc->mb_w_ - 1) {
421     for (i = 16; i < 16 + 4; ++i) {
422       it->i4_boundary_[17 + i] = it->y_top_[i];
423     }
424   } else {    // else, replicate the last valid pixel four times
425     for (i = 16; i < 16 + 4; ++i) {
426       it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
427     }
428   }
429 #if WEBP_AARCH64 && BPS == 32 && defined(WEBP_MSAN)
430   // Intra4Preds_NEON() reads 3 uninitialized bytes from i4_boundary_ when top
431   // is positioned at offset 29 (VP8TopLeftI4[3]). The values are not used
432   // meaningfully, but due to limitations in MemorySanitizer related to
433   // modeling of tbl instructions, a warning will be issued. This can be
434   // removed if MSan is updated to support the instructions. See
435   // https://issues.webmproject.org/372109644.
436   memset(it->i4_boundary_ + sizeof(it->i4_boundary_) - 3, 0xaa, 3);
437 #endif
438   VP8IteratorNzToBytes(it);  // import the non-zero context
439 }
440 
VP8IteratorRotateI4(VP8EncIterator * const it,const uint8_t * const yuv_out)441 int VP8IteratorRotateI4(VP8EncIterator* const it,
442                         const uint8_t* const yuv_out) {
443   const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
444   uint8_t* const top = it->i4_top_;
445   int i;
446 
447   // Update the cache with 7 fresh samples
448   for (i = 0; i <= 3; ++i) {
449     top[-4 + i] = blk[i + 3 * BPS];   // store future top samples
450   }
451   if ((it->i4_ & 3) != 3) {  // if not on the right sub-blocks #3, #7, #11, #15
452     for (i = 0; i <= 2; ++i) {        // store future left samples
453       top[i] = blk[3 + (2 - i) * BPS];
454     }
455   } else {  // else replicate top-right samples, as says the specs.
456     for (i = 0; i <= 3; ++i) {
457       top[i] = top[i + 4];
458     }
459   }
460   // move pointers to next sub-block
461   ++it->i4_;
462   if (it->i4_ == 16) {    // we're done
463     return 0;
464   }
465 
466   it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
467   return 1;
468 }
469 
470 //------------------------------------------------------------------------------
471