• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_dsp/intrapred_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/bitops.h"
22 
v_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)23 static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
24                                const uint8_t *above, const uint8_t *left) {
25   int r;
26   (void)left;
27 
28   for (r = 0; r < bh; r++) {
29     memcpy(dst, above, bw);
30     dst += stride;
31   }
32 }
33 
h_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)34 static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
35                                const uint8_t *above, const uint8_t *left) {
36   int r;
37   (void)above;
38 
39   for (r = 0; r < bh; r++) {
40     memset(dst, left[r], bw);
41     dst += stride;
42   }
43 }
44 
abs_diff(int a,int b)45 static INLINE int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
46 
paeth_predictor_single(uint16_t left,uint16_t top,uint16_t top_left)47 static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
48                                               uint16_t top_left) {
49   const int base = top + left - top_left;
50   const int p_left = abs_diff(base, left);
51   const int p_top = abs_diff(base, top);
52   const int p_top_left = abs_diff(base, top_left);
53 
54   // Return nearest to base of left, top and top_left.
55   return (p_left <= p_top && p_left <= p_top_left) ? left
56          : (p_top <= p_top_left)                   ? top
57                                                    : top_left;
58 }
59 
paeth_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)60 static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
61                                    int bh, const uint8_t *above,
62                                    const uint8_t *left) {
63   int r, c;
64   const uint8_t ytop_left = above[-1];
65 
66   for (r = 0; r < bh; r++) {
67     for (c = 0; c < bw; c++)
68       dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left);
69     dst += stride;
70   }
71 }
72 
73 // Some basic checks on weights for smooth predictor.
74 #define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
75                                  pred_scale)                          \
76   assert(weights_w[0] < weights_scale);                               \
77   assert(weights_h[0] < weights_scale);                               \
78   assert(weights_scale - weights_w[bw - 1] < weights_scale);          \
79   assert(weights_scale - weights_h[bh - 1] < weights_scale);          \
80   assert(pred_scale < 31)  // ensures no overflow when calculating predictor.
81 
82 #define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
83 
smooth_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)84 static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
85                                     int bh, const uint8_t *above,
86                                     const uint8_t *left) {
87   const uint8_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
88   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
89   const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
90   const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
91   // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
92   const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
93   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
94   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
95                            log2_scale + sizeof(*dst));
96   int r;
97   for (r = 0; r < bh; ++r) {
98     int c;
99     for (c = 0; c < bw; ++c) {
100       const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
101       const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
102                                   sm_weights_w[c], scale - sm_weights_w[c] };
103       uint32_t this_pred = 0;
104       int i;
105       assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
106       for (i = 0; i < 4; ++i) {
107         this_pred += weights[i] * pixels[i];
108       }
109       dst[c] = divide_round(this_pred, log2_scale);
110     }
111     dst += stride;
112   }
113 }
114 
smooth_v_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)115 static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
116                                       int bh, const uint8_t *above,
117                                       const uint8_t *left) {
118   const uint8_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
119   const uint8_t *const sm_weights = smooth_weights + bh - 4;
120   // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
121   const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
122   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
123   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
124                            log2_scale + sizeof(*dst));
125 
126   int r;
127   for (r = 0; r < bh; r++) {
128     int c;
129     for (c = 0; c < bw; ++c) {
130       const uint8_t pixels[] = { above[c], below_pred };
131       const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
132       uint32_t this_pred = 0;
133       assert(scale >= sm_weights[r]);
134       int i;
135       for (i = 0; i < 2; ++i) {
136         this_pred += weights[i] * pixels[i];
137       }
138       dst[c] = divide_round(this_pred, log2_scale);
139     }
140     dst += stride;
141   }
142 }
143 
smooth_h_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)144 static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
145                                       int bh, const uint8_t *above,
146                                       const uint8_t *left) {
147   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
148   const uint8_t *const sm_weights = smooth_weights + bw - 4;
149   // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
150   const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
151   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
152   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
153                            log2_scale + sizeof(*dst));
154 
155   int r;
156   for (r = 0; r < bh; r++) {
157     int c;
158     for (c = 0; c < bw; ++c) {
159       const uint8_t pixels[] = { left[r], right_pred };
160       const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
161       uint32_t this_pred = 0;
162       assert(scale >= sm_weights[c]);
163       int i;
164       for (i = 0; i < 2; ++i) {
165         this_pred += weights[i] * pixels[i];
166       }
167       dst[c] = divide_round(this_pred, log2_scale);
168     }
169     dst += stride;
170   }
171 }
172 
dc_128_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)173 static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
174                                     int bh, const uint8_t *above,
175                                     const uint8_t *left) {
176   int r;
177   (void)above;
178   (void)left;
179 
180   for (r = 0; r < bh; r++) {
181     memset(dst, 128, bw);
182     dst += stride;
183   }
184 }
185 
dc_left_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)186 static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
187                                      int bh, const uint8_t *above,
188                                      const uint8_t *left) {
189   int i, r, expected_dc, sum = 0;
190   (void)above;
191 
192   for (i = 0; i < bh; i++) sum += left[i];
193   expected_dc = (sum + (bh >> 1)) / bh;
194 
195   for (r = 0; r < bh; r++) {
196     memset(dst, expected_dc, bw);
197     dst += stride;
198   }
199 }
200 
dc_top_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)201 static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
202                                     int bh, const uint8_t *above,
203                                     const uint8_t *left) {
204   int i, r, expected_dc, sum = 0;
205   (void)left;
206 
207   for (i = 0; i < bw; i++) sum += above[i];
208   expected_dc = (sum + (bw >> 1)) / bw;
209 
210   for (r = 0; r < bh; r++) {
211     memset(dst, expected_dc, bw);
212     dst += stride;
213   }
214 }
215 
dc_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)216 static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
217                                 const uint8_t *above, const uint8_t *left) {
218   int i, r, expected_dc, sum = 0;
219   const int count = bw + bh;
220 
221   for (i = 0; i < bw; i++) {
222     sum += above[i];
223   }
224   for (i = 0; i < bh; i++) {
225     sum += left[i];
226   }
227 
228   expected_dc = (sum + (count >> 1)) / count;
229 
230   for (r = 0; r < bh; r++) {
231     memset(dst, expected_dc, bw);
232     dst += stride;
233   }
234 }
235 
divide_using_multiply_shift(int num,int shift1,int multiplier,int shift2)236 static INLINE int divide_using_multiply_shift(int num, int shift1,
237                                               int multiplier, int shift2) {
238   const int interm = num >> shift1;
239   return interm * multiplier >> shift2;
240 }
241 
242 // The constants (multiplier and shifts) for a given block size are obtained
243 // as follows:
244 // - Let sum_w_h =  block width + block height.
245 // - Shift 'sum_w_h' right until we reach an odd number. Let the number of
246 // shifts for that block size be called 'shift1' (see the parameter in
247 // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
248 // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
249 // block].
250 // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
251 // using the "Algorithm 1" in:
252 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
253 // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
254 // shift will be 16, regardless of the block size.
255 
256 // Note: For low bitdepth, assembly code may be optimized by using smaller
257 // constants for smaller block sizes, where the range of the 'sum' is
258 // restricted to fewer bits.
259 
260 #define DC_MULTIPLIER_1X2 0x5556
261 #define DC_MULTIPLIER_1X4 0x3334
262 
263 #define DC_SHIFT2 16
264 
dc_predictor_rect(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int shift1,int multiplier)265 static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
266                                      int bh, const uint8_t *above,
267                                      const uint8_t *left, int shift1,
268                                      int multiplier) {
269   int sum = 0;
270 
271   for (int i = 0; i < bw; i++) {
272     sum += above[i];
273   }
274   for (int i = 0; i < bh; i++) {
275     sum += left[i];
276   }
277 
278   const int expected_dc = divide_using_multiply_shift(
279       sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
280   assert(expected_dc < (1 << 8));
281 
282   for (int r = 0; r < bh; r++) {
283     memset(dst, expected_dc, bw);
284     dst += stride;
285   }
286 }
287 
288 #undef DC_SHIFT2
289 
aom_dc_predictor_4x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)290 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
291                             const uint8_t *above, const uint8_t *left) {
292   dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
293 }
294 
aom_dc_predictor_8x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)295 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
296                             const uint8_t *above, const uint8_t *left) {
297   dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
298 }
299 
aom_dc_predictor_4x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)300 void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
301                              const uint8_t *above, const uint8_t *left) {
302   dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
303 }
304 
aom_dc_predictor_16x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)305 void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
306                              const uint8_t *above, const uint8_t *left) {
307   dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
308 }
309 
aom_dc_predictor_8x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)310 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
311                              const uint8_t *above, const uint8_t *left) {
312   dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
313 }
314 
aom_dc_predictor_16x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)315 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
316                              const uint8_t *above, const uint8_t *left) {
317   dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
318 }
319 
aom_dc_predictor_8x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)320 void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
321                              const uint8_t *above, const uint8_t *left) {
322   dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
323 }
324 
aom_dc_predictor_32x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)325 void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
326                              const uint8_t *above, const uint8_t *left) {
327   dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
328 }
329 
aom_dc_predictor_16x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)330 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
331                               const uint8_t *above, const uint8_t *left) {
332   dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
333 }
334 
aom_dc_predictor_32x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)335 void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
336                               const uint8_t *above, const uint8_t *left) {
337   dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
338 }
339 
aom_dc_predictor_16x64_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)340 void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
341                               const uint8_t *above, const uint8_t *left) {
342   dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
343 }
344 
aom_dc_predictor_64x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)345 void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
346                               const uint8_t *above, const uint8_t *left) {
347   dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
348 }
349 
aom_dc_predictor_32x64_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)350 void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
351                               const uint8_t *above, const uint8_t *left) {
352   dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
353 }
354 
aom_dc_predictor_64x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)355 void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
356                               const uint8_t *above, const uint8_t *left) {
357   dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
358 }
359 
360 #undef DC_MULTIPLIER_1X2
361 #undef DC_MULTIPLIER_1X4
362 
highbd_v_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)363 static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
364                                       int bh, const uint16_t *above,
365                                       const uint16_t *left, int bd) {
366   int r;
367   (void)left;
368   (void)bd;
369   for (r = 0; r < bh; r++) {
370     memcpy(dst, above, bw * sizeof(uint16_t));
371     dst += stride;
372   }
373 }
374 
highbd_h_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)375 static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
376                                       int bh, const uint16_t *above,
377                                       const uint16_t *left, int bd) {
378   int r;
379   (void)above;
380   (void)bd;
381   for (r = 0; r < bh; r++) {
382     aom_memset16(dst, left[r], bw);
383     dst += stride;
384   }
385 }
386 
highbd_paeth_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)387 static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
388                                           int bw, int bh, const uint16_t *above,
389                                           const uint16_t *left, int bd) {
390   int r, c;
391   const uint16_t ytop_left = above[-1];
392   (void)bd;
393 
394   for (r = 0; r < bh; r++) {
395     for (c = 0; c < bw; c++)
396       dst[c] = paeth_predictor_single(left[r], above[c], ytop_left);
397     dst += stride;
398   }
399 }
400 
highbd_smooth_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)401 static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
402                                            int bw, int bh,
403                                            const uint16_t *above,
404                                            const uint16_t *left, int bd) {
405   (void)bd;
406   const uint16_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
407   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
408   const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
409   const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
410   // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
411   const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
412   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
413   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
414                            log2_scale + sizeof(*dst));
415   int r;
416   for (r = 0; r < bh; ++r) {
417     int c;
418     for (c = 0; c < bw; ++c) {
419       const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
420       const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
421                                   sm_weights_w[c], scale - sm_weights_w[c] };
422       uint32_t this_pred = 0;
423       int i;
424       assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
425       for (i = 0; i < 4; ++i) {
426         this_pred += weights[i] * pixels[i];
427       }
428       dst[c] = divide_round(this_pred, log2_scale);
429     }
430     dst += stride;
431   }
432 }
433 
highbd_smooth_v_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)434 static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
435                                              int bw, int bh,
436                                              const uint16_t *above,
437                                              const uint16_t *left, int bd) {
438   (void)bd;
439   const uint16_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
440   const uint8_t *const sm_weights = smooth_weights + bh - 4;
441   // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
442   const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
443   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
444   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
445                            log2_scale + sizeof(*dst));
446 
447   int r;
448   for (r = 0; r < bh; r++) {
449     int c;
450     for (c = 0; c < bw; ++c) {
451       const uint16_t pixels[] = { above[c], below_pred };
452       const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
453       uint32_t this_pred = 0;
454       assert(scale >= sm_weights[r]);
455       int i;
456       for (i = 0; i < 2; ++i) {
457         this_pred += weights[i] * pixels[i];
458       }
459       dst[c] = divide_round(this_pred, log2_scale);
460     }
461     dst += stride;
462   }
463 }
464 
highbd_smooth_h_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)465 static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
466                                              int bw, int bh,
467                                              const uint16_t *above,
468                                              const uint16_t *left, int bd) {
469   (void)bd;
470   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
471   const uint8_t *const sm_weights = smooth_weights + bw - 4;
472   // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
473   const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
474   const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
475   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
476                            log2_scale + sizeof(*dst));
477 
478   int r;
479   for (r = 0; r < bh; r++) {
480     int c;
481     for (c = 0; c < bw; ++c) {
482       const uint16_t pixels[] = { left[r], right_pred };
483       const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
484       uint32_t this_pred = 0;
485       assert(scale >= sm_weights[c]);
486       int i;
487       for (i = 0; i < 2; ++i) {
488         this_pred += weights[i] * pixels[i];
489       }
490       dst[c] = divide_round(this_pred, log2_scale);
491     }
492     dst += stride;
493   }
494 }
495 
highbd_dc_128_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)496 static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
497                                            int bw, int bh,
498                                            const uint16_t *above,
499                                            const uint16_t *left, int bd) {
500   int r;
501   (void)above;
502   (void)left;
503 
504   for (r = 0; r < bh; r++) {
505     aom_memset16(dst, 128 << (bd - 8), bw);
506     dst += stride;
507   }
508 }
509 
highbd_dc_left_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)510 static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
511                                             int bw, int bh,
512                                             const uint16_t *above,
513                                             const uint16_t *left, int bd) {
514   int i, r, expected_dc, sum = 0;
515   (void)above;
516   (void)bd;
517 
518   for (i = 0; i < bh; i++) sum += left[i];
519   expected_dc = (sum + (bh >> 1)) / bh;
520 
521   for (r = 0; r < bh; r++) {
522     aom_memset16(dst, expected_dc, bw);
523     dst += stride;
524   }
525 }
526 
highbd_dc_top_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)527 static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
528                                            int bw, int bh,
529                                            const uint16_t *above,
530                                            const uint16_t *left, int bd) {
531   int i, r, expected_dc, sum = 0;
532   (void)left;
533   (void)bd;
534 
535   for (i = 0; i < bw; i++) sum += above[i];
536   expected_dc = (sum + (bw >> 1)) / bw;
537 
538   for (r = 0; r < bh; r++) {
539     aom_memset16(dst, expected_dc, bw);
540     dst += stride;
541   }
542 }
543 
highbd_dc_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)544 static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
545                                        int bh, const uint16_t *above,
546                                        const uint16_t *left, int bd) {
547   int i, r, expected_dc, sum = 0;
548   const int count = bw + bh;
549   (void)bd;
550 
551   for (i = 0; i < bw; i++) {
552     sum += above[i];
553   }
554   for (i = 0; i < bh; i++) {
555     sum += left[i];
556   }
557 
558   expected_dc = (sum + (count >> 1)) / count;
559 
560   for (r = 0; r < bh; r++) {
561     aom_memset16(dst, expected_dc, bw);
562     dst += stride;
563   }
564 }
565 
566 // Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
567 // assume 2nd shift of 17 bits instead of 16.
568 // Note: Strictly speaking, 2nd shift needs to be 17 only when:
569 // - bit depth == 12, and
570 // - bw + bh is divisible by 5 (as opposed to divisible by 3).
571 // All other cases can use half the multipliers with a shift of 16 instead.
572 // This special optimization can be used when writing assembly code.
573 #define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
574 // Note: This constant is odd, but a smaller even constant (0x199a) with the
575 // appropriate shift should work for neon in 8/10-bit.
576 #define HIGHBD_DC_MULTIPLIER_1X4 0x6667
577 
578 #define HIGHBD_DC_SHIFT2 17
579 
highbd_dc_predictor_rect(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd,int shift1,uint32_t multiplier)580 static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
581                                             int bw, int bh,
582                                             const uint16_t *above,
583                                             const uint16_t *left, int bd,
584                                             int shift1, uint32_t multiplier) {
585   int sum = 0;
586   (void)bd;
587 
588   for (int i = 0; i < bw; i++) {
589     sum += above[i];
590   }
591   for (int i = 0; i < bh; i++) {
592     sum += left[i];
593   }
594 
595   const int expected_dc = divide_using_multiply_shift(
596       sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
597   assert(expected_dc < (1 << bd));
598 
599   for (int r = 0; r < bh; r++) {
600     aom_memset16(dst, expected_dc, bw);
601     dst += stride;
602   }
603 }
604 
605 #undef HIGHBD_DC_SHIFT2
606 
aom_highbd_dc_predictor_4x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)607 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
608                                    const uint16_t *above, const uint16_t *left,
609                                    int bd) {
610   highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
611                            HIGHBD_DC_MULTIPLIER_1X2);
612 }
613 
aom_highbd_dc_predictor_8x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)614 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
615                                    const uint16_t *above, const uint16_t *left,
616                                    int bd) {
617   highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
618                            HIGHBD_DC_MULTIPLIER_1X2);
619 }
620 
aom_highbd_dc_predictor_4x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)621 void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
622                                     const uint16_t *above, const uint16_t *left,
623                                     int bd) {
624   highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
625                            HIGHBD_DC_MULTIPLIER_1X4);
626 }
627 
aom_highbd_dc_predictor_16x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)628 void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
629                                     const uint16_t *above, const uint16_t *left,
630                                     int bd) {
631   highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
632                            HIGHBD_DC_MULTIPLIER_1X4);
633 }
634 
aom_highbd_dc_predictor_8x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)635 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
636                                     const uint16_t *above, const uint16_t *left,
637                                     int bd) {
638   highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
639                            HIGHBD_DC_MULTIPLIER_1X2);
640 }
641 
aom_highbd_dc_predictor_16x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)642 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
643                                     const uint16_t *above, const uint16_t *left,
644                                     int bd) {
645   highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
646                            HIGHBD_DC_MULTIPLIER_1X2);
647 }
648 
aom_highbd_dc_predictor_8x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)649 void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
650                                     const uint16_t *above, const uint16_t *left,
651                                     int bd) {
652   highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
653                            HIGHBD_DC_MULTIPLIER_1X4);
654 }
655 
aom_highbd_dc_predictor_32x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)656 void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
657                                     const uint16_t *above, const uint16_t *left,
658                                     int bd) {
659   highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
660                            HIGHBD_DC_MULTIPLIER_1X4);
661 }
662 
aom_highbd_dc_predictor_16x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)663 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
664                                      const uint16_t *above,
665                                      const uint16_t *left, int bd) {
666   highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
667                            HIGHBD_DC_MULTIPLIER_1X2);
668 }
669 
aom_highbd_dc_predictor_32x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)670 void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
671                                      const uint16_t *above,
672                                      const uint16_t *left, int bd) {
673   highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
674                            HIGHBD_DC_MULTIPLIER_1X2);
675 }
676 
aom_highbd_dc_predictor_16x64_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)677 void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
678                                      const uint16_t *above,
679                                      const uint16_t *left, int bd) {
680   highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
681                            HIGHBD_DC_MULTIPLIER_1X4);
682 }
683 
aom_highbd_dc_predictor_64x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)684 void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
685                                      const uint16_t *above,
686                                      const uint16_t *left, int bd) {
687   highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
688                            HIGHBD_DC_MULTIPLIER_1X4);
689 }
690 
aom_highbd_dc_predictor_32x64_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)691 void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
692                                      const uint16_t *above,
693                                      const uint16_t *left, int bd) {
694   highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
695                            HIGHBD_DC_MULTIPLIER_1X2);
696 }
697 
aom_highbd_dc_predictor_64x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)698 void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
699                                      const uint16_t *above,
700                                      const uint16_t *left, int bd) {
701   highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
702                            HIGHBD_DC_MULTIPLIER_1X2);
703 }
704 
705 #undef HIGHBD_DC_MULTIPLIER_1X2
706 #undef HIGHBD_DC_MULTIPLIER_1X4
707 
708 // This serves as a wrapper function, so that all the prediction functions
709 // can be unified and accessed as a pointer array. Note that the boundary
710 // above and left are not necessarily used all the time.
711 #define intra_pred_sized(type, width, height)                  \
712   void aom_##type##_predictor_##width##x##height##_c(          \
713       uint8_t *dst, ptrdiff_t stride, const uint8_t *above,    \
714       const uint8_t *left) {                                   \
715     type##_predictor(dst, stride, width, height, above, left); \
716   }
717 
718 #define intra_pred_highbd_sized(type, width, height)                        \
719   void aom_highbd_##type##_predictor_##width##x##height##_c(                \
720       uint16_t *dst, ptrdiff_t stride, const uint16_t *above,               \
721       const uint16_t *left, int bd) {                                       \
722     highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \
723   }
724 
725 /* clang-format off */
726 #define intra_pred_rectangular(type) \
727   intra_pred_sized(type, 4, 8) \
728   intra_pred_sized(type, 8, 4) \
729   intra_pred_sized(type, 8, 16) \
730   intra_pred_sized(type, 16, 8) \
731   intra_pred_sized(type, 16, 32) \
732   intra_pred_sized(type, 32, 16) \
733   intra_pred_sized(type, 32, 64) \
734   intra_pred_sized(type, 64, 32) \
735   intra_pred_sized(type, 4, 16) \
736   intra_pred_sized(type, 16, 4) \
737   intra_pred_sized(type, 8, 32) \
738   intra_pred_sized(type, 32, 8) \
739   intra_pred_sized(type, 16, 64) \
740   intra_pred_sized(type, 64, 16) \
741   intra_pred_highbd_sized(type, 4, 8) \
742   intra_pred_highbd_sized(type, 8, 4) \
743   intra_pred_highbd_sized(type, 8, 16) \
744   intra_pred_highbd_sized(type, 16, 8) \
745   intra_pred_highbd_sized(type, 16, 32) \
746   intra_pred_highbd_sized(type, 32, 16) \
747   intra_pred_highbd_sized(type, 32, 64) \
748   intra_pred_highbd_sized(type, 64, 32) \
749   intra_pred_highbd_sized(type, 4, 16) \
750   intra_pred_highbd_sized(type, 16, 4) \
751   intra_pred_highbd_sized(type, 8, 32) \
752   intra_pred_highbd_sized(type, 32, 8) \
753   intra_pred_highbd_sized(type, 16, 64) \
754   intra_pred_highbd_sized(type, 64, 16)
755 
756 #define intra_pred_above_4x4(type) \
757   intra_pred_sized(type, 8, 8) \
758   intra_pred_sized(type, 16, 16) \
759   intra_pred_sized(type, 32, 32) \
760   intra_pred_sized(type, 64, 64) \
761   intra_pred_highbd_sized(type, 4, 4) \
762   intra_pred_highbd_sized(type, 8, 8) \
763   intra_pred_highbd_sized(type, 16, 16) \
764   intra_pred_highbd_sized(type, 32, 32) \
765   intra_pred_highbd_sized(type, 64, 64) \
766   intra_pred_rectangular(type)
767 #define intra_pred_allsizes(type) \
768   intra_pred_sized(type, 4, 4) \
769   intra_pred_above_4x4(type)
770 #define intra_pred_square(type) \
771   intra_pred_sized(type, 4, 4) \
772   intra_pred_sized(type, 8, 8) \
773   intra_pred_sized(type, 16, 16) \
774   intra_pred_sized(type, 32, 32) \
775   intra_pred_sized(type, 64, 64) \
776   intra_pred_highbd_sized(type, 4, 4) \
777   intra_pred_highbd_sized(type, 8, 8) \
778   intra_pred_highbd_sized(type, 16, 16) \
779   intra_pred_highbd_sized(type, 32, 32) \
780   intra_pred_highbd_sized(type, 64, 64)
781 
782 intra_pred_allsizes(v)
783 intra_pred_allsizes(h)
784 intra_pred_allsizes(smooth)
785 intra_pred_allsizes(smooth_v)
786 intra_pred_allsizes(smooth_h)
787 intra_pred_allsizes(paeth)
788 intra_pred_allsizes(dc_128)
789 intra_pred_allsizes(dc_left)
790 intra_pred_allsizes(dc_top)
791 intra_pred_square(dc)
792 /* clang-format on */
793 #undef intra_pred_allsizes
794