1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_dsp/intrapred_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/bitops.h"
22
v_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)23 static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
24 const uint8_t *above, const uint8_t *left) {
25 int r;
26 (void)left;
27
28 for (r = 0; r < bh; r++) {
29 memcpy(dst, above, bw);
30 dst += stride;
31 }
32 }
33
h_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)34 static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
35 const uint8_t *above, const uint8_t *left) {
36 int r;
37 (void)above;
38
39 for (r = 0; r < bh; r++) {
40 memset(dst, left[r], bw);
41 dst += stride;
42 }
43 }
44
abs_diff(int a,int b)45 static INLINE int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
46
paeth_predictor_single(uint16_t left,uint16_t top,uint16_t top_left)47 static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
48 uint16_t top_left) {
49 const int base = top + left - top_left;
50 const int p_left = abs_diff(base, left);
51 const int p_top = abs_diff(base, top);
52 const int p_top_left = abs_diff(base, top_left);
53
54 // Return nearest to base of left, top and top_left.
55 return (p_left <= p_top && p_left <= p_top_left) ? left
56 : (p_top <= p_top_left) ? top
57 : top_left;
58 }
59
paeth_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)60 static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
61 int bh, const uint8_t *above,
62 const uint8_t *left) {
63 int r, c;
64 const uint8_t ytop_left = above[-1];
65
66 for (r = 0; r < bh; r++) {
67 for (c = 0; c < bw; c++)
68 dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left);
69 dst += stride;
70 }
71 }
72
73 // Some basic checks on weights for smooth predictor.
74 #define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
75 pred_scale) \
76 assert(weights_w[0] < weights_scale); \
77 assert(weights_h[0] < weights_scale); \
78 assert(weights_scale - weights_w[bw - 1] < weights_scale); \
79 assert(weights_scale - weights_h[bh - 1] < weights_scale); \
80 assert(pred_scale < 31) // ensures no overflow when calculating predictor.
81
82 #define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
83
smooth_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)84 static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
85 int bh, const uint8_t *above,
86 const uint8_t *left) {
87 const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
88 const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
89 const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
90 const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
91 // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
92 const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
93 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
94 sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
95 log2_scale + sizeof(*dst));
96 int r;
97 for (r = 0; r < bh; ++r) {
98 int c;
99 for (c = 0; c < bw; ++c) {
100 const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
101 const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
102 sm_weights_w[c], scale - sm_weights_w[c] };
103 uint32_t this_pred = 0;
104 int i;
105 assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
106 for (i = 0; i < 4; ++i) {
107 this_pred += weights[i] * pixels[i];
108 }
109 dst[c] = divide_round(this_pred, log2_scale);
110 }
111 dst += stride;
112 }
113 }
114
smooth_v_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)115 static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
116 int bh, const uint8_t *above,
117 const uint8_t *left) {
118 const uint8_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
119 const uint8_t *const sm_weights = smooth_weights + bh - 4;
120 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
121 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
122 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
123 sm_weights_sanity_checks(sm_weights, sm_weights, scale,
124 log2_scale + sizeof(*dst));
125
126 int r;
127 for (r = 0; r < bh; r++) {
128 int c;
129 for (c = 0; c < bw; ++c) {
130 const uint8_t pixels[] = { above[c], below_pred };
131 const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
132 uint32_t this_pred = 0;
133 assert(scale >= sm_weights[r]);
134 int i;
135 for (i = 0; i < 2; ++i) {
136 this_pred += weights[i] * pixels[i];
137 }
138 dst[c] = divide_round(this_pred, log2_scale);
139 }
140 dst += stride;
141 }
142 }
143
smooth_h_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)144 static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
145 int bh, const uint8_t *above,
146 const uint8_t *left) {
147 const uint8_t right_pred = above[bw - 1]; // estimated by top-right pixel
148 const uint8_t *const sm_weights = smooth_weights + bw - 4;
149 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
150 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
151 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
152 sm_weights_sanity_checks(sm_weights, sm_weights, scale,
153 log2_scale + sizeof(*dst));
154
155 int r;
156 for (r = 0; r < bh; r++) {
157 int c;
158 for (c = 0; c < bw; ++c) {
159 const uint8_t pixels[] = { left[r], right_pred };
160 const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
161 uint32_t this_pred = 0;
162 assert(scale >= sm_weights[c]);
163 int i;
164 for (i = 0; i < 2; ++i) {
165 this_pred += weights[i] * pixels[i];
166 }
167 dst[c] = divide_round(this_pred, log2_scale);
168 }
169 dst += stride;
170 }
171 }
172
dc_128_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)173 static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
174 int bh, const uint8_t *above,
175 const uint8_t *left) {
176 int r;
177 (void)above;
178 (void)left;
179
180 for (r = 0; r < bh; r++) {
181 memset(dst, 128, bw);
182 dst += stride;
183 }
184 }
185
dc_left_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)186 static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
187 int bh, const uint8_t *above,
188 const uint8_t *left) {
189 int i, r, expected_dc, sum = 0;
190 (void)above;
191
192 for (i = 0; i < bh; i++) sum += left[i];
193 expected_dc = (sum + (bh >> 1)) / bh;
194
195 for (r = 0; r < bh; r++) {
196 memset(dst, expected_dc, bw);
197 dst += stride;
198 }
199 }
200
dc_top_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)201 static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
202 int bh, const uint8_t *above,
203 const uint8_t *left) {
204 int i, r, expected_dc, sum = 0;
205 (void)left;
206
207 for (i = 0; i < bw; i++) sum += above[i];
208 expected_dc = (sum + (bw >> 1)) / bw;
209
210 for (r = 0; r < bh; r++) {
211 memset(dst, expected_dc, bw);
212 dst += stride;
213 }
214 }
215
dc_predictor(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left)216 static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
217 const uint8_t *above, const uint8_t *left) {
218 int i, r, expected_dc, sum = 0;
219 const int count = bw + bh;
220
221 for (i = 0; i < bw; i++) {
222 sum += above[i];
223 }
224 for (i = 0; i < bh; i++) {
225 sum += left[i];
226 }
227
228 expected_dc = (sum + (count >> 1)) / count;
229
230 for (r = 0; r < bh; r++) {
231 memset(dst, expected_dc, bw);
232 dst += stride;
233 }
234 }
235
divide_using_multiply_shift(int num,int shift1,int multiplier,int shift2)236 static INLINE int divide_using_multiply_shift(int num, int shift1,
237 int multiplier, int shift2) {
238 const int interm = num >> shift1;
239 return interm * multiplier >> shift2;
240 }
241
242 // The constants (multiplier and shifts) for a given block size are obtained
243 // as follows:
244 // - Let sum_w_h = block width + block height.
245 // - Shift 'sum_w_h' right until we reach an odd number. Let the number of
246 // shifts for that block size be called 'shift1' (see the parameter in
247 // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
248 // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
249 // block].
250 // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
251 // using the "Algorithm 1" in:
252 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
253 // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
254 // shift will be 16, regardless of the block size.
255
256 // Note: For low bitdepth, assembly code may be optimized by using smaller
257 // constants for smaller block sizes, where the range of the 'sum' is
258 // restricted to fewer bits.
259
260 #define DC_MULTIPLIER_1X2 0x5556
261 #define DC_MULTIPLIER_1X4 0x3334
262
263 #define DC_SHIFT2 16
264
dc_predictor_rect(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int shift1,int multiplier)265 static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
266 int bh, const uint8_t *above,
267 const uint8_t *left, int shift1,
268 int multiplier) {
269 int sum = 0;
270
271 for (int i = 0; i < bw; i++) {
272 sum += above[i];
273 }
274 for (int i = 0; i < bh; i++) {
275 sum += left[i];
276 }
277
278 const int expected_dc = divide_using_multiply_shift(
279 sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
280 assert(expected_dc < (1 << 8));
281
282 for (int r = 0; r < bh; r++) {
283 memset(dst, expected_dc, bw);
284 dst += stride;
285 }
286 }
287
288 #undef DC_SHIFT2
289
aom_dc_predictor_4x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)290 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
291 const uint8_t *above, const uint8_t *left) {
292 dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
293 }
294
aom_dc_predictor_8x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)295 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
296 const uint8_t *above, const uint8_t *left) {
297 dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
298 }
299
aom_dc_predictor_4x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)300 void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
301 const uint8_t *above, const uint8_t *left) {
302 dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
303 }
304
aom_dc_predictor_16x4_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)305 void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
306 const uint8_t *above, const uint8_t *left) {
307 dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
308 }
309
aom_dc_predictor_8x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)310 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
311 const uint8_t *above, const uint8_t *left) {
312 dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
313 }
314
aom_dc_predictor_16x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)315 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
316 const uint8_t *above, const uint8_t *left) {
317 dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
318 }
319
aom_dc_predictor_8x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)320 void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
321 const uint8_t *above, const uint8_t *left) {
322 dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
323 }
324
aom_dc_predictor_32x8_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)325 void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
326 const uint8_t *above, const uint8_t *left) {
327 dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
328 }
329
aom_dc_predictor_16x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)330 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
331 const uint8_t *above, const uint8_t *left) {
332 dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
333 }
334
aom_dc_predictor_32x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)335 void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
336 const uint8_t *above, const uint8_t *left) {
337 dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
338 }
339
aom_dc_predictor_16x64_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)340 void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
341 const uint8_t *above, const uint8_t *left) {
342 dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
343 }
344
aom_dc_predictor_64x16_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)345 void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
346 const uint8_t *above, const uint8_t *left) {
347 dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
348 }
349
aom_dc_predictor_32x64_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)350 void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
351 const uint8_t *above, const uint8_t *left) {
352 dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
353 }
354
aom_dc_predictor_64x32_c(uint8_t * dst,ptrdiff_t stride,const uint8_t * above,const uint8_t * left)355 void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
356 const uint8_t *above, const uint8_t *left) {
357 dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
358 }
359
360 #undef DC_MULTIPLIER_1X2
361 #undef DC_MULTIPLIER_1X4
362
highbd_v_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)363 static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
364 int bh, const uint16_t *above,
365 const uint16_t *left, int bd) {
366 int r;
367 (void)left;
368 (void)bd;
369 for (r = 0; r < bh; r++) {
370 memcpy(dst, above, bw * sizeof(uint16_t));
371 dst += stride;
372 }
373 }
374
highbd_h_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)375 static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
376 int bh, const uint16_t *above,
377 const uint16_t *left, int bd) {
378 int r;
379 (void)above;
380 (void)bd;
381 for (r = 0; r < bh; r++) {
382 aom_memset16(dst, left[r], bw);
383 dst += stride;
384 }
385 }
386
highbd_paeth_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)387 static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
388 int bw, int bh, const uint16_t *above,
389 const uint16_t *left, int bd) {
390 int r, c;
391 const uint16_t ytop_left = above[-1];
392 (void)bd;
393
394 for (r = 0; r < bh; r++) {
395 for (c = 0; c < bw; c++)
396 dst[c] = paeth_predictor_single(left[r], above[c], ytop_left);
397 dst += stride;
398 }
399 }
400
highbd_smooth_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)401 static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
402 int bw, int bh,
403 const uint16_t *above,
404 const uint16_t *left, int bd) {
405 (void)bd;
406 const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
407 const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
408 const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
409 const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
410 // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
411 const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
412 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
413 sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
414 log2_scale + sizeof(*dst));
415 int r;
416 for (r = 0; r < bh; ++r) {
417 int c;
418 for (c = 0; c < bw; ++c) {
419 const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
420 const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
421 sm_weights_w[c], scale - sm_weights_w[c] };
422 uint32_t this_pred = 0;
423 int i;
424 assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
425 for (i = 0; i < 4; ++i) {
426 this_pred += weights[i] * pixels[i];
427 }
428 dst[c] = divide_round(this_pred, log2_scale);
429 }
430 dst += stride;
431 }
432 }
433
highbd_smooth_v_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)434 static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
435 int bw, int bh,
436 const uint16_t *above,
437 const uint16_t *left, int bd) {
438 (void)bd;
439 const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
440 const uint8_t *const sm_weights = smooth_weights + bh - 4;
441 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
442 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
443 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
444 sm_weights_sanity_checks(sm_weights, sm_weights, scale,
445 log2_scale + sizeof(*dst));
446
447 int r;
448 for (r = 0; r < bh; r++) {
449 int c;
450 for (c = 0; c < bw; ++c) {
451 const uint16_t pixels[] = { above[c], below_pred };
452 const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
453 uint32_t this_pred = 0;
454 assert(scale >= sm_weights[r]);
455 int i;
456 for (i = 0; i < 2; ++i) {
457 this_pred += weights[i] * pixels[i];
458 }
459 dst[c] = divide_round(this_pred, log2_scale);
460 }
461 dst += stride;
462 }
463 }
464
highbd_smooth_h_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)465 static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
466 int bw, int bh,
467 const uint16_t *above,
468 const uint16_t *left, int bd) {
469 (void)bd;
470 const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
471 const uint8_t *const sm_weights = smooth_weights + bw - 4;
472 // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
473 const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
474 const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
475 sm_weights_sanity_checks(sm_weights, sm_weights, scale,
476 log2_scale + sizeof(*dst));
477
478 int r;
479 for (r = 0; r < bh; r++) {
480 int c;
481 for (c = 0; c < bw; ++c) {
482 const uint16_t pixels[] = { left[r], right_pred };
483 const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
484 uint32_t this_pred = 0;
485 assert(scale >= sm_weights[c]);
486 int i;
487 for (i = 0; i < 2; ++i) {
488 this_pred += weights[i] * pixels[i];
489 }
490 dst[c] = divide_round(this_pred, log2_scale);
491 }
492 dst += stride;
493 }
494 }
495
highbd_dc_128_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)496 static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
497 int bw, int bh,
498 const uint16_t *above,
499 const uint16_t *left, int bd) {
500 int r;
501 (void)above;
502 (void)left;
503
504 for (r = 0; r < bh; r++) {
505 aom_memset16(dst, 128 << (bd - 8), bw);
506 dst += stride;
507 }
508 }
509
highbd_dc_left_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)510 static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
511 int bw, int bh,
512 const uint16_t *above,
513 const uint16_t *left, int bd) {
514 int i, r, expected_dc, sum = 0;
515 (void)above;
516 (void)bd;
517
518 for (i = 0; i < bh; i++) sum += left[i];
519 expected_dc = (sum + (bh >> 1)) / bh;
520
521 for (r = 0; r < bh; r++) {
522 aom_memset16(dst, expected_dc, bw);
523 dst += stride;
524 }
525 }
526
highbd_dc_top_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)527 static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
528 int bw, int bh,
529 const uint16_t *above,
530 const uint16_t *left, int bd) {
531 int i, r, expected_dc, sum = 0;
532 (void)left;
533 (void)bd;
534
535 for (i = 0; i < bw; i++) sum += above[i];
536 expected_dc = (sum + (bw >> 1)) / bw;
537
538 for (r = 0; r < bh; r++) {
539 aom_memset16(dst, expected_dc, bw);
540 dst += stride;
541 }
542 }
543
highbd_dc_predictor(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd)544 static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
545 int bh, const uint16_t *above,
546 const uint16_t *left, int bd) {
547 int i, r, expected_dc, sum = 0;
548 const int count = bw + bh;
549 (void)bd;
550
551 for (i = 0; i < bw; i++) {
552 sum += above[i];
553 }
554 for (i = 0; i < bh; i++) {
555 sum += left[i];
556 }
557
558 expected_dc = (sum + (count >> 1)) / count;
559
560 for (r = 0; r < bh; r++) {
561 aom_memset16(dst, expected_dc, bw);
562 dst += stride;
563 }
564 }
565
566 // Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
567 // assume 2nd shift of 17 bits instead of 16.
568 // Note: Strictly speaking, 2nd shift needs to be 17 only when:
569 // - bit depth == 12, and
570 // - bw + bh is divisible by 5 (as opposed to divisible by 3).
571 // All other cases can use half the multipliers with a shift of 16 instead.
572 // This special optimization can be used when writing assembly code.
573 #define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
574 // Note: This constant is odd, but a smaller even constant (0x199a) with the
575 // appropriate shift should work for neon in 8/10-bit.
576 #define HIGHBD_DC_MULTIPLIER_1X4 0x6667
577
578 #define HIGHBD_DC_SHIFT2 17
579
highbd_dc_predictor_rect(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int bd,int shift1,uint32_t multiplier)580 static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
581 int bw, int bh,
582 const uint16_t *above,
583 const uint16_t *left, int bd,
584 int shift1, uint32_t multiplier) {
585 int sum = 0;
586 (void)bd;
587
588 for (int i = 0; i < bw; i++) {
589 sum += above[i];
590 }
591 for (int i = 0; i < bh; i++) {
592 sum += left[i];
593 }
594
595 const int expected_dc = divide_using_multiply_shift(
596 sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
597 assert(expected_dc < (1 << bd));
598
599 for (int r = 0; r < bh; r++) {
600 aom_memset16(dst, expected_dc, bw);
601 dst += stride;
602 }
603 }
604
605 #undef HIGHBD_DC_SHIFT2
606
aom_highbd_dc_predictor_4x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)607 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
608 const uint16_t *above, const uint16_t *left,
609 int bd) {
610 highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
611 HIGHBD_DC_MULTIPLIER_1X2);
612 }
613
aom_highbd_dc_predictor_8x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)614 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
615 const uint16_t *above, const uint16_t *left,
616 int bd) {
617 highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
618 HIGHBD_DC_MULTIPLIER_1X2);
619 }
620
aom_highbd_dc_predictor_4x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)621 void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
622 const uint16_t *above, const uint16_t *left,
623 int bd) {
624 highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
625 HIGHBD_DC_MULTIPLIER_1X4);
626 }
627
aom_highbd_dc_predictor_16x4_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)628 void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
629 const uint16_t *above, const uint16_t *left,
630 int bd) {
631 highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
632 HIGHBD_DC_MULTIPLIER_1X4);
633 }
634
aom_highbd_dc_predictor_8x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)635 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
636 const uint16_t *above, const uint16_t *left,
637 int bd) {
638 highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
639 HIGHBD_DC_MULTIPLIER_1X2);
640 }
641
aom_highbd_dc_predictor_16x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)642 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
643 const uint16_t *above, const uint16_t *left,
644 int bd) {
645 highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
646 HIGHBD_DC_MULTIPLIER_1X2);
647 }
648
aom_highbd_dc_predictor_8x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)649 void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
650 const uint16_t *above, const uint16_t *left,
651 int bd) {
652 highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
653 HIGHBD_DC_MULTIPLIER_1X4);
654 }
655
aom_highbd_dc_predictor_32x8_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)656 void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
657 const uint16_t *above, const uint16_t *left,
658 int bd) {
659 highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
660 HIGHBD_DC_MULTIPLIER_1X4);
661 }
662
aom_highbd_dc_predictor_16x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)663 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
664 const uint16_t *above,
665 const uint16_t *left, int bd) {
666 highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
667 HIGHBD_DC_MULTIPLIER_1X2);
668 }
669
aom_highbd_dc_predictor_32x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)670 void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
671 const uint16_t *above,
672 const uint16_t *left, int bd) {
673 highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
674 HIGHBD_DC_MULTIPLIER_1X2);
675 }
676
aom_highbd_dc_predictor_16x64_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)677 void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
678 const uint16_t *above,
679 const uint16_t *left, int bd) {
680 highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
681 HIGHBD_DC_MULTIPLIER_1X4);
682 }
683
aom_highbd_dc_predictor_64x16_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)684 void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
685 const uint16_t *above,
686 const uint16_t *left, int bd) {
687 highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
688 HIGHBD_DC_MULTIPLIER_1X4);
689 }
690
aom_highbd_dc_predictor_32x64_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)691 void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
692 const uint16_t *above,
693 const uint16_t *left, int bd) {
694 highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
695 HIGHBD_DC_MULTIPLIER_1X2);
696 }
697
aom_highbd_dc_predictor_64x32_c(uint16_t * dst,ptrdiff_t stride,const uint16_t * above,const uint16_t * left,int bd)698 void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
699 const uint16_t *above,
700 const uint16_t *left, int bd) {
701 highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
702 HIGHBD_DC_MULTIPLIER_1X2);
703 }
704
705 #undef HIGHBD_DC_MULTIPLIER_1X2
706 #undef HIGHBD_DC_MULTIPLIER_1X4
707
708 // This serves as a wrapper function, so that all the prediction functions
709 // can be unified and accessed as a pointer array. Note that the boundary
710 // above and left are not necessarily used all the time.
711 #define intra_pred_sized(type, width, height) \
712 void aom_##type##_predictor_##width##x##height##_c( \
713 uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \
714 const uint8_t *left) { \
715 type##_predictor(dst, stride, width, height, above, left); \
716 }
717
718 #define intra_pred_highbd_sized(type, width, height) \
719 void aom_highbd_##type##_predictor_##width##x##height##_c( \
720 uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
721 const uint16_t *left, int bd) { \
722 highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \
723 }
724
725 /* clang-format off */
726 #define intra_pred_rectangular(type) \
727 intra_pred_sized(type, 4, 8) \
728 intra_pred_sized(type, 8, 4) \
729 intra_pred_sized(type, 8, 16) \
730 intra_pred_sized(type, 16, 8) \
731 intra_pred_sized(type, 16, 32) \
732 intra_pred_sized(type, 32, 16) \
733 intra_pred_sized(type, 32, 64) \
734 intra_pred_sized(type, 64, 32) \
735 intra_pred_sized(type, 4, 16) \
736 intra_pred_sized(type, 16, 4) \
737 intra_pred_sized(type, 8, 32) \
738 intra_pred_sized(type, 32, 8) \
739 intra_pred_sized(type, 16, 64) \
740 intra_pred_sized(type, 64, 16) \
741 intra_pred_highbd_sized(type, 4, 8) \
742 intra_pred_highbd_sized(type, 8, 4) \
743 intra_pred_highbd_sized(type, 8, 16) \
744 intra_pred_highbd_sized(type, 16, 8) \
745 intra_pred_highbd_sized(type, 16, 32) \
746 intra_pred_highbd_sized(type, 32, 16) \
747 intra_pred_highbd_sized(type, 32, 64) \
748 intra_pred_highbd_sized(type, 64, 32) \
749 intra_pred_highbd_sized(type, 4, 16) \
750 intra_pred_highbd_sized(type, 16, 4) \
751 intra_pred_highbd_sized(type, 8, 32) \
752 intra_pred_highbd_sized(type, 32, 8) \
753 intra_pred_highbd_sized(type, 16, 64) \
754 intra_pred_highbd_sized(type, 64, 16)
755
756 #define intra_pred_above_4x4(type) \
757 intra_pred_sized(type, 8, 8) \
758 intra_pred_sized(type, 16, 16) \
759 intra_pred_sized(type, 32, 32) \
760 intra_pred_sized(type, 64, 64) \
761 intra_pred_highbd_sized(type, 4, 4) \
762 intra_pred_highbd_sized(type, 8, 8) \
763 intra_pred_highbd_sized(type, 16, 16) \
764 intra_pred_highbd_sized(type, 32, 32) \
765 intra_pred_highbd_sized(type, 64, 64) \
766 intra_pred_rectangular(type)
767 #define intra_pred_allsizes(type) \
768 intra_pred_sized(type, 4, 4) \
769 intra_pred_above_4x4(type)
770 #define intra_pred_square(type) \
771 intra_pred_sized(type, 4, 4) \
772 intra_pred_sized(type, 8, 8) \
773 intra_pred_sized(type, 16, 16) \
774 intra_pred_sized(type, 32, 32) \
775 intra_pred_sized(type, 64, 64) \
776 intra_pred_highbd_sized(type, 4, 4) \
777 intra_pred_highbd_sized(type, 8, 8) \
778 intra_pred_highbd_sized(type, 16, 16) \
779 intra_pred_highbd_sized(type, 32, 32) \
780 intra_pred_highbd_sized(type, 64, 64)
781
782 intra_pred_allsizes(v)
783 intra_pred_allsizes(h)
784 intra_pred_allsizes(smooth)
785 intra_pred_allsizes(smooth_v)
786 intra_pred_allsizes(smooth_h)
787 intra_pred_allsizes(paeth)
788 intra_pred_allsizes(dc_128)
789 intra_pred_allsizes(dc_left)
790 intra_pred_allsizes(dc_top)
791 intra_pred_square(dc)
792 /* clang-format on */
793 #undef intra_pred_allsizes
794