1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdlib.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom/aom_integer.h"
18 #include "aom_ports/mem.h"
19 #include "aom_dsp/blend.h"
20
masked_sad(const uint8_t * src,int src_stride,const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,const uint8_t * m,int m_stride,int width,int height)21 static INLINE unsigned int masked_sad(const uint8_t *src, int src_stride,
22 const uint8_t *a, int a_stride,
23 const uint8_t *b, int b_stride,
24 const uint8_t *m, int m_stride, int width,
25 int height) {
26 int y, x;
27 unsigned int sad = 0;
28 for (y = 0; y < height; y++) {
29 for (x = 0; x < width; x++) {
30 const int16_t pred = AOM_BLEND_A64(m[x], a[x], b[x]);
31 sad += abs(pred - src[x]);
32 }
33 src += src_stride;
34 a += a_stride;
35 b += b_stride;
36 m += m_stride;
37 }
38 return sad;
39 }
40
41 #define MASKSADMxN(m, n) \
42 unsigned int aom_masked_sad##m##x##n##_c( \
43 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
44 const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
45 int invert_mask) { \
46 if (!invert_mask) \
47 return masked_sad(src, src_stride, ref, ref_stride, second_pred, m, msk, \
48 msk_stride, m, n); \
49 else \
50 return masked_sad(src, src_stride, second_pred, m, ref, ref_stride, msk, \
51 msk_stride, m, n); \
52 } \
53 void aom_masked_sad##m##x##n##x4d_c( \
54 const uint8_t *src, int src_stride, const uint8_t *ref[4], \
55 int ref_stride, const uint8_t *second_pred, const uint8_t *msk, \
56 int msk_stride, int invert_mask, unsigned sads[4]) { \
57 if (!invert_mask) \
58 for (int i = 0; i < 4; i++) { \
59 sads[i] = masked_sad(src, src_stride, ref[i], ref_stride, second_pred, \
60 m, msk, msk_stride, m, n); \
61 } \
62 else \
63 for (int i = 0; i < 4; i++) { \
64 sads[i] = masked_sad(src, src_stride, second_pred, m, ref[i], \
65 ref_stride, msk, msk_stride, m, n); \
66 } \
67 }
68
69 /* clang-format off */
70 MASKSADMxN(128, 128)
71 MASKSADMxN(128, 64)
72 MASKSADMxN(64, 128)
73 MASKSADMxN(64, 64)
74 MASKSADMxN(64, 32)
75 MASKSADMxN(32, 64)
76 MASKSADMxN(32, 32)
77 MASKSADMxN(32, 16)
78 MASKSADMxN(16, 32)
79 MASKSADMxN(16, 16)
80 MASKSADMxN(16, 8)
81 MASKSADMxN(8, 16)
82 MASKSADMxN(8, 8)
83 MASKSADMxN(8, 4)
84 MASKSADMxN(4, 8)
85 MASKSADMxN(4, 4)
86 MASKSADMxN(4, 16)
87 MASKSADMxN(16, 4)
88 MASKSADMxN(8, 32)
89 MASKSADMxN(32, 8)
90 MASKSADMxN(16, 64)
91 MASKSADMxN(64, 16)
92 /* clang-format on */
93
94 #if CONFIG_AV1_HIGHBITDEPTH
95 static INLINE
highbd_masked_sad(const uint8_t * src8,int src_stride,const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,const uint8_t * m,int m_stride,int width,int height)96 unsigned int highbd_masked_sad(const uint8_t *src8, int src_stride,
97 const uint8_t *a8, int a_stride,
98 const uint8_t *b8, int b_stride,
99 const uint8_t *m, int m_stride, int width,
100 int height) {
101 int y, x;
102 unsigned int sad = 0;
103 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
104 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
105 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
106
107 for (y = 0; y < height; y++) {
108 for (x = 0; x < width; x++) {
109 const uint16_t pred = AOM_BLEND_A64(m[x], a[x], b[x]);
110 sad += abs(pred - src[x]);
111 }
112
113 src += src_stride;
114 a += a_stride;
115 b += b_stride;
116 m += m_stride;
117 }
118
119 return sad;
120 }
121
122 #define HIGHBD_MASKSADMXN(m, n) \
123 unsigned int aom_highbd_masked_sad##m##x##n##_c( \
124 const uint8_t *src8, int src_stride, const uint8_t *ref8, \
125 int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
126 int msk_stride, int invert_mask) { \
127 if (!invert_mask) \
128 return highbd_masked_sad(src8, src_stride, ref8, ref_stride, \
129 second_pred8, m, msk, msk_stride, m, n); \
130 else \
131 return highbd_masked_sad(src8, src_stride, second_pred8, m, ref8, \
132 ref_stride, msk, msk_stride, m, n); \
133 }
134
135 HIGHBD_MASKSADMXN(128, 128)
136 HIGHBD_MASKSADMXN(128, 64)
137 HIGHBD_MASKSADMXN(64, 128)
138 HIGHBD_MASKSADMXN(64, 64)
139 HIGHBD_MASKSADMXN(64, 32)
140 HIGHBD_MASKSADMXN(32, 64)
141 HIGHBD_MASKSADMXN(32, 32)
142 HIGHBD_MASKSADMXN(32, 16)
143 HIGHBD_MASKSADMXN(16, 32)
144 HIGHBD_MASKSADMXN(16, 16)
145 HIGHBD_MASKSADMXN(16, 8)
146 HIGHBD_MASKSADMXN(8, 16)
147 HIGHBD_MASKSADMXN(8, 8)
148 HIGHBD_MASKSADMXN(8, 4)
149 HIGHBD_MASKSADMXN(4, 8)
150 HIGHBD_MASKSADMXN(4, 4)
151 HIGHBD_MASKSADMXN(4, 16)
152 HIGHBD_MASKSADMXN(16, 4)
153 HIGHBD_MASKSADMXN(8, 32)
154 HIGHBD_MASKSADMXN(32, 8)
155 HIGHBD_MASKSADMXN(16, 64)
156 HIGHBD_MASKSADMXN(64, 16)
157 #endif // CONFIG_AV1_HIGHBITDEPTH
158
159 #if !CONFIG_REALTIME_ONLY
160 // pre: predictor being evaluated
161 // wsrc: target weighted prediction (has been *4096 to keep precision)
162 // mask: 2d weights (scaled by 4096)
obmc_sad(const uint8_t * pre,int pre_stride,const int32_t * wsrc,const int32_t * mask,int width,int height)163 static INLINE unsigned int obmc_sad(const uint8_t *pre, int pre_stride,
164 const int32_t *wsrc, const int32_t *mask,
165 int width, int height) {
166 int y, x;
167 unsigned int sad = 0;
168
169 for (y = 0; y < height; y++) {
170 for (x = 0; x < width; x++)
171 sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
172
173 pre += pre_stride;
174 wsrc += width;
175 mask += width;
176 }
177
178 return sad;
179 }
180
181 #define OBMCSADMxN(m, n) \
182 unsigned int aom_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
183 const int32_t *wsrc, \
184 const int32_t *mask) { \
185 return obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
186 }
187
188 /* clang-format off */
189 OBMCSADMxN(128, 128)
190 OBMCSADMxN(128, 64)
191 OBMCSADMxN(64, 128)
192 OBMCSADMxN(64, 64)
193 OBMCSADMxN(64, 32)
194 OBMCSADMxN(32, 64)
195 OBMCSADMxN(32, 32)
196 OBMCSADMxN(32, 16)
197 OBMCSADMxN(16, 32)
198 OBMCSADMxN(16, 16)
199 OBMCSADMxN(16, 8)
200 OBMCSADMxN(8, 16)
201 OBMCSADMxN(8, 8)
202 OBMCSADMxN(8, 4)
203 OBMCSADMxN(4, 8)
204 OBMCSADMxN(4, 4)
205 OBMCSADMxN(4, 16)
206 OBMCSADMxN(16, 4)
207 OBMCSADMxN(8, 32)
208 OBMCSADMxN(32, 8)
209 OBMCSADMxN(16, 64)
210 OBMCSADMxN(64, 16)
211 /* clang-format on */
212
213 #if CONFIG_AV1_HIGHBITDEPTH
214 static INLINE
highbd_obmc_sad(const uint8_t * pre8,int pre_stride,const int32_t * wsrc,const int32_t * mask,int width,int height)215 unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
216 const int32_t *wsrc, const int32_t *mask,
217 int width, int height) {
218 int y, x;
219 unsigned int sad = 0;
220 const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
221
222 for (y = 0; y < height; y++) {
223 for (x = 0; x < width; x++)
224 sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
225
226 pre += pre_stride;
227 wsrc += width;
228 mask += width;
229 }
230
231 return sad;
232 }
233
234 #define HIGHBD_OBMCSADMXN(m, n) \
235 unsigned int aom_highbd_obmc_sad##m##x##n##_c( \
236 const uint8_t *ref, int ref_stride, const int32_t *wsrc, \
237 const int32_t *mask) { \
238 return highbd_obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
239 }
240
241 /* clang-format off */
242 HIGHBD_OBMCSADMXN(128, 128)
243 HIGHBD_OBMCSADMXN(128, 64)
244 HIGHBD_OBMCSADMXN(64, 128)
245 HIGHBD_OBMCSADMXN(64, 64)
246 HIGHBD_OBMCSADMXN(64, 32)
247 HIGHBD_OBMCSADMXN(32, 64)
248 HIGHBD_OBMCSADMXN(32, 32)
249 HIGHBD_OBMCSADMXN(32, 16)
250 HIGHBD_OBMCSADMXN(16, 32)
251 HIGHBD_OBMCSADMXN(16, 16)
252 HIGHBD_OBMCSADMXN(16, 8)
253 HIGHBD_OBMCSADMXN(8, 16)
254 HIGHBD_OBMCSADMXN(8, 8)
255 HIGHBD_OBMCSADMXN(8, 4)
256 HIGHBD_OBMCSADMXN(4, 8)
257 HIGHBD_OBMCSADMXN(4, 4)
258 HIGHBD_OBMCSADMXN(4, 16)
259 HIGHBD_OBMCSADMXN(16, 4)
260 HIGHBD_OBMCSADMXN(8, 32)
261 HIGHBD_OBMCSADMXN(32, 8)
262 HIGHBD_OBMCSADMXN(16, 64)
263 HIGHBD_OBMCSADMXN(64, 16)
264 /* clang-format on */
265 #endif // CONFIG_AV1_HIGHBITDEPTH
266 #endif // !CONFIG_REALTIME_ONLY
267