1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License") ;
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 /*
22 File: dp_enc.c
23
24 Contains: Dynamic Predictor encode routines
25
26 Copyright: (c) 2001-2011 Apple, Inc.
27 */
28
29 #include <string.h>
30
31 #include "dplib.h"
32 #include "shift.h"
33
34 #if __GNUC__
35 #define ALWAYS_INLINE __attribute__ ((always_inline))
36 #elif defined _MSC_VER
37 #define ALWAYS_INLINE __forceinline
38 #else
39 #define ALWAYS_INLINE
40 #endif
41
42 #define LOOP_ALIGN
43
44 void
init_coefs(int16_t * coefs,uint32_t denshift,int32_t numPairs)45 init_coefs (int16_t * coefs, uint32_t denshift, int32_t numPairs)
46 {
47 int32_t k ;
48 int32_t den = 1 << denshift ;
49
50 coefs [0] = (AINIT * den) >> 4 ;
51 coefs [1] = (BINIT * den) >> 4 ;
52 coefs [2] = (CINIT * den) >> 4 ;
53 for (k = 3 ; k < numPairs ; k++)
54 coefs [k] = 0 ;
55 }
56
57 void
copy_coefs(const int16_t * srcCoefs,int16_t * dstCoefs,int32_t numPairs)58 copy_coefs (const int16_t * srcCoefs, int16_t * dstCoefs, int32_t numPairs)
59 {
60 int32_t k ;
61
62 for (k = 0 ; k < numPairs ; k++)
63 dstCoefs [k] = srcCoefs [k] ;
64 }
65
sign_of_int(int32_t i)66 static inline int32_t ALWAYS_INLINE sign_of_int (int32_t i)
67 {
68 int32_t negishift ;
69
70 negishift = ((uint32_t) - i) >> 31 ;
71 return negishift | (i >> 31) ;
72 }
73
74 void
pc_block(int32_t * in,int32_t * pc1,int32_t num,int16_t * coefs,int32_t numactive,uint32_t chanbits,uint32_t denshift)75 pc_block (int32_t * in, int32_t * pc1, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift)
76 {
77 register int16_t a0, a1, a2, a3 ;
78 register int32_t b0, b1, b2, b3 ;
79 int32_t j, k, lim ;
80 int32_t * pin ;
81 int32_t sum1, dd ;
82 int32_t sg, sgn ;
83 int32_t top ;
84 int32_t del, del0 ;
85 uint32_t chanshift = 32 - chanbits ;
86 int32_t denhalf = 1 << (denshift - 1) ;
87
88 pc1 [0] = in [0] ;
89 if (numactive == 0)
90 {
91 // just copy if numactive == 0 (but don't bother if in/out pointers the same)
92 if ((num > 1) && (in != pc1))
93 memcpy (&pc1 [1], &in [1], (num - 1) * sizeof (int32_t)) ;
94 return ;
95 }
96 if (numactive == 31)
97 {
98 // short-circuit if numactive == 31
99 for (j = 1 ; j < num ; j++)
100 {
101 del = in [j] - in [j-1] ;
102 pc1 [j] = (del << chanshift) >> chanshift ;
103 }
104 return ;
105 }
106
107 for (j = 1 ; j <= numactive ; j++)
108 {
109 del = in [j] - in [j-1] ;
110 pc1 [j] = arith_shift_left (del, chanshift) >> chanshift ;
111 }
112
113 lim = numactive + 1 ;
114
115 if (numactive == 4)
116 {
117 // optimization for numactive == 4
118 a0 = coefs [0] ;
119 a1 = coefs [1] ;
120 a2 = coefs [2] ;
121 a3 = coefs [3] ;
122
123 for (j = lim ; j < num ; j++)
124 {
125 LOOP_ALIGN
126
127 top = in [j - lim] ;
128 pin = in + j - 1 ;
129
130 b0 = top - pin [0] ;
131 b1 = top - pin [-1] ;
132 b2 = top - pin [-2] ;
133 b3 = top - pin [-3] ;
134
135 sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift ;
136
137 del = in [j] - top - sum1 ;
138 del = arith_shift_left (del, chanshift) >> chanshift ;
139 pc1 [j] = del ;
140 del0 = del ;
141
142 sg = sign_of_int (del) ;
143 if (sg > 0)
144 {
145 sgn = sign_of_int (b3) ;
146 a3 -= sgn ;
147 del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
148 if (del0 <= 0)
149 continue ;
150
151 sgn = sign_of_int (b2) ;
152 a2 -= sgn ;
153 del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
154 if (del0 <= 0)
155 continue ;
156
157 sgn = sign_of_int (b1) ;
158 a1 -= sgn ;
159 del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
160 if (del0 <= 0)
161 continue ;
162
163 a0 -= sign_of_int (b0) ;
164 }
165 else if (sg < 0)
166 {
167 // note: to avoid unnecessary negations, we flip the value of "sgn"
168 sgn = -sign_of_int (b3) ;
169 a3 -= sgn ;
170 del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
171 if (del0 >= 0)
172 continue ;
173
174 sgn = -sign_of_int (b2) ;
175 a2 -= sgn ;
176 del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
177 if (del0 >= 0)
178 continue ;
179
180 sgn = -sign_of_int (b1) ;
181 a1 -= sgn ;
182 del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
183 if (del0 >= 0)
184 continue ;
185
186 a0 += sign_of_int (b0) ;
187 }
188 }
189
190 coefs [0] = a0 ;
191 coefs [1] = a1 ;
192 coefs [2] = a2 ;
193 coefs [3] = a3 ;
194 }
195 else if (numactive == 8)
196 {
197 // optimization for numactive == 8
198 register int16_t a4, a5, a6, a7 ;
199 register int32_t b4, b5, b6, b7 ;
200
201 a0 = coefs [0] ;
202 a1 = coefs [1] ;
203 a2 = coefs [2] ;
204 a3 = coefs [3] ;
205 a4 = coefs [4] ;
206 a5 = coefs [5] ;
207 a6 = coefs [6] ;
208 a7 = coefs [7] ;
209
210 for (j = lim ; j < num ; j++)
211 {
212 LOOP_ALIGN
213
214 top = in [j - lim] ;
215 pin = in + j - 1 ;
216
217 b0 = top - (*pin--) ;
218 b1 = top - (*pin--) ;
219 b2 = top - (*pin--) ;
220 b3 = top - (*pin--) ;
221 b4 = top - (*pin--) ;
222 b5 = top - (*pin--) ;
223 b6 = top - (*pin--) ;
224 b7 = top - (*pin) ;
225 pin += 8 ;
226
227 sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
228 - a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift ;
229
230 del = in [j] - top - sum1 ;
231 del = arith_shift_left (del, chanshift) >> chanshift ;
232 pc1 [j] = del ;
233 del0 = del ;
234
235 sg = sign_of_int (del) ;
236 if (sg > 0)
237 {
238 sgn = sign_of_int (b7) ;
239 a7 -= sgn ;
240 del0 -= 1 * ((sgn * b7) >> denshift) ;
241 if (del0 <= 0)
242 continue ;
243
244 sgn = sign_of_int (b6) ;
245 a6 -= sgn ;
246 del0 -= 2 * ((sgn * b6) >> denshift) ;
247 if (del0 <= 0)
248 continue ;
249
250 sgn = sign_of_int (b5) ;
251 a5 -= sgn ;
252 del0 -= 3 * ((sgn * b5) >> denshift) ;
253 if (del0 <= 0)
254 continue ;
255
256 sgn = sign_of_int (b4) ;
257 a4 -= sgn ;
258 del0 -= 4 * ((sgn * b4) >> denshift) ;
259 if (del0 <= 0)
260 continue ;
261
262 sgn = sign_of_int (b3) ;
263 a3 -= sgn ;
264 del0 -= 5 * ((sgn * b3) >> denshift) ;
265 if (del0 <= 0)
266 continue ;
267
268 sgn = sign_of_int (b2) ;
269 a2 -= sgn ;
270 del0 -= 6 * ((sgn * b2) >> denshift) ;
271 if (del0 <= 0)
272 continue ;
273
274 sgn = sign_of_int (b1) ;
275 a1 -= sgn ;
276 del0 -= 7 * ((sgn * b1) >> denshift) ;
277 if (del0 <= 0)
278 continue ;
279
280 a0 -= sign_of_int (b0) ;
281 }
282 else if (sg < 0)
283 {
284 // note: to avoid unnecessary negations, we flip the value of "sgn"
285 sgn = -sign_of_int (b7) ;
286 a7 -= sgn ;
287 del0 -= 1 * ((sgn * b7) >> denshift) ;
288 if (del0 >= 0)
289 continue ;
290
291 sgn = -sign_of_int (b6) ;
292 a6 -= sgn ;
293 del0 -= 2 * ((sgn * b6) >> denshift) ;
294 if (del0 >= 0)
295 continue ;
296
297 sgn = -sign_of_int (b5) ;
298 a5 -= sgn ;
299 del0 -= 3 * ((sgn * b5) >> denshift) ;
300 if (del0 >= 0)
301 continue ;
302
303 sgn = -sign_of_int (b4) ;
304 a4 -= sgn ;
305 del0 -= 4 * ((sgn * b4) >> denshift) ;
306 if (del0 >= 0)
307 continue ;
308
309 sgn = -sign_of_int (b3) ;
310 a3 -= sgn ;
311 del0 -= 5 * ((sgn * b3) >> denshift) ;
312 if (del0 >= 0)
313 continue ;
314
315 sgn = -sign_of_int (b2) ;
316 a2 -= sgn ;
317 del0 -= 6 * ((sgn * b2) >> denshift) ;
318 if (del0 >= 0)
319 continue ;
320
321 sgn = -sign_of_int (b1) ;
322 a1 -= sgn ;
323 del0 -= 7 * ((sgn * b1) >> denshift) ;
324 if (del0 >= 0)
325 continue ;
326
327 a0 += sign_of_int (b0) ;
328 }
329 }
330
331 coefs [0] = a0 ;
332 coefs [1] = a1 ;
333 coefs [2] = a2 ;
334 coefs [3] = a3 ;
335 coefs [4] = a4 ;
336 coefs [5] = a5 ;
337 coefs [6] = a6 ;
338 coefs [7] = a7 ;
339 }
340 else
341 {
342 //pc_block_general:
343 // general case
344 for (j = lim ; j < num ; j++)
345 {
346 LOOP_ALIGN
347
348 top = in [j - lim] ;
349 pin = in + j - 1 ;
350
351 sum1 = 0 ;
352 for (k = 0 ; k < numactive ; k++)
353 sum1 -= coefs [k] * (top - pin [-k]) ;
354
355 del = in [j] - top - ((sum1 + denhalf) >> denshift) ;
356 del = (del << chanshift) >> chanshift ;
357 pc1 [j] = del ;
358 del0 = del ;
359
360 sg = sign_of_int (del) ;
361 if (sg > 0)
362 {
363 for (k = (numactive - 1) ; k >= 0 ; k--)
364 {
365 dd = top - pin [-k] ;
366 sgn = sign_of_int (dd) ;
367 coefs [k] -= sgn ;
368 del0 -= (numactive - k) * ((sgn * dd) >> denshift) ;
369 if (del0 <= 0)
370 break ;
371 }
372 }
373 else if (sg < 0)
374 {
375 for (k = (numactive - 1) ; k >= 0 ; k--)
376 {
377 dd = top - pin [-k] ;
378 sgn = sign_of_int (dd) ;
379 coefs [k] += sgn ;
380 del0 -= (numactive - k) * ((-sgn * dd) >> denshift) ;
381 if (del0 >= 0)
382 break ;
383 }
384 }
385 }
386 }
387 }
388