1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License") ;
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 /*
22 File: dp_enc.c
23
24 Contains: Dynamic Predictor encode routines
25
26 Copyright: (c) 2001-2011 Apple, Inc.
27 */
28
29 #include <string.h>
30
31 #include "dplib.h"
32 #include "shift.h"
33
34 #if __GNUC__
35 #define ALWAYS_INLINE __attribute__ ((always_inline))
36 #else
37 #define ALWAYS_INLINE
38 #endif
39
40 #define LOOP_ALIGN
41
42 void
init_coefs(int16_t * coefs,uint32_t denshift,int32_t numPairs)43 init_coefs (int16_t * coefs, uint32_t denshift, int32_t numPairs)
44 {
45 int32_t k ;
46 int32_t den = 1 << denshift ;
47
48 coefs [0] = (AINIT * den) >> 4 ;
49 coefs [1] = (BINIT * den) >> 4 ;
50 coefs [2] = (CINIT * den) >> 4 ;
51 for (k = 3 ; k < numPairs ; k++)
52 coefs [k] = 0 ;
53 }
54
55 void
copy_coefs(const int16_t * srcCoefs,int16_t * dstCoefs,int32_t numPairs)56 copy_coefs (const int16_t * srcCoefs, int16_t * dstCoefs, int32_t numPairs)
57 {
58 int32_t k ;
59
60 for (k = 0 ; k < numPairs ; k++)
61 dstCoefs [k] = srcCoefs [k] ;
62 }
63
sign_of_int(int32_t i)64 static inline int32_t ALWAYS_INLINE sign_of_int (int32_t i)
65 {
66 int32_t negishift ;
67
68 negishift = ((uint32_t) - i) >> 31 ;
69 return negishift | (i >> 31) ;
70 }
71
72 void
pc_block(int32_t * in,int32_t * pc1,int32_t num,int16_t * coefs,int32_t numactive,uint32_t chanbits,uint32_t denshift)73 pc_block (int32_t * in, int32_t * pc1, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift)
74 {
75 register int16_t a0, a1, a2, a3 ;
76 register int32_t b0, b1, b2, b3 ;
77 int32_t j, k, lim ;
78 int32_t * pin ;
79 int32_t sum1, dd ;
80 int32_t sg, sgn ;
81 int32_t top ;
82 int32_t del, del0 ;
83 uint32_t chanshift = 32 - chanbits ;
84 int32_t denhalf = 1 << (denshift - 1) ;
85
86 pc1 [0] = in [0] ;
87 if (numactive == 0)
88 {
89 // just copy if numactive == 0 (but don't bother if in/out pointers the same)
90 if ((num > 1) && (in != pc1))
91 memcpy (&pc1 [1], &in [1], (num - 1) * sizeof (int32_t)) ;
92 return ;
93 }
94 if (numactive == 31)
95 {
96 // short-circuit if numactive == 31
97 for (j = 1 ; j < num ; j++)
98 {
99 del = in [j] - in [j-1] ;
100 pc1 [j] = (del << chanshift) >> chanshift ;
101 }
102 return ;
103 }
104
105 for (j = 1 ; j <= numactive ; j++)
106 {
107 del = in [j] - in [j-1] ;
108 pc1 [j] = arith_shift_left (del, chanshift) >> chanshift ;
109 }
110
111 lim = numactive + 1 ;
112
113 if (numactive == 4)
114 {
115 // optimization for numactive == 4
116 a0 = coefs [0] ;
117 a1 = coefs [1] ;
118 a2 = coefs [2] ;
119 a3 = coefs [3] ;
120
121 for (j = lim ; j < num ; j++)
122 {
123 LOOP_ALIGN
124
125 top = in [j - lim] ;
126 pin = in + j - 1 ;
127
128 b0 = top - pin [0] ;
129 b1 = top - pin [-1] ;
130 b2 = top - pin [-2] ;
131 b3 = top - pin [-3] ;
132
133 sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift ;
134
135 del = in [j] - top - sum1 ;
136 del = arith_shift_left (del, chanshift) >> chanshift ;
137 pc1 [j] = del ;
138 del0 = del ;
139
140 sg = sign_of_int (del) ;
141 if (sg > 0)
142 {
143 sgn = sign_of_int (b3) ;
144 a3 -= sgn ;
145 del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
146 if (del0 <= 0)
147 continue ;
148
149 sgn = sign_of_int (b2) ;
150 a2 -= sgn ;
151 del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
152 if (del0 <= 0)
153 continue ;
154
155 sgn = sign_of_int (b1) ;
156 a1 -= sgn ;
157 del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
158 if (del0 <= 0)
159 continue ;
160
161 a0 -= sign_of_int (b0) ;
162 }
163 else if (sg < 0)
164 {
165 // note: to avoid unnecessary negations, we flip the value of "sgn"
166 sgn = -sign_of_int (b3) ;
167 a3 -= sgn ;
168 del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
169 if (del0 >= 0)
170 continue ;
171
172 sgn = -sign_of_int (b2) ;
173 a2 -= sgn ;
174 del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
175 if (del0 >= 0)
176 continue ;
177
178 sgn = -sign_of_int (b1) ;
179 a1 -= sgn ;
180 del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
181 if (del0 >= 0)
182 continue ;
183
184 a0 += sign_of_int (b0) ;
185 }
186 }
187
188 coefs [0] = a0 ;
189 coefs [1] = a1 ;
190 coefs [2] = a2 ;
191 coefs [3] = a3 ;
192 }
193 else if (numactive == 8)
194 {
195 // optimization for numactive == 8
196 register int16_t a4, a5, a6, a7 ;
197 register int32_t b4, b5, b6, b7 ;
198
199 a0 = coefs [0] ;
200 a1 = coefs [1] ;
201 a2 = coefs [2] ;
202 a3 = coefs [3] ;
203 a4 = coefs [4] ;
204 a5 = coefs [5] ;
205 a6 = coefs [6] ;
206 a7 = coefs [7] ;
207
208 for (j = lim ; j < num ; j++)
209 {
210 LOOP_ALIGN
211
212 top = in [j - lim] ;
213 pin = in + j - 1 ;
214
215 b0 = top - (*pin--) ;
216 b1 = top - (*pin--) ;
217 b2 = top - (*pin--) ;
218 b3 = top - (*pin--) ;
219 b4 = top - (*pin--) ;
220 b5 = top - (*pin--) ;
221 b6 = top - (*pin--) ;
222 b7 = top - (*pin) ;
223 pin += 8 ;
224
225 sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
226 - a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift ;
227
228 del = in [j] - top - sum1 ;
229 del = arith_shift_left (del, chanshift) >> chanshift ;
230 pc1 [j] = del ;
231 del0 = del ;
232
233 sg = sign_of_int (del) ;
234 if (sg > 0)
235 {
236 sgn = sign_of_int (b7) ;
237 a7 -= sgn ;
238 del0 -= 1 * ((sgn * b7) >> denshift) ;
239 if (del0 <= 0)
240 continue ;
241
242 sgn = sign_of_int (b6) ;
243 a6 -= sgn ;
244 del0 -= 2 * ((sgn * b6) >> denshift) ;
245 if (del0 <= 0)
246 continue ;
247
248 sgn = sign_of_int (b5) ;
249 a5 -= sgn ;
250 del0 -= 3 * ((sgn * b5) >> denshift) ;
251 if (del0 <= 0)
252 continue ;
253
254 sgn = sign_of_int (b4) ;
255 a4 -= sgn ;
256 del0 -= 4 * ((sgn * b4) >> denshift) ;
257 if (del0 <= 0)
258 continue ;
259
260 sgn = sign_of_int (b3) ;
261 a3 -= sgn ;
262 del0 -= 5 * ((sgn * b3) >> denshift) ;
263 if (del0 <= 0)
264 continue ;
265
266 sgn = sign_of_int (b2) ;
267 a2 -= sgn ;
268 del0 -= 6 * ((sgn * b2) >> denshift) ;
269 if (del0 <= 0)
270 continue ;
271
272 sgn = sign_of_int (b1) ;
273 a1 -= sgn ;
274 del0 -= 7 * ((sgn * b1) >> denshift) ;
275 if (del0 <= 0)
276 continue ;
277
278 a0 -= sign_of_int (b0) ;
279 }
280 else if (sg < 0)
281 {
282 // note: to avoid unnecessary negations, we flip the value of "sgn"
283 sgn = -sign_of_int (b7) ;
284 a7 -= sgn ;
285 del0 -= 1 * ((sgn * b7) >> denshift) ;
286 if (del0 >= 0)
287 continue ;
288
289 sgn = -sign_of_int (b6) ;
290 a6 -= sgn ;
291 del0 -= 2 * ((sgn * b6) >> denshift) ;
292 if (del0 >= 0)
293 continue ;
294
295 sgn = -sign_of_int (b5) ;
296 a5 -= sgn ;
297 del0 -= 3 * ((sgn * b5) >> denshift) ;
298 if (del0 >= 0)
299 continue ;
300
301 sgn = -sign_of_int (b4) ;
302 a4 -= sgn ;
303 del0 -= 4 * ((sgn * b4) >> denshift) ;
304 if (del0 >= 0)
305 continue ;
306
307 sgn = -sign_of_int (b3) ;
308 a3 -= sgn ;
309 del0 -= 5 * ((sgn * b3) >> denshift) ;
310 if (del0 >= 0)
311 continue ;
312
313 sgn = -sign_of_int (b2) ;
314 a2 -= sgn ;
315 del0 -= 6 * ((sgn * b2) >> denshift) ;
316 if (del0 >= 0)
317 continue ;
318
319 sgn = -sign_of_int (b1) ;
320 a1 -= sgn ;
321 del0 -= 7 * ((sgn * b1) >> denshift) ;
322 if (del0 >= 0)
323 continue ;
324
325 a0 += sign_of_int (b0) ;
326 }
327 }
328
329 coefs [0] = a0 ;
330 coefs [1] = a1 ;
331 coefs [2] = a2 ;
332 coefs [3] = a3 ;
333 coefs [4] = a4 ;
334 coefs [5] = a5 ;
335 coefs [6] = a6 ;
336 coefs [7] = a7 ;
337 }
338 else
339 {
340 //pc_block_general:
341 // general case
342 for (j = lim ; j < num ; j++)
343 {
344 LOOP_ALIGN
345
346 top = in [j - lim] ;
347 pin = in + j - 1 ;
348
349 sum1 = 0 ;
350 for (k = 0 ; k < numactive ; k++)
351 sum1 -= coefs [k] * (top - pin [-k]) ;
352
353 del = in [j] - top - ((sum1 + denhalf) >> denshift) ;
354 del = (del << chanshift) >> chanshift ;
355 pc1 [j] = del ;
356 del0 = del ;
357
358 sg = sign_of_int (del) ;
359 if (sg > 0)
360 {
361 for (k = (numactive - 1) ; k >= 0 ; k--)
362 {
363 dd = top - pin [-k] ;
364 sgn = sign_of_int (dd) ;
365 coefs [k] -= sgn ;
366 del0 -= (numactive - k) * ((sgn * dd) >> denshift) ;
367 if (del0 <= 0)
368 break ;
369 }
370 }
371 else if (sg < 0)
372 {
373 for (k = (numactive - 1) ; k >= 0 ; k--)
374 {
375 dd = top - pin [-k] ;
376 sgn = sign_of_int (dd) ;
377 coefs [k] += sgn ;
378 del0 -= (numactive - k) * ((-sgn * dd) >> denshift) ;
379 if (del0 >= 0)
380 break ;
381 }
382 }
383 }
384 }
385 }
386