• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License") ;
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *	 http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		dp_enc.c
23 
24 	Contains:	Dynamic Predictor encode routines
25 
26 	Copyright:	(c) 2001-2011 Apple, Inc.
27 */
28 
29 #include <string.h>
30 
31 #include "dplib.h"
32 #include "shift.h"
33 
34 #if __GNUC__
35 #define ALWAYS_INLINE		__attribute__ ((always_inline))
36 #elif defined _MSC_VER
37 #define ALWAYS_INLINE		__forceinline
38 #else
39 #define ALWAYS_INLINE
40 #endif
41 
42 #define LOOP_ALIGN
43 
44 void
init_coefs(int16_t * coefs,uint32_t denshift,int32_t numPairs)45 init_coefs (int16_t * coefs, uint32_t denshift, int32_t numPairs)
46 {
47 	int32_t		k ;
48 	int32_t		den = 1 << denshift ;
49 
50 	coefs [0] = (AINIT * den) >> 4 ;
51 	coefs [1] = (BINIT * den) >> 4 ;
52 	coefs [2] = (CINIT * den) >> 4 ;
53 	for (k = 3 ; k < numPairs ; k++)
54 		coefs [k] = 0 ;
55 }
56 
57 void
copy_coefs(const int16_t * srcCoefs,int16_t * dstCoefs,int32_t numPairs)58 copy_coefs (const int16_t * srcCoefs, int16_t * dstCoefs, int32_t numPairs)
59 {
60 	int32_t k ;
61 
62 	for (k = 0 ; k < numPairs ; k++)
63 		dstCoefs [k] = srcCoefs [k] ;
64 }
65 
sign_of_int(int32_t i)66 static inline int32_t ALWAYS_INLINE sign_of_int (int32_t i)
67 {
68 	int32_t negishift ;
69 
70 	negishift = ((uint32_t) - i) >> 31 ;
71 	return negishift | (i >> 31) ;
72 }
73 
74 void
pc_block(int32_t * in,int32_t * pc1,int32_t num,int16_t * coefs,int32_t numactive,uint32_t chanbits,uint32_t denshift)75 pc_block (int32_t * in, int32_t * pc1, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift)
76 {
77 	register int16_t	a0, a1, a2, a3 ;
78 	register int32_t	b0, b1, b2, b3 ;
79 	int32_t					j, k, lim ;
80 	int32_t *			pin ;
81 	int32_t				sum1, dd ;
82 	int32_t				sg, sgn ;
83 	int32_t				top ;
84 	int32_t				del, del0 ;
85 	uint32_t			chanshift = 32 - chanbits ;
86 	int32_t				denhalf = 1 << (denshift - 1) ;
87 
88 	pc1 [0] = in [0] ;
89 	if (numactive == 0)
90 	{
91 		// just copy if numactive == 0 (but don't bother if in/out pointers the same)
92 		if ((num > 1) && (in != pc1))
93 			memcpy (&pc1 [1], &in [1], (num - 1) * sizeof (int32_t)) ;
94 		return ;
95 	}
96 	if (numactive == 31)
97 	{
98 		// short-circuit if numactive == 31
99 		for (j = 1 ; j < num ; j++)
100 		{
101 			del = in [j] - in [j-1] ;
102 			pc1 [j] = (del << chanshift) >> chanshift ;
103 		}
104 		return ;
105 	}
106 
107 	for (j = 1 ; j <= numactive ; j++)
108 	{
109 		del = in [j] - in [j-1] ;
110 		pc1 [j] = arith_shift_left (del, chanshift) >> chanshift ;
111 	}
112 
113 	lim = numactive + 1 ;
114 
115 	if (numactive == 4)
116 	{
117 		// optimization for numactive == 4
118 		a0 = coefs [0] ;
119 		a1 = coefs [1] ;
120 		a2 = coefs [2] ;
121 		a3 = coefs [3] ;
122 
123 		for (j = lim ; j < num ; j++)
124 		{
125 			LOOP_ALIGN
126 
127 			top = in [j - lim] ;
128 			pin = in + j - 1 ;
129 
130 			b0 = top - pin [0] ;
131 			b1 = top - pin [-1] ;
132 			b2 = top - pin [-2] ;
133 			b3 = top - pin [-3] ;
134 
135 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift ;
136 
137 			del = in [j] - top - sum1 ;
138 			del = arith_shift_left (del, chanshift) >> chanshift ;
139 			pc1 [j] = del ;
140 			del0 = del ;
141 
142 			sg = sign_of_int (del) ;
143 			if (sg > 0)
144 			{
145 				sgn = sign_of_int (b3) ;
146 				a3 -= sgn ;
147 				del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
148 				if (del0 <= 0)
149 					continue ;
150 
151 				sgn = sign_of_int (b2) ;
152 				a2 -= sgn ;
153 				del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
154 				if (del0 <= 0)
155 					continue ;
156 
157 				sgn = sign_of_int (b1) ;
158 				a1 -= sgn ;
159 				del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
160 				if (del0 <= 0)
161 					continue ;
162 
163 				a0 -= sign_of_int (b0) ;
164 			}
165 			else if (sg < 0)
166 			{
167 				// note: to avoid unnecessary negations, we flip the value of "sgn"
168 				sgn = -sign_of_int (b3) ;
169 				a3 -= sgn ;
170 				del0 -= (4 - 3) * ((sgn * b3) >> denshift) ;
171 				if (del0 >= 0)
172 					continue ;
173 
174 				sgn = -sign_of_int (b2) ;
175 				a2 -= sgn ;
176 				del0 -= (4 - 2) * ((sgn * b2) >> denshift) ;
177 				if (del0 >= 0)
178 					continue ;
179 
180 				sgn = -sign_of_int (b1) ;
181 				a1 -= sgn ;
182 				del0 -= (4 - 1) * ((sgn * b1) >> denshift) ;
183 				if (del0 >= 0)
184 					continue ;
185 
186 				a0 += sign_of_int (b0) ;
187 			}
188 		}
189 
190 		coefs [0] = a0 ;
191 		coefs [1] = a1 ;
192 		coefs [2] = a2 ;
193 		coefs [3] = a3 ;
194 	}
195 	else if (numactive == 8)
196 	{
197 		// optimization for numactive == 8
198 		register int16_t	a4, a5, a6, a7 ;
199 		register int32_t	b4, b5, b6, b7 ;
200 
201 		a0 = coefs [0] ;
202 		a1 = coefs [1] ;
203 		a2 = coefs [2] ;
204 		a3 = coefs [3] ;
205 		a4 = coefs [4] ;
206 		a5 = coefs [5] ;
207 		a6 = coefs [6] ;
208 		a7 = coefs [7] ;
209 
210 		for (j = lim ; j < num ; j++)
211 		{
212 			LOOP_ALIGN
213 
214 			top = in [j - lim] ;
215 			pin = in + j - 1 ;
216 
217 			b0 = top - (*pin--) ;
218 			b1 = top - (*pin--) ;
219 			b2 = top - (*pin--) ;
220 			b3 = top - (*pin--) ;
221 			b4 = top - (*pin--) ;
222 			b5 = top - (*pin--) ;
223 			b6 = top - (*pin--) ;
224 			b7 = top - (*pin) ;
225 			pin += 8 ;
226 
227 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
228 					- a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift ;
229 
230 			del = in [j] - top - sum1 ;
231 			del = arith_shift_left (del, chanshift) >> chanshift ;
232 			pc1 [j] = del ;
233 			del0 = del ;
234 
235 			sg = sign_of_int (del) ;
236 			if (sg > 0)
237 			{
238 				sgn = sign_of_int (b7) ;
239 				a7 -= sgn ;
240 				del0 -= 1 * ((sgn * b7) >> denshift) ;
241 				if (del0 <= 0)
242 					continue ;
243 
244 				sgn = sign_of_int (b6) ;
245 				a6 -= sgn ;
246 				del0 -= 2 * ((sgn * b6) >> denshift) ;
247 				if (del0 <= 0)
248 					continue ;
249 
250 				sgn = sign_of_int (b5) ;
251 				a5 -= sgn ;
252 				del0 -= 3 * ((sgn * b5) >> denshift) ;
253 				if (del0 <= 0)
254 					continue ;
255 
256 				sgn = sign_of_int (b4) ;
257 				a4 -= sgn ;
258 				del0 -= 4 * ((sgn * b4) >> denshift) ;
259 				if (del0 <= 0)
260 					continue ;
261 
262 				sgn = sign_of_int (b3) ;
263 				a3 -= sgn ;
264 				del0 -= 5 * ((sgn * b3) >> denshift) ;
265 				if (del0 <= 0)
266 					continue ;
267 
268 				sgn = sign_of_int (b2) ;
269 				a2 -= sgn ;
270 				del0 -= 6 * ((sgn * b2) >> denshift) ;
271 				if (del0 <= 0)
272 					continue ;
273 
274 				sgn = sign_of_int (b1) ;
275 				a1 -= sgn ;
276 				del0 -= 7 * ((sgn * b1) >> denshift) ;
277 				if (del0 <= 0)
278 					continue ;
279 
280 				a0 -= sign_of_int (b0) ;
281 			}
282 			else if (sg < 0)
283 			{
284 				// note: to avoid unnecessary negations, we flip the value of "sgn"
285 				sgn = -sign_of_int (b7) ;
286 				a7 -= sgn ;
287 				del0 -= 1 * ((sgn * b7) >> denshift) ;
288 				if (del0 >= 0)
289 					continue ;
290 
291 				sgn = -sign_of_int (b6) ;
292 				a6 -= sgn ;
293 				del0 -= 2 * ((sgn * b6) >> denshift) ;
294 				if (del0 >= 0)
295 					continue ;
296 
297 				sgn = -sign_of_int (b5) ;
298 				a5 -= sgn ;
299 				del0 -= 3 * ((sgn * b5) >> denshift) ;
300 				if (del0 >= 0)
301 					continue ;
302 
303 				sgn = -sign_of_int (b4) ;
304 				a4 -= sgn ;
305 				del0 -= 4 * ((sgn * b4) >> denshift) ;
306 				if (del0 >= 0)
307 					continue ;
308 
309 				sgn = -sign_of_int (b3) ;
310 				a3 -= sgn ;
311 				del0 -= 5 * ((sgn * b3) >> denshift) ;
312 				if (del0 >= 0)
313 					continue ;
314 
315 				sgn = -sign_of_int (b2) ;
316 				a2 -= sgn ;
317 				del0 -= 6 * ((sgn * b2) >> denshift) ;
318 				if (del0 >= 0)
319 					continue ;
320 
321 				sgn = -sign_of_int (b1) ;
322 				a1 -= sgn ;
323 				del0 -= 7 * ((sgn * b1) >> denshift) ;
324 				if (del0 >= 0)
325 					continue ;
326 
327 				a0 += sign_of_int (b0) ;
328 			}
329 		}
330 
331 		coefs [0] = a0 ;
332 		coefs [1] = a1 ;
333 		coefs [2] = a2 ;
334 		coefs [3] = a3 ;
335 		coefs [4] = a4 ;
336 		coefs [5] = a5 ;
337 		coefs [6] = a6 ;
338 		coefs [7] = a7 ;
339 	}
340 	else
341 	{
342 //pc_block_general:
343 		// general case
344 		for (j = lim ; j < num ; j++)
345 		{
346 			LOOP_ALIGN
347 
348 			top = in [j - lim] ;
349 			pin = in + j - 1 ;
350 
351 			sum1 = 0 ;
352 			for (k = 0 ; k < numactive ; k++)
353 				sum1 -= coefs [k] * (top - pin [-k]) ;
354 
355 			del = in [j] - top - ((sum1 + denhalf) >> denshift) ;
356 			del = (del << chanshift) >> chanshift ;
357 			pc1 [j] = del ;
358 			del0 = del ;
359 
360 			sg = sign_of_int (del) ;
361 			if (sg > 0)
362 			{
363 				for (k = (numactive - 1) ; k >= 0 ; k--)
364 				{
365 					dd = top - pin [-k] ;
366 					sgn = sign_of_int (dd) ;
367 					coefs [k] -= sgn ;
368 					del0 -= (numactive - k) * ((sgn * dd) >> denshift) ;
369 					if (del0 <= 0)
370 						break ;
371 				}
372 			}
373 			else if (sg < 0)
374 			{
375 				for (k = (numactive - 1) ; k >= 0 ; k--)
376 				{
377 					dd = top - pin [-k] ;
378 					sgn = sign_of_int (dd) ;
379 					coefs [k] += sgn ;
380 					del0 -= (numactive - k) * ((-sgn * dd) >> denshift) ;
381 					if (del0 >= 0)
382 						break ;
383 				}
384 			}
385 		}
386 	}
387 }
388