1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2012-2015 Erik de Castro Lopo <erikd@mega-nerd.com>
4 *
5 * @APPLE_APACHE_LICENSE_HEADER_START@
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License") ;
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 * @APPLE_APACHE_LICENSE_HEADER_END@
20 */
21
22 /*
23 File: ALACEncoder.cpp
24 */
25
26 // build stuff
27 #define VERBOSE_DEBUG 0
28 #define DebugMsg printf
29
30 // headers
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "sfendian.h"
36
37 #include "alac_codec.h"
38
39 #include "aglib.h"
40 #include "dplib.h"
41 #include "matrixlib.h"
42
43 #include "ALACBitUtilities.h"
44 #include "ALACAudioTypes.h"
45 #include "EndianPortable.h"
46
47 typedef enum
48 {
49 false = 0,
50 true = 1
51 } bool ;
52
53 static void GetConfig (ALAC_ENCODER *p, ALACSpecificConfig * config) ;
54
55 static int32_t EncodeStereo (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ;
56 static int32_t EncodeStereoFast (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ;
57 static int32_t EncodeStereoEscape (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t numSamples) ;
58 static int32_t EncodeMono (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * input, uint32_t stride, uint32_t channelIndex, uint32_t numSamples) ;
59
60
61
62 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
63 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
64 typedef int16_t (*SearchCoefs) [kALACMaxCoefs] ;
65
66 // defines/constants
67 const uint32_t kALACEncoderMagic = MAKE_MARKER ('d', 'p', 'g', 'e') ;
68 const uint32_t kMaxSampleSize = 32 ; // max allowed bit width is 32
69 const uint32_t kDefaultMixBits = 2 ;
70 const uint32_t kDefaultMixRes = 0 ;
71 const uint32_t kMaxRes = 4 ;
72 const uint32_t kDefaultNumUV = 8 ;
73 const uint32_t kMinUV = 4 ;
74 const uint32_t kMaxUV = 8 ;
75
76 // static functions
77 #if VERBOSE_DEBUG
78 static void AddFiller (BitBuffer * bits, int32_t numBytes) ;
79 #endif
80
81
82 /*
83 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
84 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE.
85 Each particular field is accessed via the current channel indx. Note that the channel
86 indx increments by two for channel pairs.
87
88 For example:
89
90 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE)
91 indx 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
92 indx 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
93
94 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
95 indx 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
96 indx 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
97 indx 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
98 indx 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
99 indx 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
100 */
101 static const uint32_t sChannelMaps [kALACMaxChannels] =
102 {
103 ID_SCE,
104 ID_CPE,
105 (ID_CPE << 3) | (ID_SCE),
106 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
107 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
108 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
109 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
110 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
111 } ;
112
113 #if PRAGMA_MARK
114 #pragma mark -
115 #endif
116
117 void
alac_set_fastmode(ALAC_ENCODER * p,int32_t fast)118 alac_set_fastmode (ALAC_ENCODER * p, int32_t fast)
119 {
120 p->mFastMode = fast ;
121 }
122
123
124 /*
125 HEADER SPECIFICATION
126
127 For every segment we adopt the following header:
128
129 1 byte reserved (always 0)
130 1 byte flags (see below)
131 [4 byte frame length] (optional, see below)
132 ---Next, the per-segment ALAC parameters---
133 1 byte mixBits (middle-side parameter)
134 1 byte mixRes (middle-side parameter, interpreted as signed char)
135
136 1 byte shiftU (4 bits modeU, 4 bits denShiftU)
137 1 byte filterU (3 bits pbFactorU, 5 bits numU)
138 (numU) shorts (signed DP coefficients for V channel)
139 ---Next, 2nd-channel ALAC parameters in case of stereo mode---
140 1 byte shiftV (4 bits modeV, 4 bits denShiftV)
141 1 byte filterV (3 bits pbFactorV, 5 bits numV)
142 (numV) shorts (signed DP coefficients for V channel)
143 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
144 ---Then comes the AG-compressor bitstream---
145
146
147 FLAGS
148 -----
149
150 The presence of certain flag bits changes the header format such that the parameters might
151 not even be sent. The currently defined flags format is:
152
153 0000psse
154
155 where 0 = reserved, must be 0
156 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
157 ss = 2-bit field indicating "number of shift-off bytes ignored by compression"
158 e = 1-bit field indicating "escape"
159
160 The "partial frame" flag means that the following segment is not equal to the frame length specified
161 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial
162 segments without incurring the 32-bit overhead for each segment.
163
164 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
165 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
166 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
167 uncompressed size. However, by shifting the input values down and running the remaining bits through
168 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that
169 the shifted-off bytes follow after the parameter section of the header and before the compressed
170 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
171 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels,
172 the shifted off bytes are interleaved.
173
174 The "escape" flag means that this segment was not compressed b/c the compressed size would be
175 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header.
176 The other header parameter bytes will not be sent.
177
178
179 PARAMETERS
180 ----------
181
182 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
183
184 4 + (2 + 2 * numU) (mono mode)
185 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode)
186
187 where the ALAC filter-lengths numU, numV are bounded by a
188 constant (in the current source, numU, numV <= NUMCOEPAIRS), and
189 this forces an absolute upper bound on header size.
190
191 Each segment-decode process loads up these bytes from the front of the
192 local stream, in the above order, then follows with the entropy-encoded
193 bits for the given segment.
194
195 To generalize middle-side, there are various mixing modes including middle-side, each lossless,
196 as embodied in the mix () and unmix () functions. These functions exploit a generalized middle-side
197 transformation:
198
199 u := [(rL + (m-r)R)/m] ;
200 v := L - R ;
201
202 where [ ] denotes integer floor. The (lossless) inverse is
203
204 L = u + v - [rV/m] ;
205 R = L - v ;
206
207 In the segment header, m and r are encoded in mixBits and mixRes.
208 Classical "middle-side" is obtained with m = 2, r = 1, but now
209 we have more generalized mixes.
210
211 NOTES
212 -----
213 The relevance of the ALAC coefficients is explained in detail
214 in patent documents.
215 */
216
217 /*
218 EncodeStereo ()
219 - encode a channel pair
220 */
221 static int32_t
EncodeStereo(ALAC_ENCODER * p,struct BitBuffer * bitstream,const int32_t * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)222 EncodeStereo (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples)
223 {
224 BitBuffer workBits ;
225 BitBuffer startBits = *bitstream ; // squirrel away copy of current state in case we need to go back and do an escape packet
226 AGParamRec agParams ;
227 uint32_t bits1, bits2 ;
228 uint32_t dilate ;
229 int32_t mixBits, mixRes, maxRes ;
230 uint32_t minBits, minBits1, minBits2 ;
231 uint32_t numU, numV ;
232 uint32_t mode ;
233 uint32_t pbFactor ;
234 uint32_t chanBits ;
235 uint8_t bytesShifted ;
236 SearchCoefs coefsU ;
237 SearchCoefs coefsV ;
238 uint32_t indx ;
239 uint8_t partialFrame ;
240 uint32_t escapeBits ;
241 bool doEscape ;
242 int32_t status = ALAC_noErr ;
243 int32_t bestRes ;
244
245 // make sure we handle this bit-depth before we get going
246 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ;
247
248 // reload coefs pointers for this channel pair
249 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
250 // actually results in better overall compression
251 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
252 // different coefs for the different passes of "mixRes" results in even better compression
253 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ;
254 coefsV = (SearchCoefs) p->mCoefsV [channelIndex] ;
255
256 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
257 // so enable 16-bit "shift off" and encode in 17-bit mode
258 // - in addition, 24-bit mode really improves with one byte shifted off
259 if (p->mBitDepth == 32)
260 bytesShifted = 2 ;
261 else if (p->mBitDepth >= 24)
262 bytesShifted = 1 ;
263 else
264 bytesShifted = 0 ;
265
266 chanBits = p->mBitDepth - (bytesShifted * 8) + 1 ;
267
268 // flag whether or not this is a partial frame
269 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ;
270
271 // brute-force encode optimization loop
272 // - run over variations of the encoding params to find the best choice
273 mixBits = kDefaultMixBits ;
274 maxRes = kMaxRes ;
275 numU = numV = kDefaultNumUV ;
276 mode = 0 ;
277 pbFactor = 4 ;
278 dilate = 8 ;
279
280 minBits = minBits1 = minBits2 = 1ul << 31 ;
281
282 bestRes = p->mLastMixRes [channelIndex] ;
283
284 for (mixRes = 0 ; mixRes <= maxRes ; mixRes++)
285 {
286 // mix the stereo inputs
287 switch (p->mBitDepth)
288 {
289 case 16:
290 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, mixBits, mixRes) ;
291 break ;
292 case 20:
293 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate, mixBits, mixRes) ;
294 break ;
295 case 24:
296 // includes extraction of shifted-off bytes
297 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate,
298 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
299 break ;
300 case 32:
301 // includes extraction of shifted-off bytes
302 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples / dilate,
303 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
304 break ;
305 }
306
307 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ;
308
309 // run the dynamic predictors
310 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
311 pc_block (p->mMixBufferV, p->mPredictorV, numSamples / dilate, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ;
312
313 // run the lossless compressor on each channel
314 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ;
315 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ;
316 RequireNoErr (status, goto Exit ;) ;
317
318 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ;
319 status = dyn_comp (&agParams, p->mPredictorV, &workBits, numSamples / dilate, chanBits, &bits2) ;
320 RequireNoErr (status, goto Exit ;) ;
321
322 // look for best match
323 if ((bits1 + bits2) < minBits1)
324 {
325 minBits1 = bits1 + bits2 ;
326 bestRes = mixRes ;
327 }
328 }
329
330 p->mLastMixRes [channelIndex] = (int16_t) bestRes ;
331
332 // mix the stereo inputs with the current best mixRes
333 mixRes = p->mLastMixRes [channelIndex] ;
334 switch (p->mBitDepth)
335 {
336 case 16:
337 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ;
338 break ;
339 case 20:
340 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ;
341 break ;
342 case 24:
343 // also extracts the shifted off bytes into the shift buffers
344 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples,
345 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
346 break ;
347 case 32:
348 // also extracts the shifted off bytes into the shift buffers
349 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples,
350 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
351 break ;
352 }
353
354 // now it's time for the predictor coefficient search loop
355 numU = numV = kMinUV ;
356 minBits1 = minBits2 = 1ul << 31 ;
357
358 for (uint32_t numUV = kMinUV ; numUV <= kMaxUV ; numUV += 4)
359 {
360 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ;
361
362 dilate = 32 ;
363
364 // run the predictor over the same data multiple times to help it converge
365 for (uint32_t converge = 0 ; converge < 8 ; converge++)
366 {
367 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numUV-1], numUV, chanBits, DENSHIFT_DEFAULT) ;
368 pc_block (p->mMixBufferV, p->mPredictorV, numSamples / dilate, coefsV [numUV-1], numUV, chanBits, DENSHIFT_DEFAULT) ;
369 }
370
371 dilate = 8 ;
372
373 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ;
374 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ;
375
376 if ((bits1 * dilate + 16 * numUV) < minBits1)
377 {
378 minBits1 = bits1 * dilate + 16 * numUV ;
379 numU = numUV ;
380 }
381
382 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ;
383 status = dyn_comp (&agParams, p->mPredictorV, &workBits, numSamples / dilate, chanBits, &bits2) ;
384
385 if ((bits2 * dilate + 16 * numUV) < minBits2)
386 {
387 minBits2 = bits2 * dilate + 16 * numUV ;
388 numV = numUV ;
389 }
390 }
391
392 // test for escape hatch if best calculated compressed size turns out to be more than the input size
393 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ;
394 if (bytesShifted != 0)
395 minBits += (numSamples * (bytesShifted * 8) * 2) ;
396
397 escapeBits = (numSamples * p->mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */
398
399 doEscape = (minBits >= escapeBits) ? true : false ;
400
401 if (doEscape == false)
402 {
403 // write bitstream header and coefs
404 BitBufferWrite (bitstream, 0, 12) ;
405 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ;
406 if (partialFrame)
407 BitBufferWrite (bitstream, numSamples, 32) ;
408 BitBufferWrite (bitstream, mixBits, 8) ;
409 BitBufferWrite (bitstream, mixRes, 8) ;
410
411 //Assert ((mode < 16) && (DENSHIFT_DEFAULT < 16)) ;
412 //Assert ((pbFactor < 8) && (numU < 32)) ;
413 //Assert ((pbFactor < 8) && (numV < 32)) ;
414
415 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ;
416 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ;
417 for (indx = 0 ; indx < numU ; indx++)
418 BitBufferWrite (bitstream, coefsU [numU - 1][indx], 16) ;
419
420 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ;
421 BitBufferWrite (bitstream, (pbFactor << 5) | numV, 8) ;
422 for (indx = 0 ; indx < numV ; indx++)
423 BitBufferWrite (bitstream, coefsV [numV - 1][indx], 16) ;
424
425 // if shift active, write the interleaved shift buffers
426 if (bytesShifted != 0)
427 {
428 uint32_t bitShift = bytesShifted * 8 ;
429
430 //Assert (bitShift <= 16) ;
431
432 for (indx = 0 ; indx < (numSamples * 2) ; indx += 2)
433 {
434 uint32_t shiftedVal ;
435
436 shiftedVal = ((uint32_t) p->mShiftBufferUV [indx + 0] << bitShift) | (uint32_t) p->mShiftBufferUV [indx + 1] ;
437 BitBufferWrite (bitstream, shiftedVal, bitShift * 2) ;
438 }
439 }
440
441 // run the dynamic predictor and lossless compression for the "left" channel
442 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
443 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel
444 if (mode == 0)
445 {
446 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
447 }
448 else
449 {
450 pc_block (p->mMixBufferU, p->mPredictorV, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
451 pc_block (p->mPredictorV, p->mPredictorU, numSamples, NULL, 31, chanBits, 0) ;
452 }
453
454 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ;
455 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ;
456 RequireNoErr (status, goto Exit ;) ;
457
458 // run the dynamic predictor and lossless compression for the "right" channel
459 if (mode == 0)
460 {
461 pc_block (p->mMixBufferV, p->mPredictorV, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ;
462 }
463 else
464 {
465 pc_block (p->mMixBufferV, p->mPredictorU, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ;
466 pc_block (p->mPredictorU, p->mPredictorV, numSamples, NULL, 31, chanBits, 0) ;
467 }
468
469 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ;
470 status = dyn_comp (&agParams, p->mPredictorV, bitstream, numSamples, chanBits, &bits2) ;
471 RequireNoErr (status, goto Exit ;) ;
472
473 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
474 chuck it and do an escape packet
475 */
476 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ;
477 if (minBits >= escapeBits)
478 {
479 *bitstream = startBits ; // reset bitstream state
480 doEscape = true ;
481 printf ("compressed frame too big: %u vs. %u \n", minBits, escapeBits) ;
482 }
483 }
484
485 if (doEscape == true)
486 {
487 /* escape */
488 status = EncodeStereoEscape (p, bitstream, inputBuffer, stride, numSamples) ;
489
490 #if VERBOSE_DEBUG
491 DebugMsg ("escape!: %u vs %u\n", minBits, escapeBits) ;
492 #endif
493 }
494
495 Exit:
496 return status ;
497 }
498
499 /*
500 EncodeStereoFast ()
501 - encode a channel pair without the search loop for maximum possible speed
502 */
503 static int32_t
EncodeStereoFast(ALAC_ENCODER * p,struct BitBuffer * bitstream,const int32_t * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)504 EncodeStereoFast (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples)
505 {
506 BitBuffer startBits = *bitstream ; // squirrel away current bit position in case we decide to use escape hatch
507 AGParamRec agParams ;
508 uint32_t bits1, bits2 ;
509 int32_t mixBits, mixRes ;
510 uint32_t minBits, minBits1, minBits2 ;
511 uint32_t numU, numV ;
512 uint32_t mode ;
513 uint32_t pbFactor ;
514 uint32_t chanBits ;
515 uint8_t bytesShifted ;
516 SearchCoefs coefsU ;
517 SearchCoefs coefsV ;
518 uint32_t indx ;
519 uint8_t partialFrame ;
520 uint32_t escapeBits ;
521 bool doEscape ;
522 int32_t status ;
523
524 // make sure we handle this bit-depth before we get going
525 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ;
526
527 // reload coefs pointers for this channel pair
528 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
529 // actually results in better overall compression
530 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
531 // different coefs for the different passes of "mixRes" results in even better compression
532 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ;
533 coefsV = (SearchCoefs) p->mCoefsV [channelIndex] ;
534
535 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
536 // so enable 16-bit "shift off" and encode in 17-bit mode
537 // - in addition, 24-bit mode really improves with one byte shifted off
538 if (p->mBitDepth == 32)
539 bytesShifted = 2 ;
540 else if (p->mBitDepth >= 24)
541 bytesShifted = 1 ;
542 else
543 bytesShifted = 0 ;
544
545 chanBits = p->mBitDepth - (bytesShifted * 8) + 1 ;
546
547 // flag whether or not this is a partial frame
548 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ;
549
550 // set up default encoding parameters for "fast" mode
551 mixBits = kDefaultMixBits ;
552 mixRes = kDefaultMixRes ;
553 numU = numV = kDefaultNumUV ;
554 mode = 0 ;
555 pbFactor = 4 ;
556
557 minBits = minBits1 = minBits2 = 1ul << 31 ;
558
559 // mix the stereo inputs with default mixBits/mixRes
560 switch (p->mBitDepth)
561 {
562 case 16:
563 mix16 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ;
564 break ;
565 case 20:
566 mix20 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, mixBits, mixRes) ;
567 break ;
568 case 24:
569 // also extracts the shifted off bytes into the shift buffers
570 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples,
571 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
572 break ;
573 case 32:
574 // also extracts the shifted off bytes into the shift buffers
575 mix32 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples,
576 mixBits, mixRes, p->mShiftBufferUV, bytesShifted) ;
577 break ;
578 }
579
580 /* speculatively write the bitstream assuming the compressed version will be smaller */
581
582 // write bitstream header and coefs
583 BitBufferWrite (bitstream, 0, 12) ;
584 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ;
585 if (partialFrame)
586 BitBufferWrite (bitstream, numSamples, 32) ;
587 BitBufferWrite (bitstream, mixBits, 8) ;
588 BitBufferWrite (bitstream, mixRes, 8) ;
589
590 //Assert ((mode < 16) && (DENSHIFT_DEFAULT < 16)) ;
591 //Assert ((pbFactor < 8) && (numU < 32)) ;
592 //Assert ((pbFactor < 8) && (numV < 32)) ;
593
594 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ;
595 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ;
596 for (indx = 0 ; indx < numU ; indx++)
597 BitBufferWrite (bitstream, coefsU [numU - 1][indx], 16) ;
598
599 BitBufferWrite (bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8) ;
600 BitBufferWrite (bitstream, (pbFactor << 5) | numV, 8) ;
601 for (indx = 0 ; indx < numV ; indx++)
602 BitBufferWrite (bitstream, coefsV [numV - 1][indx], 16) ;
603
604 // if shift active, write the interleaved shift buffers
605 if (bytesShifted != 0)
606 {
607 uint32_t bitShift = bytesShifted * 8 ;
608
609 //Assert (bitShift <= 16) ;
610
611 for (indx = 0 ; indx < (numSamples * 2) ; indx += 2)
612 {
613 uint32_t shiftedVal ;
614
615 shiftedVal = ((uint32_t) p->mShiftBufferUV [indx + 0] << bitShift) | (uint32_t) p->mShiftBufferUV [indx + 1] ;
616 BitBufferWrite (bitstream, shiftedVal, bitShift * 2) ;
617 }
618 }
619
620 // run the dynamic predictor and lossless compression for the "left" channel
621 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
622 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
623
624 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ;
625 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ;
626 RequireNoErr (status, goto Exit ;) ;
627
628 // run the dynamic predictor and lossless compression for the "right" channel
629 pc_block (p->mMixBufferV, p->mPredictorV, numSamples, coefsV [numV - 1], numV, chanBits, DENSHIFT_DEFAULT) ;
630
631 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT) ;
632 status = dyn_comp (&agParams, p->mPredictorV, bitstream, numSamples, chanBits, &bits2) ;
633 RequireNoErr (status, goto Exit ;) ;
634
635 // do bit requirement calculations
636 minBits1 = bits1 + (numU * sizeof (int16_t) * 8) ;
637 minBits2 = bits2 + (numV * sizeof (int16_t) * 8) ;
638
639 // test for escape hatch if best calculated compressed size turns out to be more than the input size
640 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ;
641 if (bytesShifted != 0)
642 minBits += (numSamples * (bytesShifted * 8) * 2) ;
643
644 escapeBits = (numSamples * p->mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */
645
646 doEscape = (minBits >= escapeBits) ? true : false ;
647
648 if (doEscape == false)
649 {
650 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
651 chuck it and do an escape packet
652 */
653 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ;
654 if (minBits >= escapeBits)
655 {
656 doEscape = true ;
657 printf ("compressed frame too big: %u vs. %u\n", minBits, escapeBits) ;
658 }
659
660 }
661
662 if (doEscape == true)
663 {
664 /* escape */
665
666 // reset bitstream position since we speculatively wrote the compressed version
667 *bitstream = startBits ;
668
669 // write escape frame
670 status = EncodeStereoEscape (p, bitstream, inputBuffer, stride, numSamples) ;
671
672 #if VERBOSE_DEBUG
673 DebugMsg ("escape!: %u vs %u\n", minBits, (numSamples * p->mBitDepth * 2)) ;
674 #endif
675 }
676
677 Exit:
678 return status ;
679 }
680
681 /*
682 EncodeStereoEscape ()
683 - encode stereo escape frame
684 */
685 static int32_t
EncodeStereoEscape(ALAC_ENCODER * p,struct BitBuffer * bitstream,const int32_t * inputBuffer,uint32_t stride,uint32_t numSamples)686 EncodeStereoEscape (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t numSamples)
687 {
688 uint8_t partialFrame ;
689 uint32_t indx ;
690
691 // flag whether or not this is a partial frame
692 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ;
693
694 // write bitstream header
695 BitBufferWrite (bitstream, 0, 12) ;
696 BitBufferWrite (bitstream, (partialFrame << 3) | 1, 4) ; // LSB = 1 means "frame not compressed"
697 if (partialFrame)
698 BitBufferWrite (bitstream, numSamples, 32) ;
699
700 // just copy the input data to the output buffer
701 switch (p->mBitDepth)
702 {
703 case 16:
704 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
705 {
706 BitBufferWrite (bitstream, inputBuffer [indx + 0] >> 16, 16) ;
707 BitBufferWrite (bitstream, inputBuffer [indx + 1] >> 16, 16) ;
708 }
709 break ;
710 case 20:
711 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
712 {
713 BitBufferWrite (bitstream, inputBuffer [indx + 0] >> 12, 16) ;
714 BitBufferWrite (bitstream, inputBuffer [indx + 1] >> 12, 16) ;
715 }
716 break ;
717 case 24:
718 // mix24 () with mixres param = 0 means de-interleave so use it to simplify things
719 mix24 (inputBuffer, stride, p->mMixBufferU, p->mMixBufferV, numSamples, 0, 0, p->mShiftBufferUV, 0) ;
720 for (indx = 0 ; indx < numSamples ; indx++)
721 {
722 BitBufferWrite (bitstream, p->mMixBufferU [indx] >> 8, 24) ;
723 BitBufferWrite (bitstream, p->mMixBufferV [indx] >> 8, 24) ;
724 }
725 break ;
726 case 32:
727 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
728 {
729 BitBufferWrite (bitstream, inputBuffer [indx + 0], 32) ;
730 BitBufferWrite (bitstream, inputBuffer [indx + 1], 32) ;
731 }
732 break ;
733 }
734
735 return ALAC_noErr ;
736 }
737
738 /*
739 EncodeMono ()
740 - encode a mono input buffer
741 */
742 static int32_t
EncodeMono(ALAC_ENCODER * p,struct BitBuffer * bitstream,const int32_t * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)743 EncodeMono (ALAC_ENCODER *p, struct BitBuffer * bitstream, const int32_t * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples)
744 {
745 BitBuffer startBits = *bitstream ; // squirrel away copy of current state in case we need to go back and do an escape packet
746 AGParamRec agParams ;
747 uint32_t bits1 ;
748 uint32_t numU ;
749 SearchCoefs coefsU ;
750 uint32_t dilate ;
751 uint32_t minBits, bestU ;
752 uint32_t minU, maxU ;
753 uint32_t indx, indx2 ;
754 uint8_t bytesShifted ;
755 uint32_t shift ;
756 uint32_t mask ;
757 uint32_t chanBits ;
758 uint8_t pbFactor ;
759 uint8_t partialFrame ;
760 uint32_t escapeBits ;
761 bool doEscape ;
762 int32_t status = ALAC_noErr ;
763
764
765 // make sure we handle this bit-depth before we get going
766 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ;
767
768 // reload coefs array from previous frame
769 coefsU = (SearchCoefs) p->mCoefsU [channelIndex] ;
770
771 // pick bit depth for actual encoding
772 // - we lop off the lower byte (s) for 24-/32-bit encodings
773 if (p->mBitDepth == 32)
774 bytesShifted = 2 ;
775 else if (p->mBitDepth >= 24)
776 bytesShifted = 1 ;
777 else
778 bytesShifted = 0 ;
779
780 shift = bytesShifted * 8 ;
781 mask = (1ul << shift) - 1 ;
782 chanBits = p->mBitDepth - (bytesShifted * 8) ;
783
784 // flag whether or not this is a partial frame
785 partialFrame = (numSamples == p->mFrameSize) ? 0 : 1 ;
786
787 // convert N-bit data to 32-bit for predictor
788 switch (p->mBitDepth)
789 {
790 case 16:
791 // convert 16-bit data to 32-bit for predictor
792 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride)
793 p->mMixBufferU [indx] = inputBuffer [indx2] >> 16 ;
794 break ;
795
796 case 20:
797 // convert 20-bit data to 32-bit for predictor
798 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride)
799 p->mMixBufferU [indx] = inputBuffer [indx2] >> 12 ;
800 break ;
801 case 24:
802 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte (s)
803 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride)
804 {
805 p->mMixBufferU [indx] = inputBuffer [indx2] >> 8 ;
806 p->mShiftBufferUV [indx] = (uint16_t) (p->mMixBufferU [indx] & mask) ;
807 p->mMixBufferU [indx] >>= shift ;
808 }
809
810 break ;
811 case 32:
812 // just copy the 32-bit input data for the predictor and extract the shifted off byte (s)
813 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride)
814 {
815 p->mShiftBufferUV [indx] = (uint16_t) (inputBuffer [indx2] & mask) ;
816 p->mMixBufferU [indx] = inputBuffer [indx2] >> shift ;
817 }
818 break ;
819 }
820
821 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
822 // - run over variations of the encoding params to find the best choice
823 minU = 4 ;
824 maxU = 8 ;
825 minBits = 1ul << 31 ;
826 pbFactor = 4 ;
827
828 bestU = minU ;
829
830 for (numU = minU ; numU <= maxU ; numU += 4)
831 {
832 BitBuffer workBits ;
833 uint32_t numBits ;
834
835 BitBufferInit (&workBits, p->mWorkBuffer, p->mMaxOutputBytes) ;
836
837 dilate = 32 ;
838 for (uint32_t converge = 0 ; converge < 7 ; converge++)
839 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
840
841 dilate = 8 ;
842 pc_block (p->mMixBufferU, p->mPredictorU, numSamples / dilate, coefsU [numU - 1], numU, chanBits, DENSHIFT_DEFAULT) ;
843
844 set_ag_params (&agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples / dilate, numSamples / dilate, MAX_RUN_DEFAULT) ;
845 status = dyn_comp (&agParams, p->mPredictorU, &workBits, numSamples / dilate, chanBits, &bits1) ;
846 RequireNoErr (status, goto Exit ;) ;
847
848 numBits = (dilate * bits1) + (16 * numU) ;
849 if (numBits < minBits)
850 {
851 bestU = numU ;
852 minBits = numBits ;
853 }
854 }
855
856 // test for escape hatch if best calculated compressed size turns out to be more than the input size
857 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
858 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0) ;
859 if (bytesShifted != 0)
860 minBits += (numSamples * (bytesShifted * 8)) ;
861
862 escapeBits = (numSamples * p->mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8) ; /* 2 common header bytes */
863
864 doEscape = (minBits >= escapeBits) ? true : false ;
865
866 if (doEscape == false)
867 {
868 // write bitstream header
869 BitBufferWrite (bitstream, 0, 12) ;
870 BitBufferWrite (bitstream, (partialFrame << 3) | (bytesShifted << 1), 4) ;
871 if (partialFrame)
872 BitBufferWrite (bitstream, numSamples, 32) ;
873 BitBufferWrite (bitstream, 0, 16) ; // mixBits = mixRes = 0
874
875 // write the params and predictor coefs
876 numU = bestU ;
877 BitBufferWrite (bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8) ; // modeU = 0
878 BitBufferWrite (bitstream, (pbFactor << 5) | numU, 8) ;
879 for (indx = 0 ; indx < numU ; indx++)
880 BitBufferWrite (bitstream, coefsU [numU-1][indx], 16) ;
881
882 // if shift active, write the interleaved shift buffers
883 if (bytesShifted != 0)
884 {
885 for (indx = 0 ; indx < numSamples ; indx++)
886 BitBufferWrite (bitstream, p->mShiftBufferUV [indx], shift) ;
887 }
888
889 // run the dynamic predictor with the best result
890 pc_block (p->mMixBufferU, p->mPredictorU, numSamples, coefsU [numU-1], numU, chanBits, DENSHIFT_DEFAULT) ;
891
892 // do lossless compression
893 set_standard_ag_params (&agParams, numSamples, numSamples) ;
894 status = dyn_comp (&agParams, p->mPredictorU, bitstream, numSamples, chanBits, &bits1) ;
895 //AssertNoErr (status) ;
896
897
898 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
899 chuck it and do an escape packet
900 */
901 minBits = BitBufferGetPosition (bitstream) - BitBufferGetPosition (&startBits) ;
902 if (minBits >= escapeBits)
903 {
904 *bitstream = startBits ; // reset bitstream state
905 doEscape = true ;
906 printf ("compressed frame too big: %u vs. %u\n", minBits, escapeBits) ;
907 }
908 }
909
910 if (doEscape == true)
911 {
912 // write bitstream header and coefs
913 BitBufferWrite (bitstream, 0, 12) ;
914 BitBufferWrite (bitstream, (partialFrame << 3) | 1, 4) ; // LSB = 1 means "frame not compressed"
915 if (partialFrame)
916 BitBufferWrite (bitstream, numSamples, 32) ;
917
918 // just copy the input data to the output buffer
919 switch (p->mBitDepth)
920 {
921 case 16:
922 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
923 BitBufferWrite (bitstream, inputBuffer [indx] >> 16, 16) ;
924 break ;
925 case 20:
926 // convert 20-bit data to 32-bit for simplicity
927 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
928 BitBufferWrite (bitstream, inputBuffer [indx] >> 12, 20) ;
929 break ;
930 case 24:
931 // convert 24-bit data to 32-bit for simplicity
932 for (indx = 0, indx2 = 0 ; indx < numSamples ; indx++, indx2 += stride)
933 {
934 p->mMixBufferU [indx] = inputBuffer [indx2] >> 8 ;
935 BitBufferWrite (bitstream, p->mMixBufferU [indx], 24) ;
936 }
937 break ;
938 case 32:
939 for (indx = 0 ; indx < (numSamples * stride) ; indx += stride)
940 BitBufferWrite (bitstream, inputBuffer [indx], 32) ;
941 break ;
942 }
943 #if VERBOSE_DEBUG
944 DebugMsg ("escape!: %u vs %u\n", minBits, (numSamples * p->mBitDepth)) ;
945 #endif
946 }
947
948 Exit:
949 return status ;
950 }
951
952 #if PRAGMA_MARK
953 #pragma mark -
954 #endif
955
956 /*
957 Encode ()
958 - encode the next block of samples
959 */
960 int32_t
alac_encode(ALAC_ENCODER * p,uint32_t numSamples,const int32_t * theReadBuffer,unsigned char * theWriteBuffer,uint32_t * ioNumBytes)961 alac_encode (ALAC_ENCODER *p, uint32_t numSamples,
962 const int32_t * theReadBuffer, unsigned char * theWriteBuffer, uint32_t * ioNumBytes)
963 {
964 uint32_t outputSize ;
965 BitBuffer bitstream ;
966 int32_t status ;
967 uint32_t numChannels = p->mNumChannels ;
968
969 // make sure we handle this bit-depth before we get going
970 RequireAction ((p->mBitDepth == 16) || (p->mBitDepth == 20) || (p->mBitDepth == 24) || (p->mBitDepth == 32), return kALAC_ParamError ;) ;
971
972 // create a bit buffer structure pointing to our output buffer
973 BitBufferInit (&bitstream, theWriteBuffer, p->mMaxOutputBytes) ;
974
975 if (numChannels == 2)
976 {
977 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
978 BitBufferWrite (&bitstream, ID_CPE, 3) ;
979 BitBufferWrite (&bitstream, 0, 4) ;
980
981 // encode stereo input buffer
982 if (p->mFastMode == false)
983 status = EncodeStereo (p, &bitstream, theReadBuffer, 2, 0, numSamples) ;
984 else
985 status = EncodeStereoFast (p, &bitstream, theReadBuffer, 2, 0, numSamples) ;
986 RequireNoErr (status, goto Exit ;) ;
987 }
988 else if (numChannels == 1)
989 {
990 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
991 BitBufferWrite (&bitstream, ID_SCE, 3) ;
992 BitBufferWrite (&bitstream, 0, 4) ;
993
994 // encode mono input buffer
995 status = EncodeMono (p, &bitstream, theReadBuffer, 1, 0, numSamples) ;
996 RequireNoErr (status, goto Exit ;) ;
997 }
998 else
999 {
1000 const int32_t * inputBuffer ;
1001 uint32_t tag ;
1002 uint32_t channelIndex ;
1003 uint8_t stereoElementTag ;
1004 uint8_t monoElementTag ;
1005 uint8_t lfeElementTag ;
1006
1007 inputBuffer = theReadBuffer ;
1008
1009 stereoElementTag = 0 ;
1010 monoElementTag = 0 ;
1011 lfeElementTag = 0 ;
1012
1013 for (channelIndex = 0 ; channelIndex < numChannels ;)
1014 {
1015 tag = (sChannelMaps [numChannels - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3) ;
1016
1017 BitBufferWrite (&bitstream, tag, 3) ;
1018 switch (tag)
1019 {
1020 case ID_SCE:
1021 // mono
1022 BitBufferWrite (&bitstream, monoElementTag, 4) ;
1023
1024 status = EncodeMono (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ;
1025
1026 inputBuffer += 1 ;
1027 channelIndex++ ;
1028 monoElementTag++ ;
1029 break ;
1030
1031 case ID_CPE:
1032 // stereo
1033 BitBufferWrite (&bitstream, stereoElementTag, 4) ;
1034
1035 status = EncodeStereo (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ;
1036
1037 inputBuffer += 2 ;
1038 channelIndex += 2 ;
1039 stereoElementTag++ ;
1040 break ;
1041
1042 case ID_LFE:
1043 // LFE channel (subwoofer)
1044 BitBufferWrite (&bitstream, lfeElementTag, 4) ;
1045
1046 status = EncodeMono (p, &bitstream, inputBuffer, numChannels, channelIndex, numSamples) ;
1047
1048 inputBuffer += 1 ;
1049 channelIndex++ ;
1050 lfeElementTag++ ;
1051 break ;
1052
1053 default:
1054 printf ("That ain't right! (%u)\n", tag) ;
1055 status = kALAC_ParamError ;
1056 goto Exit ;
1057 }
1058
1059 RequireNoErr (status, goto Exit ;) ;
1060 }
1061 }
1062
1063 #if VERBOSE_DEBUG
1064 {
1065 // if there is room left in the output buffer, add some random fill data to test decoder
1066 int32_t bitsLeft ;
1067 int32_t bytesLeft ;
1068
1069 bitsLeft = BitBufferGetPosition (&bitstream) - 3 ; // - 3 for ID_END tag
1070 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8) ;
1071
1072 if ((bytesLeft > 20) && ((bytesLeft & 0x4u) != 0))
1073 AddFiller (&bitstream, bytesLeft) ;
1074 }
1075 #endif
1076
1077 // add 3-bit frame end tag: ID_END
1078 BitBufferWrite (&bitstream, ID_END, 3) ;
1079
1080 // byte-align the output data
1081 BitBufferByteAlign (&bitstream, true) ;
1082
1083 outputSize = BitBufferGetPosition (&bitstream) / 8 ;
1084 //Assert (outputSize <= mMaxOutputBytes) ;
1085
1086
1087 // all good, let iTunes know what happened and remember the total number of input sample frames
1088 *ioNumBytes = outputSize ;
1089 //mEncodedFrames += encodeMsg->numInputSamples ;
1090
1091 // gather encoding stats
1092 p->mTotalBytesGenerated += outputSize ;
1093 p->mMaxFrameBytes = MAX (p->mMaxFrameBytes, outputSize) ;
1094
1095 status = ALAC_noErr ;
1096
1097 Exit:
1098 return status ;
1099 }
1100
1101
1102 #if PRAGMA_MARK
1103 #pragma mark -
1104 #endif
1105
1106 /*
1107 GetConfig ()
1108 */
1109 void
GetConfig(ALAC_ENCODER * p,ALACSpecificConfig * config)1110 GetConfig (ALAC_ENCODER *p, ALACSpecificConfig * config)
1111 {
1112 config->frameLength = Swap32NtoB (p->mFrameSize) ;
1113 config->compatibleVersion = (uint8_t) kALACCompatibleVersion ;
1114 config->bitDepth = (uint8_t) p->mBitDepth ;
1115 config->pb = (uint8_t) PB0 ;
1116 config->kb = (uint8_t) KB0 ;
1117 config->mb = (uint8_t) MB0 ;
1118 config->numChannels = (uint8_t) p->mNumChannels ;
1119 config->maxRun = Swap16NtoB ((uint16_t) MAX_RUN_DEFAULT) ;
1120 config->maxFrameBytes = Swap32NtoB (p->mMaxFrameBytes) ;
1121 config->avgBitRate = Swap32NtoB (p->mAvgBitRate) ;
1122 config->sampleRate = Swap32NtoB (p->mOutputSampleRate) ;
1123 }
1124
1125 uint32_t
alac_get_magic_cookie_size(uint32_t inNumChannels)1126 alac_get_magic_cookie_size (uint32_t inNumChannels)
1127 {
1128 if (inNumChannels > 2)
1129 {
1130 return sizeof (ALACSpecificConfig) + kChannelAtomSize + sizeof (ALACAudioChannelLayout) ;
1131 }
1132 else
1133 {
1134 return sizeof (ALACSpecificConfig) ;
1135 }
1136 }
1137
1138 void
alac_get_magic_cookie(ALAC_ENCODER * p,void * outCookie,uint32_t * ioSize)1139 alac_get_magic_cookie (ALAC_ENCODER *p, void * outCookie, uint32_t * ioSize)
1140 {
1141 ALACSpecificConfig theConfig = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } ;
1142 ALACAudioChannelLayout theChannelLayout = { 0, 0, 0 } ;
1143 uint8_t theChannelAtom [kChannelAtomSize] = { 0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0 } ;
1144 uint32_t theCookieSize = sizeof (ALACSpecificConfig) ;
1145 uint8_t * theCookiePointer = (uint8_t *) outCookie ;
1146
1147 GetConfig (p, &theConfig) ;
1148 if (theConfig.numChannels > 2)
1149 {
1150 theChannelLayout.mChannelLayoutTag = Swap32NtoB (ALACChannelLayoutTags [theConfig.numChannels - 1]) ;
1151 theCookieSize += (sizeof (ALACAudioChannelLayout) + kChannelAtomSize) ;
1152 }
1153 if (*ioSize >= theCookieSize)
1154 {
1155 memcpy (theCookiePointer, &theConfig, sizeof (ALACSpecificConfig)) ;
1156 theChannelAtom [3] = (sizeof (ALACAudioChannelLayout) + kChannelAtomSize) ;
1157 if (theConfig.numChannels > 2)
1158 {
1159 theCookiePointer += sizeof (ALACSpecificConfig) ;
1160 memcpy (theCookiePointer, theChannelAtom, kChannelAtomSize) ;
1161 theCookiePointer += kChannelAtomSize ;
1162 memcpy (theCookiePointer, &theChannelLayout, sizeof (ALACAudioChannelLayout)) ;
1163 }
1164 *ioSize = theCookieSize ;
1165 }
1166 else
1167 {
1168 *ioSize = 0 ; // no incomplete cookies
1169 }
1170 }
1171
1172 /*
1173 alac_encoder_init ()
1174 - initialize the encoder component with the current config
1175 */
1176 int32_t
alac_encoder_init(ALAC_ENCODER * p,uint32_t samplerate,uint32_t channels,uint32_t format_flags,uint32_t frameSize)1177 alac_encoder_init (ALAC_ENCODER *p, uint32_t samplerate, uint32_t channels, uint32_t format_flags, uint32_t frameSize)
1178 {
1179 int32_t status ;
1180
1181 p->mFrameSize = (frameSize > 0 && frameSize <= ALAC_FRAME_LENGTH) ? frameSize : ALAC_FRAME_LENGTH ;
1182
1183 p->mOutputSampleRate = samplerate ;
1184 p->mNumChannels = channels ;
1185 switch (format_flags)
1186 {
1187 case 1:
1188 p->mBitDepth = 16 ;
1189 break ;
1190 case 2:
1191 p->mBitDepth = 20 ;
1192 break ;
1193 case 3:
1194 p->mBitDepth = 24 ;
1195 break ;
1196 case 4:
1197 p->mBitDepth = 32 ;
1198 break ;
1199 default:
1200 break ;
1201 }
1202
1203 // set up default encoding parameters and state
1204 // - note: mFrameSize is set in the constructor or via alac_set_frame_size () which must be called before this routine
1205 for (uint32_t indx = 0 ; indx < kALACMaxChannels ; indx++)
1206 p->mLastMixRes [indx] = kDefaultMixRes ;
1207
1208 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1209 // but note that this can be bigger than the input size!
1210 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1211 p->mMaxOutputBytes = p->mFrameSize * p->mNumChannels * ((10 + kMaxSampleSize) / 8) + 1 ;
1212
1213 status = ALAC_noErr ;
1214
1215 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1216 for (int32_t channel = 0 ; channel < (int32_t) p->mNumChannels ; channel++)
1217 {
1218 for (int32_t search = 0 ; search < kALACMaxSearches ; search++)
1219 {
1220 init_coefs (p->mCoefsU [channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs) ;
1221 init_coefs (p->mCoefsV [channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs) ;
1222 }
1223 }
1224
1225 return status ;
1226 }
1227
1228 /*
1229 alac_get_source_format ()
1230 - given the input format, return one of our supported formats
1231 */
1232 void
alac_get_source_format(ALAC_ENCODER * p,const AudioFormatDescription * source,AudioFormatDescription * output)1233 alac_get_source_format (ALAC_ENCODER *p, const AudioFormatDescription * source, AudioFormatDescription * output)
1234 {
1235 (void) output ;
1236 // default is 16-bit native endian
1237 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1238 // to encode to 16-bit since the source was lossy in the first place
1239 // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1240 if ((source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) || (source->mBitsPerChannel <= 16))
1241 p->mBitDepth = 16 ;
1242 else if (source->mBitsPerChannel <= 20)
1243 p->mBitDepth = 20 ;
1244 else if (source->mBitsPerChannel <= 24)
1245 p->mBitDepth = 24 ;
1246 else
1247 p->mBitDepth = 32 ;
1248
1249 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1250 // and sample rate were specified when we were configured
1251 /*
1252 MakeUncompressedAudioFormat (mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output) ;
1253 */
1254 }
1255
1256
1257
1258 #if VERBOSE_DEBUG
1259
1260 #if PRAGMA_MARK
1261 #pragma mark -
1262 #endif
1263
1264 /*
1265 AddFiller ()
1266 - add fill and data stream elements to the bitstream to test the decoder
1267 */
AddFiller(BitBuffer * bits,int32_t numBytes)1268 static void AddFiller (BitBuffer * bits, int32_t numBytes)
1269 {
1270 uint8_t tag ;
1271 int32_t indx ;
1272
1273 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1274 numBytes -= 6 ;
1275 if (numBytes <= 0)
1276 return ;
1277
1278 // randomly pick Fill or Data Stream Element based on numBytes requested
1279 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE ;
1280
1281 BitBufferWrite (bits, tag, 3) ;
1282 if (tag == ID_FIL)
1283 {
1284 // can't write more than 269 bytes in a fill element
1285 numBytes = (numBytes > 269) ? 269 : numBytes ;
1286
1287 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1288 if (numBytes >= 15)
1289 {
1290 uint16_t extensionSize ;
1291
1292 BitBufferWrite (bits, 15, 4) ;
1293
1294 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1295 // - otherwise, there's no way to represent 15
1296 // - for example, to really mean 15 bytes you must encode extensionSize = 1
1297 // - why it's not like data stream elements I have no idea
1298 extensionSize = (numBytes - 15) + 1 ;
1299 //Assert (extensionSize <= 255) ;
1300 BitBufferWrite (bits, extensionSize, 8) ;
1301 }
1302 else
1303 BitBufferWrite (bits, numBytes, 4) ;
1304
1305 BitBufferWrite (bits, 0x10, 8) ; // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1306 for (indx = 0 ; indx < (numBytes - 1) ; indx++)
1307 BitBufferWrite (bits, 0xa5, 8) ; // fill_byte = b10100101 = 0xa5
1308 }
1309 else
1310 {
1311 // can't write more than 510 bytes in a data stream element
1312 numBytes = (numBytes > 510) ? 510 : numBytes ;
1313
1314 BitBufferWrite (bits, 0, 4) ; // element instance tag
1315 BitBufferWrite (bits, 1, 1) ; // byte-align flag = true
1316
1317 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1318 if (numBytes >= 255)
1319 {
1320 BitBufferWrite (bits, 255, 8) ;
1321 BitBufferWrite (bits, numBytes - 255, 8) ;
1322 }
1323 else
1324 BitBufferWrite (bits, numBytes, 8) ;
1325
1326 BitBufferByteAlign (bits, true) ; // byte-align with zeros
1327
1328 for (indx = 0 ; indx < numBytes ; indx++)
1329 BitBufferWrite (bits, 0x5a, 8) ;
1330 }
1331 }
1332
1333 #endif /* VERBOSE_DEBUG */
1334