1 /* Copyright (c) 2001-2011 Timothy B. Terriberry
2 */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "entcode.h"
33 #include "arch.h"
34
35 #if !defined(EC_CLZ)
36 /*This is a fallback for systems where we don't know how to access
37 a BSR or CLZ instruction (see ecintrin.h).
38 If you are optimizing Opus on a new platform and it has a native CLZ or
39 BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
40 an easy performance win.*/
ec_ilog(opus_uint32 _v)41 int ec_ilog(opus_uint32 _v){
42 /*On a Pentium M, this branchless version tested as the fastest on
43 1,000,000,000 random 32-bit integers, edging out a similar version with
44 branches, and a 256-entry LUT version.*/
45 int ret;
46 int m;
47 ret=!!_v;
48 m=!!(_v&0xFFFF0000)<<4;
49 _v>>=m;
50 ret|=m;
51 m=!!(_v&0xFF00)<<3;
52 _v>>=m;
53 ret|=m;
54 m=!!(_v&0xF0)<<2;
55 _v>>=m;
56 ret|=m;
57 m=!!(_v&0xC)<<1;
58 _v>>=m;
59 ret|=m;
60 ret+=!!(_v&0x2);
61 return ret;
62 }
63 #endif
64
65 #if 1
66 /* This is a faster version of ec_tell_frac() that takes advantage
67 of the low (1/8 bit) resolution to use just a linear function
68 followed by a lookup to determine the exact transition thresholds. */
ec_tell_frac(ec_ctx * _this)69 opus_uint32 ec_tell_frac(ec_ctx *_this){
70 static const unsigned correction[8] =
71 {35733, 38967, 42495, 46340,
72 50535, 55109, 60097, 65535};
73 opus_uint32 nbits;
74 opus_uint32 r;
75 int l;
76 unsigned b;
77 nbits=_this->nbits_total<<BITRES;
78 l=EC_ILOG(_this->rng);
79 r=_this->rng>>(l-16);
80 b = (r>>12)-8;
81 b += r>correction[b];
82 l = (l<<3)+b;
83 return nbits-l;
84 }
85 #else
ec_tell_frac(ec_ctx * _this)86 opus_uint32 ec_tell_frac(ec_ctx *_this){
87 opus_uint32 nbits;
88 opus_uint32 r;
89 int l;
90 int i;
91 /*To handle the non-integral number of bits still left in the encoder/decoder
92 state, we compute the worst-case number of bits of val that must be
93 encoded to ensure that the value is inside the range for any possible
94 subsequent bits.
95 The computation here is independent of val itself (the decoder does not
96 even track that value), even though the real number of bits used after
97 ec_enc_done() may be 1 smaller if rng is a power of two and the
98 corresponding trailing bits of val are all zeros.
99 If we did try to track that special case, then coding a value with a
100 probability of 1/(1<<n) might sometimes appear to use more than n bits.
101 This may help explain the surprising result that a newly initialized
102 encoder or decoder claims to have used 1 bit.*/
103 nbits=_this->nbits_total<<BITRES;
104 l=EC_ILOG(_this->rng);
105 r=_this->rng>>(l-16);
106 for(i=BITRES;i-->0;){
107 int b;
108 r=r*r>>15;
109 b=(int)(r>>16);
110 l=l<<1|b;
111 r>>=b;
112 }
113 return nbits-l;
114 }
115 #endif
116
117 #ifdef USE_SMALL_DIV_TABLE
118 /* Result of 2^32/(2*i+1), except for i=0. */
119 const opus_uint32 SMALL_DIV_TABLE[129] = {
120 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
121 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
122 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
123 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084,
124 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906,
125 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A,
126 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A,
127 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104,
128 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1,
129 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2,
130 0x0329161F, 0x03159721, 0x03030303, 0x02F14990,
131 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46,
132 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597,
133 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17,
134 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902,
135 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810,
136 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC,
137 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30,
138 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364,
139 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14,
140 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F,
141 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE,
142 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6,
143 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3,
144 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E,
145 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A,
146 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190,
147 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227,
148 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4,
149 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E,
150 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3,
151 0x01073260, 0x0105197F, 0x0103091B, 0x01010101
152 };
153 #endif
154