• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2014, Cisco Systems, INC
2    Written by XiangMingZhu WeiZhou MinPeng YanWang
3 
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifndef MAIN_SSE_H
29 # define MAIN_SSE_H
30 
31 # ifdef HAVE_CONFIG_H
32 #  include "config.h"
33 # endif
34 
35 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
36 
37 void silk_VQ_WMat_EC_sse4_1(
38     opus_int8                   *ind,                           /* O    index of best codebook vector               */
39     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
40     opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
41     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
42     const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
43     const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
44     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
45     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
46     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
47     const opus_int              subfr_len,                      /* I    number of samples per subframe              */
48     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
49     const opus_int              L                               /* I    number of vectors in codebook               */
50 );
51 
52 #  if defined OPUS_X86_PRESUME_SSE4_1
53 
54 #   define OVERRIDE_silk_VQ_WMat_EC
55 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
56                            subfr_len, max_gain_Q7, L, arch) \
57     ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
58                           subfr_len, max_gain_Q7, L))
59 
60 #  elif defined(OPUS_HAVE_RTCD)
61 
62 extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
63     opus_int8                   *ind,                           /* O    index of best codebook vector               */
64     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
65     opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
66     opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
67     const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
68     const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
69     const opus_int8             *cb_Q7,                         /* I    codebook                                    */
70     const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
71     const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
72     const opus_int              subfr_len,                      /* I    number of samples per subframe              */
73     const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
74     const opus_int              L                               /* I    number of vectors in codebook               */
75 );
76 
77 #   define OVERRIDE_silk_VQ_WMat_EC
78 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
79                            subfr_len, max_gain_Q7, L, arch) \
80     ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
81                           subfr_len, max_gain_Q7, L))
82 
83 #  endif
84 
85 void silk_NSQ_sse4_1(
86     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
87     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
88     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
89     const opus_int16            x16[],                                        /* I    Input                           */
90     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
91     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],            /* I    Short term prediction coefs     */
92     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
93     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
94     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
95     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
96     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
97     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
98     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
99     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
100     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
101 );
102 
103 #  if defined OPUS_X86_PRESUME_SSE4_1
104 
105 #   define OVERRIDE_silk_NSQ
106 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
107                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
108     ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
109                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
110 
111 #  elif defined(OPUS_HAVE_RTCD)
112 
113 extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
114     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
115     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
116     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
117     const opus_int16            x16[],                                        /* I    Input                           */
118     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
119     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],            /* I    Short term prediction coefs     */
120     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
121     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
122     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
123     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
124     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
125     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
126     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
127     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
128     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
129 );
130 
131 #   define OVERRIDE_silk_NSQ
132 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
133                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
134     ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
135                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
136 
137 #  endif
138 
139 void silk_NSQ_del_dec_sse4_1(
140     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
141     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
142     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
143     const opus_int16            x16[],                                        /* I    Input                           */
144     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
145     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],            /* I    Short term prediction coefs     */
146     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
147     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
148     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
149     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
150     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
151     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
152     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
153     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
154     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
155 );
156 
157 #  if defined OPUS_X86_PRESUME_SSE4_1
158 
159 #   define OVERRIDE_silk_NSQ_del_dec
160 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
161                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
162     ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
163                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
164 
165 #  elif defined(OPUS_HAVE_RTCD)
166 
167 extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
168     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
169     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
170     SideInfoIndices             *psIndices,                                   /* I/O  Quantization Indices            */
171     const opus_int16            x16[],                                        /* I    Input                           */
172     opus_int8                   pulses[],                                     /* O    Quantized pulse signal          */
173     const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],            /* I    Short term prediction coefs     */
174     const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],      /* I    Long term prediction coefs      */
175     const opus_int16            AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I    Noise shaping coefs             */
176     const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],            /* I    Long term shaping coefs         */
177     const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                     /* I    Spectral tilt                   */
178     const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                   /* I    Low frequency shaping coefs     */
179     const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                    /* I    Quantization step sizes         */
180     const opus_int              pitchL[ MAX_NB_SUBFR ],                       /* I    Pitch lags                      */
181     const opus_int              Lambda_Q10,                                   /* I    Rate/distortion tradeoff        */
182     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
183 );
184 
185 #   define OVERRIDE_silk_NSQ_del_dec
186 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
187                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
188     ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
189                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
190 
191 #  endif
192 
193 void silk_noise_shape_quantizer(
194     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                       */
195     opus_int            signalType,             /* I    Signal type                     */
196     const opus_int32    x_sc_Q10[],             /* I                                    */
197     opus_int8           pulses[],               /* O                                    */
198     opus_int16          xq[],                   /* O                                    */
199     opus_int32          sLTP_Q15[],             /* I/O  LTP state                       */
200     const opus_int16    a_Q12[],                /* I    Short term prediction coefs     */
201     const opus_int16    b_Q14[],                /* I    Long term prediction coefs      */
202     const opus_int16    AR_shp_Q13[],           /* I    Noise shaping AR coefs          */
203     opus_int            lag,                    /* I    Pitch lag                       */
204     opus_int32          HarmShapeFIRPacked_Q14, /* I                                    */
205     opus_int            Tilt_Q14,               /* I    Spectral tilt                   */
206     opus_int32          LF_shp_Q14,             /* I                                    */
207     opus_int32          Gain_Q16,               /* I                                    */
208     opus_int            Lambda_Q10,             /* I                                    */
209     opus_int            offset_Q10,             /* I                                    */
210     opus_int            length,                 /* I    Input length                    */
211     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
212     opus_int            predictLPCOrder,        /* I    Prediction filter order         */
213     int                 arch                    /* I    Architecture                    */
214 );
215 
216 /**************************/
217 /* Noise level estimation */
218 /**************************/
219 void silk_VAD_GetNoiseLevels(
220     const opus_int32            pX[ VAD_N_BANDS ],  /* I    subband energies                            */
221     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
222 );
223 
224 opus_int silk_VAD_GetSA_Q8_sse4_1(
225     silk_encoder_state *psEnC,
226     const opus_int16   pIn[]
227 );
228 
229 #  if defined(OPUS_X86_PRESUME_SSE4_1)
230 
231 #   define OVERRIDE_silk_VAD_GetSA_Q8
232 #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
233 
234 #  elif defined(OPUS_HAVE_RTCD)
235 
236 extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
237      silk_encoder_state *psEnC,
238      const opus_int16   pIn[]);
239 
240 #   define OVERRIDE_silk_VAD_GetSA_Q8
241 #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
242       ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
243 
244 #  endif
245 
246 # endif
247 #endif
248