1 /* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19 #ifndef AVUTIL_FLOAT_DSP_H 20 #define AVUTIL_FLOAT_DSP_H 21 22 #include "config.h" 23 24 typedef struct AVFloatDSPContext { 25 /** 26 * Calculate the entry wise product of two vectors of floats and store the result in 27 * a vector of floats. 28 * 29 * @param dst output vector 30 * constraints: 32-byte aligned 31 * @param src0 first input vector 32 * constraints: 32-byte aligned 33 * @param src1 second input vector 34 * constraints: 32-byte aligned 35 * @param len number of elements in the input 36 * constraints: multiple of 16 37 */ 38 void (*vector_fmul)(float *dst, const float *src0, const float *src1, 39 int len); 40 41 /** 42 * Multiply a vector of floats by a scalar float and add to 43 * destination vector. Source and destination vectors must 44 * overlap exactly or not at all. 45 * 46 * @param dst result vector 47 * constraints: 32-byte aligned 48 * @param src input vector 49 * constraints: 32-byte aligned 50 * @param mul scalar value 51 * @param len length of vector 52 * constraints: multiple of 16 53 */ 54 void (*vector_fmac_scalar)(float *dst, const float *src, float mul, 55 int len); 56 57 /** 58 * Multiply a vector of doubles by a scalar double and add to 59 * destination vector. Source and destination vectors must 60 * overlap exactly or not at all. 61 * 62 * @param dst result vector 63 * constraints: 32-byte aligned 64 * @param src input vector 65 * constraints: 32-byte aligned 66 * @param mul scalar value 67 * @param len length of vector 68 * constraints: multiple of 16 69 */ 70 void (*vector_dmac_scalar)(double *dst, const double *src, double mul, 71 int len); 72 73 /** 74 * Multiply a vector of floats by a scalar float. Source and 75 * destination vectors must overlap exactly or not at all. 76 * 77 * @param dst result vector 78 * constraints: 16-byte aligned 79 * @param src input vector 80 * constraints: 16-byte aligned 81 * @param mul scalar value 82 * @param len length of vector 83 * constraints: multiple of 4 84 */ 85 void (*vector_fmul_scalar)(float *dst, const float *src, float mul, 86 int len); 87 88 /** 89 * Multiply a vector of double by a scalar double. Source and 90 * destination vectors must overlap exactly or not at all. 91 * 92 * @param dst result vector 93 * constraints: 32-byte aligned 94 * @param src input vector 95 * constraints: 32-byte aligned 96 * @param mul scalar value 97 * @param len length of vector 98 * constraints: multiple of 8 99 */ 100 void (*vector_dmul_scalar)(double *dst, const double *src, double mul, 101 int len); 102 103 /** 104 * Overlap/add with window function. 105 * Used primarily by MDCT-based audio codecs. 106 * Source and destination vectors must overlap exactly or not at all. 107 * 108 * @param dst result vector 109 * constraints: 16-byte aligned 110 * @param src0 first source vector 111 * constraints: 16-byte aligned 112 * @param src1 second source vector 113 * constraints: 16-byte aligned 114 * @param win half-window vector 115 * constraints: 16-byte aligned 116 * @param len length of vector 117 * constraints: multiple of 4 118 */ 119 void (*vector_fmul_window)(float *dst, const float *src0, 120 const float *src1, const float *win, int len); 121 122 /** 123 * Calculate the entry wise product of two vectors of floats, add a third vector of 124 * floats and store the result in a vector of floats. 125 * 126 * @param dst output vector 127 * constraints: 32-byte aligned 128 * @param src0 first input vector 129 * constraints: 32-byte aligned 130 * @param src1 second input vector 131 * constraints: 32-byte aligned 132 * @param src2 third input vector 133 * constraints: 32-byte aligned 134 * @param len number of elements in the input 135 * constraints: multiple of 16 136 */ 137 void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, 138 const float *src2, int len); 139 140 /** 141 * Calculate the entry wise product of two vectors of floats, and store the result 142 * in a vector of floats. The second vector of floats is iterated over 143 * in reverse order. 144 * 145 * @param dst output vector 146 * constraints: 32-byte aligned 147 * @param src0 first input vector 148 * constraints: 32-byte aligned 149 * @param src1 second input vector 150 * constraints: 32-byte aligned 151 * @param len number of elements in the input 152 * constraints: multiple of 16 153 */ 154 void (*vector_fmul_reverse)(float *dst, const float *src0, 155 const float *src1, int len); 156 157 /** 158 * Calculate the sum and difference of two vectors of floats. 159 * 160 * @param v1 first input vector, sum output, 16-byte aligned 161 * @param v2 second input vector, difference output, 16-byte aligned 162 * @param len length of vectors, multiple of 4 163 */ 164 void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len); 165 166 /** 167 * Calculate the scalar product of two vectors of floats. 168 * 169 * @param v1 first vector, 16-byte aligned 170 * @param v2 second vector, 16-byte aligned 171 * @param len length of vectors, multiple of 4 172 * 173 * @return sum of elementwise products 174 */ 175 float (*scalarproduct_float)(const float *v1, const float *v2, int len); 176 177 /** 178 * Calculate the entry wise product of two vectors of doubles and store the result in 179 * a vector of doubles. 180 * 181 * @param dst output vector 182 * constraints: 32-byte aligned 183 * @param src0 first input vector 184 * constraints: 32-byte aligned 185 * @param src1 second input vector 186 * constraints: 32-byte aligned 187 * @param len number of elements in the input 188 * constraints: multiple of 16 189 */ 190 void (*vector_dmul)(double *dst, const double *src0, const double *src1, 191 int len); 192 } AVFloatDSPContext; 193 194 /** 195 * Return the scalar product of two vectors. 196 * 197 * @param v1 first input vector 198 * @param v2 first input vector 199 * @param len number of elements 200 * 201 * @return sum of elementwise products 202 */ 203 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); 204 205 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp); 206 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); 207 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); 208 void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); 209 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); 210 211 /** 212 * Allocate a float DSP context. 213 * 214 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant 215 */ 216 AVFloatDSPContext *avpriv_float_dsp_alloc(int strict); 217 218 #endif /* AVUTIL_FLOAT_DSP_H */ 219