• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <immintrin.h>
13 
14 #include "config/aom_dsp_rtcd.h"
15 #include "aom_dsp/aom_dsp_common.h"
16 #include "aom_dsp/fft_common.h"
17 
18 extern void aom_transpose_float_sse2(const float *A, float *B, int n);
19 extern void aom_fft_unpack_2d_output_sse2(const float *col_fft, float *output,
20                                           int n);
21 
22 // Generate the 1d forward transforms for float using _mm256
23 GEN_FFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
24           _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
25           _mm256_mul_ps);
26 GEN_FFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
27            _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
28            _mm256_mul_ps);
29 GEN_FFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
30            _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
31            _mm256_mul_ps);
32 
aom_fft8x8_float_avx2(const float * input,float * temp,float * output)33 void aom_fft8x8_float_avx2(const float *input, float *temp, float *output) {
34   aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_avx2,
35                  aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
36 }
37 
aom_fft16x16_float_avx2(const float * input,float * temp,float * output)38 void aom_fft16x16_float_avx2(const float *input, float *temp, float *output) {
39   aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_avx2,
40                  aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
41 }
42 
aom_fft32x32_float_avx2(const float * input,float * temp,float * output)43 void aom_fft32x32_float_avx2(const float *input, float *temp, float *output) {
44   aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_avx2,
45                  aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
46 }
47 
48 // Generate the 1d inverse transforms for float using _mm256
49 GEN_IFFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
50            _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
51            _mm256_mul_ps);
52 GEN_IFFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
53             _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
54             _mm256_mul_ps);
55 GEN_IFFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
56             _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
57             _mm256_mul_ps);
58 
aom_ifft8x8_float_avx2(const float * input,float * temp,float * output)59 void aom_ifft8x8_float_avx2(const float *input, float *temp, float *output) {
60   aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_avx2,
61                   aom_ifft1d_8_avx2, aom_transpose_float_sse2, 8);
62 }
63 
aom_ifft16x16_float_avx2(const float * input,float * temp,float * output)64 void aom_ifft16x16_float_avx2(const float *input, float *temp, float *output) {
65   aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
66                   aom_fft1d_16_avx2, aom_ifft1d_16_avx2,
67                   aom_transpose_float_sse2, 8);
68 }
69 
aom_ifft32x32_float_avx2(const float * input,float * temp,float * output)70 void aom_ifft32x32_float_avx2(const float *input, float *temp, float *output) {
71   aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
72                   aom_fft1d_32_avx2, aom_ifft1d_32_avx2,
73                   aom_transpose_float_sse2, 8);
74 }
75