1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <immintrin.h>
13
14 #include "config/aom_dsp_rtcd.h"
15 #include "aom_dsp/aom_dsp_common.h"
16 #include "aom_dsp/fft_common.h"
17
18 extern void aom_transpose_float_sse2(const float *A, float *B, int n);
19 extern void aom_fft_unpack_2d_output_sse2(const float *col_fft, float *output,
20 int n);
21
22 // Generate the 1d forward transforms for float using _mm256
23 GEN_FFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
24 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
25 _mm256_mul_ps);
26 GEN_FFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
27 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
28 _mm256_mul_ps);
29 GEN_FFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
30 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
31 _mm256_mul_ps);
32
aom_fft8x8_float_avx2(const float * input,float * temp,float * output)33 void aom_fft8x8_float_avx2(const float *input, float *temp, float *output) {
34 aom_fft_2d_gen(input, temp, output, 8, aom_fft1d_8_avx2,
35 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
36 }
37
aom_fft16x16_float_avx2(const float * input,float * temp,float * output)38 void aom_fft16x16_float_avx2(const float *input, float *temp, float *output) {
39 aom_fft_2d_gen(input, temp, output, 16, aom_fft1d_16_avx2,
40 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
41 }
42
aom_fft32x32_float_avx2(const float * input,float * temp,float * output)43 void aom_fft32x32_float_avx2(const float *input, float *temp, float *output) {
44 aom_fft_2d_gen(input, temp, output, 32, aom_fft1d_32_avx2,
45 aom_transpose_float_sse2, aom_fft_unpack_2d_output_sse2, 8);
46 }
47
48 // Generate the 1d inverse transforms for float using _mm256
49 GEN_IFFT_8(static INLINE void, avx2, float, __m256, _mm256_load_ps,
50 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
51 _mm256_mul_ps);
52 GEN_IFFT_16(static INLINE void, avx2, float, __m256, _mm256_load_ps,
53 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
54 _mm256_mul_ps);
55 GEN_IFFT_32(static INLINE void, avx2, float, __m256, _mm256_load_ps,
56 _mm256_store_ps, _mm256_set1_ps, _mm256_add_ps, _mm256_sub_ps,
57 _mm256_mul_ps);
58
aom_ifft8x8_float_avx2(const float * input,float * temp,float * output)59 void aom_ifft8x8_float_avx2(const float *input, float *temp, float *output) {
60 aom_ifft_2d_gen(input, temp, output, 8, aom_fft1d_8_float, aom_fft1d_8_avx2,
61 aom_ifft1d_8_avx2, aom_transpose_float_sse2, 8);
62 }
63
aom_ifft16x16_float_avx2(const float * input,float * temp,float * output)64 void aom_ifft16x16_float_avx2(const float *input, float *temp, float *output) {
65 aom_ifft_2d_gen(input, temp, output, 16, aom_fft1d_16_float,
66 aom_fft1d_16_avx2, aom_ifft1d_16_avx2,
67 aom_transpose_float_sse2, 8);
68 }
69
aom_ifft32x32_float_avx2(const float * input,float * temp,float * output)70 void aom_ifft32x32_float_avx2(const float *input, float *temp, float *output) {
71 aom_ifft_2d_gen(input, temp, output, 32, aom_fft1d_32_float,
72 aom_fft1d_32_avx2, aom_ifft1d_32_avx2,
73 aom_transpose_float_sse2, 8);
74 }
75