1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
13 #define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
14
15 #include <smmintrin.h>
16
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20
av1_round_shift_32_sse4_1(__m128i vec,int bit)21 static INLINE __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) {
22 __m128i tmp, round;
23 round = _mm_set1_epi32(1 << (bit - 1));
24 tmp = _mm_add_epi32(vec, round);
25 return _mm_srai_epi32(tmp, bit);
26 }
27
av1_round_shift_array_32_sse4_1(__m128i * input,__m128i * output,const int size,const int bit)28 static INLINE void av1_round_shift_array_32_sse4_1(__m128i *input,
29 __m128i *output,
30 const int size,
31 const int bit) {
32 if (bit > 0) {
33 int i;
34 for (i = 0; i < size; i++) {
35 output[i] = av1_round_shift_32_sse4_1(input[i], bit);
36 }
37 } else {
38 int i;
39 for (i = 0; i < size; i++) {
40 output[i] = _mm_slli_epi32(input[i], -bit);
41 }
42 }
43 }
44
av1_round_shift_rect_array_32_sse4_1(__m128i * input,__m128i * output,const int size,const int bit,const int val)45 static INLINE void av1_round_shift_rect_array_32_sse4_1(__m128i *input,
46 __m128i *output,
47 const int size,
48 const int bit,
49 const int val) {
50 const __m128i sqrt2 = _mm_set1_epi32(val);
51 if (bit > 0) {
52 int i;
53 for (i = 0; i < size; i++) {
54 const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit);
55 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
56 output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
57 }
58 } else {
59 int i;
60 for (i = 0; i < size; i++) {
61 const __m128i r0 = _mm_slli_epi32(input[i], -bit);
62 const __m128i r1 = _mm_mullo_epi32(sqrt2, r0);
63 output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits);
64 }
65 }
66 }
67
68 #ifdef __cplusplus
69 }
70 #endif
71
72 #endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_
73