1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ideint_cac_ssse3.c
24 *
25 * @brief
26 * This file include the definitions of the combing artifact check function
27 * of the de-interlacer and some variant of that.
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * cac_4x8()
34 * ideint_cac()
35 *
36 * @remarks
37 * In the de-interlacer workspace, cac is not a seperate assembly module as
38 * it comes along with the de_int_decision() function. But in C-Model, to
39 * keep the things cleaner, it was made to be a separate function during
40 * cac experiments long after the assembly was written by Mudit.
41 *
42 *******************************************************************************
43 */
44 /*****************************************************************************/
45 /* File Includes */
46 /*****************************************************************************/
47 /* System include files */
48 #include <stdio.h>
49 #include <stdint.h>
50 #include <string.h>
51 #include <stdlib.h>
52 #include <immintrin.h>
53
54 /* User include files */
55 #include "icv_datatypes.h"
56 #include "icv_macros.h"
57 #include "icv.h"
58 #include "icv_variance.h"
59 #include "icv_sad.h"
60 #include "ideint.h"
61 #include "ideint_defs.h"
62 #include "ideint_structs.h"
63 #include "ideint_cac.h"
64
65 /**
66 *******************************************************************************
67 *
68 * @brief
69 * Combing artifact check function for 8x8 block
70 *
71 * @par Description
72 * Determines CAC for 8x8 block by calling 8x4 CAC function
73 *
74 * @param[in] pu1_top
75 * Top field
76 *
77 * @param[in] pu1_bot
78 * Bottom field
79 *
80 * @param[in] top_strd
81 * Top field Stride
82 *
83 * @param[in] bot_strd
84 * Bottom field stride
85 *
86 * @returns
87 * combing artifact flag (1 = detected, 0 = not detected)
88 *
89 * @remarks
90 *
91 *******************************************************************************
92 */
ideint_cac_8x8_ssse3(UWORD8 * pu1_top,UWORD8 * pu1_bot,WORD32 top_strd,WORD32 bot_strd)93 WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
94 UWORD8 *pu1_bot,
95 WORD32 top_strd,
96 WORD32 bot_strd)
97 {
98 WORD32 ca; /* combing artifact result */
99 WORD32 i;
100 WORD32 adj[2] = {0};
101 WORD32 alt[2] = {0};
102 WORD32 sum_1, sum_2, sum_3, sum_4;
103 WORD32 sum_diff, diff_sum;
104
105 __m128i top[4];
106 __m128i bot[4];
107 __m128i sum_t[4];
108 __m128i sum_b[4];
109 __m128i zero;
110
111
112 zero = _mm_setzero_si128();
113
114 for(i = 0; i < 4; i++)
115 {
116 /* Load top */
117 top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
118 pu1_top += top_strd;
119
120 /* Load bottom */
121 bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
122 pu1_bot += bot_strd;
123
124 /* Unpack */
125 top[i] = _mm_unpacklo_epi8(top[i], zero);
126 bot[i] = _mm_unpacklo_epi8(bot[i], zero);
127
128 /* Compute row sums */
129 sum_t[i] = _mm_sad_epu8(top[i], zero);
130 sum_b[i] = _mm_sad_epu8(bot[i], zero);
131 }
132
133 /* Compute row based alt and adj */
134 for(i = 0; i < 4; i += 2)
135 {
136 sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
137 sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
138 sum_diff = ABS_DIF(sum_1, sum_2);
139 if(sum_diff >= RSUM_CSUM_THRESH)
140 adj[0] += sum_diff;
141
142 sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
143 sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
144 sum_diff = ABS_DIF(sum_3, sum_4);
145 if(sum_diff >= RSUM_CSUM_THRESH)
146 adj[0] += sum_diff;
147
148 alt[0] += ABS_DIF(sum_1, sum_3);
149 alt[0] += ABS_DIF(sum_2, sum_4);
150
151 sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
152 sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
153 sum_diff = ABS_DIF(sum_1, sum_2);
154 if(sum_diff >= RSUM_CSUM_THRESH)
155 adj[1] += sum_diff;
156
157 sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
158 sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
159 sum_diff = ABS_DIF(sum_3, sum_4);
160 if(sum_diff >= RSUM_CSUM_THRESH)
161 adj[1] += sum_diff;
162
163 alt[1] += ABS_DIF(sum_1, sum_3);
164 alt[1] += ABS_DIF(sum_2, sum_4);
165 }
166
167 /* Compute column based adj */
168 {
169 __m128i avg1, avg2;
170 __m128i top_avg, bot_avg;
171 __m128i min, max, diff, thresh;
172 __m128i mask;
173 avg1 = _mm_avg_epu8(top[0], top[1]);
174 avg2 = _mm_avg_epu8(top[2], top[3]);
175 top_avg = _mm_avg_epu8(avg1, avg2);
176
177 avg1 = _mm_avg_epu8(bot[0], bot[1]);
178 avg2 = _mm_avg_epu8(bot[2], bot[3]);
179 bot_avg = _mm_avg_epu8(avg1, avg2);
180
181 min = _mm_min_epu8(top_avg, bot_avg);
182 max = _mm_max_epu8(top_avg, bot_avg);
183
184 diff = _mm_sub_epi16(max, min);
185 thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
186
187 mask = _mm_cmpgt_epi16(diff, thresh);
188 diff = _mm_and_si128(diff, mask);
189
190 diff_sum = _mm_extract_epi16(diff, 0);
191 diff_sum += _mm_extract_epi16(diff, 1);
192 diff_sum += _mm_extract_epi16(diff, 2);
193 diff_sum += _mm_extract_epi16(diff, 3);
194
195 adj[0] += diff_sum << 2;
196
197 diff_sum = _mm_extract_epi16(diff, 4);
198 diff_sum += _mm_extract_epi16(diff, 5);
199 diff_sum += _mm_extract_epi16(diff, 6);
200 diff_sum += _mm_extract_epi16(diff, 7);
201
202 adj[1] += diff_sum << 2;
203
204 }
205
206 /* Compute column based alt */
207 {
208 __m128i avg1, avg2;
209 __m128i even_avg, odd_avg, diff;
210 avg1 = _mm_avg_epu8(top[0], bot[0]);
211 avg2 = _mm_avg_epu8(top[2], bot[2]);
212 even_avg = _mm_avg_epu8(avg1, avg2);
213
214 avg1 = _mm_avg_epu8(top[1], bot[1]);
215 avg2 = _mm_avg_epu8(top[3], bot[3]);
216 odd_avg = _mm_avg_epu8(avg1, avg2);
217
218 diff = _mm_sad_epu8(even_avg, odd_avg);
219
220
221 diff_sum = _mm_cvtsi128_si32(diff);
222 alt[0] += diff_sum << 2;
223
224 diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
225 alt[1] += diff_sum << 2;
226
227 }
228 alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
229 alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
230
231 ca = (alt[0] < adj[0]);
232 ca |= (alt[1] < adj[1]);
233
234 return ca;
235 }
236
237