1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #include "precomp.hpp"
45
46 #include <limits>
47
48 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
49
50 namespace cv
51 {
52
53
54 //////////////////////////// Bayer Pattern -> RGB conversion /////////////////////////////
55
56 template<typename T>
57 class SIMDBayerStubInterpolator_
58 {
59 public:
bayer2Gray(const T *,int,T *,int,int,int,int) const60 int bayer2Gray(const T*, int, T*, int, int, int, int) const
61 {
62 return 0;
63 }
64
bayer2RGB(const T *,int,T *,int,int) const65 int bayer2RGB(const T*, int, T*, int, int) const
66 {
67 return 0;
68 }
69
bayer2RGBA(const T *,int,T *,int,int) const70 int bayer2RGBA(const T*, int, T*, int, int) const
71 {
72 return 0;
73 }
74
bayer2RGB_EA(const T *,int,T *,int,int) const75 int bayer2RGB_EA(const T*, int, T*, int, int) const
76 {
77 return 0;
78 }
79 };
80
81 #if CV_SSE2
82 class SIMDBayerInterpolator_8u
83 {
84 public:
SIMDBayerInterpolator_8u()85 SIMDBayerInterpolator_8u()
86 {
87 use_simd = checkHardwareSupport(CV_CPU_SSE2);
88 }
89
bayer2Gray(const uchar * bayer,int bayer_step,uchar * dst,int width,int bcoeff,int gcoeff,int rcoeff) const90 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
91 int width, int bcoeff, int gcoeff, int rcoeff) const
92 {
93 if( !use_simd )
94 return 0;
95
96 __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2));
97 __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2));
98 __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2));
99 const uchar* bayer_end = bayer + width;
100
101 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
102 {
103 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
104 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
105 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
106
107 __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7),
108 _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7));
109 __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2));
110 b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1);
111
112 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7));
113 __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7);
114 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
115 g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2);
116
117 r0 = _mm_srli_epi16(r1, 8);
118 r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2);
119 r0 = _mm_slli_epi16(r0, 3);
120
121 g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y));
122 g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y));
123 g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y));
124 g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y));
125 g0 = _mm_srli_epi16(g0, 2);
126 g1 = _mm_srli_epi16(g1, 2);
127 g0 = _mm_packus_epi16(g0, g0);
128 g1 = _mm_packus_epi16(g1, g1);
129 g0 = _mm_unpacklo_epi8(g0, g1);
130 _mm_storeu_si128((__m128i*)dst, g0);
131 }
132
133 return (int)(bayer - (bayer_end - width));
134 }
135
bayer2RGB(const uchar * bayer,int bayer_step,uchar * dst,int width,int blue) const136 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
137 {
138 if( !use_simd )
139 return 0;
140 /*
141 B G B G | B G B G | B G B G | B G B G
142 G R G R | G R G R | G R G R | G R G R
143 B G B G | B G B G | B G B G | B G B G
144 */
145
146 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
147 __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128();
148 __m128i masklo = _mm_set1_epi16(0x00ff);
149 const uchar* bayer_end = bayer + width;
150
151 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
152 {
153 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
154 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
155 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
156
157 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo));
158 __m128i nextb1 = _mm_srli_si128(b1, 2);
159 __m128i b0 = _mm_add_epi16(b1, nextb1);
160 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
161 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
162 // b0 b2 ... b14 b1 b3 ... b15
163 b0 = _mm_packus_epi16(b0, b1);
164
165 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8));
166 __m128i g1 = _mm_and_si128(r1, masklo);
167 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2)));
168 g1 = _mm_srli_si128(g1, 2);
169 g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2);
170 // g0 g2 ... g14 g1 g3 ... g15
171 g0 = _mm_packus_epi16(g0, g1);
172
173 r0 = _mm_srli_epi16(r1, 8);
174 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
175 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
176 // r0 r2 ... r14 r1 r3 ... r15
177 r0 = _mm_packus_epi16(r0, r1);
178
179 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
180 b0 = _mm_xor_si128(b0, b1);
181 r0 = _mm_xor_si128(r0, b1);
182
183 // b1 g1 b3 g3 b5 g5...
184 b1 = _mm_unpackhi_epi8(b0, g0);
185 // b0 g0 b2 g2 b4 g4 ....
186 b0 = _mm_unpacklo_epi8(b0, g0);
187
188 // r1 0 r3 0 r5 0 ...
189 r1 = _mm_unpackhi_epi8(r0, z);
190 // r0 0 r2 0 r4 0 ...
191 r0 = _mm_unpacklo_epi8(r0, z);
192
193 // 0 b0 g0 r0 0 b2 g2 r2 ...
194 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
195 // 0 b8 g8 r8 0 b10 g10 r10 ...
196 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
197
198 // b1 g1 r1 0 b3 g3 r3 0 ...
199 r0 = _mm_unpacklo_epi16(b1, r1);
200 // b9 g9 r9 0 b11 g11 r11 0 ...
201 r1 = _mm_unpackhi_epi16(b1, r1);
202
203 // 0 b0 g0 r0 b1 g1 r1 0 ...
204 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
205 // 0 b4 g4 r4 b5 g5 r5 0 ...
206 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
207
208 _mm_storel_epi64((__m128i*)(dst-1+0), b0);
209 _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8));
210 _mm_storel_epi64((__m128i*)(dst-1+6*2), b1);
211 _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8));
212
213 // 0 b8 g8 r8 b9 g9 r9 0 ...
214 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
215 // 0 b12 g12 r12 b13 g13 r13 0 ...
216 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
217
218 _mm_storel_epi64((__m128i*)(dst-1+6*4), g0);
219 _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8));
220
221 _mm_storel_epi64((__m128i*)(dst-1+6*6), g1);
222 }
223
224 return (int)(bayer - (bayer_end - width));
225 }
226
bayer2RGBA(const uchar *,int,uchar *,int,int) const227 int bayer2RGBA(const uchar*, int, uchar*, int, int) const
228 {
229 return 0;
230 }
231
bayer2RGB_EA(const uchar * bayer,int bayer_step,uchar * dst,int width,int blue) const232 int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
233 {
234 if (!use_simd)
235 return 0;
236
237 const uchar* bayer_end = bayer + width;
238 __m128i masklow = _mm_set1_epi16(0x00ff);
239 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2);
240 __m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128();
241 __m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0);
242
243 for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42)
244 {
245 /*
246 B G B G | B G B G | B G B G | B G B G
247 G R G R | G R G R | G R G R | G R G R
248 B G B G | B G B G | B G B G | B G B G
249 */
250
251 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer);
252 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step));
253 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2));
254
255 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow));
256 __m128i nextb1 = _mm_srli_si128(b1, 2);
257 __m128i b0 = _mm_add_epi16(b1, nextb1);
258 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1);
259 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2);
260 // b0 b2 ... b14 b1 b3 ... b15
261 b0 = _mm_packus_epi16(b0, b1);
262
263 // vertical sum
264 __m128i r0g = _mm_srli_epi16(r0, 8);
265 __m128i r2g = _mm_srli_epi16(r2, 8);
266 __m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1);
267 // gorizontal sum
268 __m128i g1 = _mm_and_si128(masklow, r1);
269 __m128i nextg1 = _mm_srli_si128(g1, 2);
270 __m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1);
271
272 // gradients
273 __m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g));
274 __m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1));
275 __m128i gmask = _mm_cmpgt_epi16(gradg, gradv);
276
277 __m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full)));
278 // g0 g2 ... g14 g1 g3 ...
279 g0 = _mm_packus_epi16(g0, nextg1);
280
281 r0 = _mm_srli_epi16(r1, 8);
282 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2));
283 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1);
284 // r0 r2 ... r14 r1 r3 ... r15
285 r0 = _mm_packus_epi16(r0, r1);
286
287 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask);
288 b0 = _mm_xor_si128(b0, b1);
289 r0 = _mm_xor_si128(r0, b1);
290
291 // b1 g1 b3 g3 b5 g5...
292 b1 = _mm_unpackhi_epi8(b0, g0);
293 // b0 g0 b2 g2 b4 g4 ....
294 b0 = _mm_unpacklo_epi8(b0, g0);
295
296 // r1 0 r3 0 r5 0 ...
297 r1 = _mm_unpackhi_epi8(r0, z);
298 // r0 0 r2 0 r4 0 ...
299 r0 = _mm_unpacklo_epi8(r0, z);
300
301 // 0 b0 g0 r0 0 b2 g2 r2 ...
302 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1);
303 // 0 b8 g8 r8 0 b10 g10 r10 ...
304 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1);
305
306 // b1 g1 r1 0 b3 g3 r3 0 ...
307 r0 = _mm_unpacklo_epi16(b1, r1);
308 // b9 g9 r9 0 b11 g11 r11 0 ...
309 r1 = _mm_unpackhi_epi16(b1, r1);
310
311 // 0 b0 g0 r0 b1 g1 r1 0 ...
312 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1);
313 // 0 b4 g4 r4 b5 g5 r5 0 ...
314 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1);
315
316 _mm_storel_epi64((__m128i*)(dst+0), b0);
317 _mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8));
318 _mm_storel_epi64((__m128i*)(dst+6*2), b1);
319 _mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8));
320
321 // 0 b8 g8 r8 b9 g9 r9 0 ...
322 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1);
323 // 0 b12 g12 r12 b13 g13 r13 0 ...
324 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1);
325
326 _mm_storel_epi64((__m128i*)(dst+6*4), g0);
327 _mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8));
328
329 _mm_storel_epi64((__m128i*)(dst+6*6), g1);
330 }
331
332 return int(bayer - (bayer_end - width));
333 }
334
335 bool use_simd;
336 };
337 #elif CV_NEON
338 class SIMDBayerInterpolator_8u
339 {
340 public:
SIMDBayerInterpolator_8u()341 SIMDBayerInterpolator_8u()
342 {
343 }
344
bayer2Gray(const uchar * bayer,int bayer_step,uchar * dst,int width,int bcoeff,int gcoeff,int rcoeff) const345 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst,
346 int width, int bcoeff, int gcoeff, int rcoeff) const
347 {
348 /*
349 B G B G | B G B G | B G B G | B G B G
350 G R G R | G R G R | G R G R | G R G R
351 B G B G | B G B G | B G B G | B G B G
352 */
353
354 uint16x8_t masklo = vdupq_n_u16(255);
355 const uchar* bayer_end = bayer + width;
356
357 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
358 {
359 uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
360 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
361 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
362
363 uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
364 uint16x8_t b1 = vextq_u16(b1_, b1_, 1);
365 uint16x8_t b0 = vaddq_u16(b1_, b1);
366 // b0 = b0 b2 b4 ...
367 // b1 = b1 b3 b5 ...
368
369 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
370 uint16x8_t g1 = vandq_u16(r1, masklo);
371 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
372 uint16x8_t rot = vextq_u16(g1, g1, 1);
373 g1 = vshlq_n_u16(rot, 2);
374 // g0 = b0 b2 b4 ...
375 // g1 = b1 b3 b5 ...
376
377 r0 = vshrq_n_u16(r1, 8);
378 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
379 r0 = vshlq_n_u16(r0, 2);
380 // r0 = r0 r2 r4 ...
381 // r1 = r1 r3 r5 ...
382
383 b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2)));
384 b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4)));
385
386 g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2)));
387 g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2)));
388
389 r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2)));
390 r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4)));
391
392 g0 = vaddq_u16(vaddq_u16(g0, b0), r0);
393 g1 = vaddq_u16(vaddq_u16(g1, b1), r1);
394
395 uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2));
396 vst1_u8(dst, p.val[0]);
397 vst1_u8(dst + 8, p.val[1]);
398 }
399
400 return (int)(bayer - (bayer_end - width));
401 }
402
bayer2RGB(const uchar * bayer,int bayer_step,uchar * dst,int width,int blue) const403 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
404 {
405 /*
406 B G B G | B G B G | B G B G | B G B G
407 G R G R | G R G R | G R G R | G R G R
408 B G B G | B G B G | B G B G | B G B G
409 */
410 uint16x8_t masklo = vdupq_n_u16(255);
411 uint8x16x3_t pix;
412 const uchar* bayer_end = bayer + width;
413
414 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
415 {
416 uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
417 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
418 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
419
420 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
421 uint16x8_t nextb1 = vextq_u16(b1, b1, 1);
422 uint16x8_t b0 = vaddq_u16(b1, nextb1);
423 // b0 b1 b2 ...
424 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));
425 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);
426
427 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
428 uint16x8_t g1 = vandq_u16(r1, masklo);
429 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
430 g1 = vextq_u16(g1, g1, 1);
431 // g0 g1 g2 ...
432 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));
433 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);
434
435 r0 = vshrq_n_u16(r1, 8);
436 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
437 // r0 r1 r2 ...
438 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));
439 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);
440
441 vst3q_u8(dst-1, pix);
442 }
443
444 return (int)(bayer - (bayer_end - width));
445 }
446
bayer2RGBA(const uchar * bayer,int bayer_step,uchar * dst,int width,int blue) const447 int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const
448 {
449 /*
450 B G B G | B G B G | B G B G | B G B G
451 G R G R | G R G R | G R G R | G R G R
452 B G B G | B G B G | B G B G | B G B G
453 */
454 uint16x8_t masklo = vdupq_n_u16(255);
455 uint8x16x4_t pix;
456 const uchar* bayer_end = bayer + width;
457 pix.val[3] = vdupq_n_u8(255);
458
459 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
460 {
461 uint16x8_t r0 = vld1q_u16((const ushort*)bayer);
462 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step));
463 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2));
464
465 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo));
466 uint16x8_t nextb1 = vextq_u16(b1, b1, 1);
467 uint16x8_t b0 = vaddq_u16(b1, nextb1);
468 // b0 b1 b2 ...
469 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1));
470 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]);
471
472 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
473 uint16x8_t g1 = vandq_u16(r1, masklo);
474 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
475 g1 = vextq_u16(g1, g1, 1);
476 // g0 g1 g2 ...
477 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1));
478 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]);
479
480 r0 = vshrq_n_u16(r1, 8);
481 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1));
482 // r0 r1 r2 ...
483 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1));
484 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]);
485
486 vst4q_u8(dst-1, pix);
487 }
488
489 return (int)(bayer - (bayer_end - width));
490 }
491
bayer2RGB_EA(const uchar *,int,uchar *,int,int) const492 int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const
493 {
494 return 0;
495 }
496 };
497 #else
498 typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u;
499 #endif
500
501
502 template<typename T, class SIMDInterpolator>
503 class Bayer2Gray_Invoker :
504 public ParallelLoopBody
505 {
506 public:
Bayer2Gray_Invoker(const Mat & _srcmat,Mat & _dstmat,int _start_with_green,bool _brow,const Size & _size,int _bcoeff,int _rcoeff)507 Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow,
508 const Size& _size, int _bcoeff, int _rcoeff) :
509 ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green),
510 Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff)
511 {
512 }
513
operator ()(const Range & range) const514 virtual void operator ()(const Range& range) const
515 {
516 SIMDInterpolator vecOp;
517 const int G2Y = 9617;
518 const int SHIFT = 14;
519
520 const T* bayer0 = srcmat.ptr<T>();
521 int bayer_step = (int)(srcmat.step/sizeof(T));
522 T* dst0 = (T*)dstmat.data;
523 int dst_step = (int)(dstmat.step/sizeof(T));
524 int bcoeff = Bcoeff, rcoeff = Rcoeff;
525 int start_with_green = Start_with_green;
526 bool brow = Brow;
527
528 dst0 += dst_step + 1;
529
530 if (range.start % 2)
531 {
532 brow = !brow;
533 std::swap(bcoeff, rcoeff);
534 start_with_green = !start_with_green;
535 }
536
537 bayer0 += range.start * bayer_step;
538 dst0 += range.start * dst_step;
539
540 for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step )
541 {
542 unsigned t0, t1, t2;
543 const T* bayer = bayer0;
544 T* dst = dst0;
545 const T* bayer_end = bayer + size.width;
546
547 if( size.width <= 0 )
548 {
549 dst[-1] = dst[size.width] = 0;
550 continue;
551 }
552
553 if( start_with_green )
554 {
555 t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff;
556 t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff;
557 t2 = bayer[bayer_step+1]*(2*G2Y);
558
559 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
560 bayer++;
561 dst++;
562 }
563
564 int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff);
565 bayer += delta;
566 dst += delta;
567
568 for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 )
569 {
570 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
571 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
572 t2 = bayer[bayer_step+1]*(4*bcoeff);
573 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
574
575 t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff;
576 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff;
577 t2 = bayer[bayer_step+2]*(2*G2Y);
578 dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1);
579 }
580
581 if( bayer < bayer_end )
582 {
583 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff;
584 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y;
585 t2 = bayer[bayer_step+1]*(4*bcoeff);
586 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2);
587 bayer++;
588 dst++;
589 }
590
591 dst0[-1] = dst0[0];
592 dst0[size.width] = dst0[size.width-1];
593
594 brow = !brow;
595 std::swap(bcoeff, rcoeff);
596 start_with_green = !start_with_green;
597 }
598 }
599
600 private:
601 Mat srcmat;
602 Mat dstmat;
603 int Start_with_green;
604 bool Brow;
605 Size size;
606 int Bcoeff, Rcoeff;
607 };
608
609 template<typename T, typename SIMDInterpolator>
Bayer2Gray_(const Mat & srcmat,Mat & dstmat,int code)610 static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code )
611 {
612 const int R2Y = 4899;
613 const int B2Y = 1868;
614
615 Size size = srcmat.size();
616 int bcoeff = B2Y, rcoeff = R2Y;
617 int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY;
618 bool brow = true;
619
620 if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY )
621 {
622 brow = false;
623 std::swap(bcoeff, rcoeff);
624 }
625 size.height -= 2;
626 size.width -= 2;
627
628 if (size.height > 0)
629 {
630 Range range(0, size.height);
631 Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat,
632 start_with_green, brow, size, bcoeff, rcoeff);
633 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));
634 }
635
636 size = dstmat.size();
637 T* dst0 = dstmat.ptr<T>();
638 int dst_step = (int)(dstmat.step/sizeof(T));
639 if( size.height > 2 )
640 for( int i = 0; i < size.width; i++ )
641 {
642 dst0[i] = dst0[i + dst_step];
643 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
644 }
645 else
646 for( int i = 0; i < size.width; i++ )
647 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
648 }
649
650 template <typename T>
651 struct Alpha
652 {
valuecv::Alpha653 static T value() { return std::numeric_limits<T>::max(); }
654 };
655
656 template <>
657 struct Alpha<float>
658 {
valuecv::Alpha659 static float value() { return 1.0f; }
660 };
661
662 template <typename T, typename SIMDInterpolator>
663 class Bayer2RGB_Invoker :
664 public ParallelLoopBody
665 {
666 public:
Bayer2RGB_Invoker(const Mat & _srcmat,Mat & _dstmat,int _start_with_green,int _blue,const Size & _size)667 Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) :
668 ParallelLoopBody(),
669 srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size)
670 {
671 }
672
operator ()(const Range & range) const673 virtual void operator() (const Range& range) const
674 {
675 SIMDInterpolator vecOp;
676 T alpha = Alpha<T>::value();
677 int dcn = dstmat.channels();
678 int dcn2 = dcn << 1;
679
680 int bayer_step = (int)(srcmat.step/sizeof(T));
681 const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start;
682
683 int dst_step = (int)(dstmat.step/sizeof(T));
684 T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1;
685
686 int blue = Blue, start_with_green = Start_with_green;
687 if (range.start % 2)
688 {
689 blue = -blue;
690 start_with_green = !start_with_green;
691 }
692
693 for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i )
694 {
695 int t0, t1;
696 const T* bayer = bayer0;
697 T* dst = dst0;
698 const T* bayer_end = bayer + size.width;
699
700 // in case of when size.width <= 2
701 if( size.width <= 0 )
702 {
703 if (dcn == 3)
704 {
705 dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] =
706 dst[size.width*dcn] = dst[size.width*dcn+1] = 0;
707 }
708 else
709 {
710 dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] =
711 dst[size.width*dcn] = dst[size.width*dcn+1] = 0;
712 dst[-2] = dst[size.width*dcn+2] = alpha;
713 }
714 continue;
715 }
716
717 if( start_with_green )
718 {
719 t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1;
720 t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1;
721
722 dst[-blue] = (T)t0;
723 dst[0] = bayer[bayer_step+1];
724 dst[blue] = (T)t1;
725 if (dcn == 4)
726 dst[2] = alpha; // alpha channel
727
728 bayer++;
729 dst += dcn;
730 }
731
732 // simd optimization only for dcn == 3
733 int delta = dcn == 4 ?
734 vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) :
735 vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue);
736 bayer += delta;
737 dst += delta*dcn;
738
739 if (dcn == 3) // Bayer to BGR
740 {
741 if( blue > 0 )
742 {
743 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
744 {
745 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
746 bayer[bayer_step*2+2] + 2) >> 2;
747 t1 = (bayer[1] + bayer[bayer_step] +
748 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
749 dst[-1] = (T)t0;
750 dst[0] = (T)t1;
751 dst[1] = bayer[bayer_step+1];
752
753 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
754 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
755 dst[2] = (T)t0;
756 dst[3] = bayer[bayer_step+2];
757 dst[4] = (T)t1;
758 }
759 }
760 else
761 {
762 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
763 {
764 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
765 bayer[bayer_step*2+2] + 2) >> 2;
766 t1 = (bayer[1] + bayer[bayer_step] +
767 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
768 dst[1] = (T)t0;
769 dst[0] = (T)t1;
770 dst[-1] = bayer[bayer_step+1];
771
772 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
773 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
774 dst[4] = (T)t0;
775 dst[3] = bayer[bayer_step+2];
776 dst[2] = (T)t1;
777 }
778 }
779 }
780 else // Bayer to BGRA
781 {
782 // if current row does not contain Blue pixels
783 if( blue > 0 )
784 {
785 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
786 {
787 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
788 bayer[bayer_step*2+2] + 2) >> 2;
789 t1 = (bayer[1] + bayer[bayer_step] +
790 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
791 dst[-1] = (T)t0;
792 dst[0] = (T)t1;
793 dst[1] = bayer[bayer_step+1];
794 dst[2] = alpha; // alpha channel
795
796 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
797 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
798 dst[3] = (T)t0;
799 dst[4] = bayer[bayer_step+2];
800 dst[5] = (T)t1;
801 dst[6] = alpha; // alpha channel
802 }
803 }
804 else // if current row contains Blue pixels
805 {
806 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 )
807 {
808 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
809 bayer[bayer_step*2+2] + 2) >> 2;
810 t1 = (bayer[1] + bayer[bayer_step] +
811 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
812 dst[-1] = bayer[bayer_step+1];
813 dst[0] = (T)t1;
814 dst[1] = (T)t0;
815 dst[2] = alpha; // alpha channel
816
817 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1;
818 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1;
819 dst[3] = (T)t1;
820 dst[4] = bayer[bayer_step+2];
821 dst[5] = (T)t0;
822 dst[6] = alpha; // alpha channel
823 }
824 }
825 }
826
827 // if skip one pixel at the end of row
828 if( bayer < bayer_end )
829 {
830 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] +
831 bayer[bayer_step*2+2] + 2) >> 2;
832 t1 = (bayer[1] + bayer[bayer_step] +
833 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2;
834 dst[-blue] = (T)t0;
835 dst[0] = (T)t1;
836 dst[blue] = bayer[bayer_step+1];
837 if (dcn == 4)
838 dst[2] = alpha; // alpha channel
839 bayer++;
840 dst += dcn;
841 }
842
843 // fill the last and the first pixels of row accordingly
844 if (dcn == 3)
845 {
846 dst0[-4] = dst0[-1];
847 dst0[-3] = dst0[0];
848 dst0[-2] = dst0[1];
849 dst0[size.width*dcn-1] = dst0[size.width*dcn-4];
850 dst0[size.width*dcn] = dst0[size.width*dcn-3];
851 dst0[size.width*dcn+1] = dst0[size.width*dcn-2];
852 }
853 else
854 {
855 dst0[-5] = dst0[-1];
856 dst0[-4] = dst0[0];
857 dst0[-3] = dst0[1];
858 dst0[-2] = dst0[2]; // alpha channel
859 dst0[size.width*dcn-1] = dst0[size.width*dcn-5];
860 dst0[size.width*dcn] = dst0[size.width*dcn-4];
861 dst0[size.width*dcn+1] = dst0[size.width*dcn-3];
862 dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel
863 }
864
865 blue = -blue;
866 start_with_green = !start_with_green;
867 }
868 }
869
870 private:
871 Mat srcmat;
872 Mat dstmat;
873 int Start_with_green, Blue;
874 Size size;
875 };
876
877 template<typename T, class SIMDInterpolator>
Bayer2RGB_(const Mat & srcmat,Mat & dstmat,int code)878 static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code )
879 {
880 int dst_step = (int)(dstmat.step/sizeof(T));
881 Size size = srcmat.size();
882 int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1;
883 int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR;
884
885 int dcn = dstmat.channels();
886 size.height -= 2;
887 size.width -= 2;
888
889 if (size.height > 0)
890 {
891 Range range(0, size.height);
892 Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size);
893 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16));
894 }
895
896 // filling the first and the last rows
897 size = dstmat.size();
898 T* dst0 = dstmat.ptr<T>();
899 if( size.height > 2 )
900 for( int i = 0; i < size.width*dcn; i++ )
901 {
902 dst0[i] = dst0[i + dst_step];
903 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step];
904 }
905 else
906 for( int i = 0; i < size.width*dcn; i++ )
907 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0;
908 }
909
910
911 /////////////////// Demosaicing using Variable Number of Gradients ///////////////////////
912
Bayer2RGB_VNG_8u(const Mat & srcmat,Mat & dstmat,int code)913 static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
914 {
915 const uchar* bayer = srcmat.ptr();
916 int bstep = (int)srcmat.step;
917 uchar* dst = dstmat.ptr();
918 int dststep = (int)dstmat.step;
919 Size size = srcmat.size();
920
921 int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2;
922 bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG;
923
924 // for too small images use the simple interpolation algorithm
925 if( MIN(size.width, size.height) < 8 )
926 {
927 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code );
928 return;
929 }
930
931 const int brows = 3, bcn = 7;
932 int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7;
933 int i, bufstep = N7*bcn;
934 cv::AutoBuffer<ushort> _buf(bufstep*brows);
935 ushort* buf = (ushort*)_buf;
936
937 bayer += bstep*2;
938
939 #if CV_SSE2
940 bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2);
941 #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a))
942 #endif
943
944 for( int y = 2; y < size.height - 4; y++ )
945 {
946 uchar* dstrow = dst + dststep*y + 6;
947 const uchar* srow;
948
949 for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ )
950 {
951 ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1;
952 srow = bayer + (y+dy)*bstep + 1;
953
954 for( i = 0; i < bcn; i++ )
955 brow[N*i-1] = brow[(N-2) + N*i] = 0;
956
957 i = 1;
958
959 #if CV_SSE2
960 if( haveSSE )
961 {
962 __m128i z = _mm_setzero_si128();
963 for( ; i <= N-9; i += 8, srow += 8, brow += 8 )
964 {
965 __m128i s1, s2, s3, s4, s6, s7, s8, s9;
966
967 s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z);
968 s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z);
969 s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z);
970
971 s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z);
972 s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z);
973
974 s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z);
975 s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z);
976 s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z);
977
978 __m128i b0, b1, b2, b3, b4, b5, b6;
979
980 b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1),
981 _mm_adds_epu16(_mm_absdiff_epu16(s1, s7),
982 _mm_absdiff_epu16(s3, s9)));
983 b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1),
984 _mm_adds_epu16(_mm_absdiff_epu16(s1, s3),
985 _mm_absdiff_epu16(s7, s9)));
986 b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1);
987 b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1);
988
989 _mm_storeu_si128((__m128i*)brow, b0);
990 _mm_storeu_si128((__m128i*)(brow + N), b1);
991 _mm_storeu_si128((__m128i*)(brow + N2), b2);
992 _mm_storeu_si128((__m128i*)(brow + N3), b3);
993
994 b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4),
995 _mm_absdiff_epu16(s6, s8)));
996 b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6),
997 _mm_absdiff_epu16(s4, s8)));
998 b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8));
999 b6 = _mm_srli_epi16(b6, 1);
1000
1001 _mm_storeu_si128((__m128i*)(brow + N4), b4);
1002 _mm_storeu_si128((__m128i*)(brow + N5), b5);
1003 _mm_storeu_si128((__m128i*)(brow + N6), b6);
1004 }
1005 }
1006 #endif
1007
1008 for( ; i < N-1; i++, srow++, brow++ )
1009 {
1010 brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) +
1011 std::abs(srow[-bstep] - srow[+bstep])*2 +
1012 std::abs(srow[1-bstep] - srow[1+bstep]));
1013 brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) +
1014 std::abs(srow[-1] - srow[1])*2 +
1015 std::abs(srow[-1+bstep] - srow[1+bstep]));
1016 brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2);
1017 brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2);
1018 brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) +
1019 std::abs(srow[+bstep] - srow[1]));
1020 brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) +
1021 std::abs(srow[+bstep] - srow[-1]));
1022 brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1);
1023 }
1024 }
1025
1026 const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2;
1027 const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2;
1028 const ushort* brow2 = buf + (y % brows)*bufstep + 2;
1029 static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f };
1030 srow = bayer + y*bstep + 2;
1031 bool greenCell = greenCell0;
1032
1033 i = 2;
1034 #if CV_SSE2
1035 int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2;
1036 #else
1037 int limit = N - 2;
1038 #endif
1039
1040 do
1041 {
1042 for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 )
1043 {
1044 int gradN = brow0[0] + brow1[0];
1045 int gradS = brow1[0] + brow2[0];
1046 int gradW = brow1[N-1] + brow1[N];
1047 int gradE = brow1[N] + brow1[N+1];
1048 int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
1049 int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
1050 int R, G, B;
1051
1052 if( !greenCell )
1053 {
1054 int gradNE = brow0[N4+1] + brow1[N4];
1055 int gradSW = brow1[N4] + brow2[N4-1];
1056 int gradNW = brow0[N5-1] + brow1[N5];
1057 int gradSE = brow1[N5] + brow2[N5+1];
1058
1059 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
1060 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
1061 int T = minGrad + MAX(maxGrad/2, 1);
1062
1063 int Rs = 0, Gs = 0, Bs = 0, ng = 0;
1064 if( gradN < T )
1065 {
1066 Rs += srow[-bstep*2] + srow[0];
1067 Gs += srow[-bstep]*2;
1068 Bs += srow[-bstep-1] + srow[-bstep+1];
1069 ng++;
1070 }
1071 if( gradS < T )
1072 {
1073 Rs += srow[bstep*2] + srow[0];
1074 Gs += srow[bstep]*2;
1075 Bs += srow[bstep-1] + srow[bstep+1];
1076 ng++;
1077 }
1078 if( gradW < T )
1079 {
1080 Rs += srow[-2] + srow[0];
1081 Gs += srow[-1]*2;
1082 Bs += srow[-bstep-1] + srow[bstep-1];
1083 ng++;
1084 }
1085 if( gradE < T )
1086 {
1087 Rs += srow[2] + srow[0];
1088 Gs += srow[1]*2;
1089 Bs += srow[-bstep+1] + srow[bstep+1];
1090 ng++;
1091 }
1092 if( gradNE < T )
1093 {
1094 Rs += srow[-bstep*2+2] + srow[0];
1095 Gs += brow0[N6+1];
1096 Bs += srow[-bstep+1]*2;
1097 ng++;
1098 }
1099 if( gradSW < T )
1100 {
1101 Rs += srow[bstep*2-2] + srow[0];
1102 Gs += brow2[N6-1];
1103 Bs += srow[bstep-1]*2;
1104 ng++;
1105 }
1106 if( gradNW < T )
1107 {
1108 Rs += srow[-bstep*2-2] + srow[0];
1109 Gs += brow0[N6-1];
1110 Bs += srow[-bstep+1]*2;
1111 ng++;
1112 }
1113 if( gradSE < T )
1114 {
1115 Rs += srow[bstep*2+2] + srow[0];
1116 Gs += brow2[N6+1];
1117 Bs += srow[-bstep+1]*2;
1118 ng++;
1119 }
1120 R = srow[0];
1121 G = R + cvRound((Gs - Rs)*scale[ng]);
1122 B = R + cvRound((Bs - Rs)*scale[ng]);
1123 }
1124 else
1125 {
1126 int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
1127 int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
1128 int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
1129 int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
1130
1131 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE);
1132 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE);
1133 int T = minGrad + MAX(maxGrad/2, 1);
1134
1135 int Rs = 0, Gs = 0, Bs = 0, ng = 0;
1136 if( gradN < T )
1137 {
1138 Rs += srow[-bstep*2-1] + srow[-bstep*2+1];
1139 Gs += srow[-bstep*2] + srow[0];
1140 Bs += srow[-bstep]*2;
1141 ng++;
1142 }
1143 if( gradS < T )
1144 {
1145 Rs += srow[bstep*2-1] + srow[bstep*2+1];
1146 Gs += srow[bstep*2] + srow[0];
1147 Bs += srow[bstep]*2;
1148 ng++;
1149 }
1150 if( gradW < T )
1151 {
1152 Rs += srow[-1]*2;
1153 Gs += srow[-2] + srow[0];
1154 Bs += srow[-bstep-2]+srow[bstep-2];
1155 ng++;
1156 }
1157 if( gradE < T )
1158 {
1159 Rs += srow[1]*2;
1160 Gs += srow[2] + srow[0];
1161 Bs += srow[-bstep+2]+srow[bstep+2];
1162 ng++;
1163 }
1164 if( gradNE < T )
1165 {
1166 Rs += srow[-bstep*2+1] + srow[1];
1167 Gs += srow[-bstep+1]*2;
1168 Bs += srow[-bstep] + srow[-bstep+2];
1169 ng++;
1170 }
1171 if( gradSW < T )
1172 {
1173 Rs += srow[bstep*2-1] + srow[-1];
1174 Gs += srow[bstep-1]*2;
1175 Bs += srow[bstep] + srow[bstep-2];
1176 ng++;
1177 }
1178 if( gradNW < T )
1179 {
1180 Rs += srow[-bstep*2-1] + srow[-1];
1181 Gs += srow[-bstep-1]*2;
1182 Bs += srow[-bstep-2]+srow[-bstep];
1183 ng++;
1184 }
1185 if( gradSE < T )
1186 {
1187 Rs += srow[bstep*2+1] + srow[1];
1188 Gs += srow[bstep+1]*2;
1189 Bs += srow[bstep+2]+srow[bstep];
1190 ng++;
1191 }
1192 G = srow[0];
1193 R = G + cvRound((Rs - Gs)*scale[ng]);
1194 B = G + cvRound((Bs - Gs)*scale[ng]);
1195 }
1196 dstrow[blueIdx] = cv::saturate_cast<uchar>(B);
1197 dstrow[1] = cv::saturate_cast<uchar>(G);
1198 dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R);
1199 greenCell = !greenCell;
1200 }
1201
1202 #if CV_SSE2
1203 if( !haveSSE )
1204 break;
1205
1206 __m128i emask = _mm_set1_epi32(0x0000ffff),
1207 omask = _mm_set1_epi32(0xffff0000),
1208 z = _mm_setzero_si128(),
1209 one = _mm_set1_epi16(1);
1210 __m128 _0_5 = _mm_set1_ps(0.5f);
1211
1212 #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)
1213 #define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f)
1214 #define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f)
1215 #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts
1216
1217 // process 8 pixels at once
1218 for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 )
1219 {
1220 //int gradN = brow0[0] + brow1[0];
1221 __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1));
1222
1223 //int gradS = brow1[0] + brow2[0];
1224 __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2));
1225
1226 //int gradW = brow1[N-1] + brow1[N];
1227 __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N)));
1228
1229 //int gradE = brow1[N+1] + brow1[N];
1230 __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N)));
1231
1232 //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
1233 //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
1234 __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE));
1235 __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE));
1236
1237 __m128i grad0, grad1;
1238
1239 //int gradNE = brow0[N4+1] + brow1[N4];
1240 //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
1241 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
1242 grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))),
1243 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1))));
1244 __m128i gradNE = _mm_merge_epi16(grad0, grad1);
1245
1246 //int gradSW = brow1[N4] + brow2[N4-1];
1247 //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
1248 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4)));
1249 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))),
1250 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1))));
1251 __m128i gradSW = _mm_merge_epi16(grad0, grad1);
1252
1253 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW);
1254 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW);
1255
1256 //int gradNW = brow0[N5-1] + brow1[N5];
1257 //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
1258 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
1259 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))),
1260 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1))));
1261 __m128i gradNW = _mm_merge_epi16(grad0, grad1);
1262
1263 //int gradSE = brow1[N5] + brow2[N5+1];
1264 //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
1265 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5)));
1266 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))),
1267 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1))));
1268 __m128i gradSE = _mm_merge_epi16(grad0, grad1);
1269
1270 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE);
1271 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE);
1272
1273 //int T = minGrad + maxGrad/2;
1274 __m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad);
1275
1276 __m128i RGs = z, GRs = z, Bs = z, ng = z;
1277
1278 __m128i x0 = _mm_loadl_u8_s16(srow, +0 );
1279 __m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep );
1280 __m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2);
1281 __m128i x3 = _mm_loadl_u8_s16(srow, - bstep );
1282 __m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2);
1283 __m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep );
1284 __m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep );
1285 __m128i x7 = _mm_loadl_u8_s16(srow, +1 );
1286 __m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep );
1287 __m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep );
1288 __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2);
1289 __m128i x11 = _mm_loadl_u8_s16(srow, + bstep );
1290 __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2);
1291 __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep );
1292 __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep );
1293 __m128i x15 = _mm_loadl_u8_s16(srow, -1 );
1294 __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep );
1295
1296 __m128i t0, t1, mask;
1297
1298 // gradN ***********************************************
1299 mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN
1300 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN)
1301
1302 t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*2
1303 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0]
1304
1305 // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)
1306 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1307 // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)
1308 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask));
1309 // Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)
1310 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask));
1311
1312 // gradNE **********************************************
1313 mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE
1314 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE)
1315
1316 t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*2
1317 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0]
1318
1319 // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)
1320 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1321 // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)
1322 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask));
1323 // Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE)
1324 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask));
1325
1326 // gradE ***********************************************
1327 mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE
1328 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE)
1329
1330 t0 = _mm_slli_epi16(x7, 1); // srow[1]*2
1331 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0]
1332
1333 // RGs += (srow[2] + srow[0]) * (T>gradE)
1334 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1335 // GRs += (srow[1]*2) * (T>gradE)
1336 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
1337 // Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)
1338 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask));
1339
1340 // gradSE **********************************************
1341 mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE
1342 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE)
1343
1344 t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*2
1345 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0]
1346
1347 // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)
1348 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1349 // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)
1350 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask));
1351 // Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)
1352 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask));
1353
1354 // gradS ***********************************************
1355 mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS
1356 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS)
1357
1358 t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*2
1359 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0]
1360
1361 // RGs += (srow[bstep*2]+srow[0]) * (T>gradS)
1362 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1363 // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)
1364 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask));
1365 // Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)
1366 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask));
1367
1368 // gradSW **********************************************
1369 mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW
1370 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW)
1371
1372 t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*2
1373 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0]
1374
1375 // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)
1376 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1377 // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)
1378 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask));
1379 // Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)
1380 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask));
1381
1382 // gradW ***********************************************
1383 mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW
1384 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW)
1385
1386 t0 = _mm_slli_epi16(x15, 1); // srow[-1]*2
1387 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0]
1388
1389 // RGs += (srow[-2]+srow[0]) * (T>gradW)
1390 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask));
1391 // GRs += (srow[-1]*2) * (T>gradW)
1392 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask));
1393 // Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)
1394 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask));
1395
1396 // gradNW **********************************************
1397 mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW
1398 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW)
1399
1400 t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*2
1401 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0]
1402
1403 // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)
1404 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask));
1405 // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)
1406 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask));
1407 // Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)
1408 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask));
1409
1410 __m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng));
1411 __m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng));
1412
1413 // now interpolate r, g & b
1414 t0 = _mm_subs_epi16(GRs, RGs);
1415 t1 = _mm_subs_epi16(Bs, RGs);
1416
1417 t0 = _mm_add_epi16(x0, _mm_packs_epi32(
1418 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)),
1419 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1))));
1420
1421 t1 = _mm_add_epi16(x0, _mm_packs_epi32(
1422 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)),
1423 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1))));
1424
1425 x1 = _mm_merge_epi16(x0, t0);
1426 x2 = _mm_merge_epi16(t0, x0);
1427
1428 uchar R[8], G[8], B[8];
1429
1430 _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z));
1431 _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z));
1432 _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z));
1433
1434 for( int j = 0; j < 8; j++, dstrow += 3 )
1435 {
1436 dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j];
1437 }
1438 }
1439 #endif
1440
1441 limit = N - 2;
1442 }
1443 while( i < N - 2 );
1444
1445 for( i = 0; i < 6; i++ )
1446 {
1447 dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i];
1448 dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i];
1449 }
1450
1451 greenCell0 = !greenCell0;
1452 blueIdx ^= 2;
1453 }
1454
1455 for( i = 0; i < size.width*3; i++ )
1456 {
1457 dst[i] = dst[i + dststep] = dst[i + dststep*2];
1458 dst[i + dststep*(size.height-4)] =
1459 dst[i + dststep*(size.height-3)] =
1460 dst[i + dststep*(size.height-2)] =
1461 dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)];
1462 }
1463 }
1464
1465 //////////////////////////////// Edge-Aware Demosaicing //////////////////////////////////
1466
1467 template <typename T, typename SIMDInterpolator>
1468 class Bayer2RGB_EdgeAware_T_Invoker :
1469 public cv::ParallelLoopBody
1470 {
1471 public:
Bayer2RGB_EdgeAware_T_Invoker(const Mat & _src,Mat & _dst,const Size & _size,int _blue,int _start_with_green)1472 Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size,
1473 int _blue, int _start_with_green) :
1474 ParallelLoopBody(),
1475 src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green)
1476 {
1477 }
1478
operator ()(const Range & range) const1479 virtual void operator()(const Range& range) const
1480 {
1481 int dcn = dst.channels();
1482 int dcn2 = dcn<<1;
1483 int start_with_green = Start_with_green, blue = Blue;
1484 int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1());
1485 SIMDInterpolator vecOp;
1486
1487 const T* S = src.ptr<T>(range.start + 1) + 1;
1488 T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn;
1489
1490 if (range.start % 2)
1491 {
1492 start_with_green ^= 1;
1493 blue ^= 1;
1494 }
1495
1496 // to BGR
1497 for (int y = range.start; y < range.end; ++y)
1498 {
1499 int x = 1;
1500 if (start_with_green)
1501 {
1502 D[blue<<1] = (S[-sstep] + S[sstep]) >> 1;
1503 D[1] = S[0];
1504 D[2-(blue<<1)] = (S[-1] + S[1]) >> 1;
1505 D += dcn;
1506 ++S;
1507 ++x;
1508 }
1509
1510 int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue);
1511 x += delta;
1512 S += delta;
1513 D += dcn * delta;
1514
1515 if (blue)
1516 for (; x < size.width; x += 2, S += 2, D += dcn2)
1517 {
1518 D[0] = S[0];
1519 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1520 D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2;
1521
1522 D[3] = (S[0] + S[2] + 1) >> 1;
1523 D[4] = S[1];
1524 D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;
1525 }
1526 else
1527 for (; x < size.width; x += 2, S += 2, D += dcn2)
1528 {
1529 D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;
1530 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1531 D[2] = S[0];
1532
1533 D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1;
1534 D[4] = S[1];
1535 D[5] = (S[0] + S[2] + 1) >> 1;
1536 }
1537
1538 if (x <= size.width)
1539 {
1540 D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2;
1541 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1;
1542 D[2-(blue<<1)] = S[0];
1543 D += dcn;
1544 ++S;
1545 }
1546
1547 for (int i = 0; i < dcn; ++i)
1548 {
1549 D[i] = D[-dcn + i];
1550 D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i];
1551 }
1552
1553 start_with_green ^= 1;
1554 blue ^= 1;
1555 S += 2;
1556 D += dcn2;
1557 }
1558 }
1559
1560 private:
1561 Mat src;
1562 Mat dst;
1563 Size size;
1564 int Blue, Start_with_green;
1565 };
1566
1567 template <typename T, typename SIMDInterpolator>
Bayer2RGB_EdgeAware_T(const Mat & src,Mat & dst,int code)1568 static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code)
1569 {
1570 Size size = src.size();
1571
1572 // for small sizes
1573 if (size.width <= 2 || size.height <= 2)
1574 {
1575 dst = Scalar::all(0);
1576 return;
1577 }
1578
1579 size.width -= 2;
1580 size.height -= 2;
1581
1582 int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0;
1583 int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0;
1584
1585 if (size.height > 0)
1586 {
1587 Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green);
1588 Range range(0, size.height);
1589 parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16));
1590 }
1591 size = dst.size();
1592 size.width *= dst.channels();
1593 size_t dstep = dst.step / dst.elemSize1();
1594 T* firstRow = dst.ptr<T>();
1595 T* lastRow = dst.ptr<T>() + (size.height-1) * dstep;
1596
1597 if (size.height > 2)
1598 {
1599 for (int x = 0; x < size.width; ++x)
1600 {
1601 firstRow[x] = (firstRow+dstep)[x];
1602 lastRow[x] = (lastRow-dstep)[x];
1603 }
1604 }
1605 else
1606 for (int x = 0; x < size.width; ++x)
1607 firstRow[x] = lastRow[x] = 0;
1608 }
1609
1610 } // end namespace cv
1611
1612 //////////////////////////////////////////////////////////////////////////////////////////
1613 // The main Demosaicing function //
1614 //////////////////////////////////////////////////////////////////////////////////////////
1615
demosaicing(InputArray _src,OutputArray _dst,int code,int dcn)1616 void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn)
1617 {
1618 Mat src = _src.getMat(), dst;
1619 Size sz = src.size();
1620 int scn = src.channels(), depth = src.depth();
1621
1622 CV_Assert(depth == CV_8U || depth == CV_16U);
1623 CV_Assert(!src.empty());
1624
1625 switch (code)
1626 {
1627 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY:
1628 if (dcn <= 0)
1629 dcn = 1;
1630 CV_Assert( scn == 1 && dcn == 1 );
1631
1632 _dst.create(sz, CV_MAKETYPE(depth, dcn));
1633 dst = _dst.getMat();
1634
1635 if( depth == CV_8U )
1636 Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
1637 else if( depth == CV_16U )
1638 Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
1639 else
1640 CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types");
1641 break;
1642
1643 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR:
1644 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG:
1645 {
1646 if (dcn <= 0)
1647 dcn = 3;
1648 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) );
1649
1650 _dst.create(sz, CV_MAKE_TYPE(depth, dcn));
1651 Mat dst_ = _dst.getMat();
1652
1653 if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ||
1654 code == CV_BayerRG2BGR || code == CV_BayerGR2BGR )
1655 {
1656 if( depth == CV_8U )
1657 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code);
1658 else if( depth == CV_16U )
1659 Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code);
1660 else
1661 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types");
1662 }
1663 else
1664 {
1665 CV_Assert( depth == CV_8U );
1666 Bayer2RGB_VNG_8u(src, dst_, code);
1667 }
1668 }
1669 break;
1670
1671 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA:
1672 if (dcn <= 0)
1673 dcn = 3;
1674
1675 CV_Assert(scn == 1 && dcn == 3);
1676 _dst.create(sz, CV_MAKETYPE(depth, dcn));
1677 dst = _dst.getMat();
1678
1679 if (depth == CV_8U)
1680 Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code);
1681 else if (depth == CV_16U)
1682 Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code);
1683 else
1684 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types");
1685
1686 break;
1687
1688 default:
1689 CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" );
1690 }
1691 }
1692