• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "ColorConverter"
19 #include <android-base/macros.h>
20 #include <utils/Log.h>
21 
22 #include <media/stagefright/foundation/ADebug.h>
23 #include <media/stagefright/foundation/ALooper.h>
24 #include <media/stagefright/foundation/ColorUtils.h>
25 #include <media/stagefright/ColorConverter.h>
26 #include <media/stagefright/MediaCodecConstants.h>
27 #include <media/stagefright/MediaErrors.h>
28 
29 #include "libyuv/convert_from.h"
30 #include "libyuv/convert_argb.h"
31 #include "libyuv/planar_functions.h"
32 #include "libyuv/video_common.h"
33 #include <functional>
34 #include <sys/time.h>
35 
36 #define USE_LIBYUV
37 #define PERF_PROFILING 0
38 
39 
40 #if defined(__aarch64__) || defined(__ARM_NEON__)
41 #define USE_NEON_Y410 1
42 #else
43 #define USE_NEON_Y410 0
44 #endif
45 
46 #if USE_NEON_Y410
47 #include <arm_neon.h>
48 #endif
49 
50 namespace android {
51 
isRGB(OMX_COLOR_FORMATTYPE colorFormat)52 static bool isRGB(OMX_COLOR_FORMATTYPE colorFormat) {
53     return colorFormat == OMX_COLOR_Format16bitRGB565
54             || colorFormat == OMX_COLOR_Format32BitRGBA8888
55             || colorFormat == OMX_COLOR_Format32bitBGRA8888
56             || colorFormat == COLOR_Format32bitABGR2101010;
57 }
58 
isBt2020() const59 bool ColorConverter::ColorSpace::isBt2020() const {
60     return (mStandard == ColorUtils::kColorStandardBT2020);
61 }
62 
isH420() const63 bool ColorConverter::ColorSpace::isH420() const {
64     return (mStandard == ColorUtils::kColorStandardBT709)
65             && (mRange == ColorUtils::kColorRangeLimited);
66 }
67 
68 // the matrix coefficients are the same for both 601.625 and 601.525 standards
isI420() const69 bool ColorConverter::ColorSpace::isI420() const {
70     return ((mStandard == ColorUtils::kColorStandardBT601_625)
71             || (mStandard == ColorUtils::kColorStandardBT601_525))
72             && (mRange == ColorUtils::kColorRangeLimited);
73 }
74 
isJ420() const75 bool ColorConverter::ColorSpace::isJ420() const {
76     return ((mStandard == ColorUtils::kColorStandardBT601_625)
77             || (mStandard == ColorUtils::kColorStandardBT601_525))
78             && (mRange == ColorUtils::kColorRangeFull);
79 }
80 
81 /**
82  * This class approximates the standard YUV to RGB conversions by factoring the matrix
83  * coefficients to 1/256th-s (as dividing by 256 is easy to do with right shift). The chosen value
84  * of 256 is somewhat arbitrary and was not dependent on the bit-depth, but it does limit the
85  * precision of the matrix coefficients (KR & KB).
86  *
87  * The maximum color error after clipping from using 256 is a distance of:
88  *   0.4 (8-bit) / 1.4 (10-bit) for greens in BT.601
89  *   0.5 (8-bit) / 1.9 (10-bit) for cyans in BT.709, and
90  *   0.3 (8-bit) / 1.3 (10-bit) for violets in BT.2020 (it is 0.4 for 10-bit BT.2020 limited)
91  *
92  * Note for reference: libyuv is using a divisor of 64 instead of 256 to ensure no overflow in
93  * 16-bit math. The maximum color error for libyuv is 3.5 / 14.
94  *
95  * The clamping is done using a lookup vector where negative indices are mapped to 0
96  * and indices > 255 are mapped to 255. (For 10-bit these are clamped to 0 to 1023)
97  *
98  * The matrices are assumed to be of the following format (note the sign on the 2nd row):
99  *
100  * [ R ]     [ _y     0    _r_v ]   [ Y -  C16 ]
101  * [ G ]  =  [ _y  -_g_u  -_g_v ] * [ U - C128 ]
102  * [ B ]     [ _y   _b_u     0  ]   [ V - C128 ]
103  *
104  * C16 is 1 << (bitdepth - 4) for limited range, and 0 for full range
105  * C128 is 1 << (bitdepth - 1)
106  * C255 is (1 << bitdepth) - 1
107  *
108  * The min and max values from these equations determine the clip range needed for clamping:
109  *
110  * min = - (_y * C16 + max((_g_u + _g_v) * (C255-C128), max(_r_v, _b_u) * C128)) / 256
111  * max = (_y * (C255 - C16) + max((_g_u + _g_v) * C128, max(_r_v, _b_u) * (C255-C128)) + 128) / 256
112  */
113 
114 struct ColorConverter::Coeffs {
115     int32_t _y;
116     int32_t _r_v;
117     int32_t _g_u;
118     int32_t _g_v;
119     int32_t _b_u;
120 };
121 
122 /*
123 
124 Color conversion rules are dictated by ISO (e.g. ISO:IEC 23008:2)
125 
126 Limited range means Y is in [16, 235], U and V are in [16, 224] corresponding to [-0.5 to 0.5].
127 
128 Full range means Y is in [0, 255], U and V are in [0.5, 255.5] corresponding to [-0.5 to .5].
129 
130 RGB is always in full range ([0, 255])
131 
132 The color primaries determine the KR and KB values:
133 
134 
135 For full range (assuming 8-bits) ISO defines:
136 
137 (   Y   )   (  KR      1-KR-KB       KB  )
138 (       )   (                            )   (R)
139 (       )   (-KR/2   -(1-KR-KB)/2        )   ( )
140 (U - 128) = (-----   ------------    0.5 ) * (G)
141 (       )   ((1-KB)     (1-KB)           )   ( )
142 (       )   (                            )   (B)
143 (       )   (        -(1-KR-KB)/2  -KB/2 )
144 (V - 128)   ( 0.5    ------------  ----- )
145             (           (1-KR)     (1-KR))
146 
147 (the math is rounded, 128 is (1 << (bitdepth - 1)) )
148 
149 From this
150 
151 (R)      ( 1       0        2*(1-KR)   )   (   Y   )
152 ( )      (                             )   (       )
153 ( )      (    2*KB*(KB-1)  2*KR*(KR-1) )   (       )
154 (G)  =   ( 1  -----------  ----------- ) * (U - 128)
155 ( )      (      1-KR-KB      1-KR-KB   )   (       )
156 ( )      (                             )   (       )
157 (B)      ( 1   2*(1-KB)         0      )   (V - 128)
158 
159 For limited range, this becomes
160 
161 (R)      ( 1       0        2*(1-KR)   )   (255/219  0  0)   (Y -  16)
162 ( )      (                             )   (             )   (       )
163 ( )      (    2*KB*(KB-1)  2*KR*(KR-1) )   (             )   (       )
164 (G)  =   ( 1  -----------  ----------- ) * (0  255/224  0) * (U - 128)
165 ( )      (      1-KR-KB      1-KR-KB   )   (             )   (       )
166 ( )      (                             )   (             )   (       )
167 (B)      ( 1   2*(1-KB)         0      )   (0  0  255/224)   (V - 128)
168 
169 ( For non-8-bit, 16 is (1 << (bitdepth - 4)), 128 is (1 << (bitdepth - 1)),
170   255 is ((1 << bitdepth) - 1), 219 is (219 << (bitdepth - 8)) and
171   224 is (224 << (bitdepth - 8)), so the matrix coefficients slightly change. )
172 
173 */
174 
175 namespace {
176 
177 /**
178  * BT.601:  K_R = 0.299;  K_B = 0.114
179  *
180  * clip range 8-bit: [-277, 535], 10-bit: [-1111, 2155]
181  */
182 const struct ColorConverter::Coeffs BT601_FULL      = { 256, 359,  88, 183, 454 };
183 const struct ColorConverter::Coeffs BT601_LIMITED   = { 298, 409, 100, 208, 516 };
184 const struct ColorConverter::Coeffs BT601_LTD_10BIT = { 299, 410, 101, 209, 518 };
185 
186 /**
187  * BT.709:  K_R = 0.2126; K_B = 0.0722
188  *
189  * clip range 8-bit: [-289, 547], 10-bit: [-1159, 2202]
190  */
191 const struct ColorConverter::Coeffs BT709_FULL      = { 256, 403,  48, 120, 475 };
192 const struct ColorConverter::Coeffs BT709_LIMITED   = { 298, 459,  55, 136, 541 };
193 const struct ColorConverter::Coeffs BT709_LTD_10BIT = { 290, 460,  55, 137, 542 };
194 
195 /**
196  * BT.2020:  K_R = 0.2627; K_B = 0.0593
197  *
198  * clip range 8-bit: [-294, 552], 10-bit: [-1175, 2218]
199  *
200  * This is the largest clip range.
201  */
202 const struct ColorConverter::Coeffs BT2020_FULL      = { 256, 377,  42, 146, 482 };
203 const struct ColorConverter::Coeffs BT2020_LIMITED   = { 298, 430,  48, 167, 548 };
204 const struct ColorConverter::Coeffs BT2020_LTD_10BIT = { 299, 431,  48, 167, 550 };
205 
206 constexpr int CLIP_RANGE_MIN_8BIT = -294;
207 constexpr int CLIP_RANGE_MAX_8BIT = 552;
208 
209 constexpr int CLIP_RANGE_MIN_10BIT = -1175;
210 constexpr int CLIP_RANGE_MAX_10BIT = 2218;
211 
212 }
213 
ColorConverter(OMX_COLOR_FORMATTYPE from,OMX_COLOR_FORMATTYPE to)214 ColorConverter::ColorConverter(
215         OMX_COLOR_FORMATTYPE from, OMX_COLOR_FORMATTYPE to)
216     : mSrcFormat(from),
217       mDstFormat(to),
218       mSrcColorSpace({0, 0, 0}),
219       mClip(NULL),
220       mClip10Bit(NULL) {
221 }
222 
~ColorConverter()223 ColorConverter::~ColorConverter() {
224     delete[] mClip;
225     mClip = NULL;
226     delete[] mClip10Bit;
227     mClip10Bit = NULL;
228 }
229 
isValid() const230 bool ColorConverter::isValid() const {
231     switch ((int32_t)mSrcFormat) {
232         case OMX_COLOR_FormatYUV420Planar16:
233             if (mDstFormat == OMX_COLOR_FormatYUV444Y410) {
234                 return true;
235             }
236             FALLTHROUGH_INTENDED;
237         case OMX_COLOR_FormatYUV420Planar:
238             return mDstFormat == OMX_COLOR_Format16bitRGB565
239                     || mDstFormat == OMX_COLOR_Format32BitRGBA8888
240                     || mDstFormat == OMX_COLOR_Format32bitBGRA8888;
241 
242         case OMX_COLOR_FormatCbYCrY:
243         case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
244         case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
245             return mDstFormat == OMX_COLOR_Format16bitRGB565;
246 
247         case OMX_COLOR_FormatYUV420SemiPlanar:
248 #ifdef USE_LIBYUV
249             return mDstFormat == OMX_COLOR_Format16bitRGB565
250                     || mDstFormat == OMX_COLOR_Format32BitRGBA8888
251                     || mDstFormat == OMX_COLOR_Format32bitBGRA8888;
252 #else
253             return mDstFormat == OMX_COLOR_Format16bitRGB565;
254 #endif
255         case COLOR_FormatYUVP010:
256             return mDstFormat == COLOR_Format32bitABGR2101010;
257 
258         default:
259             return false;
260     }
261 }
262 
isDstRGB() const263 bool ColorConverter::isDstRGB() const {
264     return isRGB(mDstFormat);
265 }
266 
setSrcColorSpace(uint32_t standard,uint32_t range,uint32_t transfer)267 void ColorConverter::setSrcColorSpace(
268         uint32_t standard, uint32_t range, uint32_t transfer) {
269     if (isRGB(mSrcFormat)) {
270         ALOGW("Can't set color space on RGB source");
271         return;
272     }
273     mSrcColorSpace.mStandard = standard;
274     mSrcColorSpace.mRange = range;
275     mSrcColorSpace.mTransfer = transfer;
276 }
277 
278 /*
279  * If stride is non-zero, client's stride will be used. For planar
280  * or semi-planar YUV formats, stride must be even numbers.
281  * If stride is zero, it will be calculated based on width and bpp
282  * of the format, assuming no padding on the right edge.
283  */
BitmapParams(void * bits,size_t width,size_t height,size_t stride,size_t cropLeft,size_t cropTop,size_t cropRight,size_t cropBottom,OMX_COLOR_FORMATTYPE colorFromat)284 ColorConverter::BitmapParams::BitmapParams(
285         void *bits,
286         size_t width, size_t height, size_t stride,
287         size_t cropLeft, size_t cropTop,
288         size_t cropRight, size_t cropBottom,
289         OMX_COLOR_FORMATTYPE colorFromat)
290     : mBits(bits),
291       mColorFormat(colorFromat),
292       mWidth(width),
293       mHeight(height),
294       mCropLeft(cropLeft),
295       mCropTop(cropTop),
296       mCropRight(cropRight),
297       mCropBottom(cropBottom) {
298     switch((int32_t)mColorFormat) {
299     case OMX_COLOR_Format16bitRGB565:
300     case OMX_COLOR_FormatYUV420Planar16:
301     case COLOR_FormatYUVP010:
302     case OMX_COLOR_FormatCbYCrY:
303         mBpp = 2;
304         mStride = 2 * mWidth;
305         break;
306 
307     case OMX_COLOR_Format32bitBGRA8888:
308     case OMX_COLOR_Format32BitRGBA8888:
309     case COLOR_Format32bitABGR2101010:
310     case OMX_COLOR_FormatYUV444Y410:
311         mBpp = 4;
312         mStride = 4 * mWidth;
313         break;
314 
315     case OMX_COLOR_FormatYUV420Planar:
316     case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
317     case OMX_COLOR_FormatYUV420SemiPlanar:
318     case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
319         mBpp = 1;
320         mStride = mWidth;
321         break;
322 
323     default:
324         ALOGE("Unsupported color format %d", mColorFormat);
325         mBpp = 1;
326         mStride = mWidth;
327         break;
328     }
329     // use client's stride if it's specified.
330     if (stride != 0) {
331         mStride = stride;
332     }
333 }
334 
cropWidth() const335 size_t ColorConverter::BitmapParams::cropWidth() const {
336     return mCropRight - mCropLeft + 1;
337 }
338 
cropHeight() const339 size_t ColorConverter::BitmapParams::cropHeight() const {
340     return mCropBottom - mCropTop + 1;
341 }
342 
isValid() const343 bool ColorConverter::BitmapParams::isValid() const {
344     if (!((mStride & 1) == 0  // stride must be even
345         && mStride >= mBpp * cropWidth())) {
346             return false;
347     }
348     return true;
349 }
350 
convert(const void * srcBits,size_t srcWidth,size_t srcHeight,size_t srcStride,size_t srcCropLeft,size_t srcCropTop,size_t srcCropRight,size_t srcCropBottom,void * dstBits,size_t dstWidth,size_t dstHeight,size_t dstStride,size_t dstCropLeft,size_t dstCropTop,size_t dstCropRight,size_t dstCropBottom)351 status_t ColorConverter::convert(
352         const void *srcBits,
353         size_t srcWidth, size_t srcHeight, size_t srcStride,
354         size_t srcCropLeft, size_t srcCropTop,
355         size_t srcCropRight, size_t srcCropBottom,
356         void *dstBits,
357         size_t dstWidth, size_t dstHeight, size_t dstStride,
358         size_t dstCropLeft, size_t dstCropTop,
359         size_t dstCropRight, size_t dstCropBottom) {
360     BitmapParams src(
361             const_cast<void *>(srcBits),
362             srcWidth, srcHeight, srcStride,
363             srcCropLeft, srcCropTop, srcCropRight, srcCropBottom, mSrcFormat);
364 
365     BitmapParams dst(
366             dstBits,
367             dstWidth, dstHeight, dstStride,
368             dstCropLeft, dstCropTop, dstCropRight, dstCropBottom, mDstFormat);
369 
370     if (!(src.isValid()
371             && dst.isValid()
372             && (src.mCropLeft & 1) == 0
373             && src.cropWidth() == dst.cropWidth()
374             && src.cropHeight() == dst.cropHeight())) {
375         return ERROR_UNSUPPORTED;
376     }
377 
378     status_t err;
379 
380     switch ((int32_t)mSrcFormat) {
381         case OMX_COLOR_FormatYUV420Planar:
382 #ifdef USE_LIBYUV
383             err = convertYUV420PlanarUseLibYUV(src, dst);
384 #else
385             err = convertYUV420Planar(src, dst);
386 #endif
387             break;
388 
389         case OMX_COLOR_FormatYUV420Planar16:
390         {
391 #if PERF_PROFILING
392             int64_t startTimeUs = ALooper::GetNowUs();
393 #endif
394             err = convertYUV420Planar16(src, dst);
395 #if PERF_PROFILING
396             int64_t endTimeUs = ALooper::GetNowUs();
397             ALOGD("convertYUV420Planar16 took %lld us", (long long) (endTimeUs - startTimeUs));
398 #endif
399             break;
400         }
401 
402         case COLOR_FormatYUVP010:
403         {
404 #if PERF_PROFILING
405             int64_t startTimeUs = ALooper::GetNowUs();
406 #endif
407             err = convertYUVP010(src, dst);
408 #if PERF_PROFILING
409             int64_t endTimeUs = ALooper::GetNowUs();
410             ALOGD("convertYUVP010 took %lld us", (long long) (endTimeUs - startTimeUs));
411 #endif
412             break;
413         }
414 
415         case OMX_COLOR_FormatCbYCrY:
416             err = convertCbYCrY(src, dst);
417             break;
418 
419         case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
420             err = convertQCOMYUV420SemiPlanar(src, dst);
421             break;
422 
423         case OMX_COLOR_FormatYUV420SemiPlanar:
424 #ifdef USE_LIBYUV
425             err = convertYUV420SemiPlanarUseLibYUV(src, dst);
426 #else
427             err = convertYUV420SemiPlanar(src, dst);
428 #endif
429             break;
430 
431         case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
432             err = convertTIYUV420PackedSemiPlanar(src, dst);
433             break;
434 
435         default:
436         {
437             CHECK(!"Should not be here. Unknown color conversion.");
438             break;
439         }
440     }
441 
442     return err;
443 }
444 
getMatrix() const445 const struct ColorConverter::Coeffs *ColorConverter::getMatrix() const {
446     const bool isFullRange = mSrcColorSpace.mRange == ColorUtils::kColorRangeFull;
447     const bool is10Bit = (mSrcFormat == COLOR_FormatYUVP010
448             || mSrcFormat == OMX_COLOR_FormatYUV420Planar16);
449 
450     switch (mSrcColorSpace.mStandard) {
451     case ColorUtils::kColorStandardBT601_525:
452     case ColorUtils::kColorStandardBT601_625:
453         return (isFullRange ? &BT601_FULL :
454                 is10Bit ? &BT601_LTD_10BIT : &BT601_LIMITED);
455 
456     case ColorUtils::kColorStandardBT709:
457         return (isFullRange ? &BT709_FULL :
458                 is10Bit ? &BT709_LTD_10BIT : &BT709_LIMITED);
459 
460     case ColorUtils::kColorStandardBT2020:
461         return (isFullRange ? &BT2020_FULL :
462                 is10Bit ? &BT2020_LTD_10BIT : &BT2020_LIMITED);
463 
464     default:
465         // for now use the default matrices for unhandled color spaces
466         // TODO: fail?
467         // return nullptr;
468         [[fallthrough]];
469 
470     case ColorUtils::kColorStandardUnspecified:
471         return is10Bit ? &BT2020_LTD_10BIT : &BT601_LIMITED;
472 
473     }
474 }
475 
476 // Interleaved YUV 422 CbYCrY to RGB565
convertCbYCrY(const BitmapParams & src,const BitmapParams & dst)477 status_t ColorConverter::convertCbYCrY(
478         const BitmapParams &src, const BitmapParams &dst) {
479     // XXX Untested
480 
481     const struct Coeffs *matrix = getMatrix();
482     if (!matrix) {
483         return ERROR_UNSUPPORTED;
484     }
485 
486     signed _b_u = matrix->_b_u;
487     signed _neg_g_u = -matrix->_g_u;
488     signed _neg_g_v = -matrix->_g_v;
489     signed _r_v = matrix->_r_v;
490     signed _y = matrix->_y;
491     signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
492 
493     uint8_t *kAdjustedClip = initClip();
494 
495     uint16_t *dst_ptr = (uint16_t *)dst.mBits
496         + dst.mCropTop * dst.mWidth + dst.mCropLeft;
497 
498     const uint8_t *src_ptr = (const uint8_t *)src.mBits
499         + (src.mCropTop * src.mWidth + src.mCropLeft) * 2;
500 
501     for (size_t y = 0; y < src.cropHeight(); ++y) {
502         for (size_t x = 0; x < src.cropWidth() - 1; x += 2) {
503             signed y1 = (signed)src_ptr[2 * x + 1] - _c16;
504             signed y2 = (signed)src_ptr[2 * x + 3] - _c16;
505             signed u = (signed)src_ptr[2 * x] - 128;
506             signed v = (signed)src_ptr[2 * x + 2] - 128;
507 
508             signed u_b = u * _b_u;
509             signed u_g = u * _neg_g_u;
510             signed v_g = v * _neg_g_v;
511             signed v_r = v * _r_v;
512 
513             signed tmp1 = y1 * _y + 128;
514             signed b1 = (tmp1 + u_b) / 256;
515             signed g1 = (tmp1 + v_g + u_g) / 256;
516             signed r1 = (tmp1 + v_r) / 256;
517 
518             signed tmp2 = y2 * _y + 128;
519             signed b2 = (tmp2 + u_b) / 256;
520             signed g2 = (tmp2 + v_g + u_g) / 256;
521             signed r2 = (tmp2 + v_r) / 256;
522 
523             uint32_t rgb1 =
524                 ((kAdjustedClip[r1] >> 3) << 11)
525                 | ((kAdjustedClip[g1] >> 2) << 5)
526                 | (kAdjustedClip[b1] >> 3);
527 
528             uint32_t rgb2 =
529                 ((kAdjustedClip[r2] >> 3) << 11)
530                 | ((kAdjustedClip[g2] >> 2) << 5)
531                 | (kAdjustedClip[b2] >> 3);
532 
533             if (x + 1 < src.cropWidth()) {
534                 *(uint32_t *)(&dst_ptr[x]) = (rgb2 << 16) | rgb1;
535             } else {
536                 dst_ptr[x] = rgb1;
537             }
538         }
539 
540         src_ptr += src.mWidth * 2;
541         dst_ptr += dst.mWidth;
542     }
543 
544     return OK;
545 }
546 
547 /*
548     libyuv supports the following color spaces:
549 
550     I420: BT.601 limited range
551     J420: BT.601 full range (jpeg)
552     H420: BT.709 limited range
553 
554 */
555 
556 #define DECLARE_YUV2RGBFUNC(func, rgb) int (*func)(     \
557         const uint8_t*, int, const uint8_t*, int,       \
558         const uint8_t*, int, uint8_t*, int, int, int)   \
559         = mSrcColorSpace.isH420() ? libyuv::H420To##rgb \
560         : mSrcColorSpace.isJ420() ? libyuv::J420To##rgb \
561         : libyuv::I420To##rgb
562 
convertYUV420PlanarUseLibYUV(const BitmapParams & src,const BitmapParams & dst)563 status_t ColorConverter::convertYUV420PlanarUseLibYUV(
564         const BitmapParams &src, const BitmapParams &dst) {
565     // Fall back to our conversion if libyuv does not support the color space.
566     // I420 (BT.601 limited) is default, so don't fall back if we end up using it anyway.
567     if (!mSrcColorSpace.isH420() && !mSrcColorSpace.isJ420()
568             // && !mSrcColorSpace.isI420() /* same as line below */
569             && getMatrix() != &BT601_LIMITED) {
570         return convertYUV420Planar(src, dst);
571     }
572 
573     uint8_t *dst_ptr = (uint8_t *)dst.mBits
574         + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
575 
576     const uint8_t *src_y =
577         (const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft;
578 
579     const uint8_t *src_u =
580         (const uint8_t *)src.mBits + src.mStride * src.mHeight
581         + (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2);
582 
583     const uint8_t *src_v =
584         src_u + (src.mStride / 2) * (src.mHeight / 2);
585 
586     switch (mDstFormat) {
587     case OMX_COLOR_Format16bitRGB565:
588     {
589         DECLARE_YUV2RGBFUNC(func, RGB565);
590         (*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
591                 (uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
592         break;
593     }
594 
595     case OMX_COLOR_Format32BitRGBA8888:
596     {
597         DECLARE_YUV2RGBFUNC(func, ABGR);
598         (*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
599                 (uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
600         break;
601     }
602 
603     case OMX_COLOR_Format32bitBGRA8888:
604     {
605         DECLARE_YUV2RGBFUNC(func, ARGB);
606         (*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
607                 (uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
608         break;
609     }
610 
611     default:
612         return ERROR_UNSUPPORTED;
613     }
614 
615     return OK;
616 }
617 
convertYUV420SemiPlanarUseLibYUV(const BitmapParams & src,const BitmapParams & dst)618 status_t ColorConverter::convertYUV420SemiPlanarUseLibYUV(
619         const BitmapParams &src, const BitmapParams &dst) {
620     // Fall back to our conversion if libyuv does not support the color space.
621     // libyuv only supports BT.601 limited range NV12. Don't fall back if we end up using it anyway.
622     if (// !mSrcColorSpace.isI420() && /* same as below */
623         getMatrix() != &BT601_LIMITED) {
624         return convertYUV420SemiPlanar(src, dst);
625     }
626 
627     uint8_t *dst_ptr = (uint8_t *)dst.mBits
628         + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
629 
630     const uint8_t *src_y =
631         (const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft;
632 
633     const uint8_t *src_u =
634         (const uint8_t *)src.mBits + src.mStride * src.mHeight
635         + (src.mCropTop / 2) * src.mStride + src.mCropLeft;
636 
637     switch (mDstFormat) {
638     case OMX_COLOR_Format16bitRGB565:
639         libyuv::NV12ToRGB565(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
640                 dst.mStride, src.cropWidth(), src.cropHeight());
641         break;
642 
643     case OMX_COLOR_Format32bitBGRA8888:
644         libyuv::NV12ToARGB(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
645                 dst.mStride, src.cropWidth(), src.cropHeight());
646         break;
647 
648     case OMX_COLOR_Format32BitRGBA8888:
649         libyuv::NV12ToABGR(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
650                 dst.mStride, src.cropWidth(), src.cropHeight());
651         break;
652 
653     default:
654         return ERROR_UNSUPPORTED;
655    }
656 
657    return OK;
658 }
659 
660 std::function<void (void *, void *, void *, size_t,
661                     signed *, signed *, signed *, signed *)>
getReadFromSrc(OMX_COLOR_FORMATTYPE srcFormat)662 getReadFromSrc(OMX_COLOR_FORMATTYPE srcFormat) {
663     switch(srcFormat) {
664     case OMX_COLOR_FormatYUV420Planar:
665         return [](void *src_y, void *src_u, void *src_v, size_t x,
666                   signed *y1, signed *y2, signed *u, signed *v) {
667             *y1 = ((uint8_t*)src_y)[x];
668             *y2 = ((uint8_t*)src_y)[x + 1];
669             *u = ((uint8_t*)src_u)[x / 2] - 128;
670             *v = ((uint8_t*)src_v)[x / 2] - 128;
671         };
672     // this format stores 10 bits content with 16 bits
673     // converting it to 8 bits src
674     case OMX_COLOR_FormatYUV420Planar16:
675         return [](void *src_y, void *src_u, void *src_v, size_t x,
676                 signed *y1, signed *y2, signed *u, signed *v) {
677             *y1 = (uint8_t)(((uint16_t*)src_y)[x] >> 2);
678             *y2 = (uint8_t)(((uint16_t*)src_y)[x + 1] >> 2);
679             *u = (uint8_t)(((uint16_t*)src_u)[x / 2] >> 2) - 128;
680             *v = (uint8_t)(((uint16_t*)src_v)[x / 2] >> 2) - 128;
681         };
682     default:
683         TRESPASS();
684     }
685     return nullptr;
686 }
687 
688 // TRICKY: this method only supports RGBA_1010102 output for 10-bit sources, and all other outputs
689 // for 8-bit sources as the type of kAdjustedClip is hardcoded based on output, not input.
690 std::function<void (void *, bool, signed, signed, signed, signed, signed, signed)>
getWriteToDst(OMX_COLOR_FORMATTYPE dstFormat,void * kAdjustedClip)691 getWriteToDst(OMX_COLOR_FORMATTYPE dstFormat, void *kAdjustedClip) {
692     switch ((int)dstFormat) {
693     case OMX_COLOR_Format16bitRGB565:
694     {
695         return [kAdjustedClip](void *dst_ptr, bool uncropped,
696                                signed r1, signed g1, signed b1,
697                                signed r2, signed g2, signed b2) {
698             uint32_t rgb1 =
699                 ((((uint8_t *)kAdjustedClip)[r1] >> 3) << 11)
700                 | ((((uint8_t *)kAdjustedClip)[g1] >> 2) << 5)
701                 | (((uint8_t *)kAdjustedClip)[b1] >> 3);
702 
703             if (uncropped) {
704                 uint32_t rgb2 =
705                     ((((uint8_t *)kAdjustedClip)[r2] >> 3) << 11)
706                     | ((((uint8_t *)kAdjustedClip)[g2] >> 2) << 5)
707                     | (((uint8_t *)kAdjustedClip)[b2] >> 3);
708 
709                 *(uint32_t *)dst_ptr = (rgb2 << 16) | rgb1;
710             } else {
711                 *(uint16_t *)dst_ptr = rgb1;
712             }
713         };
714     }
715     case OMX_COLOR_Format32BitRGBA8888:
716     {
717         return [kAdjustedClip](void *dst_ptr, bool uncropped,
718                                signed r1, signed g1, signed b1,
719                                signed r2, signed g2, signed b2) {
720             ((uint32_t *)dst_ptr)[0] =
721                     (((uint8_t *)kAdjustedClip)[r1])
722                     | (((uint8_t *)kAdjustedClip)[g1] << 8)
723                     | (((uint8_t *)kAdjustedClip)[b1] << 16)
724                     | (0xFF << 24);
725 
726             if (uncropped) {
727                 ((uint32_t *)dst_ptr)[1] =
728                         (((uint8_t *)kAdjustedClip)[r2])
729                         | (((uint8_t *)kAdjustedClip)[g2] << 8)
730                         | (((uint8_t *)kAdjustedClip)[b2] << 16)
731                         | (0xFF << 24);
732             }
733         };
734     }
735     case OMX_COLOR_Format32bitBGRA8888:
736     {
737         return [kAdjustedClip](void *dst_ptr, bool uncropped,
738                                signed r1, signed g1, signed b1,
739                                signed r2, signed g2, signed b2) {
740             ((uint32_t *)dst_ptr)[0] =
741                     (((uint8_t *)kAdjustedClip)[b1])
742                     | (((uint8_t *)kAdjustedClip)[g1] << 8)
743                     | (((uint8_t *)kAdjustedClip)[r1] << 16)
744                     | (0xFF << 24);
745 
746             if (uncropped) {
747                 ((uint32_t *)dst_ptr)[1] =
748                         (((uint8_t *)kAdjustedClip)[b2])
749                         | (((uint8_t *)kAdjustedClip)[g2] << 8)
750                         | (((uint8_t *)kAdjustedClip)[r2] << 16)
751                         | (0xFF << 24);
752             }
753         };
754     }
755     case COLOR_Format32bitABGR2101010:
756     {
757         return [kAdjustedClip](void *dst_ptr, bool uncropped,
758                                signed r1, signed g1, signed b1,
759                                signed r2, signed g2, signed b2) {
760             ((uint32_t *)dst_ptr)[0] =
761                     (((uint16_t *)kAdjustedClip)[r1])
762                     | (((uint16_t *)kAdjustedClip)[g1] << 10)
763                     | (((uint16_t *)kAdjustedClip)[b1] << 20)
764                     | (3 << 30);
765 
766             if (uncropped) {
767                 ((uint32_t *)dst_ptr)[1] =
768                         (((uint16_t *)kAdjustedClip)[r2])
769                         | (((uint16_t *)kAdjustedClip)[g2] << 10)
770                         | (((uint16_t *)kAdjustedClip)[b2] << 20)
771                         | (3 << 30);
772             }
773         };
774     }
775 
776     default:
777         TRESPASS();
778     }
779     return nullptr;
780 }
781 
convertYUV420Planar(const BitmapParams & src,const BitmapParams & dst)782 status_t ColorConverter::convertYUV420Planar(
783         const BitmapParams &src, const BitmapParams &dst) {
784     const struct Coeffs *matrix = getMatrix();
785     if (!matrix) {
786         return ERROR_UNSUPPORTED;
787     }
788 
789     signed _b_u = matrix->_b_u;
790     signed _neg_g_u = -matrix->_g_u;
791     signed _neg_g_v = -matrix->_g_v;
792     signed _r_v = matrix->_r_v;
793     signed _y = matrix->_y;
794     signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
795 
796     uint8_t *kAdjustedClip = initClip();
797 
798     auto readFromSrc = getReadFromSrc(mSrcFormat);
799     auto writeToDst = getWriteToDst(mDstFormat, (void *)kAdjustedClip);
800 
801     uint8_t *dst_ptr = (uint8_t *)dst.mBits
802             + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
803 
804     uint8_t *src_y = (uint8_t *)src.mBits
805             + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
806 
807     uint8_t *src_u = (uint8_t *)src.mBits + src.mStride * src.mHeight
808             + (src.mCropTop / 2) * (src.mStride / 2) + src.mCropLeft / 2 * src.mBpp;
809 
810     uint8_t *src_v = src_u + (src.mStride / 2) * (src.mHeight / 2);
811 
812     for (size_t y = 0; y < src.cropHeight(); ++y) {
813         for (size_t x = 0; x < src.cropWidth(); x += 2) {
814             signed y1, y2, u, v;
815             readFromSrc(src_y, src_u, src_v, x, &y1, &y2, &u, &v);
816 
817             signed u_b = u * _b_u;
818             signed u_g = u * _neg_g_u;
819             signed v_g = v * _neg_g_v;
820             signed v_r = v * _r_v;
821 
822             signed tmp1 = (y1 - _c16) * _y + 128;
823             signed b1 = (tmp1 + u_b) / 256;
824             signed g1 = (tmp1 + v_g + u_g) / 256;
825             signed r1 = (tmp1 + v_r) / 256;
826 
827             signed tmp2 = (y2 - _c16) * _y + 128;
828             signed b2 = (tmp2 + u_b) / 256;
829             signed g2 = (tmp2 + v_g + u_g) / 256;
830             signed r2 = (tmp2 + v_r) / 256;
831 
832             bool uncropped = x + 1 < src.cropWidth();
833             writeToDst(dst_ptr + x * dst.mBpp, uncropped, r1, g1, b1, r2, g2, b2);
834         }
835 
836         src_y += src.mStride;
837 
838         if (y & 1) {
839             src_u += src.mStride / 2;
840             src_v += src.mStride / 2;
841         }
842 
843         dst_ptr += dst.mStride;
844     }
845 
846     return OK;
847 }
848 
convertYUV420Planar16(const BitmapParams & src,const BitmapParams & dst)849 status_t ColorConverter::convertYUV420Planar16(
850         const BitmapParams &src, const BitmapParams &dst) {
851     if (mDstFormat == OMX_COLOR_FormatYUV444Y410) {
852         return convertYUV420Planar16ToY410(src, dst);
853     }
854 
855     return convertYUV420Planar(src, dst);
856 }
857 
convertYUVP010(const BitmapParams & src,const BitmapParams & dst)858 status_t ColorConverter::convertYUVP010(
859         const BitmapParams &src, const BitmapParams &dst) {
860     if (mDstFormat == COLOR_Format32bitABGR2101010) {
861         return convertYUVP010ToRGBA1010102(src, dst);
862     }
863 
864     return ERROR_UNSUPPORTED;
865 }
866 
convertYUVP010ToRGBA1010102(const BitmapParams & src,const BitmapParams & dst)867 status_t ColorConverter::convertYUVP010ToRGBA1010102(
868         const BitmapParams &src, const BitmapParams &dst) {
869     const struct Coeffs *matrix = getMatrix();
870     if (!matrix) {
871         return ERROR_UNSUPPORTED;
872     }
873 
874     signed _b_u = matrix->_b_u;
875     signed _neg_g_u = -matrix->_g_u;
876     signed _neg_g_v = -matrix->_g_v;
877     signed _r_v = matrix->_r_v;
878     signed _y = matrix->_y;
879     signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 64 : 0;
880 
881     uint16_t *kAdjustedClip10bit = initClip10Bit();
882 
883 //    auto readFromSrc = getReadFromSrc(mSrcFormat);
884     auto writeToDst = getWriteToDst(mDstFormat, (void *)kAdjustedClip10bit);
885 
886     uint8_t *dst_ptr = (uint8_t *)dst.mBits
887             + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
888 
889     uint16_t *src_y = (uint16_t *)((uint8_t *)src.mBits
890             + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp);
891 
892     uint16_t *src_uv = (uint16_t *)((uint8_t *)src.mBits
893             + src.mStride * src.mHeight
894             + (src.mCropTop / 2) * src.mStride + src.mCropLeft * src.mBpp);
895 
896     for (size_t y = 0; y < src.cropHeight(); ++y) {
897         for (size_t x = 0; x < src.cropWidth(); x += 2) {
898             signed y1, y2, u, v;
899             y1 = (src_y[x] >> 6) - _c16;
900             y2 = (src_y[x + 1] >> 6) - _c16;
901             u = int(src_uv[x] >> 6) - 512;
902             v = int(src_uv[x + 1] >> 6) - 512;
903 
904             signed u_b = u * _b_u;
905             signed u_g = u * _neg_g_u;
906             signed v_g = v * _neg_g_v;
907             signed v_r = v * _r_v;
908 
909             signed tmp1 = y1 * _y + 128;
910             signed b1 = (tmp1 + u_b) / 256;
911             signed g1 = (tmp1 + v_g + u_g) / 256;
912             signed r1 = (tmp1 + v_r) / 256;
913 
914             signed tmp2 = y2 * _y + 128;
915             signed b2 = (tmp2 + u_b) / 256;
916             signed g2 = (tmp2 + v_g + u_g) / 256;
917             signed r2 = (tmp2 + v_r) / 256;
918 
919             bool uncropped = x + 1 < src.cropWidth();
920 
921             writeToDst(dst_ptr + x * dst.mBpp, uncropped, r1, g1, b1, r2, g2, b2);
922         }
923 
924         src_y += src.mStride / 2;
925 
926         if (y & 1) {
927             src_uv += src.mStride / 2;
928         }
929 
930         dst_ptr += dst.mStride;
931     }
932 
933     return OK;
934 }
935 
936 
937 #if !USE_NEON_Y410
938 
convertYUV420Planar16ToY410(const BitmapParams & src,const BitmapParams & dst)939 status_t ColorConverter::convertYUV420Planar16ToY410(
940         const BitmapParams &src, const BitmapParams &dst) {
941     uint8_t *dst_ptr = (uint8_t *)dst.mBits
942         + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
943 
944     const uint8_t *src_y =
945         (const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
946 
947     const uint8_t *src_u =
948         (const uint8_t *)src.mBits + src.mStride * src.mHeight
949         + (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2) * src.mBpp;
950 
951     const uint8_t *src_v =
952         src_u + (src.mStride / 2) * (src.mHeight / 2);
953 
954     // Converting two lines at a time, slightly faster
955     for (size_t y = 0; y < src.cropHeight(); y += 2) {
956         uint32_t *dst_top = (uint32_t *) dst_ptr;
957         uint32_t *dst_bot = (uint32_t *) (dst_ptr + dst.mStride);
958         uint16_t *ptr_ytop = (uint16_t*) src_y;
959         uint16_t *ptr_ybot = (uint16_t*) (src_y + src.mStride);
960         uint16_t *ptr_u = (uint16_t*) src_u;
961         uint16_t *ptr_v = (uint16_t*) src_v;
962 
963         uint32_t u01, v01, y01, y23, y45, y67, uv0, uv1;
964         size_t x = 0;
965         for (; x < src.cropWidth() - 3; x += 4) {
966             u01 = *((uint32_t*)ptr_u); ptr_u += 2;
967             v01 = *((uint32_t*)ptr_v); ptr_v += 2;
968 
969             y01 = *((uint32_t*)ptr_ytop); ptr_ytop += 2;
970             y23 = *((uint32_t*)ptr_ytop); ptr_ytop += 2;
971             y45 = *((uint32_t*)ptr_ybot); ptr_ybot += 2;
972             y67 = *((uint32_t*)ptr_ybot); ptr_ybot += 2;
973 
974             uv0 = (u01 & 0x3FF) | ((v01 & 0x3FF) << 20);
975             uv1 = (u01 >> 16) | ((v01 >> 16) << 20);
976 
977             *dst_top++ = ((y01 & 0x3FF) << 10) | uv0;
978             *dst_top++ = ((y01 >> 16) << 10) | uv0;
979             *dst_top++ = ((y23 & 0x3FF) << 10) | uv1;
980             *dst_top++ = ((y23 >> 16) << 10) | uv1;
981 
982             *dst_bot++ = ((y45 & 0x3FF) << 10) | uv0;
983             *dst_bot++ = ((y45 >> 16) << 10) | uv0;
984             *dst_bot++ = ((y67 & 0x3FF) << 10) | uv1;
985             *dst_bot++ = ((y67 >> 16) << 10) | uv1;
986         }
987 
988         // There should be at most 2 more pixels to process. Note that we don't
989         // need to consider odd case as the buffer is always aligned to even.
990         if (x < src.cropWidth()) {
991             u01 = *ptr_u;
992             v01 = *ptr_v;
993             y01 = *((uint32_t*)ptr_ytop);
994             y45 = *((uint32_t*)ptr_ybot);
995             uv0 = (u01 & 0x3FF) | ((v01 & 0x3FF) << 20);
996             *dst_top++ = ((y01 & 0x3FF) << 10) | uv0;
997             *dst_top++ = ((y01 >> 16) << 10) | uv0;
998             *dst_bot++ = ((y45 & 0x3FF) << 10) | uv0;
999             *dst_bot++ = ((y45 >> 16) << 10) | uv0;
1000         }
1001 
1002         src_y += src.mStride * 2;
1003         src_u += src.mStride / 2;
1004         src_v += src.mStride / 2;
1005         dst_ptr += dst.mStride * 2;
1006     }
1007 
1008     return OK;
1009 }
1010 
1011 #else
1012 
convertYUV420Planar16ToY410(const BitmapParams & src,const BitmapParams & dst)1013 status_t ColorConverter::convertYUV420Planar16ToY410(
1014         const BitmapParams &src, const BitmapParams &dst) {
1015     uint8_t *out = (uint8_t *)dst.mBits
1016         + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
1017 
1018     const uint8_t *src_y =
1019         (const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
1020 
1021     const uint8_t *src_u =
1022         (const uint8_t *)src.mBits + src.mStride * src.mHeight
1023         + (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2) * src.mBpp;
1024 
1025     const uint8_t *src_v =
1026         src_u + (src.mStride / 2) * (src.mHeight / 2);
1027 
1028     for (size_t y = 0; y < src.cropHeight(); y++) {
1029         uint16_t *ptr_y = (uint16_t*) src_y;
1030         uint16_t *ptr_u = (uint16_t*) src_u;
1031         uint16_t *ptr_v = (uint16_t*) src_v;
1032         uint32_t *ptr_out = (uint32_t *) out;
1033 
1034         // Process 16-pixel at a time.
1035         uint32_t *ptr_limit = ptr_out + (src.cropWidth() & ~15);
1036         while (ptr_out < ptr_limit) {
1037             uint16x4_t u0123 = vld1_u16(ptr_u); ptr_u += 4;
1038             uint16x4_t u4567 = vld1_u16(ptr_u); ptr_u += 4;
1039             uint16x4_t v0123 = vld1_u16(ptr_v); ptr_v += 4;
1040             uint16x4_t v4567 = vld1_u16(ptr_v); ptr_v += 4;
1041             uint16x4_t y0123 = vld1_u16(ptr_y); ptr_y += 4;
1042             uint16x4_t y4567 = vld1_u16(ptr_y); ptr_y += 4;
1043             uint16x4_t y89ab = vld1_u16(ptr_y); ptr_y += 4;
1044             uint16x4_t ycdef = vld1_u16(ptr_y); ptr_y += 4;
1045 
1046             uint32x2_t uvtempl;
1047             uint32x4_t uvtempq;
1048 
1049             uvtempq = vaddw_u16(vshll_n_u16(v0123, 20), u0123);
1050 
1051             uvtempl = vget_low_u32(uvtempq);
1052             uint32x4_t uv0011 = vreinterpretq_u32_u64(
1053                     vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
1054 
1055             uvtempl = vget_high_u32(uvtempq);
1056             uint32x4_t uv2233 = vreinterpretq_u32_u64(
1057                     vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
1058 
1059             uvtempq = vaddw_u16(vshll_n_u16(v4567, 20), u4567);
1060 
1061             uvtempl = vget_low_u32(uvtempq);
1062             uint32x4_t uv4455 = vreinterpretq_u32_u64(
1063                     vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
1064 
1065             uvtempl = vget_high_u32(uvtempq);
1066             uint32x4_t uv6677 = vreinterpretq_u32_u64(
1067                     vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
1068 
1069             uint32x4_t dsttemp;
1070 
1071             dsttemp = vorrq_u32(uv0011, vshll_n_u16(y0123, 10));
1072             vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
1073 
1074             dsttemp = vorrq_u32(uv2233, vshll_n_u16(y4567, 10));
1075             vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
1076 
1077             dsttemp = vorrq_u32(uv4455, vshll_n_u16(y89ab, 10));
1078             vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
1079 
1080             dsttemp = vorrq_u32(uv6677, vshll_n_u16(ycdef, 10));
1081             vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
1082         }
1083 
1084         src_y += src.mStride;
1085         if (y & 1) {
1086             src_u += src.mStride / 2;
1087             src_v += src.mStride / 2;
1088         }
1089         out += dst.mStride;
1090     }
1091 
1092     // Process the left-overs out-of-loop, 2-pixel at a time. Note that we don't
1093     // need to consider odd case as the buffer is always aligned to even.
1094     if (src.cropWidth() & 15) {
1095         size_t xstart = (src.cropWidth() & ~15);
1096 
1097         uint8_t *out = (uint8_t *)dst.mBits + dst.mCropTop * dst.mStride
1098                 + (dst.mCropLeft + xstart) * dst.mBpp;
1099 
1100         const uint8_t *src_y = (const uint8_t *)src.mBits + src.mCropTop * src.mStride
1101                 + (src.mCropLeft + xstart) * src.mBpp;
1102 
1103         const uint8_t *src_u = (const uint8_t *)src.mBits + src.mStride * src.mHeight
1104             + (src.mCropTop / 2) * (src.mStride / 2)
1105             + ((src.mCropLeft + xstart) / 2) * src.mBpp;
1106 
1107         const uint8_t *src_v = src_u + (src.mStride / 2) * (src.mHeight / 2);
1108 
1109         for (size_t y = 0; y < src.cropHeight(); y++) {
1110             uint16_t *ptr_y = (uint16_t*) src_y;
1111             uint16_t *ptr_u = (uint16_t*) src_u;
1112             uint16_t *ptr_v = (uint16_t*) src_v;
1113             uint32_t *ptr_out = (uint32_t *) out;
1114             for (size_t x = xstart; x < src.cropWidth(); x += 2) {
1115                 uint16_t u = *ptr_u++;
1116                 uint16_t v = *ptr_v++;
1117                 uint32_t y01 = *((uint32_t*)ptr_y); ptr_y += 2;
1118                 uint32_t uv = u | (((uint32_t)v) << 20);
1119                 *ptr_out++ = ((y01 & 0x3FF) << 10) | uv;
1120                 *ptr_out++ = ((y01 >> 16) << 10) | uv;
1121             }
1122             src_y += src.mStride;
1123             if (y & 1) {
1124                 src_u += src.mStride / 2;
1125                 src_v += src.mStride / 2;
1126             }
1127             out += dst.mStride;
1128         }
1129     }
1130 
1131     return OK;
1132 }
1133 
1134 #endif // USE_NEON_Y410
1135 
convertQCOMYUV420SemiPlanar(const BitmapParams & src,const BitmapParams & dst)1136 status_t ColorConverter::convertQCOMYUV420SemiPlanar(
1137         const BitmapParams &src, const BitmapParams &dst) {
1138     /* QCOMYUV420SemiPlanar is NV21, while MediaCodec uses NV12 */
1139     return convertYUV420SemiPlanarBase(
1140             src, dst, src.mWidth /* row_inc */, true /* isNV21 */);
1141 }
1142 
convertTIYUV420PackedSemiPlanar(const BitmapParams & src,const BitmapParams & dst)1143 status_t ColorConverter::convertTIYUV420PackedSemiPlanar(
1144         const BitmapParams &src, const BitmapParams &dst) {
1145     return convertYUV420SemiPlanarBase(
1146             src, dst, src.mWidth /* row_inc */);
1147 }
1148 
convertYUV420SemiPlanar(const BitmapParams & src,const BitmapParams & dst)1149 status_t ColorConverter::convertYUV420SemiPlanar(
1150         const BitmapParams &src, const BitmapParams &dst) {
1151     return convertYUV420SemiPlanarBase(
1152             src, dst, src.mStride /* row_inc */);
1153 }
1154 
convertYUV420SemiPlanarBase(const BitmapParams & src,const BitmapParams & dst,size_t row_inc,bool isNV21)1155 status_t ColorConverter::convertYUV420SemiPlanarBase(const BitmapParams &src,
1156         const BitmapParams &dst, size_t row_inc, bool isNV21) {
1157     const struct Coeffs *matrix = getMatrix();
1158     if (!matrix) {
1159         return ERROR_UNSUPPORTED;
1160     }
1161 
1162     signed _b_u = matrix->_b_u;
1163     signed _neg_g_u = -matrix->_g_u;
1164     signed _neg_g_v = -matrix->_g_v;
1165     signed _r_v = matrix->_r_v;
1166     signed _y = matrix->_y;
1167     signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
1168 
1169     uint8_t *kAdjustedClip = initClip();
1170 
1171     uint16_t *dst_ptr = (uint16_t *)((uint8_t *)
1172             dst.mBits + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp);
1173 
1174     const uint8_t *src_y =
1175         (const uint8_t *)src.mBits + src.mCropTop * row_inc + src.mCropLeft;
1176 
1177     const uint8_t *src_u = (const uint8_t *)src.mBits + src.mHeight * row_inc +
1178         (src.mCropTop / 2) * row_inc + src.mCropLeft;
1179 
1180     for (size_t y = 0; y < src.cropHeight(); ++y) {
1181         for (size_t x = 0; x < src.cropWidth(); x += 2) {
1182             signed y1 = (signed)src_y[x] - _c16;
1183             signed y2 = (signed)src_y[x + 1] - _c16;
1184 
1185             signed u = (signed)src_u[(x & ~1) + isNV21] - 128;
1186             signed v = (signed)src_u[(x & ~1) + !isNV21] - 128;
1187 
1188             signed u_b = u * _b_u;
1189             signed u_g = u * _neg_g_u;
1190             signed v_g = v * _neg_g_v;
1191             signed v_r = v * _r_v;
1192 
1193             signed tmp1 = y1 * _y + 128;
1194             signed b1 = (tmp1 + u_b) / 256;
1195             signed g1 = (tmp1 + v_g + u_g) / 256;
1196             signed r1 = (tmp1 + v_r) / 256;
1197 
1198             signed tmp2 = y2 * _y + 128;
1199             signed b2 = (tmp2 + u_b) / 256;
1200             signed g2 = (tmp2 + v_g + u_g) / 256;
1201             signed r2 = (tmp2 + v_r) / 256;
1202 
1203             uint32_t rgb1 =
1204                 ((kAdjustedClip[r1] >> 3) << 11)
1205                 | ((kAdjustedClip[g1] >> 2) << 5)
1206                 | (kAdjustedClip[b1] >> 3);
1207 
1208             uint32_t rgb2 =
1209                 ((kAdjustedClip[r2] >> 3) << 11)
1210                 | ((kAdjustedClip[g2] >> 2) << 5)
1211                 | (kAdjustedClip[b2] >> 3);
1212 
1213             if (x + 1 < src.cropWidth()) {
1214                 *(uint32_t *)(&dst_ptr[x]) = (rgb2 << 16) | rgb1;
1215             } else {
1216                 dst_ptr[x] = rgb1;
1217             }
1218         }
1219 
1220         src_y += row_inc;
1221 
1222         if (y & 1) {
1223             src_u += row_inc;
1224         }
1225 
1226         dst_ptr = (uint16_t*)((uint8_t*)dst_ptr + dst.mStride);
1227     }
1228 
1229     return OK;
1230 }
1231 
initClip()1232 uint8_t *ColorConverter::initClip() {
1233     if (mClip == NULL) {
1234         mClip = new uint8_t[CLIP_RANGE_MAX_8BIT - CLIP_RANGE_MIN_8BIT + 1];
1235 
1236         for (signed i = CLIP_RANGE_MIN_8BIT; i <= CLIP_RANGE_MAX_8BIT; ++i) {
1237             mClip[i - CLIP_RANGE_MIN_8BIT] = (i < 0) ? 0 : (i > 255) ? 255 : (uint8_t)i;
1238         }
1239     }
1240 
1241     return &mClip[-CLIP_RANGE_MIN_8BIT];
1242 }
1243 
initClip10Bit()1244 uint16_t *ColorConverter::initClip10Bit() {
1245     if (mClip10Bit == NULL) {
1246         mClip10Bit = new uint16_t[CLIP_RANGE_MAX_10BIT - CLIP_RANGE_MIN_10BIT + 1];
1247 
1248         for (signed i = CLIP_RANGE_MIN_10BIT; i <= CLIP_RANGE_MAX_10BIT; ++i) {
1249             mClip10Bit[i - CLIP_RANGE_MIN_10BIT] = (i < 0) ? 0 : (i > 1023) ? 1023 : (uint16_t)i;
1250         }
1251     }
1252 
1253     return &mClip10Bit[-CLIP_RANGE_MIN_10BIT];
1254 }
1255 
1256 }  // namespace android
1257