• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "securec.h"
17 #include "avcodec_errors.h"
18 #include "avcodec_log.h"
19 #include "avcodec_trace.h"
20 #include "avc_encoder_convert.h"
21 
22 #if defined(ARMV8)
23 #include <arm_neon.h>
24 #endif
25 
26 namespace OHOS {
27 namespace MediaAVCodec {
28 namespace Codec {
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 namespace {
35 constexpr OHOS::HiviewDFX::HiLogLabel LABEL = {LOG_CORE, LOG_DOMAIN_FRAMEWORK, "AvcEncoder"};
36 } // namespace
37 
38 // Matrix coefficient to convert RGB to planar YUV data
39 // Each sub-array represents to 3x3 coeff use with R G B
40 static const int16_t BT601_MATRIX[2][3][3] = {
41     {{76, 150, 29}, {-43, -85, 128}, {128, -107, -21}},     // RANGE_FULL
42     {{66, 129, 25}, {-38, -74, 112}, {112, -94, -18}},      // RANGE_LIMITED
43 };
44 
45 static const int16_t BT709_MATRIX[2][3][3] = {
46     {{54, 183, 18}, {-29, -99, 128}, {128, -116, -12}},     // RANGE_FULL
47     {{47, 157, 16}, {-26, -86, 112}, {112, -102, -10}},     // RANGE_LIMITED
48 };
49 
Clip3(uint8_t min,uint8_t value,uint8_t max)50 inline uint8_t Clip3(uint8_t min, uint8_t value, uint8_t max)
51 {
52     if (value < min) {
53         return min;
54     }
55     if (value > max) {
56         return max;
57     }
58     return value;
59 }
60 
61 #if defined(ARMV8)
ConvertRgbToYuv420Neon(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,RgbImageData & rgbData)62 int32_t ConvertRgbToYuv420Neon(uint8_t *dstData, int32_t width, int32_t height,
63     int32_t bufferSize, RgbImageData &rgbData)
64 {
65     const uint8_t *srcData = rgbData.data;
66     int32_t srcStride = rgbData.stride;
67     int32_t bytesPerPixel = rgbData.bytesPerPixel;
68     COLOR_MATRIX colorMatrix = rgbData.matrix;
69     COLOR_RANGE colorRange = rgbData.range;
70     int32_t dstStride = width;
71 
72     if (dstData == nullptr || srcData == nullptr) {
73         return AVCS_ERR_INVALID_VAL;
74     }
75     if ((dstStride * height * 3 / 2) > bufferSize) {  // yuv bufferSize dstStride * height * 3 / 2
76         AVCODEC_LOGE("conversion buffer is too small for converting from RGB to YUV");
77         return AVCS_ERR_NO_MEMORY;
78     }
79     colorRange = (colorRange != RANGE_FULL && colorRange != RANGE_LIMITED) ? RANGE_LIMITED : colorRange;
80     const int16_t(*weights)[3] =
81         (colorMatrix == MATRIX_BT709) ? BT709_MATRIX[colorRange - 1] : BT601_MATRIX[colorRange - 1];
82 
83     uint8_t zeroLvl = colorRange == RANGE_FULL ? 0 : 16;
84 
85     const uint8x16_t u8_16 = vdupq_n_u8(zeroLvl);
86 
87     const uint16x8_t mask = vdupq_n_s16(255);
88 
89     const int8x8_t s8_rounding = vdup_n_s8(-128);
90 
91     uint8x8_t scalar_Yr = vdup_n_u8((uint8_t)weights[0][0]);    // 0: Y
92     uint8x8_t scalar_Yg = vdup_n_u8((uint8_t)weights[0][1]);    // 1: U
93     uint8x8_t scalar_Yb = vdup_n_u8((uint8_t)weights[0][2]);    // 2: V
94 
95     int16x8_t u2_scalar = vdupq_n_s16(weights[1][1]);   // u scalar = -74
96     int16x8_t v2_scalar = vdupq_n_s16(weights[2][1]);   // v scalar = -94
97 
98     int16x8_t u3_scalar = vdupq_n_s16(weights[1][2]);   // u scalar = 112
99     int16x8_t v3_scalar = vdupq_n_s16(weights[2][2]);   // v saclar = -18
100 
101     int patch = width >> 4;                 // each 16 pixel for a patch.rigth shift 4 bits
102     int widthLess = width - (patch << 4);   // left shift 4 bits to get patch width
103     const int isPad = (widthLess > 0);
104 
105     const uint8_t *rgbaBuf = srcData;
106     int rgbaOffset = 0;
107 
108     int stridePadding = srcStride - width;
109 
110     int yStride = 0;
111     int uStride = yStride >> 1;
112 
113     int lumaOffset = 0;
114     int chromaUOffset = dstStride * height;
115     int chromaVOffset = chromaUOffset + (dstStride >> 1) * (height >> 1);
116     const int batchOffset = 16;            // each time process 16 pixels
117 
118     for (int j = 0; j < height; ++j) {
119         for (int i = 0; i < patch + isPad; ++i) {
120             uint8x16x3_t pixel_rgb;
121             if (bytesPerPixel == RGBA_COLINC) {
122                 uint8x16x4_t pixel_rgba = vld4q_u8(rgbaBuf + rgbaOffset);
123                 pixel_rgb.val[0] = pixel_rgba.val[0];   // get val[0] from rgba buf
124                 pixel_rgb.val[1] = pixel_rgba.val[1];   // get val[1] from rgba buf
125                 pixel_rgb.val[2] = pixel_rgba.val[2];   // get val[2] from rgba buf
126             } else {
127                 pixel_rgb = vld3q_u8(rgbaBuf + rgbaOffset);
128             }
129 
130             uint8x8x2_t uint8_r;
131             uint8x8x2_t uint8_g;
132             uint8x8x2_t uint8_b;
133             uint8_r.val[0] = vget_low_u8(pixel_rgb.val[0]);     // val[0]: r value
134             uint8_r.val[1] = vget_high_u8(pixel_rgb.val[0]);    // val[0]: r value
135             uint8_g.val[0] = vget_low_u8(pixel_rgb.val[1]);     // val[1]: g value
136             uint8_g.val[1] = vget_high_u8(pixel_rgb.val[1]);    // val[1]: g value
137             uint8_b.val[0] = vget_low_u8(pixel_rgb.val[2]);     // val[2]: b value
138             uint8_b.val[1] = vget_high_u8(pixel_rgb.val[2]);    // val[2]: b value
139 
140             uint16x8x2_t uint16_y;
141             uint8x16_t pixel_y;
142 
143             uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar_Yr);
144             uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar_Yr);      // Y = R * 66
145 
146             uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar_Yg);
147             uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar_Yg);
148 
149             uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar_Yb);
150             uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar_Yb);
151 
152             pixel_y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8));  // 8 bits
153             pixel_y = vaddq_u8(pixel_y, u8_16);
154 
155             vst1q_u8(dstData + lumaOffset, pixel_y);
156 
157             if (j % 2 == 0) {   // calc 2 components: u, v
158                 int16x8_t r = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[0]), mask));
159                 int16x8_t g = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[1]), mask));
160                 int16x8_t b = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[2]), mask));
161 
162                 int16x8_t signed_u;
163                 int16x8_t signed_v;
164                 uint8x8_t result[2]; // 2 row for u & v
165                 signed_u = vmulq_n_s16(r, weights[1][0]);   // u coeff = -28
166                 signed_v = vmulq_n_s16(r, weights[2][0]);   // v coeff = 112
167 
168                 signed_u = vmlaq_s16(signed_u, g, u2_scalar);
169                 signed_v = vmlaq_s16(signed_v, g, v2_scalar);
170 
171                 signed_u = vmlaq_s16(signed_u, b, u3_scalar);
172                 signed_v = vmlaq_s16(signed_v, b, v3_scalar);
173 
174                 result[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(signed_u, 8), s8_rounding));  // 8 bits
175                 result[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(signed_v, 8), s8_rounding));  // 8 bits
176 
177                 vst1_u8(dstData + chromaUOffset, result[0]);
178                 vst1_u8(dstData + chromaVOffset, result[1]);
179 
180                 chromaUOffset += (i == 0 && isPad) ? (widthLess >> 1) : (batchOffset >> 1);
181                 chromaVOffset += (i == 0 && isPad) ? (widthLess >> 1) : (batchOffset >> 1);
182             }
183             if (i == 0 && isPad) {
184                 rgbaOffset += bytesPerPixel * widthLess;
185                 lumaOffset += widthLess;
186             } else {
187                 rgbaOffset += bytesPerPixel * batchOffset;
188                 lumaOffset += batchOffset;
189             }
190         }
191 
192         if ((j & 1) == 0) {
193             chromaUOffset += uStride;
194             chromaVOffset += uStride;
195         }
196 
197         lumaOffset += yStride;
198         rgbaOffset += stridePadding * bytesPerPixel;
199     }
200     return AVCS_ERR_OK;
201 }
202 #endif
203 
ConvertRgbToYuv420(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,RgbImageData & rgbData)204 int32_t ConvertRgbToYuv420(uint8_t *dstData, int32_t width, int32_t height,
205     int32_t bufferSize, RgbImageData &rgbData)
206 {
207     const uint8_t *srcData = rgbData.data;
208     int32_t srcStride = rgbData.stride;
209     int32_t bytesPerPixel = rgbData.bytesPerPixel;
210     COLOR_MATRIX colorMatrix = rgbData.matrix;
211     COLOR_RANGE colorRange = rgbData.range;
212     int32_t dstStride = width;
213 
214     if (dstData == nullptr || srcData == nullptr) {
215         return AVCS_ERR_INVALID_VAL;
216     }
217     if ((dstStride * height * 3 / 2) > bufferSize) {    // yuv bufferSize dstStride * height * 3 / 2
218         AVCODEC_LOGE("conversion buffer is too small for converting from RGB to YUV");
219         return AVCS_ERR_NO_MEMORY;
220     }
221 
222     uint8_t *dstU = dstData + dstStride * height;
223     uint8_t *dstV = dstU + (dstStride >> 1) * (height >> 1);
224 
225     const uint8_t *pRed = srcData;          // r data 0
226     const uint8_t *pGreen = srcData + 1;    // g data 1
227     const uint8_t *pBlue = srcData + 2;     // b data 2
228 
229     colorRange = (colorRange != RANGE_FULL && colorRange != RANGE_LIMITED) ? RANGE_LIMITED : colorRange;
230     const int16_t(*weights)[3] =
231         (colorMatrix == MATRIX_BT709) ? BT709_MATRIX[colorRange - 1] : BT601_MATRIX[colorRange - 1];
232 
233     uint8_t zeroLvl = colorRange == RANGE_FULL ? 0 : 16;
234     uint8_t maxLvlLuma = colorRange == RANGE_FULL ? 255 : 235;
235     uint8_t maxLvlChroma = colorRange == RANGE_FULL ? 255 : 240;
236 
237     for (int32_t y = 0; y < height; ++y) {
238         for (int32_t x = 0; x < width; ++x) {
239             uint8_t r = *pRed;
240             uint8_t g = *pGreen;
241             uint8_t b = *pBlue;
242 
243             uint8_t luma = ((r * weights[0][0] + g * weights[0][1] + b * weights[0][2]) >> 8) + zeroLvl;
244             dstData[x] = Clip3(zeroLvl, luma, maxLvlLuma);
245 
246             if ((x & 1) == 0 && (y & 1) == 0) {
247                 uint8_t u = ((r * weights[1][0] + g * weights[1][1] + b * weights[1][2]) >> 8) + 128;
248                 uint8_t v = ((r * weights[2][0] + g * weights[2][1] + b * weights[2][2]) >> 8) + 128;
249                 dstU[x >> 1] = Clip3(zeroLvl, v, maxLvlChroma);
250                 dstV[x >> 1] = Clip3(zeroLvl, u, maxLvlChroma);
251             }
252             pRed += bytesPerPixel;
253             pGreen += bytesPerPixel;
254             pBlue += bytesPerPixel;
255         }
256 
257         if ((y & 1) == 0) {
258             dstU += dstStride >> 1;
259             dstV += dstStride >> 1;
260         }
261 
262         pRed -= bytesPerPixel * width;
263         pGreen -= bytesPerPixel * width;
264         pBlue -= bytesPerPixel * width;
265         pRed += bytesPerPixel * srcStride;
266         pGreen += bytesPerPixel * srcStride;
267         pBlue += bytesPerPixel * srcStride;
268 
269         dstData += dstStride;
270     }
271     return AVCS_ERR_OK;
272 }
273 
ConvertNv12ToYuv420(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,YuvImageData & yuvData)274 int32_t ConvertNv12ToYuv420(uint8_t *dstData, int32_t width, int32_t height,
275     int32_t bufferSize, YuvImageData &yuvData)
276 {
277     if (bufferSize < (width * height * 3 / 2)) {    // yuv bufferSize dstStride * height * 3 / 2
278         AVCODEC_LOGE("conversion buffer is too small for converting from NV12 to YUV");
279         return AVCS_ERR_NO_MEMORY;
280     }
281 
282     int32_t dstStride = width;
283     uint8_t *dstY = dstData;
284     uint8_t *dstU = dstData + dstStride * height;
285     uint8_t *dstV = dstU + (dstStride >> 1) * (height >> 1);
286 
287     int32_t srcStride = yuvData.stride;
288     const uint8_t *srcY = yuvData.data;
289     const uint8_t *srcU = srcY + yuvData.uvOffset;
290     const uint8_t *srcV = srcY + yuvData.uvOffset + 1;
291 
292     int32_t x;
293     int32_t y;
294     for (y = 0; y < height; y++) {
295         errno_t ret = memcpy_s(dstY, width, srcY, width);
296         CHECK_AND_RETURN_RET_LOG(ret == EOK, AVCS_ERR_UNKNOWN, "memcpy_s failed");
297         dstY += dstStride;
298         srcY += srcStride;
299     }
300 
301     height = height >> 1;
302     width = width >> 1;
303     dstStride = dstStride >> 1;
304     for (y = 0; y < height; y++) {
305         for (x = 0; x < width; x++) {
306             dstU[x] = srcU[x << 1];
307             dstV[x] = srcV[x << 1];
308         }
309 
310         dstU += dstStride;
311         dstV += dstStride;
312         srcU += srcStride;
313         srcV += srcStride;
314     }
315 
316     return AVCS_ERR_OK;
317 }
318 
ConvertNv21ToYuv420(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,YuvImageData & yuvData)319 int32_t ConvertNv21ToYuv420(uint8_t *dstData, int32_t width, int32_t height,
320     int32_t bufferSize, YuvImageData &yuvData)
321 {
322     if (bufferSize < (width * height * 3 / 2)) {    // yuv bufferSize dstStride * height * 3 / 2
323         AVCODEC_LOGE("conversion buffer is too small for converting from NV21 to YUV");
324         return AVCS_ERR_NO_MEMORY;
325     }
326 
327     int32_t dstStride = width;
328     uint8_t *dstY = dstData;
329     uint8_t *dstU = dstData + dstStride * height;
330     uint8_t *dstV = dstU + (dstStride >> 1) * (height >> 1);
331 
332     int32_t srcStride = yuvData.stride;
333     const uint8_t *srcY = yuvData.data;
334     const uint8_t *srcU = srcY + yuvData.uvOffset + 1;
335     const uint8_t *srcV = srcY + yuvData.uvOffset;
336 
337     int32_t x;
338     int32_t y;
339     for (y = 0; y < height; y++) {
340         errno_t ret = memcpy_s(dstY, width, srcY, width);
341         CHECK_AND_RETURN_RET_LOG(ret == EOK, AVCS_ERR_UNKNOWN, "memcpy_s failed");
342         dstY += dstStride;
343         srcY += srcStride;
344     }
345 
346     height = height >> 1;
347     width = width >> 1;
348     dstStride = dstStride >> 1;
349     for (y = 0; y < height; y++) {
350         for (x = 0; x < width; x++) {
351             dstU[x] = srcU[x << 1];
352             dstV[x] = srcV[x << 1];
353         }
354 
355         dstU += dstStride;
356         dstV += dstStride;
357         srcU += srcStride;
358         srcV += srcStride;
359     }
360 
361     return AVCS_ERR_OK;
362 }
363 
364 #ifdef __cplusplus
365 }
366 #endif
367 
368 } // namespace Codec
369 } // namespace MediaAVCodec
370 } // namespace OHOS