1 /*
2 * Copyright (C) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "securec.h"
17 #include "avcodec_errors.h"
18 #include "avcodec_log.h"
19 #include "avcodec_trace.h"
20 #include "avc_encoder_convert.h"
21
22 #if defined(ARMV8)
23 #include <arm_neon.h>
24 #endif
25
26 namespace OHOS {
27 namespace MediaAVCodec {
28 namespace Codec {
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34 namespace {
35 constexpr OHOS::HiviewDFX::HiLogLabel LABEL = {LOG_CORE, LOG_DOMAIN_FRAMEWORK, "AvcEncoder"};
36 } // namespace
37
38 // Matrix coefficient to convert RGB to planar YUV data
39 // Each sub-array represents to 3x3 coeff use with R G B
40 static const int16_t BT601_MATRIX[2][3][3] = {
41 {{76, 150, 29}, {-43, -85, 128}, {128, -107, -21}}, // RANGE_FULL
42 {{66, 129, 25}, {-38, -74, 112}, {112, -94, -18}}, // RANGE_LIMITED
43 };
44
45 static const int16_t BT709_MATRIX[2][3][3] = {
46 {{54, 183, 18}, {-29, -99, 128}, {128, -116, -12}}, // RANGE_FULL
47 {{47, 157, 16}, {-26, -86, 112}, {112, -102, -10}}, // RANGE_LIMITED
48 };
49
Clip3(uint8_t min,uint8_t value,uint8_t max)50 inline uint8_t Clip3(uint8_t min, uint8_t value, uint8_t max)
51 {
52 if (value < min) {
53 return min;
54 }
55 if (value > max) {
56 return max;
57 }
58 return value;
59 }
60
61 #if defined(ARMV8)
ConvertRgbToNv21Neon(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,RgbImageData & rgbData)62 int32_t ConvertRgbToNv21Neon(uint8_t *dstData, int32_t width, int32_t height,
63 int32_t bufferSize, RgbImageData &rgbData)
64 {
65 const uint8_t *srcData = rgbData.data;
66 int32_t srcStride = rgbData.stride;
67 int32_t bytesPerPixel = rgbData.bytesPerPixel;
68 COLOR_MATRIX colorMatrix = rgbData.matrix;
69 COLOR_RANGE colorRange = rgbData.range;
70 int32_t dstStride = width;
71
72 if (dstData == nullptr || srcData == nullptr) {
73 return AVCS_ERR_INVALID_VAL;
74 }
75 if ((dstStride * height * 3 / 2) > bufferSize) { // yuv bufferSize dstStride * height * 3 / 2
76 AVCODEC_LOGE("conversion buffer is too small for converting from RGB to YUV");
77 return AVCS_ERR_NO_MEMORY;
78 }
79 colorRange = (colorRange != RANGE_FULL && colorRange != RANGE_LIMITED) ? RANGE_LIMITED : colorRange;
80 const int16_t(*weights)[3] =
81 (colorMatrix == MATRIX_BT709) ? BT709_MATRIX[colorRange - 1] : BT601_MATRIX[colorRange - 1];
82
83 uint8_t zeroLvl = colorRange == RANGE_FULL ? 0 : 16;
84
85 const uint8x16_t u8_16 = vdupq_n_u8(zeroLvl);
86
87 const uint16x8_t mask = vdupq_n_s16(255);
88
89 const int8x8_t s8_rounding = vdup_n_s8(-128);
90
91 uint8x8_t scalar_Yr = vdup_n_u8((uint8_t)weights[0][0]); // 0: Y
92 uint8x8_t scalar_Yg = vdup_n_u8((uint8_t)weights[0][1]); // 1: U
93 uint8x8_t scalar_Yb = vdup_n_u8((uint8_t)weights[0][2]); // 2: V
94
95 int16x8_t u2_scalar = vdupq_n_s16(weights[1][1]); // u scalar = -74
96 int16x8_t v2_scalar = vdupq_n_s16(weights[2][1]); // v scalar = -94
97
98 int16x8_t u3_scalar = vdupq_n_s16(weights[1][2]); // u scalar = 112
99 int16x8_t v3_scalar = vdupq_n_s16(weights[2][2]); // v saclar = -18
100
101 int patch = width >> 4; // each 16 pixel for a patch.rigth shift 4 bits
102 int widthLess = width - (patch << 4); // left shift 4 bits to get patch width
103 const int isPad = (widthLess > 0);
104
105 const uint8_t *rgbaBuf = srcData;
106 int rgbaOffset = 0;
107
108 int stridePadding = srcStride - width;
109
110 int yStride = 0;
111 int uStride = yStride;
112
113 int lumaOffset = 0;
114 int chromaUOffset = dstStride * height;
115 const int batchOffset = 16; // each time process 16 pixels
116
117 for (int j = 0; j < height; ++j) {
118 for (int i = 0; i < patch + isPad; ++i) {
119 uint8x16x3_t pixel_rgb;
120 if (bytesPerPixel == RGBA_COLINC) {
121 uint8x16x4_t pixel_rgba = vld4q_u8(rgbaBuf + rgbaOffset);
122 pixel_rgb.val[0] = pixel_rgba.val[0]; // get val[0] from rgba buf
123 pixel_rgb.val[1] = pixel_rgba.val[1]; // get val[1] from rgba buf
124 pixel_rgb.val[2] = pixel_rgba.val[2]; // get val[2] from rgba buf
125 } else {
126 pixel_rgb = vld3q_u8(rgbaBuf + rgbaOffset);
127 }
128
129 uint8x8x2_t uint8_r;
130 uint8x8x2_t uint8_g;
131 uint8x8x2_t uint8_b;
132 uint8_r.val[0] = vget_low_u8(pixel_rgb.val[0]); // val[0]: r value
133 uint8_r.val[1] = vget_high_u8(pixel_rgb.val[0]); // val[0]: r value
134 uint8_g.val[0] = vget_low_u8(pixel_rgb.val[1]); // val[1]: g value
135 uint8_g.val[1] = vget_high_u8(pixel_rgb.val[1]); // val[1]: g value
136 uint8_b.val[0] = vget_low_u8(pixel_rgb.val[2]); // val[2]: b value
137 uint8_b.val[1] = vget_high_u8(pixel_rgb.val[2]); // val[2]: b value
138
139 uint16x8x2_t uint16_y;
140 uint8x16_t pixel_y;
141
142 uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar_Yr);
143 uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar_Yr); // Y = R * 66
144
145 uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar_Yg);
146 uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar_Yg);
147
148 uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar_Yb);
149 uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar_Yb);
150
151 pixel_y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8)); // 8 bits
152 pixel_y = vaddq_u8(pixel_y, u8_16);
153
154 vst1q_u8(dstData + lumaOffset, pixel_y);
155
156 if (j % 2 == 0) { // calc 2 cpmpoents: u, v
157 int16x8_t r = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[0]), mask));
158 int16x8_t g = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[1]), mask));
159 int16x8_t b = vreinterpretq_s16_u16(vandq_u16(vreinterpretq_u16_u8(pixel_rgb.val[2]), mask));
160
161 int16x8_t signed_u;
162 int16x8_t signed_v;
163 uint8x8x2_t result;
164 signed_u = vmulq_n_s16(r, weights[1][0]); // u coeff = -28
165 signed_v = vmulq_n_s16(r, weights[2][0]); // v coeff = 112
166
167 signed_u = vmlaq_s16(signed_u, g, u2_scalar);
168 signed_v = vmlaq_s16(signed_v, g, v2_scalar);
169
170 signed_u = vmlaq_s16(signed_u, b, u3_scalar);
171 signed_v = vmlaq_s16(signed_v, b, v3_scalar);
172
173 result.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(signed_u, 8), s8_rounding)); // 8 bits
174 result.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(signed_v, 8), s8_rounding)); // 8 bits
175
176 vst2_u8(dstData + chromaUOffset, result);
177
178 chromaUOffset += (i == 0 && isPad) ? widthLess : batchOffset;
179 }
180 if (i == 0 && isPad) {
181 rgbaOffset += bytesPerPixel * widthLess;
182 lumaOffset += widthLess;
183 } else {
184 rgbaOffset += bytesPerPixel * batchOffset;
185 lumaOffset += batchOffset;
186 }
187 }
188
189 if ((j & 1) == 0) {
190 chromaUOffset += uStride;
191 }
192
193 lumaOffset += yStride;
194 rgbaOffset += stridePadding * bytesPerPixel;
195 }
196 return AVCS_ERR_OK;
197 }
198 #endif
199
ConvertRgbToNv21(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,RgbImageData & rgbData)200 int32_t ConvertRgbToNv21(uint8_t *dstData, int32_t width, int32_t height,
201 int32_t bufferSize, RgbImageData &rgbData)
202 {
203 const uint8_t *srcData = rgbData.data;
204 int32_t srcStride = rgbData.stride;
205 int32_t bytesPerPixel = rgbData.bytesPerPixel;
206 COLOR_MATRIX colorMatrix = rgbData.matrix;
207 COLOR_RANGE colorRange = rgbData.range;
208 int32_t dstStride = width;
209
210 if (dstData == nullptr || srcData == nullptr) {
211 return AVCS_ERR_INVALID_VAL;
212 }
213 if ((dstStride * height * 3 / 2) > bufferSize) { // yuv bufferSize dstStride * height * 3 / 2
214 AVCODEC_LOGE("conversion buffer is too small for converting from RGB to YUV");
215 return AVCS_ERR_NO_MEMORY;
216 }
217
218 uint8_t *dstV = dstData + dstStride * height;
219
220 const uint8_t *pRed = srcData; // r data 0
221 const uint8_t *pGreen = srcData + 1; // g data 1
222 const uint8_t *pBlue = srcData + 2; // b data 2
223
224 colorRange = (colorRange != RANGE_FULL && colorRange != RANGE_LIMITED) ? RANGE_LIMITED : colorRange;
225 const int16_t(*weights)[3] =
226 (colorMatrix == MATRIX_BT709) ? BT709_MATRIX[colorRange - 1] : BT601_MATRIX[colorRange - 1];
227
228 uint8_t zeroLvl = colorRange == RANGE_FULL ? 0 : 16;
229 uint8_t maxLvlLuma = colorRange == RANGE_FULL ? 255 : 235;
230 uint8_t maxLvlChroma = colorRange == RANGE_FULL ? 255 : 240;
231
232 for (int32_t y = 0; y < height; ++y) {
233 for (int32_t x = 0; x < width; ++x) {
234 uint8_t r = *pRed;
235 uint8_t g = *pGreen;
236 uint8_t b = *pBlue;
237
238 uint8_t luma = ((r * weights[0][0] + g * weights[0][1] + b * weights[0][2]) >> 8) + zeroLvl;
239 dstData[x] = Clip3(zeroLvl, luma, maxLvlLuma);
240
241 if ((x & 1) == 0 && (y & 1) == 0) {
242 uint8_t u = ((r * weights[1][0] + g * weights[1][1] + b * weights[1][2]) >> 8) + 128; // nv21: U
243 uint8_t v = ((r * weights[2][0] + g * weights[2][1] + b * weights[2][2]) >> 8) + 128; // nv21: V
244 dstV[x] = Clip3(zeroLvl, v, maxLvlChroma);
245 dstV[x + 1] = Clip3(zeroLvl, u, maxLvlChroma);
246 }
247 pRed += bytesPerPixel;
248 pGreen += bytesPerPixel;
249 pBlue += bytesPerPixel;
250 }
251
252 if ((y & 1) == 0) {
253 dstV += dstStride;
254 }
255
256 pRed -= bytesPerPixel * width;
257 pGreen -= bytesPerPixel * width;
258 pBlue -= bytesPerPixel * width;
259 pRed += bytesPerPixel * srcStride;
260 pGreen += bytesPerPixel * srcStride;
261 pBlue += bytesPerPixel * srcStride;
262
263 dstData += dstStride;
264 }
265 return AVCS_ERR_OK;
266 }
267
ConvertNv12ToNv21(uint8_t * dstData,int32_t width,int32_t height,int32_t bufferSize,YuvImageData & yuvData)268 int32_t ConvertNv12ToNv21(uint8_t *dstData, int32_t width, int32_t height,
269 int32_t bufferSize, YuvImageData &yuvData)
270 {
271 if (bufferSize < (width * height * 3 / 2)) { // yuv bufferSize dstStride * height * 3 / 2
272 AVCODEC_LOGE("conversion buffer is too small for converting from RGB to YUV");
273 return AVCS_ERR_NO_MEMORY;
274 }
275
276 int32_t dstStride = width;
277 uint8_t *dstY = dstData;
278 uint8_t *dstU = dstData + dstStride * height;
279 uint8_t *dstV = dstData + dstStride * height + 1;
280
281 int32_t srcStride = yuvData.stride;
282 const uint8_t *srcY = yuvData.data;
283 const uint8_t *srcU = srcY + yuvData.uvOffset;
284 const uint8_t *srcV = srcU + 1;
285
286 for (int32_t y = 0; y < height; y++) {
287 errno_t ret = memcpy_s(dstY, width, srcY, width);
288 CHECK_AND_RETURN_RET_LOG(ret == EOK, AVCS_ERR_UNKNOWN, "memcpy_s failed");
289 dstY += dstStride;
290 srcY += srcStride;
291 }
292
293 height = height >> 1;
294 width = width >> 1;
295 for (int32_t y = 0; y < height; y++) {
296 for (int32_t x = 0; x < width; x++) {
297 dstU[x * 2] = srcV[x * 2]; // 2: addr + pos * 2 for u
298 dstV[x * 2] = srcU[x * 2]; // 2: (addr + 1) + pos * 2 for v
299 }
300
301 dstU += dstStride;
302 dstV += dstStride;
303 srcU += srcStride;
304 srcV += srcStride;
305 }
306
307 return AVCS_ERR_OK;
308 }
309
310 #ifdef __cplusplus
311 }
312 #endif
313
314 } // namespace Codec
315 } // namespace MediaAVCodec
316 } // namespace OHOS