1 /*
2 * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 #include "swscale_internal.h"
21
22 typedef struct VScalerContext
23 {
24 uint16_t *filter[2];
25 int32_t *filter_pos;
26 int filter_size;
27 int isMMX;
28 union {
29 yuv2planar1_fn yuv2planar1;
30 yuv2planarX_fn yuv2planarX;
31 yuv2interleavedX_fn yuv2interleavedX;
32 yuv2packed1_fn yuv2packed1;
33 yuv2packed2_fn yuv2packed2;
34 yuv2anyX_fn yuv2anyX;
35 } pfn;
36 yuv2packedX_fn yuv2packedX;
37 } VScalerContext;
38
39
lum_planar_vscale(SwsContext * c,SwsFilterDescriptor * desc,int sliceY,int sliceH)40 static int lum_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
41 {
42 VScalerContext *inst = desc->instance;
43 int dstW = desc->dst->width;
44
45 int first = FFMAX(1-inst->filter_size, inst->filter_pos[sliceY]);
46 int sp = first - desc->src->plane[0].sliceY;
47 int dp = sliceY - desc->dst->plane[0].sliceY;
48 uint8_t **src = desc->src->plane[0].line + sp;
49 uint8_t **dst = desc->dst->plane[0].line + dp;
50 uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
51
52 if (inst->filter_size == 1)
53 inst->pfn.yuv2planar1((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
54 else
55 inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
56
57 if (desc->alpha) {
58 int sp = first - desc->src->plane[3].sliceY;
59 int dp = sliceY - desc->dst->plane[3].sliceY;
60 uint8_t **src = desc->src->plane[3].line + sp;
61 uint8_t **dst = desc->dst->plane[3].line + dp;
62 uint16_t *filter = inst->filter[1] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
63
64 if (inst->filter_size == 1)
65 inst->pfn.yuv2planar1((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
66 else
67 inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
68 }
69
70 return 1;
71 }
72
chr_planar_vscale(SwsContext * c,SwsFilterDescriptor * desc,int sliceY,int sliceH)73 static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
74 {
75 const int chrSkipMask = (1 << desc->dst->v_chr_sub_sample) - 1;
76 if (sliceY & chrSkipMask)
77 return 0;
78 else {
79 VScalerContext *inst = desc->instance;
80 int dstW = AV_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample);
81 int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
82
83 int first = FFMAX(1-inst->filter_size, inst->filter_pos[chrSliceY]);
84 int sp1 = first - desc->src->plane[1].sliceY;
85 int sp2 = first - desc->src->plane[2].sliceY;
86 int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
87 int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
88 uint8_t **src1 = desc->src->plane[1].line + sp1;
89 uint8_t **src2 = desc->src->plane[2].line + sp2;
90 uint8_t **dst1 = desc->dst->plane[1].line + dp1;
91 uint8_t **dst2 = desc->dst->plane[2].line + dp2;
92 uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size);
93
94 if (c->yuv2nv12cX) {
95 inst->pfn.yuv2interleavedX(c->dstFormat, c->chrDither8, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
96 } else if (inst->filter_size == 1) {
97 inst->pfn.yuv2planar1((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0);
98 inst->pfn.yuv2planar1((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3);
99 } else {
100 inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src1, dst1[0], dstW, c->chrDither8, 0);
101 inst->pfn.yuv2planarX(filter, inst->filter_size, (const int16_t**)src2, dst2[0], dstW, c->chrDither8, inst->isMMX ? (c->uv_offx2 >> 1) : 3);
102 }
103 }
104
105 return 1;
106 }
107
packed_vscale(SwsContext * c,SwsFilterDescriptor * desc,int sliceY,int sliceH)108 static int packed_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
109 {
110 VScalerContext *inst = desc->instance;
111 int dstW = desc->dst->width;
112 int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
113
114 int lum_fsize = inst[0].filter_size;
115 int chr_fsize = inst[1].filter_size;
116 uint16_t *lum_filter = inst[0].filter[0];
117 uint16_t *chr_filter = inst[1].filter[0];
118
119 int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[ sliceY]);
120 int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
121
122 int sp0 = firstLum - desc->src->plane[0].sliceY;
123 int sp1 = firstChr - desc->src->plane[1].sliceY;
124 int sp2 = firstChr - desc->src->plane[2].sliceY;
125 int sp3 = firstLum - desc->src->plane[3].sliceY;
126 int dp = sliceY - desc->dst->plane[0].sliceY;
127 uint8_t **src0 = desc->src->plane[0].line + sp0;
128 uint8_t **src1 = desc->src->plane[1].line + sp1;
129 uint8_t **src2 = desc->src->plane[2].line + sp2;
130 uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
131 uint8_t **dst = desc->dst->plane[0].line + dp;
132
133
134 if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 1) { // unscaled RGB
135 inst->pfn.yuv2packed1(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
136 (const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, 0, sliceY);
137 } else if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2 &&
138 chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
139 chr_filter[2 * chrSliceY + 1] <= 4096U) { // unscaled RGB
140 int chrAlpha = chr_filter[2 * chrSliceY + 1];
141 inst->pfn.yuv2packed1(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
142 (const int16_t*)(desc->alpha ? *src3 : NULL), *dst, dstW, chrAlpha, sliceY);
143 } else if (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2 &&
144 lum_filter[2 * sliceY + 1] + lum_filter[2 * sliceY] == 4096 &&
145 lum_filter[2 * sliceY + 1] <= 4096U &&
146 chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
147 chr_filter[2 * chrSliceY + 1] <= 4096U
148 ) { // bilinear upscale RGB
149 int lumAlpha = lum_filter[2 * sliceY + 1];
150 int chrAlpha = chr_filter[2 * chrSliceY + 1];
151 c->lumMmxFilter[2] =
152 c->lumMmxFilter[3] = lum_filter[2 * sliceY] * 0x10001;
153 c->chrMmxFilter[2] =
154 c->chrMmxFilter[3] = chr_filter[2 * chrSliceY] * 0x10001;
155 inst->pfn.yuv2packed2(c, (const int16_t**)src0, (const int16_t**)src1, (const int16_t**)src2, (const int16_t**)src3,
156 *dst, dstW, lumAlpha, chrAlpha, sliceY);
157 } else { // general RGB
158 if ((c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2) ||
159 (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2)) {
160 if (!c->warned_unuseable_bilinear)
161 av_log(c, AV_LOG_INFO, "Optimized 2 tap filter code cannot be used\n");
162 c->warned_unuseable_bilinear = 1;
163 }
164
165 inst->yuv2packedX(c, lum_filter + sliceY * lum_fsize,
166 (const int16_t**)src0, lum_fsize, chr_filter + chrSliceY * chr_fsize,
167 (const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, *dst, dstW, sliceY);
168 }
169 return 1;
170 }
171
any_vscale(SwsContext * c,SwsFilterDescriptor * desc,int sliceY,int sliceH)172 static int any_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
173 {
174 VScalerContext *inst = desc->instance;
175 int dstW = desc->dst->width;
176 int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
177
178 int lum_fsize = inst[0].filter_size;
179 int chr_fsize = inst[1].filter_size;
180 uint16_t *lum_filter = inst[0].filter[0];
181 uint16_t *chr_filter = inst[1].filter[0];
182
183 int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[ sliceY]);
184 int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
185
186 int sp0 = firstLum - desc->src->plane[0].sliceY;
187 int sp1 = firstChr - desc->src->plane[1].sliceY;
188 int sp2 = firstChr - desc->src->plane[2].sliceY;
189 int sp3 = firstLum - desc->src->plane[3].sliceY;
190 int dp0 = sliceY - desc->dst->plane[0].sliceY;
191 int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
192 int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
193 int dp3 = sliceY - desc->dst->plane[3].sliceY;
194
195 uint8_t **src0 = desc->src->plane[0].line + sp0;
196 uint8_t **src1 = desc->src->plane[1].line + sp1;
197 uint8_t **src2 = desc->src->plane[2].line + sp2;
198 uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
199 uint8_t *dst[4] = { desc->dst->plane[0].line[dp0],
200 desc->dst->plane[1].line[dp1],
201 desc->dst->plane[2].line[dp2],
202 desc->alpha ? desc->dst->plane[3].line[dp3] : NULL };
203
204 av_assert1(!c->yuv2packed1 && !c->yuv2packed2);
205 inst->pfn.yuv2anyX(c, lum_filter + sliceY * lum_fsize,
206 (const int16_t**)src0, lum_fsize, chr_filter + sliceY * chr_fsize,
207 (const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, dst, dstW, sliceY);
208
209 return 1;
210
211 }
212
ff_init_vscale(SwsContext * c,SwsFilterDescriptor * desc,SwsSlice * src,SwsSlice * dst)213 int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
214 {
215 VScalerContext *lumCtx = NULL;
216 VScalerContext *chrCtx = NULL;
217
218 if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
219 lumCtx = av_mallocz(sizeof(VScalerContext));
220 if (!lumCtx)
221 return AVERROR(ENOMEM);
222
223
224 desc[0].process = lum_planar_vscale;
225 desc[0].instance = lumCtx;
226 desc[0].src = src;
227 desc[0].dst = dst;
228 desc[0].alpha = c->needAlpha;
229
230 if (!isGray(c->dstFormat)) {
231 chrCtx = av_mallocz(sizeof(VScalerContext));
232 if (!chrCtx)
233 return AVERROR(ENOMEM);
234 desc[1].process = chr_planar_vscale;
235 desc[1].instance = chrCtx;
236 desc[1].src = src;
237 desc[1].dst = dst;
238 }
239 } else {
240 lumCtx = av_mallocz_array(sizeof(VScalerContext), 2);
241 if (!lumCtx)
242 return AVERROR(ENOMEM);
243 chrCtx = &lumCtx[1];
244
245 desc[0].process = c->yuv2packedX ? packed_vscale : any_vscale;
246 desc[0].instance = lumCtx;
247 desc[0].src = src;
248 desc[0].dst = dst;
249 desc[0].alpha = c->needAlpha;
250 }
251
252 ff_init_vscale_pfn(c, c->yuv2plane1, c->yuv2planeX, c->yuv2nv12cX,
253 c->yuv2packed1, c->yuv2packed2, c->yuv2packedX, c->yuv2anyX, c->use_mmx_vfilter);
254 return 0;
255 }
256
ff_init_vscale_pfn(SwsContext * c,yuv2planar1_fn yuv2plane1,yuv2planarX_fn yuv2planeX,yuv2interleavedX_fn yuv2nv12cX,yuv2packed1_fn yuv2packed1,yuv2packed2_fn yuv2packed2,yuv2packedX_fn yuv2packedX,yuv2anyX_fn yuv2anyX,int use_mmx)257 void ff_init_vscale_pfn(SwsContext *c,
258 yuv2planar1_fn yuv2plane1,
259 yuv2planarX_fn yuv2planeX,
260 yuv2interleavedX_fn yuv2nv12cX,
261 yuv2packed1_fn yuv2packed1,
262 yuv2packed2_fn yuv2packed2,
263 yuv2packedX_fn yuv2packedX,
264 yuv2anyX_fn yuv2anyX, int use_mmx)
265 {
266 VScalerContext *lumCtx = NULL;
267 VScalerContext *chrCtx = NULL;
268 int idx = c->numDesc - (c->is_internal_gamma ? 2 : 1); //FIXME avoid hardcoding indexes
269
270 if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
271 if (!isGray(c->dstFormat)) {
272 chrCtx = c->desc[idx].instance;
273
274 chrCtx->filter[0] = use_mmx ? (int16_t*)c->chrMmxFilter : c->vChrFilter;
275 chrCtx->filter_size = c->vChrFilterSize;
276 chrCtx->filter_pos = c->vChrFilterPos;
277 chrCtx->isMMX = use_mmx;
278
279 --idx;
280 if (yuv2nv12cX) chrCtx->pfn.yuv2interleavedX = yuv2nv12cX;
281 else if (c->vChrFilterSize == 1) chrCtx->pfn.yuv2planar1 = yuv2plane1;
282 else chrCtx->pfn.yuv2planarX = yuv2planeX;
283 }
284
285 lumCtx = c->desc[idx].instance;
286
287 lumCtx->filter[0] = use_mmx ? (int16_t*)c->lumMmxFilter : c->vLumFilter;
288 lumCtx->filter[1] = use_mmx ? (int16_t*)c->alpMmxFilter : c->vLumFilter;
289 lumCtx->filter_size = c->vLumFilterSize;
290 lumCtx->filter_pos = c->vLumFilterPos;
291 lumCtx->isMMX = use_mmx;
292
293 if (c->vLumFilterSize == 1) lumCtx->pfn.yuv2planar1 = yuv2plane1;
294 else lumCtx->pfn.yuv2planarX = yuv2planeX;
295
296 } else {
297 lumCtx = c->desc[idx].instance;
298 chrCtx = &lumCtx[1];
299
300 lumCtx->filter[0] = c->vLumFilter;
301 lumCtx->filter_size = c->vLumFilterSize;
302 lumCtx->filter_pos = c->vLumFilterPos;
303
304 chrCtx->filter[0] = c->vChrFilter;
305 chrCtx->filter_size = c->vChrFilterSize;
306 chrCtx->filter_pos = c->vChrFilterPos;
307
308 lumCtx->isMMX = use_mmx;
309 chrCtx->isMMX = use_mmx;
310
311 if (yuv2packedX) {
312 if (c->yuv2packed1 && c->vLumFilterSize == 1 && c->vChrFilterSize <= 2)
313 lumCtx->pfn.yuv2packed1 = yuv2packed1;
314 else if (c->yuv2packed2 && c->vLumFilterSize == 2 && c->vChrFilterSize == 2)
315 lumCtx->pfn.yuv2packed2 = yuv2packed2;
316 lumCtx->yuv2packedX = yuv2packedX;
317 } else
318 lumCtx->pfn.yuv2anyX = yuv2anyX;
319 }
320 }
321
322
323