• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer
2  * Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23 
24 #include <string.h>
25 #include <stdio.h>
26 #include <math.h>
27 
28 /**
29  * SECTION:gstvideoscaler
30  * @title: GstVideoScaler
31  * @short_description: Utility object for rescaling video frames
32  *
33  * #GstVideoScaler is a utility object for rescaling and resampling
34  * video frames using various interpolation / sampling methods.
35  *
36  */
37 
38 #ifndef DISABLE_ORC
39 #include <orc/orcfunctions.h>
40 #else
41 #define orc_memcpy memcpy
42 #endif
43 
44 #include "video-orc.h"
45 #include "video-scaler.h"
46 
47 #ifndef GST_DISABLE_GST_DEBUG
48 #define GST_CAT_DEFAULT ensure_debug_category()
49 static GstDebugCategory *
ensure_debug_category(void)50 ensure_debug_category (void)
51 {
52   static gsize cat_gonce = 0;
53 
54   if (g_once_init_enter (&cat_gonce)) {
55     gsize cat_done;
56 
57     cat_done = (gsize) _gst_debug_category_new ("video-scaler", 0,
58         "video-scaler object");
59 
60     g_once_init_leave (&cat_gonce, cat_done);
61   }
62 
63   return (GstDebugCategory *) cat_gonce;
64 }
65 
66 #else
67 #define ensure_debug_category() /* NOOP */
68 #endif /* GST_DISABLE_GST_DEBUG */
69 
70 #define SCALE_U8          12
71 #define SCALE_U8_ROUND    (1 << (SCALE_U8 -1))
72 #define SCALE_U8_LQ       6
73 #define SCALE_U8_LQ_ROUND (1 << (SCALE_U8_LQ -1))
74 #define SCALE_U16         12
75 #define SCALE_U16_ROUND   (1 << (SCALE_U16 -1))
76 
77 #define LQ
78 
79 typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale,
80     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems);
81 typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale,
82     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
83     guint n_elems);
84 
85 struct _GstVideoScaler
86 {
87   GstVideoResamplerMethod method;
88   GstVideoScalerFlags flags;
89 
90   GstVideoResampler resampler;
91 
92   gboolean merged;
93   gint in_y_offset;
94   gint out_y_offset;
95 
96   /* cached integer coefficients */
97   gint16 *taps_s16;
98   gint16 *taps_s16_4;
99   guint32 *offset_n;
100   /* for ORC */
101   gint inc;
102 
103   gint tmpwidth;
104   gpointer tmpline1;
105   gpointer tmpline2;
106 };
107 
108 static void
resampler_zip(GstVideoResampler * resampler,const GstVideoResampler * r1,const GstVideoResampler * r2)109 resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1,
110     const GstVideoResampler * r2)
111 {
112   guint i, out_size, max_taps, n_phases;
113   gdouble *taps;
114   guint32 *offset, *phase;
115 
116   g_return_if_fail (r1->max_taps == r2->max_taps);
117 
118   out_size = r1->out_size + r2->out_size;
119   max_taps = r1->max_taps;
120   n_phases = out_size;
121   offset = g_malloc (sizeof (guint32) * out_size);
122   phase = g_malloc (sizeof (guint32) * n_phases);
123   taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
124 
125   resampler->in_size = r1->in_size + r2->in_size;
126   resampler->out_size = out_size;
127   resampler->max_taps = max_taps;
128   resampler->n_phases = n_phases;
129   resampler->offset = offset;
130   resampler->phase = phase;
131   resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
132   resampler->taps = taps;
133 
134   for (i = 0; i < out_size; i++) {
135     guint idx = i / 2;
136     const GstVideoResampler *r;
137 
138     r = (i & 1) ? r2 : r1;
139 
140     offset[i] = r->offset[idx] * 2 + (i & 1);
141     phase[i] = i;
142 
143     memcpy (taps + i * max_taps, r->taps + r->phase[idx] * max_taps,
144         max_taps * sizeof (gdouble));
145   }
146 }
147 
148 static void
realloc_tmplines(GstVideoScaler * scale,gint n_elems,gint width)149 realloc_tmplines (GstVideoScaler * scale, gint n_elems, gint width)
150 {
151   gint n_taps = scale->resampler.max_taps;
152 
153   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
154     n_taps *= 2;
155 
156   scale->tmpline1 =
157       g_realloc (scale->tmpline1, sizeof (gint32) * width * n_elems * n_taps);
158   scale->tmpline2 =
159       g_realloc (scale->tmpline2, sizeof (gint32) * width * n_elems);
160   scale->tmpwidth = width;
161 }
162 
163 static void
scaler_dump(GstVideoScaler * scale)164 scaler_dump (GstVideoScaler * scale)
165 {
166 #if 0
167   gint i, j, in_size, out_size, max_taps;
168   guint32 *offset, *phase;
169   gdouble *taps;
170   GstVideoResampler *r = &scale->resampler;
171 
172   in_size = r->in_size;
173   out_size = r->out_size;
174   offset = r->offset;
175   phase = r->phase;
176   max_taps = r->max_taps;
177   taps = r->taps;
178 
179   g_print ("in %d, out %d, max_taps %d, n_phases %d\n", in_size, out_size,
180       max_taps, r->n_phases);
181 
182   for (i = 0; i < out_size; i++) {
183     g_print ("%d: \t%d \t%d:", i, offset[i], phase[i]);
184 
185     for (j = 0; j < max_taps; j++) {
186       g_print ("\t%f", taps[i * max_taps + j]);
187     }
188     g_print ("\n");
189   }
190 #endif
191 }
192 
193 #define INTERLACE_SHIFT 0.5
194 
195 /**
196  * gst_video_scaler_new: (skip)
197  * @method: a #GstVideoResamplerMethod
198  * @flags: #GstVideoScalerFlags
199  * @n_taps: number of taps to use
200  * @in_size: number of source elements
201  * @out_size: number of destination elements
202  * @options: (allow-none): extra options
203  *
204  * Make a new @method video scaler. @in_size source lines/pixels will
205  * be scaled to @out_size destination lines/pixels.
206  *
207  * @n_taps specifies the amount of pixels to use from the source for one output
208  * pixel. If n_taps is 0, this function chooses a good value automatically based
209  * on the @method and @in_size/@out_size.
210  *
211  * Returns: a #GstVideoScaler
212  */
213 GstVideoScaler *
gst_video_scaler_new(GstVideoResamplerMethod method,GstVideoScalerFlags flags,guint n_taps,guint in_size,guint out_size,GstStructure * options)214 gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags,
215     guint n_taps, guint in_size, guint out_size, GstStructure * options)
216 {
217   GstVideoScaler *scale;
218 
219   g_return_val_if_fail (in_size != 0, NULL);
220   g_return_val_if_fail (out_size != 0, NULL);
221 
222   scale = g_slice_new0 (GstVideoScaler);
223 
224   GST_DEBUG ("%d %u  %u->%u", method, n_taps, in_size, out_size);
225 
226   scale->method = method;
227   scale->flags = flags;
228 
229   if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) {
230     GstVideoResampler tresamp, bresamp;
231     gdouble shift;
232 
233     shift = (INTERLACE_SHIFT * out_size) / in_size;
234 
235     gst_video_resampler_init (&tresamp, method,
236         GST_VIDEO_RESAMPLER_FLAG_HALF_TAPS, (out_size + 1) / 2, n_taps, shift,
237         (in_size + 1) / 2, (out_size + 1) / 2, options);
238 
239     n_taps = tresamp.max_taps;
240 
241     gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size,
242         n_taps, -shift, in_size - tresamp.in_size,
243         out_size - tresamp.out_size, options);
244 
245     resampler_zip (&scale->resampler, &tresamp, &bresamp);
246     gst_video_resampler_clear (&tresamp);
247     gst_video_resampler_clear (&bresamp);
248   } else {
249     gst_video_resampler_init (&scale->resampler, method,
250         GST_VIDEO_RESAMPLER_FLAG_NONE, out_size, n_taps, 0.0, in_size, out_size,
251         options);
252   }
253 
254   if (out_size == 1)
255     scale->inc = 0;
256   else
257     scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1;
258 
259   scaler_dump (scale);
260   GST_DEBUG ("max_taps %d", scale->resampler.max_taps);
261 
262   return scale;
263 }
264 
265 /**
266  * gst_video_scaler_free:
267  * @scale: a #GstVideoScaler
268  *
269  * Free a previously allocated #GstVideoScaler @scale.
270  */
271 void
gst_video_scaler_free(GstVideoScaler * scale)272 gst_video_scaler_free (GstVideoScaler * scale)
273 {
274   g_return_if_fail (scale != NULL);
275 
276   gst_video_resampler_clear (&scale->resampler);
277   g_free (scale->taps_s16);
278   g_free (scale->taps_s16_4);
279   g_free (scale->offset_n);
280   g_free (scale->tmpline1);
281   g_free (scale->tmpline2);
282   g_slice_free (GstVideoScaler, scale);
283 }
284 
285 /**
286  * gst_video_scaler_get_max_taps:
287  * @scale: a #GstVideoScaler
288  *
289  * Get the maximum number of taps for @scale.
290  *
291  * Returns: the maximum number of taps
292  */
293 guint
gst_video_scaler_get_max_taps(GstVideoScaler * scale)294 gst_video_scaler_get_max_taps (GstVideoScaler * scale)
295 {
296   g_return_val_if_fail (scale != NULL, 0);
297 
298   return scale->resampler.max_taps;
299 }
300 
301 /**
302  * gst_video_scaler_get_coeff:
303  * @scale: a #GstVideoScaler
304  * @out_offset: an output offset
305  * @in_offset: result input offset
306  * @n_taps: result n_taps
307  *
308  * For a given pixel at @out_offset, get the first required input pixel at
309  * @in_offset and the @n_taps filter coefficients.
310  *
311  * Note that for interlaced content, @in_offset needs to be incremented with
312  * 2 to get the next input line.
313  *
314  * Returns: an array of @n_tap gdouble values with filter coefficients.
315  */
316 const gdouble *
gst_video_scaler_get_coeff(GstVideoScaler * scale,guint out_offset,guint * in_offset,guint * n_taps)317 gst_video_scaler_get_coeff (GstVideoScaler * scale,
318     guint out_offset, guint * in_offset, guint * n_taps)
319 {
320   guint offset, phase;
321 
322   g_return_val_if_fail (scale != NULL, NULL);
323   g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL);
324 
325   offset = scale->resampler.offset[out_offset];
326   phase = scale->resampler.phase[out_offset];
327 
328   if (in_offset)
329     *in_offset = offset;
330   if (n_taps) {
331     *n_taps = scale->resampler.max_taps;
332     if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
333       *n_taps *= 2;
334   }
335   return scale->resampler.taps + phase * scale->resampler.max_taps;
336 }
337 
338 static gboolean
resampler_convert_coeff(const gdouble * src,gpointer dest,guint n,guint bits,guint precision)339 resampler_convert_coeff (const gdouble * src,
340     gpointer dest, guint n, guint bits, guint precision)
341 {
342   gdouble multiplier;
343   gint i, j;
344   gdouble offset, l_offset, h_offset;
345   gboolean exact = FALSE;
346 
347   multiplier = (1 << precision);
348 
349   /* Round to integer, but with an adjustable bias that we use to
350    * eliminate the DC error. */
351   l_offset = 0.0;
352   h_offset = 1.0;
353   offset = 0.5;
354 
355   for (i = 0; i < 64; i++) {
356     gint sum = 0;
357 
358     for (j = 0; j < n; j++) {
359       gint16 tap = floor (offset + src[j] * multiplier);
360 
361       ((gint16 *) dest)[j] = tap;
362 
363       sum += tap;
364     }
365     if (sum == (1 << precision)) {
366       exact = TRUE;
367       break;
368     }
369 
370     if (l_offset == h_offset)
371       break;
372 
373     if (sum < (1 << precision)) {
374       if (offset > l_offset)
375         l_offset = offset;
376       offset += (h_offset - l_offset) / 2;
377     } else {
378       if (offset < h_offset)
379         h_offset = offset;
380       offset -= (h_offset - l_offset) / 2;
381     }
382   }
383 
384   if (!exact)
385     GST_WARNING ("can't find exact taps");
386 
387   return exact;
388 }
389 
390 static void
make_s16_taps(GstVideoScaler * scale,gint n_elems,gint precision)391 make_s16_taps (GstVideoScaler * scale, gint n_elems, gint precision)
392 {
393   gint i, j, max_taps, n_phases, out_size, src_inc;
394   gint16 *taps_s16, *taps_s16_4;
395   gdouble *taps;
396   guint32 *phase, *offset, *offset_n;
397 
398   n_phases = scale->resampler.n_phases;
399   max_taps = scale->resampler.max_taps;
400 
401   taps = scale->resampler.taps;
402   taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps);
403 
404   for (i = 0; i < n_phases; i++) {
405     resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision);
406 
407     taps += max_taps;
408     taps_s16 += max_taps;
409   }
410 
411   out_size = scale->resampler.out_size;
412 
413   taps_s16 = scale->taps_s16;
414   phase = scale->resampler.phase;
415   offset = scale->resampler.offset;
416 
417   taps_s16_4 = scale->taps_s16_4 =
418       g_malloc (sizeof (gint16) * out_size * max_taps * 4);
419   offset_n = scale->offset_n =
420       g_malloc (sizeof (guint32) * out_size * max_taps);
421 
422   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
423     src_inc = 2;
424   else
425     src_inc = 1;
426 
427   for (j = 0; j < max_taps; j++) {
428     for (i = 0; i < out_size; i++) {
429       gint16 tap;
430 
431       if (scale->merged) {
432         if ((i & 1) == scale->out_y_offset)
433           offset_n[j * out_size + i] = offset[i] + (2 * j);
434         else
435           offset_n[j * out_size + i] = offset[i] + (4 * j);
436       } else {
437         offset_n[j * out_size + i] = offset[i] + j * src_inc;
438       }
439       tap = taps_s16[phase[i] * max_taps + j];
440       taps_s16_4[(j * out_size + i) * n_elems + 0] = tap;
441       if (n_elems > 1)
442         taps_s16_4[(j * out_size + i) * n_elems + 1] = tap;
443       if (n_elems > 2)
444         taps_s16_4[(j * out_size + i) * n_elems + 2] = tap;
445       if (n_elems > 3)
446         taps_s16_4[(j * out_size + i) * n_elems + 3] = tap;
447     }
448   }
449 }
450 
451 #undef ACC_SCALE
452 
453 static void
video_scale_h_near_u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)454 video_scale_h_near_u8 (GstVideoScaler * scale,
455     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
456 {
457   guint8 *s, *d;
458   gint i;
459 
460   d = (guint8 *) dest + dest_offset;
461   s = (guint8 *) src;
462 
463   {
464 #ifndef ACC_SCALE
465     guint32 *offset = scale->resampler.offset + dest_offset;
466 
467     for (i = 0; i < width; i++)
468       d[i] = s[offset[i]];
469 #else
470     gint acc = 0;
471 
472     for (i = 0; i < width; i++) {
473       gint j = (acc + 0x8000) >> 16;
474       d[i] = s[j];
475       acc += scale->inc;
476     }
477 #endif
478   }
479 }
480 
481 static void
video_scale_h_near_3u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)482 video_scale_h_near_3u8 (GstVideoScaler * scale,
483     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
484 {
485   guint8 *s, *d;
486   gint i;
487 
488   d = (guint8 *) dest + dest_offset;
489   s = (guint8 *) src;
490 
491   {
492 #ifndef ACC_SCALE
493     guint32 *offset = scale->resampler.offset + dest_offset;
494 
495     for (i = 0; i < width; i++) {
496       gint j = offset[i] * 3;
497 
498       d[i * 3 + 0] = s[j + 0];
499       d[i * 3 + 1] = s[j + 1];
500       d[i * 3 + 2] = s[j + 2];
501     }
502 #else
503     gint acc = 0;
504 
505     for (i = 0; i < width; i++) {
506       gint j = ((acc + 0x8000) >> 16) * 3;
507 
508       d[i * 3 + 0] = s[j + 0];
509       d[i * 3 + 1] = s[j + 1];
510       d[i * 3 + 2] = s[j + 2];
511       acc += scale->inc;
512     }
513 #endif
514   }
515 }
516 
517 static void
video_scale_h_near_u16(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)518 video_scale_h_near_u16 (GstVideoScaler * scale,
519     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
520 {
521   guint16 *s, *d;
522   gint i;
523 
524   d = (guint16 *) dest + dest_offset;
525   s = (guint16 *) src;
526 
527   {
528 #ifndef ACC_SCALE
529     guint32 *offset = scale->resampler.offset + dest_offset;
530 
531     for (i = 0; i < width; i++)
532       d[i] = s[offset[i]];
533 #else
534     gint acc = 0;
535 
536     for (i = 0; i < width; i++) {
537       gint j = (acc + 0x8000) >> 16;
538       d[i] = s[j];
539       acc += scale->inc;
540     }
541 #endif
542   }
543 }
544 
545 static void
video_scale_h_near_u32(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)546 video_scale_h_near_u32 (GstVideoScaler * scale,
547     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
548 {
549   guint32 *s, *d;
550 
551   d = (guint32 *) dest + dest_offset;
552   s = (guint32 *) src;
553 
554 #if 0
555   /* ORC is slower on this */
556   video_orc_resample_h_near_u32_lq (d, s, 0, scale->inc, width);
557 #elif 0
558   video_orc_resample_h_near_u32 (d, s, offset, width);
559 #else
560   {
561     gint i;
562 #ifndef ACC_SCALE
563     guint32 *offset = scale->resampler.offset + dest_offset;
564 
565     for (i = 0; i < width; i++)
566       d[i] = s[offset[i]];
567 #else
568     gint acc = 0;
569 
570     for (i = 0; i < width; i++) {
571       gint j = (acc + 0x8000) >> 16;
572       d[i] = s[j];
573       acc += scale->inc;
574     }
575 #endif
576   }
577 #endif
578 }
579 
580 static void
video_scale_h_near_u64(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)581 video_scale_h_near_u64 (GstVideoScaler * scale,
582     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
583 {
584   guint64 *s, *d;
585   gint i;
586   guint32 *offset;
587 
588   d = (guint64 *) dest + dest_offset;
589   s = (guint64 *) src;
590 
591   offset = scale->resampler.offset + dest_offset;
592   for (i = 0; i < width; i++)
593     d[i] = s[offset[i]];
594 }
595 
596 static void
video_scale_h_2tap_1u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)597 video_scale_h_2tap_1u8 (GstVideoScaler * scale,
598     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
599 {
600   guint8 *s, *d;
601 
602   d = (guint8 *) dest + dest_offset;
603   s = (guint8 *) src;
604 
605   video_orc_resample_h_2tap_1u8_lq (d, s, 0, scale->inc, width);
606 }
607 
608 static void
video_scale_h_2tap_4u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)609 video_scale_h_2tap_4u8 (GstVideoScaler * scale,
610     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
611 {
612   guint32 *s, *d;
613 
614   d = (guint32 *) dest + dest_offset;
615   s = (guint32 *) src;
616 
617   video_orc_resample_h_2tap_4u8_lq (d, s, 0, scale->inc, width);
618 }
619 
620 static void
video_scale_h_ntap_u8(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)621 video_scale_h_ntap_u8 (GstVideoScaler * scale,
622     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
623 {
624   gint16 *taps;
625   gint i, max_taps, count;
626   gpointer d;
627   guint32 *offset_n;
628   guint8 *pixels;
629   gint16 *temp;
630 
631   if (scale->taps_s16 == NULL)
632 #ifdef LQ
633     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
634 #else
635     make_s16_taps (scale, n_elems, SCALE_U8);
636 #endif
637 
638   max_taps = scale->resampler.max_taps;
639   offset_n = scale->offset_n;
640 
641   pixels = (guint8 *) scale->tmpline1;
642 
643   /* prepare the arrays */
644   count = width * max_taps;
645   switch (n_elems) {
646     case 1:
647     {
648       guint8 *s = (guint8 *) src;
649 
650       for (i = 0; i < count; i++)
651         pixels[i] = s[offset_n[i]];
652 
653       d = (guint8 *) dest + dest_offset;
654       break;
655     }
656     case 2:
657     {
658       guint16 *p16 = (guint16 *) pixels;
659       guint16 *s = (guint16 *) src;
660 
661       for (i = 0; i < count; i++)
662         p16[i] = s[offset_n[i]];
663 
664       d = (guint16 *) dest + dest_offset;
665       break;
666     }
667     case 3:
668     {
669       guint8 *s = (guint8 *) src;
670 
671       for (i = 0; i < count; i++) {
672         gint j = offset_n[i] * 3;
673         pixels[i * 3 + 0] = s[j + 0];
674         pixels[i * 3 + 1] = s[j + 1];
675         pixels[i * 3 + 2] = s[j + 2];
676       }
677       d = (guint8 *) dest + dest_offset * 3;
678       break;
679     }
680     case 4:
681     {
682       guint32 *p32 = (guint32 *) pixels;
683       guint32 *s = (guint32 *) src;
684 #if 0
685       video_orc_resample_h_near_u32 (p32, s, offset_n, count);
686 #else
687       for (i = 0; i < count; i++)
688         p32[i] = s[offset_n[i]];
689 #endif
690       d = (guint32 *) dest + dest_offset;
691       break;
692     }
693     default:
694       return;
695   }
696   temp = (gint16 *) scale->tmpline2;
697   taps = scale->taps_s16_4;
698   count = width * n_elems;
699 
700 #ifdef LQ
701   if (max_taps == 2) {
702     video_orc_resample_h_2tap_u8_lq (d, pixels, pixels + count, taps,
703         taps + count, count);
704   } else {
705     /* first pixels with first tap to temp */
706     if (max_taps >= 3) {
707       video_orc_resample_h_multaps3_u8_lq (temp, pixels, pixels + count,
708           pixels + count * 2, taps, taps + count, taps + count * 2, count);
709       max_taps -= 3;
710       pixels += count * 3;
711       taps += count * 3;
712     } else {
713       gint first = max_taps % 3;
714 
715       video_orc_resample_h_multaps_u8_lq (temp, pixels, taps, count);
716       video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels + count, count,
717           taps + count, count * 2, count, first - 1);
718       max_taps -= first;
719       pixels += count * first;
720       taps += count * first;
721     }
722     while (max_taps > 3) {
723       if (max_taps >= 6) {
724         video_orc_resample_h_muladdtaps3_u8_lq (temp, pixels, pixels + count,
725             pixels + count * 2, taps, taps + count, taps + count * 2, count);
726         max_taps -= 3;
727         pixels += count * 3;
728         taps += count * 3;
729       } else {
730         video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
731             taps, count * 2, count, max_taps - 3);
732         pixels += count * (max_taps - 3);
733         taps += count * (max_taps - 3);
734         max_taps = 3;
735       }
736     }
737     if (max_taps == 3) {
738       video_orc_resample_h_muladdscaletaps3_u8_lq (d, pixels, pixels + count,
739           pixels + count * 2, taps, taps + count, taps + count * 2, temp,
740           count);
741     } else {
742       if (max_taps) {
743         /* add other pixels with other taps to t4 */
744         video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
745             taps, count * 2, count, max_taps);
746       }
747       /* scale and write final result */
748       video_orc_resample_scaletaps_u8_lq (d, temp, count);
749     }
750   }
751 #else
752   /* first pixels with first tap to t4 */
753   video_orc_resample_h_multaps_u8 (temp, pixels, taps, count);
754   /* add other pixels with other taps to t4 */
755   video_orc_resample_h_muladdtaps_u8 (temp, 0, pixels + count, count,
756       taps + count, count * 2, count, max_taps - 1);
757   /* scale and write final result */
758   video_orc_resample_scaletaps_u8 (d, temp, count);
759 #endif
760 }
761 
762 static void
video_scale_h_ntap_u16(GstVideoScaler * scale,gpointer src,gpointer dest,guint dest_offset,guint width,guint n_elems)763 video_scale_h_ntap_u16 (GstVideoScaler * scale,
764     gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
765 {
766   gint16 *taps;
767   gint i, max_taps, count;
768   gpointer d;
769   guint32 *offset_n;
770   guint16 *pixels;
771   gint32 *temp;
772 
773   if (scale->taps_s16 == NULL)
774     make_s16_taps (scale, n_elems, SCALE_U16);
775 
776   max_taps = scale->resampler.max_taps;
777   offset_n = scale->offset_n;
778 
779   pixels = (guint16 *) scale->tmpline1;
780   /* prepare the arrays FIXME, we can add this into ORC */
781   count = width * max_taps;
782   switch (n_elems) {
783     case 1:
784     {
785       guint16 *s = (guint16 *) src;
786 
787       for (i = 0; i < count; i++)
788         pixels[i] = s[offset_n[i]];
789 
790       d = (guint16 *) dest + dest_offset;
791       break;
792     }
793     case 4:
794     {
795       guint64 *p64 = (guint64 *) pixels;
796       guint64 *s = (guint64 *) src;
797 #if 0
798       video_orc_resample_h_near_u32 (p32, s, offset_n, count);
799 #else
800       for (i = 0; i < count; i++)
801         p64[i] = s[offset_n[i]];
802 #endif
803       d = (guint64 *) dest + dest_offset;
804       break;
805     }
806     default:
807       return;
808   }
809 
810   temp = (gint32 *) scale->tmpline2;
811   taps = scale->taps_s16_4;
812   count = width * n_elems;
813 
814   if (max_taps == 2) {
815     video_orc_resample_h_2tap_u16 (d, pixels, pixels + count, taps,
816         taps + count, count);
817   } else {
818     /* first pixels with first tap to t4 */
819     video_orc_resample_h_multaps_u16 (temp, pixels, taps, count);
820     /* add other pixels with other taps to t4 */
821     video_orc_resample_h_muladdtaps_u16 (temp, 0, pixels + count, count * 2,
822         taps + count, count * 2, count, max_taps - 1);
823     /* scale and write final result */
824     video_orc_resample_scaletaps_u16 (d, temp, count);
825   }
826 }
827 
828 static void
video_scale_v_near_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)829 video_scale_v_near_u8 (GstVideoScaler * scale,
830     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
831     guint n_elems)
832 {
833   if (dest != srcs[0])
834     memcpy (dest, srcs[0], n_elems * width);
835 }
836 
837 static void
video_scale_v_near_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)838 video_scale_v_near_u16 (GstVideoScaler * scale,
839     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
840     guint n_elems)
841 {
842   if (dest != srcs[0])
843     memcpy (dest, srcs[0], n_elems * 2 * width);
844 }
845 
846 static void
video_scale_v_2tap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)847 video_scale_v_2tap_u8 (GstVideoScaler * scale,
848     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
849     guint n_elems)
850 {
851   gint max_taps, src_inc;
852   guint8 *s1, *s2, *d;
853   gint16 p1;
854 
855   if (scale->taps_s16 == NULL)
856 #ifdef LQ
857     make_s16_taps (scale, n_elems, SCALE_U8_LQ + 2);
858 #else
859     make_s16_taps (scale, n_elems, SCALE_U8);
860 #endif
861 
862   max_taps = scale->resampler.max_taps;
863 
864   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
865     src_inc = 2;
866   else
867     src_inc = 1;
868 
869   d = (guint8 *) dest;
870   s1 = (guint8 *) srcs[0 * src_inc];
871   s2 = (guint8 *) srcs[1 * src_inc];
872   p1 = scale->taps_s16[dest_offset * max_taps + 1];
873 
874 #ifdef LQ
875   video_orc_resample_v_2tap_u8_lq (d, s1, s2, p1, width * n_elems);
876 #else
877   video_orc_resample_v_2tap_u8 (d, s1, s2, p1, width * n_elems);
878 #endif
879 }
880 
881 static void
video_scale_v_2tap_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)882 video_scale_v_2tap_u16 (GstVideoScaler * scale,
883     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
884     guint n_elems)
885 {
886   gint max_taps, src_inc;
887   guint16 *s1, *s2, *d;
888   gint16 p1;
889 
890   if (scale->taps_s16 == NULL)
891     make_s16_taps (scale, n_elems, SCALE_U16);
892 
893   max_taps = scale->resampler.max_taps;
894 
895   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
896     src_inc = 2;
897   else
898     src_inc = 1;
899 
900   d = (guint16 *) dest;
901   s1 = (guint16 *) srcs[0 * src_inc];
902   s2 = (guint16 *) srcs[1 * src_inc];
903   p1 = scale->taps_s16[dest_offset * max_taps + 1];
904 
905   video_orc_resample_v_2tap_u16 (d, s1, s2, p1, width * n_elems);
906 }
907 
908 #if 0
909 static void
910 video_scale_h_4tap_8888 (GstVideoScaler * scale,
911     gpointer src, gpointer dest, guint dest_offset, guint width)
912 {
913   gint16 *taps;
914   gint i, max_taps, count;
915   guint8 *d;
916   guint32 *offset_n;
917   guint32 *pixels;
918 
919   if (scale->taps_s16 == NULL)
920     make_s16_taps (scale, n_elems, S16_SCALE);
921 
922   max_taps = scale->resampler.max_taps;
923   offset_n = scale->offset_n;
924 
925   d = (guint8 *) dest + 4 * dest_offset;
926 
927   /* prepare the arrays FIXME, we can add this into ORC */
928   count = width * max_taps;
929   pixels = (guint32 *) scale->tmpline1;
930   for (i = 0; i < count; i++)
931     pixels[i] = ((guint32 *) src)[offset_n[i]];
932 
933   taps = scale->taps_s16_4;
934   count = width * 4;
935 
936   video_orc_resample_h_4tap_8 (d, pixels, pixels + width, pixels + 2 * width,
937       pixels + 3 * width, taps, taps + count, taps + 2 * count,
938       taps + 3 * count, count);
939 }
940 #endif
941 
942 static void
video_scale_v_4tap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)943 video_scale_v_4tap_u8 (GstVideoScaler * scale,
944     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
945     guint n_elems)
946 {
947   gint max_taps;
948   guint8 *s1, *s2, *s3, *s4, *d;
949   gint p1, p2, p3, p4, src_inc;
950   gint16 *taps;
951 
952   if (scale->taps_s16 == NULL)
953 #ifdef LQ
954     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
955 #else
956     make_s16_taps (scale, n_elems, SCALE_U8);
957 #endif
958 
959   max_taps = scale->resampler.max_taps;
960   taps = scale->taps_s16 + dest_offset * max_taps;
961 
962   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
963     src_inc = 2;
964   else
965     src_inc = 1;
966 
967   d = (guint8 *) dest;
968   s1 = (guint8 *) srcs[0 * src_inc];
969   s2 = (guint8 *) srcs[1 * src_inc];
970   s3 = (guint8 *) srcs[2 * src_inc];
971   s4 = (guint8 *) srcs[3 * src_inc];
972   p1 = taps[0];
973   p2 = taps[1];
974   p3 = taps[2];
975   p4 = taps[3];
976 
977 #ifdef LQ
978   video_orc_resample_v_4tap_u8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4,
979       width * n_elems);
980 #else
981   video_orc_resample_v_4tap_u8 (d, s1, s2, s3, s4, p1, p2, p3, p4,
982       width * n_elems);
983 #endif
984 }
985 
986 static void
video_scale_v_ntap_u8(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)987 video_scale_v_ntap_u8 (GstVideoScaler * scale,
988     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
989     guint n_elems)
990 {
991   gint16 *taps;
992   gint i, max_taps, count, src_inc;
993   gpointer d;
994   gint16 *temp;
995 
996   if (scale->taps_s16 == NULL)
997 #ifdef LQ
998     make_s16_taps (scale, n_elems, SCALE_U8_LQ);
999 #else
1000     make_s16_taps (scale, n_elems, SCALE_U8);
1001 #endif
1002 
1003   max_taps = scale->resampler.max_taps;
1004   taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
1005 
1006   d = (guint32 *) dest;
1007 
1008   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
1009     src_inc = 2;
1010   else
1011     src_inc = 1;
1012 
1013   temp = (gint16 *) scale->tmpline2;
1014   count = width * n_elems;
1015 
1016 #ifdef LQ
1017   if (max_taps >= 4) {
1018     video_orc_resample_v_multaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
1019         srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
1020         taps[3], count);
1021     max_taps -= 4;
1022     srcs += 4 * src_inc;
1023     taps += 4;
1024   } else {
1025     gint first = (max_taps % 4);
1026 
1027     video_orc_resample_v_multaps_u8_lq (temp, srcs[0], taps[0], count);
1028     for (i = 1; i < first; i++) {
1029       video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1030           count);
1031     }
1032     max_taps -= first;
1033     srcs += first * src_inc;
1034     taps += first;
1035   }
1036   while (max_taps > 4) {
1037     if (max_taps >= 8) {
1038       video_orc_resample_v_muladdtaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
1039           srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
1040           taps[3], count);
1041       max_taps -= 4;
1042       srcs += 4 * src_inc;
1043       taps += 4;
1044     } else {
1045       for (i = 0; i < max_taps - 4; i++)
1046         video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1047             count);
1048       srcs += (max_taps - 4) * src_inc;
1049       taps += (max_taps - 4);
1050       max_taps = 4;
1051     }
1052   }
1053   if (max_taps == 4) {
1054     video_orc_resample_v_muladdscaletaps4_u8_lq (d, srcs[0], srcs[1 * src_inc],
1055         srcs[2 * src_inc], srcs[3 * src_inc], temp, taps[0], taps[1], taps[2],
1056         taps[3], count);
1057   } else {
1058     for (i = 0; i < max_taps; i++)
1059       video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
1060           count);
1061     video_orc_resample_scaletaps_u8_lq (d, temp, count);
1062   }
1063 
1064 #else
1065   video_orc_resample_v_multaps_u8 (temp, srcs[0], taps[0], count);
1066   for (i = 1; i < max_taps; i++) {
1067     video_orc_resample_v_muladdtaps_u8 (temp, srcs[i * src_inc], taps[i],
1068         count);
1069   }
1070   video_orc_resample_scaletaps_u8 (d, temp, count);
1071 #endif
1072 }
1073 
1074 static void
video_scale_v_ntap_u16(GstVideoScaler * scale,gpointer srcs[],gpointer dest,guint dest_offset,guint width,guint n_elems)1075 video_scale_v_ntap_u16 (GstVideoScaler * scale,
1076     gpointer srcs[], gpointer dest, guint dest_offset, guint width,
1077     guint n_elems)
1078 {
1079   gint16 *taps;
1080   gint i, max_taps, count, src_inc;
1081   gpointer d;
1082   gint32 *temp;
1083 
1084   if (scale->taps_s16 == NULL)
1085     make_s16_taps (scale, n_elems, SCALE_U16);
1086 
1087   max_taps = scale->resampler.max_taps;
1088   taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
1089 
1090   d = (guint16 *) dest;
1091 
1092   if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
1093     src_inc = 2;
1094   else
1095     src_inc = 1;
1096 
1097   temp = (gint32 *) scale->tmpline2;
1098   count = width * n_elems;
1099 
1100   video_orc_resample_v_multaps_u16 (temp, srcs[0], taps[0], count);
1101   for (i = 1; i < max_taps; i++) {
1102     video_orc_resample_v_muladdtaps_u16 (temp, srcs[i * src_inc], taps[i],
1103         count);
1104   }
1105   video_orc_resample_scaletaps_u16 (d, temp, count);
1106 }
1107 
1108 static gint
get_y_offset(GstVideoFormat format)1109 get_y_offset (GstVideoFormat format)
1110 {
1111   switch (format) {
1112     case GST_VIDEO_FORMAT_YUY2:
1113     case GST_VIDEO_FORMAT_YVYU:
1114       return 0;
1115     default:
1116     case GST_VIDEO_FORMAT_UYVY:
1117       return 1;
1118   }
1119 }
1120 
1121 /**
1122  * gst_video_scaler_combine_packed_YUV: (skip)
1123  * @y_scale: a scaler for the Y component
1124  * @uv_scale: a scaler for the U and V components
1125  * @in_format: the input video format
1126  * @out_format: the output video format
1127  *
1128  * Combine a scaler for Y and UV into one scaler for the packed @format.
1129  *
1130  * Returns: a new horizontal videoscaler for @format.
1131  *
1132  * Since: 1.6
1133  */
1134 GstVideoScaler *
gst_video_scaler_combine_packed_YUV(GstVideoScaler * y_scale,GstVideoScaler * uv_scale,GstVideoFormat in_format,GstVideoFormat out_format)1135 gst_video_scaler_combine_packed_YUV (GstVideoScaler * y_scale,
1136     GstVideoScaler * uv_scale, GstVideoFormat in_format,
1137     GstVideoFormat out_format)
1138 {
1139   GstVideoScaler *scale;
1140   GstVideoResampler *resampler;
1141   guint i, out_size, max_taps, n_phases;
1142   gdouble *taps;
1143   guint32 *offset, *phase;
1144 
1145   g_return_val_if_fail (y_scale != NULL, NULL);
1146   g_return_val_if_fail (uv_scale != NULL, NULL);
1147   g_return_val_if_fail (uv_scale->resampler.max_taps ==
1148       y_scale->resampler.max_taps, NULL);
1149 
1150   scale = g_slice_new0 (GstVideoScaler);
1151 
1152   scale->method = y_scale->method;
1153   scale->flags = y_scale->flags;
1154   scale->merged = TRUE;
1155 
1156   resampler = &scale->resampler;
1157 
1158   out_size = GST_ROUND_UP_4 (y_scale->resampler.out_size * 2);
1159   max_taps = y_scale->resampler.max_taps;
1160   n_phases = out_size;
1161   offset = g_malloc (sizeof (guint32) * out_size);
1162   phase = g_malloc (sizeof (guint32) * n_phases);
1163   taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
1164 
1165   resampler->in_size = y_scale->resampler.in_size * 2;
1166   resampler->out_size = out_size;
1167   resampler->max_taps = max_taps;
1168   resampler->n_phases = n_phases;
1169   resampler->offset = offset;
1170   resampler->phase = phase;
1171   resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
1172   resampler->taps = taps;
1173 
1174   scale->in_y_offset = get_y_offset (in_format);
1175   scale->out_y_offset = get_y_offset (out_format);
1176   scale->inc = y_scale->inc;
1177 
1178   for (i = 0; i < out_size; i++) {
1179     gint ic;
1180 
1181     if ((i & 1) == scale->out_y_offset) {
1182       ic = MIN (i / 2, y_scale->resampler.out_size - 1);
1183       offset[i] = y_scale->resampler.offset[ic] * 2 + scale->in_y_offset;
1184       memcpy (taps + i * max_taps, y_scale->resampler.taps +
1185           y_scale->resampler.phase[ic] * max_taps, max_taps * sizeof (gdouble));
1186     } else {
1187       ic = MIN (i / 4, uv_scale->resampler.out_size - 1);
1188       offset[i] = uv_scale->resampler.offset[ic] * 4 + (i & 3);
1189       memcpy (taps + i * max_taps, uv_scale->resampler.taps +
1190           uv_scale->resampler.phase[ic] * max_taps,
1191           max_taps * sizeof (gdouble));
1192     }
1193     phase[i] = i;
1194   }
1195 
1196   scaler_dump (scale);
1197 
1198   return scale;
1199 }
1200 
1201 static gboolean
get_functions(GstVideoScaler * hscale,GstVideoScaler * vscale,GstVideoFormat format,GstVideoScalerHFunc * hfunc,GstVideoScalerVFunc * vfunc,gint * n_elems,guint * width,gint * bits)1202 get_functions (GstVideoScaler * hscale, GstVideoScaler * vscale,
1203     GstVideoFormat format,
1204     GstVideoScalerHFunc * hfunc, GstVideoScalerVFunc * vfunc,
1205     gint * n_elems, guint * width, gint * bits)
1206 {
1207   gboolean mono = FALSE;
1208 
1209   switch (format) {
1210     case GST_VIDEO_FORMAT_GRAY8:
1211       *bits = 8;
1212       *n_elems = 1;
1213       mono = TRUE;
1214       break;
1215     case GST_VIDEO_FORMAT_YUY2:
1216     case GST_VIDEO_FORMAT_YVYU:
1217     case GST_VIDEO_FORMAT_UYVY:
1218       *bits = 8;
1219       *n_elems = 1;
1220       *width = GST_ROUND_UP_4 (*width * 2);
1221       break;
1222     case GST_VIDEO_FORMAT_RGB:
1223     case GST_VIDEO_FORMAT_BGR:
1224     case GST_VIDEO_FORMAT_v308:
1225     case GST_VIDEO_FORMAT_IYU2:
1226       *bits = 8;
1227       *n_elems = 3;
1228       break;
1229     case GST_VIDEO_FORMAT_AYUV:
1230     case GST_VIDEO_FORMAT_RGBx:
1231     case GST_VIDEO_FORMAT_BGRx:
1232     case GST_VIDEO_FORMAT_xRGB:
1233     case GST_VIDEO_FORMAT_xBGR:
1234     case GST_VIDEO_FORMAT_RGBA:
1235     case GST_VIDEO_FORMAT_BGRA:
1236     case GST_VIDEO_FORMAT_ARGB:
1237     case GST_VIDEO_FORMAT_ABGR:
1238       *bits = 8;
1239       *n_elems = 4;
1240       break;
1241     case GST_VIDEO_FORMAT_ARGB64:
1242     case GST_VIDEO_FORMAT_ARGB64_LE:
1243     case GST_VIDEO_FORMAT_ARGB64_BE:
1244     case GST_VIDEO_FORMAT_RGBA64_BE:
1245     case GST_VIDEO_FORMAT_RGBA64_LE:
1246     case GST_VIDEO_FORMAT_BGRA64_BE:
1247     case GST_VIDEO_FORMAT_BGRA64_LE:
1248     case GST_VIDEO_FORMAT_ABGR64_BE:
1249     case GST_VIDEO_FORMAT_ABGR64_LE:
1250     case GST_VIDEO_FORMAT_AYUV64:
1251       *bits = 16;
1252       *n_elems = 4;
1253       break;
1254     case GST_VIDEO_FORMAT_GRAY16_LE:
1255     case GST_VIDEO_FORMAT_GRAY16_BE:
1256       *bits = 16;
1257       *n_elems = 1;
1258       mono = TRUE;
1259       break;
1260     case GST_VIDEO_FORMAT_NV12:
1261     case GST_VIDEO_FORMAT_NV16:
1262     case GST_VIDEO_FORMAT_NV21:
1263     case GST_VIDEO_FORMAT_NV24:
1264     case GST_VIDEO_FORMAT_NV61:
1265       *bits = 8;
1266       *n_elems = 2;
1267       break;
1268     default:
1269       return FALSE;
1270   }
1271   if (*bits == 8) {
1272     switch (hscale ? hscale->resampler.max_taps : 0) {
1273       case 0:
1274         break;
1275       case 1:
1276         if (*n_elems == 1)
1277           *hfunc = video_scale_h_near_u8;
1278         else if (*n_elems == 2)
1279           *hfunc = video_scale_h_near_u16;
1280         else if (*n_elems == 3)
1281           *hfunc = video_scale_h_near_3u8;
1282         else if (*n_elems == 4)
1283           *hfunc = video_scale_h_near_u32;
1284         break;
1285       case 2:
1286         if (*n_elems == 1 && mono)
1287           *hfunc = video_scale_h_2tap_1u8;
1288         else if (*n_elems == 4)
1289           *hfunc = video_scale_h_2tap_4u8;
1290         else
1291           *hfunc = video_scale_h_ntap_u8;
1292         break;
1293       default:
1294         *hfunc = video_scale_h_ntap_u8;
1295         break;
1296     }
1297     switch (vscale ? vscale->resampler.max_taps : 0) {
1298       case 0:
1299         break;
1300       case 1:
1301         *vfunc = video_scale_v_near_u8;
1302         break;
1303       case 2:
1304         *vfunc = video_scale_v_2tap_u8;
1305         break;
1306       case 4:
1307         *vfunc = video_scale_v_4tap_u8;
1308         break;
1309       default:
1310         *vfunc = video_scale_v_ntap_u8;
1311         break;
1312     }
1313   } else if (*bits == 16) {
1314     switch (hscale ? hscale->resampler.max_taps : 0) {
1315       case 0:
1316         break;
1317       case 1:
1318         if (*n_elems == 1)
1319           *hfunc = video_scale_h_near_u16;
1320         else
1321           *hfunc = video_scale_h_near_u64;
1322         break;
1323       default:
1324         *hfunc = video_scale_h_ntap_u16;
1325         break;
1326     }
1327     switch (vscale ? vscale->resampler.max_taps : 0) {
1328       case 0:
1329         break;
1330       case 1:
1331         *vfunc = video_scale_v_near_u16;
1332         break;
1333       case 2:
1334         *vfunc = video_scale_v_2tap_u16;
1335         break;
1336       default:
1337         *vfunc = video_scale_v_ntap_u16;
1338         break;
1339     }
1340   }
1341   return TRUE;
1342 }
1343 
1344 /**
1345  * gst_video_scaler_horizontal:
1346  * @scale: a #GstVideoScaler
1347  * @format: a #GstVideoFormat for @src and @dest
1348  * @src: source pixels
1349  * @dest: destination pixels
1350  * @dest_offset: the horizontal destination offset
1351  * @width: the number of pixels to scale
1352  *
1353  * Horizontally scale the pixels in @src to @dest, starting from @dest_offset
1354  * for @width samples.
1355  */
1356 void
gst_video_scaler_horizontal(GstVideoScaler * scale,GstVideoFormat format,gpointer src,gpointer dest,guint dest_offset,guint width)1357 gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format,
1358     gpointer src, gpointer dest, guint dest_offset, guint width)
1359 {
1360   gint n_elems, bits;
1361   GstVideoScalerHFunc func = NULL;
1362 
1363   g_return_if_fail (scale != NULL);
1364   g_return_if_fail (src != NULL);
1365   g_return_if_fail (dest != NULL);
1366   g_return_if_fail (dest_offset + width <= scale->resampler.out_size);
1367 
1368   if (!get_functions (scale, NULL, format, &func, NULL, &n_elems, &width, &bits)
1369       || func == NULL)
1370     goto no_func;
1371 
1372   if (scale->tmpwidth < width)
1373     realloc_tmplines (scale, n_elems, width);
1374 
1375   func (scale, src, dest, dest_offset, width, n_elems);
1376   return;
1377 
1378 no_func:
1379   {
1380     GST_WARNING ("no scaler function for format");
1381   }
1382 }
1383 
1384 /**
1385  * gst_video_scaler_vertical:
1386  * @scale: a #GstVideoScaler
1387  * @format: a #GstVideoFormat for @srcs and @dest
1388  * @src_lines: source pixels lines
1389  * @dest: destination pixels
1390  * @dest_offset: the vertical destination offset
1391  * @width: the number of pixels to scale
1392  *
1393  * Vertically combine @width pixels in the lines in @src_lines to @dest.
1394  * @dest is the location of the target line at @dest_offset and
1395  * @srcs are the input lines for @dest_offset.
1396  */
1397 void
gst_video_scaler_vertical(GstVideoScaler * scale,GstVideoFormat format,gpointer src_lines[],gpointer dest,guint dest_offset,guint width)1398 gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format,
1399     gpointer src_lines[], gpointer dest, guint dest_offset, guint width)
1400 {
1401   gint n_elems, bits;
1402   GstVideoScalerVFunc func = NULL;
1403 
1404   g_return_if_fail (scale != NULL);
1405   g_return_if_fail (src_lines != NULL);
1406   g_return_if_fail (dest != NULL);
1407   g_return_if_fail (dest_offset < scale->resampler.out_size);
1408 
1409   if (!get_functions (NULL, scale, format, NULL, &func, &n_elems, &width, &bits)
1410       || func == NULL)
1411     goto no_func;
1412 
1413   if (scale->tmpwidth < width)
1414     realloc_tmplines (scale, n_elems, width);
1415 
1416   func (scale, src_lines, dest, dest_offset, width, n_elems);
1417 
1418   return;
1419 
1420 no_func:
1421   {
1422     GST_WARNING ("no scaler function for format");
1423   }
1424 }
1425 
1426 
1427 /**
1428  * gst_video_scaler_2d:
1429  * @hscale: a horizontal #GstVideoScaler
1430  * @vscale: a vertical #GstVideoScaler
1431  * @format: a #GstVideoFormat for @srcs and @dest
1432  * @src: source pixels
1433  * @src_stride: source pixels stride
1434  * @dest: destination pixels
1435  * @dest_stride: destination pixels stride
1436  * @x: the horizontal destination offset
1437  * @y: the vertical destination offset
1438  * @width: the number of output pixels to scale
1439  * @height: the number of output lines to scale
1440  *
1441  * Scale a rectangle of pixels in @src with @src_stride to @dest with
1442  * @dest_stride using the horizontal scaler @hscaler and the vertical
1443  * scaler @vscale.
1444  *
1445  * One or both of @hscale and @vscale can be NULL to only perform scaling in
1446  * one dimension or do a copy without scaling.
1447  *
1448  * @x and @y are the coordinates in the destination image to process.
1449  */
1450 void
gst_video_scaler_2d(GstVideoScaler * hscale,GstVideoScaler * vscale,GstVideoFormat format,gpointer src,gint src_stride,gpointer dest,gint dest_stride,guint x,guint y,guint width,guint height)1451 gst_video_scaler_2d (GstVideoScaler * hscale, GstVideoScaler * vscale,
1452     GstVideoFormat format, gpointer src, gint src_stride,
1453     gpointer dest, gint dest_stride, guint x, guint y,
1454     guint width, guint height)
1455 {
1456   gint n_elems, bits;
1457   GstVideoScalerHFunc hfunc = NULL;
1458   GstVideoScalerVFunc vfunc = NULL;
1459   gint i;
1460   gboolean interlaced;
1461 
1462   g_return_if_fail (src != NULL);
1463   g_return_if_fail (dest != NULL);
1464 
1465   if (!get_functions (hscale, vscale, format, &hfunc, &vfunc, &n_elems, &width,
1466           &bits))
1467     goto no_func;
1468 
1469   interlaced = vscale && ! !(vscale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED);
1470 
1471 #define LINE(s,ss,i)  ((guint8 *)(s) + ((i) * (ss)))
1472 #define TMP_LINE(s,i) ((guint8 *)((s)->tmpline1) + (i) * (sizeof (gint32) * width * n_elems))
1473 
1474   if (vscale == NULL) {
1475     if (hscale == NULL) {
1476       guint xo, xw;
1477       guint8 *s, *d;
1478 
1479       xo = x * n_elems;
1480       xw = width * n_elems * (bits / 8);
1481 
1482       s = LINE (src, src_stride, y) + xo;
1483       d = LINE (dest, dest_stride, y) + xo;
1484 
1485       /* no scaling, do memcpy */
1486       for (i = y; i < height; i++) {
1487         memcpy (d, s, xw);
1488         d += dest_stride;
1489         s += src_stride;
1490       }
1491     } else {
1492       if (hscale->tmpwidth < width)
1493         realloc_tmplines (hscale, n_elems, width);
1494 
1495       /* only horizontal scaling */
1496       for (i = y; i < height; i++) {
1497         hfunc (hscale, LINE (src, src_stride, i), LINE (dest, dest_stride, i),
1498             x, width, n_elems);
1499       }
1500     }
1501   } else {
1502     guint v_taps;
1503     gpointer *lines;
1504 
1505     if (vscale->tmpwidth < width)
1506       realloc_tmplines (vscale, n_elems, width);
1507 
1508     v_taps = vscale->resampler.max_taps;
1509 
1510     lines = g_alloca ((interlaced ? 2 : 1) * v_taps * sizeof (gpointer));
1511     memset (lines, 0, (interlaced ? 2 : 1) * v_taps * sizeof (gpointer));
1512 
1513     if (hscale == NULL) {
1514       guint src_inc = interlaced ? 2 : 1;
1515 
1516       /* only vertical scaling */
1517       for (i = y; i < height; i++) {
1518         guint in, j;
1519 
1520         in = vscale->resampler.offset[i];
1521         for (j = 0; j < v_taps; j++) {
1522           guint l = in + j * src_inc;
1523 
1524           g_assert (l < vscale->resampler.in_size);
1525           lines[j * src_inc] = LINE (src, src_stride, l);
1526         }
1527 
1528         vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
1529       }
1530     } else {
1531       gint s1, s2;
1532       guint *tmpline_lines;
1533 
1534       tmpline_lines = g_newa (guint, (interlaced ? 2 : 1) * v_taps);
1535       /* initialize with -1 */
1536       memset (tmpline_lines, 0xff,
1537           (interlaced ? 2 : 1) * v_taps * sizeof (guint));
1538 
1539       if (hscale->tmpwidth < width)
1540         realloc_tmplines (hscale, n_elems, width);
1541 
1542       s1 = width * vscale->resampler.offset[height - 1];
1543       s2 = width * height;
1544 
1545       if (s1 <= s2) {
1546         for (i = y; i < height; i++) {
1547           guint in, j;
1548           guint src_inc = interlaced ? 2 : 1;
1549           guint f2_offset = (interlaced && (i % 2 == 1)) * v_taps;
1550 
1551           in = vscale->resampler.offset[i];
1552           for (j = 0; j < v_taps; j++) {
1553             guint k;
1554             guint l = in + j * src_inc;
1555 
1556             g_assert (l < vscale->resampler.in_size);
1557 
1558             /* First check if we already have this line in tmplines */
1559             for (k = f2_offset; k < v_taps + f2_offset; k++) {
1560               if (tmpline_lines[k] == l) {
1561                 lines[j * src_inc] = TMP_LINE (vscale, k);
1562                 break;
1563               }
1564             }
1565             /* Found */
1566             if (k < v_taps + f2_offset)
1567               continue;
1568 
1569             /* Otherwise find an empty line we can clear */
1570             for (k = f2_offset; k < v_taps + f2_offset; k++) {
1571               if (tmpline_lines[k] < in || tmpline_lines[k] == -1)
1572                 break;
1573             }
1574 
1575             /* Must not happen, that would mean we don't have enough space to
1576              * begin with */
1577             g_assert (k < v_taps + f2_offset);
1578 
1579             hfunc (hscale, LINE (src, src_stride, l), TMP_LINE (vscale, k), x,
1580                 width, n_elems);
1581             tmpline_lines[k] = l;
1582             lines[j * src_inc] = TMP_LINE (vscale, k);
1583           }
1584 
1585           vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
1586         }
1587       } else {
1588         guint vx, vw, w1, ws;
1589         guint h_taps;
1590 
1591         h_taps = hscale->resampler.max_taps;
1592         w1 = x + width - 1;
1593         ws = hscale->resampler.offset[w1];
1594 
1595         /* we need to estimate the area that we first need to scale in the
1596          * vertical direction. Scale x and width to find the lower bound and
1597          * overshoot the width to find the upper bound */
1598         vx = (hscale->inc * x) >> 16;
1599         vx = MIN (vx, hscale->resampler.offset[x]);
1600         vw = (hscale->inc * (x + width)) >> 16;
1601         if (hscale->merged) {
1602           if ((w1 & 1) == hscale->out_y_offset)
1603             vw = MAX (vw, ws + (2 * h_taps));
1604           else
1605             vw = MAX (vw, ws + (4 * h_taps));
1606         } else {
1607           vw = MAX (vw, ws + h_taps);
1608         }
1609         vw += 1;
1610         /* but clamp to max size */
1611         vw = MIN (vw, hscale->resampler.in_size);
1612 
1613         if (vscale->tmpwidth < vw)
1614           realloc_tmplines (vscale, n_elems, vw);
1615 
1616         for (i = y; i < height; i++) {
1617           guint in, j;
1618           guint src_inc = interlaced ? 2 : 1;
1619 
1620           in = vscale->resampler.offset[i];
1621           for (j = 0; j < v_taps; j++) {
1622             guint l = in + j * src_inc;
1623 
1624             g_assert (l < vscale->resampler.in_size);
1625             lines[j * src_inc] =
1626                 LINE (src, src_stride, in + j * src_inc) + vx * n_elems;
1627           }
1628 
1629           vfunc (vscale, lines, TMP_LINE (vscale, 0) + vx * n_elems, i,
1630               vw - vx, n_elems);
1631 
1632           hfunc (hscale, TMP_LINE (vscale, 0), LINE (dest, dest_stride,
1633                   i), x, width, n_elems);
1634         }
1635       }
1636     }
1637   }
1638   return;
1639 
1640 no_func:
1641   {
1642     GST_WARNING ("no scaler function for format");
1643   }
1644 }
1645 
1646 #undef LINE
1647 #undef TMP_LINE
1648