• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 /****************************************************************************
13  *
14  *   Module Title :     gen_scalers.c
15  *
16  *   Description  :     Generic image scaling functions.
17  *
18  ***************************************************************************/
19 
20 /****************************************************************************
21 *  Header Files
22 ****************************************************************************/
23 #include "vpx_scale/vpxscale.h"
24 
25 /****************************************************************************
26 *  Imports
27 ****************************************************************************/
28 
29 /****************************************************************************
30  *
31  *  ROUTINE       : horizontal_line_4_5_scale_c4
32  *
33  *  INPUTS        : const unsigned char *source : Pointer to source data.
34  *                  unsigned int source_width    : Stride of source.
35  *                  unsigned char *dest         : Pointer to destination data.
36  *                  unsigned int dest_width      : Stride of destination (NOT USED).
37  *
38  *  OUTPUTS       : None.
39  *
40  *  RETURNS       : void
41  *
42  *  FUNCTION      : Copies horizontal line of pixels from source to
43  *                  destination scaling up by 4 to 5.
44  *
45  *  SPECIAL NOTES : None.
46  *
47  ****************************************************************************/
48 static
horizontal_line_4_5_scale_c64(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width)49 void horizontal_line_4_5_scale_c64
50 (
51     const unsigned char *source,
52     unsigned int source_width,
53     unsigned char *dest,
54     unsigned int dest_width
55 )
56 {
57     unsigned i;
58     unsigned int ba, cb, dc, ed;
59     unsigned char *restrict des = dest;
60     unsigned int *restrict src = (unsigned int *)source;
61     unsigned int const_51_205, const_102_154,
62              const_205_51, const_154_102;
63 
64     unsigned int src_current, src_next;
65 
66     (void) dest_width;
67 
68     // Constants that are to be used for the filtering.  For
69     //  best speed we are going to want to right shift by 16.
70     //  In the generic version they were shift by 8, so put
71     //  an extra 8 in now so that 16 will come out later.
72     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
73     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
74     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
75     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
76 
77     // 5 points are needed to filter to give 5 output points.
78     //  A load can pull up 4 at a time, and one needs to be
79     //  "borrowed" from the next set of data.  So instead of
80     //  loading those 5 points each time, "steal" a point from
81     //  the next set and only load up 4 each time through.
82     src_current = _mem4(src);
83 
84     for (i = 0; i < source_width - 4; i += 4)
85     {
86         src_next = _mem4(src++);
87 
88         // Reorder the data so that it is ready for the
89         //  dot product.
90         ba = _unpklu4(src_current);
91         cb = _unpkhu4(_rotl(src_current, 8));
92         dc = _unpkhu4(src_current);
93         ed = _unpkhu4(_shrmb(src_next, src_current));
94 
95         // Use the dot product with round and shift.
96         des [0] = src_current & 0xff;
97         des [1] = _dotprsu2(ba, const_205_51);
98         des [2] = _dotprsu2(cb, const_154_102);
99         des [3] = _dotprsu2(dc, const_102_154);
100         des [4] = _dotprsu2(ed, const_51_205);
101 
102         des += 5;
103 
104         // reuse loaded vales next time around.
105         src_current = src_next;
106     }
107 
108     // vp8_filter the last set of points.  Normally a point from the next set
109     //  would be used, but there is no next set, so just fill.
110     ba = _unpklu4(src_current);
111     cb = _unpkhu4(_rotl(src_current, 8));
112     dc = _unpkhu4(src_current);
113 
114     des [0] = src_current & 0xff;
115     des [1] = _dotprsu2(ba, const_205_51);
116     des [2] = _dotprsu2(cb, const_154_102);
117     des [3] = _dotprsu2(dc, const_102_154);
118     des [4] = src_current & 0xff;
119 
120 }
121 /****************************************************************************
122  *
123  *  ROUTINE       : vertical_band_4_5_scale_c64
124  *
125  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
126  *                  unsigned int dest_pitch : Stride of destination data.
127  *                  unsigned int dest_width : Width of destination data.
128  *
129  *  OUTPUTS       : None.
130  *
131  *  RETURNS       : void
132  *
133  *  FUNCTION      : Scales vertical band of pixels by scale 4 to 5. The
134  *                  height of the band scaled is 4-pixels.
135  *
136  *  SPECIAL NOTES : The routine uses the first line of the band below
137  *                  the current band.
138  *
139  ****************************************************************************/
140 static
vertical_band_4_5_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)141 void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
142 {
143     unsigned int i;
144     unsigned int a, b, c, d, e;
145     unsigned int ba, cb, dc, ed;
146     unsigned char *restrict src = dest;
147     unsigned char *restrict des = dest;
148     unsigned int const_51_205, const_102_154,
149              const_205_51, const_154_102;
150 
151     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
152     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
153     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
154     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
155 
156     // Force a loop unroll here so that there is not such a
157     //  dependancy.
158     a = src [0];
159     b = src [dest_pitch];
160     c = src [dest_pitch*2];
161     d = src [dest_pitch*3];
162     e = src [dest_pitch*5];
163     src ++;
164 
165     for (i = 0; i < dest_width; i++)
166     {
167         ba = _pack2(b, a);
168         cb = _pack2(c, b);
169         dc = _pack2(d, c);
170         ed = _pack2(e, d);
171 
172         a = src [0];
173         b = src [dest_pitch];
174         c = src [dest_pitch*2];
175         d = src [dest_pitch*3];
176         e = src [dest_pitch*5];
177         src ++;
178 
179         des [dest_pitch] = _dotprsu2(ba, const_205_51);
180         des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
181         des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
182         des [dest_pitch*4] = _dotprsu2(ed, const_51_205);
183 
184         des ++;
185     }
186 }
187 
188 /****************************************************************************
189  *
190  *  ROUTINE       : last_vertical_band_4_5_scale_c64
191  *
192  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
193  *                  unsigned int dest_pitch : Stride of destination data.
194  *                  unsigned int dest_width : Width of destination data.
195  *
196  *  OUTPUTS       : None.
197  *
198  *  RETURNS       : void
199  *
200  *  FUNCTION      : Scales last vertical band of pixels by scale 4 to 5. The
201  *                  height of the band scaled is 4-pixels.
202  *
203  *  SPECIAL NOTES : The routine does not have available the first line of
204  *                  the band below the current band, since this is the
205  *                  last band.
206  *
207  ****************************************************************************/
208 static
last_vertical_band_4_5_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)209 void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
210 {
211     unsigned int i;
212     unsigned int a, b, c, d;
213     unsigned int ba, cb, dc;
214     unsigned char *restrict src = dest;
215     unsigned char *restrict des = dest;
216     unsigned int const_102_154, const_205_51, const_154_102;
217 
218     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
219     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
220     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
221 
222     a = src [0];
223     b = src [dest_pitch];
224     c = src [dest_pitch*2];
225     d = src [dest_pitch*3];
226     src ++;
227 
228     for (i = 0; i < dest_width; ++i)
229     {
230         ba = _pack2(b, a);
231         cb = _pack2(c, b);
232         dc = _pack2(d, c);
233 
234         a = src [0];
235         b = src [dest_pitch];
236         c = src [dest_pitch*2];
237         d = src [dest_pitch*3];
238         src ++;
239 
240         des [dest_pitch] = _dotprsu2(ba, const_205_51);
241         des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
242         des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
243         des [dest_pitch*4] = (unsigned char) d;
244 
245         des++;
246     }
247 }
248 
249 /****************************************************************************
250  *
251  *  ROUTINE       : horizontal_line_3_5_scale_c64
252  *
253  *  INPUTS        : const unsigned char *source : Pointer to source data.
254  *                  unsigned int source_width    : Stride of source.
255  *                  unsigned char *dest         : Pointer to destination data.
256  *                  unsigned int dest_width      : Stride of destination (NOT USED).
257  *
258  *  OUTPUTS       : None.
259  *
260  *  RETURNS       : void
261  *
262  *  FUNCTION      : Copies horizontal line of pixels from source to
263  *                  destination scaling up by 3 to 5.
264  *
265  *  SPECIAL NOTES : None.
266  *
267  *
268  ****************************************************************************/
269 static
horizontal_line_3_5_scale_c64(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width)270 void horizontal_line_3_5_scale_c64
271 (
272     const unsigned char *source,
273     unsigned int source_width,
274     unsigned char *dest,
275     unsigned int dest_width
276 )
277 {
278     unsigned int i;
279     unsigned int ba, cb, dc;
280     unsigned int src_current;
281     unsigned char *restrict des = dest;
282     unsigned char *restrict src = (unsigned char *)source;
283     unsigned int const_51_205, const_102_154,
284              const_205_51, const_154_102;
285 
286     (void) dest_width;
287 
288     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
289     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
290     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
291     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
292 
293     for (i = 0; i < source_width - 3; i += 3)
294     {
295         src_current = _mem4(src);
296 
297         // Reorder the data so that it is ready for the
298         //  dot product.
299         ba = _unpklu4(src_current);
300         cb = _unpkhu4(_rotl(src_current, 8));
301         dc = _unpkhu4(src_current);
302 
303         des [0] = src_current & 0xff;
304         des [1] = _dotprsu2(ba, const_154_102);
305         des [2] = _dotprsu2(cb, const_51_205);
306         des [3] = _dotprsu2(cb, const_205_51);
307         des [4] = _dotprsu2(dc, const_102_154);
308 
309         src += 3;
310         des += 5;
311     }
312 
313     src_current = _mem4(src);
314 
315     ba = _unpklu4(src_current);
316     cb = _unpkhu4(_rotl(src_current, 8));
317     dc = _unpkhu4(src_current);
318 
319 
320     des [0] = src_current & 0xff;
321     des [1] = _dotprsu2(ba, const_154_102);
322     des [2] = _dotprsu2(cb, const_51_205);
323     des [3] = _dotprsu2(cb, const_205_51);
324     des [4] = dc & 0xff;
325 
326 }
327 
328 /****************************************************************************
329  *
330  *  ROUTINE       : vertical_band_3_5_scale_c64
331  *
332  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
333  *                  unsigned int dest_pitch : Stride of destination data.
334  *                  unsigned int dest_width : Width of destination data.
335  *
336  *  OUTPUTS       : None.
337  *
338  *  RETURNS       : void
339  *
340  *  FUNCTION      : Scales vertical band of pixels by scale 3 to 5. The
341  *                  height of the band scaled is 3-pixels.
342  *
343  *  SPECIAL NOTES : The routine uses the first line of the band below
344  *                  the current band.
345  *
346  ****************************************************************************/
347 static
vertical_band_3_5_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)348 void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
349 {
350     unsigned int i;
351     unsigned int a, b, c, d;
352     unsigned int ba, cb, dc;
353     unsigned char *restrict src = dest;
354     unsigned char *restrict des = dest;
355     unsigned int const_51_205, const_102_154,
356              const_205_51, const_154_102;
357 
358     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
359     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
360     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
361     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
362 
363     a = src [0];
364     b = src [dest_pitch];
365     c = src [dest_pitch*2];
366     d = src [dest_pitch*5];
367     src ++;
368 
369     for (i = 0; i < dest_width; i++)
370     {
371         ba = _pack2(b, a);
372         cb = _pack2(c, b);
373         dc = _pack2(d, c);
374 
375         a = src [0];
376         b = src [dest_pitch];
377         c = src [dest_pitch*2];
378         d = src [dest_pitch*5];
379         src ++;
380 
381         des [dest_pitch]   = _dotprsu2(ba, const_154_102);
382         des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
383         des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
384         des [dest_pitch*4] = _dotprsu2(dc, const_102_154);
385 
386         des++;
387     }
388 }
389 
390 /****************************************************************************
391  *
392  *  ROUTINE       : last_vertical_band_3_5_scale_c64
393  *
394  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
395  *                  unsigned int dest_pitch : Stride of destination data.
396  *                  unsigned int dest_width : Width of destination data.
397  *
398  *  OUTPUTS       : None.
399  *
400  *  RETURNS       : void
401  *
402  *  FUNCTION      : Scales last vertical band of pixels by scale 3 to 5. The
403  *                  height of the band scaled is 3-pixels.
404  *
405  *  SPECIAL NOTES : The routine does not have available the first line of
406  *                  the band below the current band, since this is the
407  *                  last band.
408  *
409  ****************************************************************************/
410 static
last_vertical_band_3_5_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)411 void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
412 {
413     unsigned int i;
414     unsigned int a, b, c;
415     unsigned int ba, cb;
416     unsigned char *restrict src = dest;
417     unsigned char *restrict des = dest;
418     unsigned int const_51_205, const_205_51, const_154_102;
419 
420     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
421     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
422     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
423 
424     a = src [0];
425     b = src [dest_pitch];
426     c = src [dest_pitch*2];
427     src ++;
428 
429     for (i = 0; i < dest_width; ++i)
430     {
431         ba = _pack2(b, a);
432         cb = _pack2(c, b);
433 
434         a = src [0];
435         b = src [dest_pitch];
436         c = src [dest_pitch*2];
437         src ++;
438 
439         des [dest_pitch]   = _dotprsu2(ba, const_154_102);
440         des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
441         des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
442         des [dest_pitch*4] = (unsigned char)(c) ;
443 
444         des++;
445     }
446 }
447 
448 /****************************************************************************
449  *
450  *  ROUTINE       : horizontal_line_1_2_scale_c64
451  *
452  *  INPUTS        : const unsigned char *source : Pointer to source data.
453  *                  unsigned int source_width    : Stride of source.
454  *                  unsigned char *dest         : Pointer to destination data.
455  *                  unsigned int dest_width      : Stride of destination (NOT USED).
456  *
457  *  OUTPUTS       : None.
458  *
459  *  RETURNS       : void
460  *
461  *  FUNCTION      : Copies horizontal line of pixels from source to
462  *                  destination scaling up by 1 to 2.
463  *
464  *  SPECIAL NOTES : source width must be a multiple of 4.
465  *
466  ****************************************************************************/
horizontal_line_1_2_scale_c64(const unsigned char * source,unsigned int source_width,unsigned char * dest,unsigned int dest_width)467 void horizontal_line_1_2_scale_c64
468 (
469     const unsigned char *source,
470     unsigned int source_width,
471     unsigned char *dest,
472     unsigned int dest_width
473 )
474 {
475     unsigned int i;
476     unsigned char *restrict des = dest;
477     unsigned char *restrict src = (unsigned char *)source;
478     unsigned int src7_4i, src4_1i, src3_0i;
479     unsigned int a4_0i, ahi, alo;
480     double src7_0d, src3_0d;
481     const unsigned int k01 = 0x01010101;
482 
483     for (i = 0; i < source_width / 4; i += 1)
484     {
485         // Load up the data from src.  Here a wide load is
486         //  used to get 8 bytes at once, only 5 will be used
487         //  for the actual computation.
488         src7_0d = _memd8(src);
489         src3_0i = _lo(src7_0d);
490         src7_4i = _hi(src7_0d);
491 
492         // Need to average between points.  Shift byte 5 into
493         //  the lower word.  This will result in bytes 5-1
494         //  averaged with 4-0.
495         src4_1i = _shrmb(src7_4i, src3_0i);
496         a4_0i = _avgu4(src4_1i, src3_0i);
497 
498         // Expand the data out. Could do an unpack, however
499         //  all but the multiply units are getting pretty hard
500         //  here the multiply unit can take some of the computations.
501         src3_0d = _mpyu4(src3_0i, k01);
502 
503         // The averages need to be unpacked so that they are in 16
504         //  bit form and will be able to be interleaved with the
505         //  original data
506         ahi = _unpkhu4(a4_0i);
507         alo = _unpklu4(a4_0i);
508 
509         ahi = _swap4(ahi);
510         alo = _swap4(alo);
511 
512         // Mix the average result in with the orginal data.
513         ahi = _hi(src3_0d) | ahi;
514         alo = _lo(src3_0d) | alo;
515 
516         _memd8(des) = _itod(ahi, alo);
517 
518         des += 8;
519         src += 4;
520     }
521 }
522 
523 
524 /****************************************************************************
525  *
526  *  ROUTINE       : vertical_band_1_2_scale_c64
527  *
528  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
529  *                  unsigned int dest_pitch : Stride of destination data.
530  *                  unsigned int dest_width : Width of destination data.
531  *
532  *  OUTPUTS       : None.
533  *
534  *  RETURNS       : void
535  *
536  *  FUNCTION      : Scales vertical band of pixels by scale 1 to 2. The
537  *                  height of the band scaled is 1-pixel.
538  *
539  *  SPECIAL NOTES : The routine uses the first line of the band below
540  *                  the current band.
541  *                  Destination width must be a multiple of 4.  Because the
542  *                  intput must be, therefore the output must be.
543  *
544  ****************************************************************************/
545 static
vertical_band_1_2_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)546 void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
547 {
548     unsigned int i;
549     unsigned int a, b;
550     unsigned int *restrict line_a = (unsigned int *)dest;
551     unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2));
552     unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
553 
554     for (i = 0; i < dest_width / 4; i++)
555     {
556         a = _mem4(line_a++);
557         b = _mem4(line_b++);
558 
559         _mem4(des++) = _avgu4(a, b);
560     }
561 }
562 
563 /****************************************************************************
564  *
565  *  ROUTINE       : last_vertical_band_1_2_scale_c64
566  *
567  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
568  *                  unsigned int dest_pitch : Stride of destination data.
569  *                  unsigned int dest_width : Width of destination data.
570  *
571  *  OUTPUTS       : None.
572  *
573  *  RETURNS       : void
574  *
575  *  FUNCTION      : Scales last vertical band of pixels by scale 1 to 2. The
576  *                  height of the band scaled is 1-pixel.
577  *
578  *  SPECIAL NOTES : The routine does not have available the first line of
579  *                  the band below the current band, since this is the
580  *                  last band.  Again, width must be a multiple of 4.
581  *
582  ****************************************************************************/
583 static
last_vertical_band_1_2_scale_c64(unsigned char * dest,unsigned int dest_pitch,unsigned int dest_width)584 void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
585 {
586     unsigned int i;
587     unsigned int *restrict src = (unsigned int *)dest;
588     unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
589 
590     for (i = 0; i < dest_width / 4; ++i)
591     {
592         _mem4(des++) = _mem4(src++);
593     }
594 }
595 
596 void
register_generic_scalers(void)597 register_generic_scalers(void)
598 {
599     vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_c64;
600     vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_c64;
601     vp8_last_vertical_band_1_2_scale      = last_vertical_band_1_2_scale_c64;
602     vp8_horizontal_line_3_5_scale        = horizontal_line_3_5_scale_c64;
603     vp8_vertical_band_3_5_scale          = vertical_band_3_5_scale_c64;
604     vp8_last_vertical_band_3_5_scale      = last_vertical_band_3_5_scale_c64;
605     vp8_horizontal_line_4_5_scale        = horizontal_line_4_5_scale_c64;
606     vp8_vertical_band_4_5_scale          = vertical_band_4_5_scale_c64;
607     vp8_last_vertical_band_4_5_scale      = last_vertical_band_4_5_scale_c64;
608 }
609