• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include <float.h>
13 #include <math.h>
14 #include <stdio.h>
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpxscale_arbitrary.h"
17 
18 #define FIXED_POINT
19 
20 #define MAX_IN_WIDTH        800
21 #define MAX_IN_HEIGHT       600
22 #define MAX_OUT_WIDTH       800
23 #define MAX_OUT_HEIGHT      600
24 #define MAX_OUT_DIMENSION   ((MAX_OUT_WIDTH > MAX_OUT_HEIGHT) ? \
25                              MAX_OUT_WIDTH : MAX_OUT_HEIGHT)
26 
27 BICUBIC_SCALER_STRUCT g_b_scaler;
28 static int g_first_time = 1;
29 
30 #pragma DATA_SECTION(g_hbuf, "VP6_HEAP")
31 #pragma DATA_ALIGN (g_hbuf, 32);
32 unsigned char g_hbuf[MAX_OUT_DIMENSION];
33 
34 #pragma DATA_SECTION(g_hbuf_uv, "VP6_HEAP")
35 #pragma DATA_ALIGN (g_hbuf_uv, 32);
36 unsigned char g_hbuf_uv[MAX_OUT_DIMENSION];
37 
38 
39 #ifdef FIXED_POINT
40 static int a_i = 0.6 * 65536;
41 #else
42 static float a = -0.6;
43 #endif
44 
45 #ifdef FIXED_POINT
46 //         3     2
47 // C0 = a*t - a*t
48 //
c0_fixed(unsigned int t)49 static short c0_fixed(unsigned int t)
50 {
51     // put t in Q16 notation
52     unsigned short v1, v2;
53 
54     // Q16
55     v1 = (a_i * t) >> 16;
56     v1 = (v1 * t) >> 16;
57 
58     // Q16
59     v2 = (a_i * t) >> 16;
60     v2 = (v2 * t) >> 16;
61     v2 = (v2 * t) >> 16;
62 
63     // Q12
64     return -((v1 - v2) >> 4);
65 }
66 
67 //                     2          3
68 // C1 = a*t + (3-2*a)*t  - (2-a)*t
69 //
c1_fixed(unsigned int t)70 static short c1_fixed(unsigned int t)
71 {
72     unsigned short v1, v2, v3;
73     unsigned short two, three;
74 
75     // Q16
76     v1 = (a_i * t) >> 16;
77 
78     // Q13
79     two = 2 << 13;
80     v2 = two - (a_i >> 3);
81     v2 = (v2 * t) >> 16;
82     v2 = (v2 * t) >> 16;
83     v2 = (v2 * t) >> 16;
84 
85     // Q13
86     three = 3 << 13;
87     v3 = three - (2 * (a_i >> 3));
88     v3 = (v3 * t) >> 16;
89     v3 = (v3 * t) >> 16;
90 
91     // Q12
92     return (((v1 >> 3) - v2 + v3) >> 1);
93 
94 }
95 
96 //                 2          3
97 // C2 = 1 - (3-a)*t  + (2-a)*t
98 //
c2_fixed(unsigned int t)99 static short c2_fixed(unsigned int t)
100 {
101     unsigned short v1, v2, v3;
102     unsigned short two, three;
103 
104     // Q13
105     v1 = 1 << 13;
106 
107     // Q13
108     three = 3 << 13;
109     v2 = three - (a_i >> 3);
110     v2 = (v2 * t) >> 16;
111     v2 = (v2 * t) >> 16;
112 
113     // Q13
114     two = 2 << 13;
115     v3 = two - (a_i >> 3);
116     v3 = (v3 * t) >> 16;
117     v3 = (v3 * t) >> 16;
118     v3 = (v3 * t) >> 16;
119 
120     // Q12
121     return (v1 - v2 + v3) >> 1;
122 }
123 
124 //                 2      3
125 // C3 = a*t - 2*a*t  + a*t
126 //
c3_fixed(unsigned int t)127 static short c3_fixed(unsigned int t)
128 {
129     int v1, v2, v3;
130 
131     // Q16
132     v1 = (a_i * t) >> 16;
133 
134     // Q15
135     v2 = 2 * (a_i >> 1);
136     v2 = (v2 * t) >> 16;
137     v2 = (v2 * t) >> 16;
138 
139     // Q16
140     v3 = (a_i * t) >> 16;
141     v3 = (v3 * t) >> 16;
142     v3 = (v3 * t) >> 16;
143 
144     // Q12
145     return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
146 }
147 #else
148 //          3     2
149 // C0 = -a*t + a*t
150 //
C0(float t)151 float C0(float t)
152 {
153     return -a * t * t * t + a * t * t;
154 }
155 
156 //                      2          3
157 // C1 = -a*t + (2*a+3)*t  - (a+2)*t
158 //
C1(float t)159 float C1(float t)
160 {
161     return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
162 }
163 
164 //                 2          3
165 // C2 = 1 - (a+3)*t  + (a+2)*t
166 //
C2(float t)167 float C2(float t)
168 {
169     return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
170 }
171 
172 //                 2      3
173 // C3 = a*t - 2*a*t  + a*t
174 //
C3(float t)175 float C3(float t)
176 {
177     return a * t * t * t - 2.0f * a * t * t + a * t;
178 }
179 #endif
180 
181 #if 0
182 int compare_real_fixed()
183 {
184     int i, errors = 0;
185     float mult = 1.0 / 10000.0;
186     unsigned int fixed_mult = mult * 4294967296;//65536;
187     unsigned int phase_offset_int;
188     float phase_offset_real;
189 
190     for (i = 0; i < 10000; i++)
191     {
192         int fixed0, fixed1, fixed2, fixed3, fixed_total;
193         int real0, real1, real2, real3, real_total;
194 
195         phase_offset_real = (float)i * mult;
196         phase_offset_int = (fixed_mult * i) >> 16;
197 //      phase_offset_int = phase_offset_real * 65536;
198 
199         fixed0 = c0_fixed(phase_offset_int);
200         real0 = C0(phase_offset_real) * 4096.0;
201 
202         if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
203             errors++;
204 
205         fixed1 = c1_fixed(phase_offset_int);
206         real1 = C1(phase_offset_real) * 4096.0;
207 
208         if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
209             errors++;
210 
211         fixed2 = c2_fixed(phase_offset_int);
212         real2 = C2(phase_offset_real) * 4096.0;
213 
214         if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
215             errors++;
216 
217         fixed3 = c3_fixed(phase_offset_int);
218         real3 = C3(phase_offset_real) * 4096.0;
219 
220         if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
221             errors++;
222 
223         fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
224         real_total = real0 + real1 + real2 + real3;
225 
226         if ((fixed_total > 4097) || (fixed_total < 4094))
227             errors ++;
228 
229         if ((real_total > 4097) || (real_total < 4095))
230             errors ++;
231     }
232 
233     return errors;
234 }
235 #endif
236 
237 // Find greatest common denominator between two integers.  Method used here is
238 //  slow compared to Euclid's algorithm, but does not require any division.
gcd(int a,int b)239 int gcd(int a, int b)
240 {
241     // Problem with this algorithm is that if a or b = 0 this function
242     //  will never exit.  Don't want to return 0 because any computation
243     //  that was based on a common denoninator and tried to reduce by
244     //  dividing by 0 would fail.  Best solution that could be thought of
245     //  would to be fail by returing a 1;
246     if (a <= 0 || b <= 0)
247         return 1;
248 
249     while (a != b)
250     {
251         if (b > a)
252             b = b - a;
253         else
254         {
255             int tmp = a;//swap large and
256             a = b; //small
257             b = tmp;
258         }
259     }
260 
261     return b;
262 }
263 
bicubic_coefficient_init()264 void bicubic_coefficient_init()
265 {
266     vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
267     g_first_time = 0;
268 }
269 
bicubic_coefficient_destroy()270 void bicubic_coefficient_destroy()
271 {
272     if (!g_first_time)
273     {
274         vpx_free(g_b_scaler.l_w);
275 
276         vpx_free(g_b_scaler.l_h);
277 
278         vpx_free(g_b_scaler.l_h_uv);
279 
280         vpx_free(g_b_scaler.c_w);
281 
282         vpx_free(g_b_scaler.c_h);
283 
284         vpx_free(g_b_scaler.c_h_uv);
285 
286         vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
287     }
288 }
289 
290 // Create the coeffients that will be used for the cubic interpolation.
291 //  Because scaling does not have to be equal in the vertical and horizontal
292 //  regimes the phase offsets will be different.  There are 4 coefficents
293 //  for each point, two on each side.  The layout is that there are the
294 //  4 coefficents for each phase in the array and then the next phase.
bicubic_coefficient_setup(int in_width,int in_height,int out_width,int out_height)295 int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height)
296 {
297     int i;
298 #ifdef FIXED_POINT
299     int phase_offset_int;
300     unsigned int fixed_mult;
301     int product_val = 0;
302 #else
303     float phase_offset;
304 #endif
305     int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
306 
307     if (g_first_time)
308         bicubic_coefficient_init();
309 
310 
311     // check to see if the coefficents have already been set up correctly
312     if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
313         && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
314         return 0;
315 
316     g_b_scaler.in_width = in_width;
317     g_b_scaler.in_height = in_height;
318     g_b_scaler.out_width = out_width;
319     g_b_scaler.out_height = out_height;
320 
321     // Don't want to allow crazy scaling, just try and prevent a catastrophic
322     //  failure here.  Want to fail after setting the member functions so if
323     //  if the scaler is called the member functions will not scale.
324     if (out_width <= 0 || out_height <= 0)
325         return -1;
326 
327     // reduce in/out width and height ratios using the gcd
328     gcd_w = gcd(out_width, in_width);
329     gcd_h = gcd(out_height, in_height);
330     gcd_h_uv = gcd(out_height, in_height / 2);
331 
332     // the numerator width and height are to be saved in
333     //  globals so they can be used during the scaling process
334     //  without having to be recalculated.
335     g_b_scaler.nw = out_width / gcd_w;
336     d_w = in_width / gcd_w;
337 
338     g_b_scaler.nh = out_height / gcd_h;
339     d_h = in_height / gcd_h;
340 
341     g_b_scaler.nh_uv = out_height / gcd_h_uv;
342     d_h_uv = (in_height / 2) / gcd_h_uv;
343 
344     // allocate memory for the coefficents
345     vpx_free(g_b_scaler.l_w);
346 
347     vpx_free(g_b_scaler.l_h);
348 
349     vpx_free(g_b_scaler.l_h_uv);
350 
351     g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
352     g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
353     g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
354 
355     vpx_free(g_b_scaler.c_w);
356 
357     vpx_free(g_b_scaler.c_h);
358 
359     vpx_free(g_b_scaler.c_h_uv);
360 
361     g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
362     g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
363     g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
364 
365     g_b_scaler.hbuf = g_hbuf;
366     g_b_scaler.hbuf_uv = g_hbuf_uv;
367 
368     // Set up polyphase filter taps.  This needs to be done before
369     //  the scaling because of the floating point math required.  The
370     //  coefficients are multiplied by 2^12 so that fixed point math
371     //  can be used in the main scaling loop.
372 #ifdef FIXED_POINT
373     fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
374 
375     product_val = 0;
376 
377     for (i = 0; i < g_b_scaler.nw; i++)
378     {
379         if (product_val > g_b_scaler.nw)
380             product_val -= g_b_scaler.nw;
381 
382         phase_offset_int = (fixed_mult * product_val) >> 16;
383 
384         g_b_scaler.c_w[i*4]   = c3_fixed(phase_offset_int);
385         g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int);
386         g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int);
387         g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int);
388 
389         product_val += d_w;
390     }
391 
392 
393     fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
394 
395     product_val = 0;
396 
397     for (i = 0; i < g_b_scaler.nh; i++)
398     {
399         if (product_val > g_b_scaler.nh)
400             product_val -= g_b_scaler.nh;
401 
402         phase_offset_int = (fixed_mult * product_val) >> 16;
403 
404         g_b_scaler.c_h[i*4]   = c0_fixed(phase_offset_int);
405         g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int);
406         g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int);
407         g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int);
408 
409         product_val += d_h;
410     }
411 
412     fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
413 
414     product_val = 0;
415 
416     for (i = 0; i < g_b_scaler.nh_uv; i++)
417     {
418         if (product_val > g_b_scaler.nh_uv)
419             product_val -= g_b_scaler.nh_uv;
420 
421         phase_offset_int = (fixed_mult * product_val) >> 16;
422 
423         g_b_scaler.c_h_uv[i*4]   = c0_fixed(phase_offset_int);
424         g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int);
425         g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int);
426         g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int);
427 
428         product_val += d_h_uv;
429     }
430 
431 #else
432 
433     for (i = 0; i < g_nw; i++)
434     {
435         phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
436         g_c_w[i*4]   = (C3(phase_offset) * 4096.0);
437         g_c_w[i*4+1] = (C2(phase_offset) * 4096.0);
438         g_c_w[i*4+2] = (C1(phase_offset) * 4096.0);
439         g_c_w[i*4+3] = (C0(phase_offset) * 4096.0);
440     }
441 
442     for (i = 0; i < g_nh; i++)
443     {
444         phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
445         g_c_h[i*4]   = (C0(phase_offset) * 4096.0);
446         g_c_h[i*4+1] = (C1(phase_offset) * 4096.0);
447         g_c_h[i*4+2] = (C2(phase_offset) * 4096.0);
448         g_c_h[i*4+3] = (C3(phase_offset) * 4096.0);
449     }
450 
451     for (i = 0; i < g_nh_uv; i++)
452     {
453         phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
454         g_c_h_uv[i*4]   = (C0(phase_offset) * 4096.0);
455         g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0);
456         g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0);
457         g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0);
458     }
459 
460 #endif
461 
462     // Create an array that corresponds input lines to output lines.
463     //  This doesn't require floating point math, but it does require
464     //  a division and because hardware division is not present that
465     //  is a call.
466     for (i = 0; i < out_width; i++)
467     {
468         g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
469 
470         if ((g_b_scaler.l_w[i] + 2) <= in_width)
471             g_b_scaler.max_usable_out_width = i;
472 
473     }
474 
475     for (i = 0; i < out_height + 1; i++)
476     {
477         g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
478         g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
479     }
480 
481     return 0;
482 }
483 
bicubic_scale(int in_width,int in_height,int in_stride,int out_width,int out_height,int out_stride,unsigned char * input_image,unsigned char * output_image)484 int bicubic_scale(int in_width, int in_height, int in_stride,
485                   int out_width, int out_height, int out_stride,
486                   unsigned char *input_image, unsigned char *output_image)
487 {
488     short *RESTRICT l_w, * RESTRICT l_h;
489     short *RESTRICT c_w, * RESTRICT c_h;
490     unsigned char *RESTRICT ip, * RESTRICT op;
491     unsigned char *RESTRICT hbuf;
492     int h, w, lw, lh;
493     int temp_sum;
494     int phase_offset_w, phase_offset_h;
495 
496     c_w = g_b_scaler.c_w;
497     c_h = g_b_scaler.c_h;
498 
499     op = output_image;
500 
501     l_w = g_b_scaler.l_w;
502     l_h = g_b_scaler.l_h;
503 
504     phase_offset_h = 0;
505 
506     for (h = 0; h < out_height; h++)
507     {
508         // select the row to work on
509         lh = l_h[h];
510         ip = input_image + (in_stride * lh);
511 
512         // vp8_filter the row vertically into an temporary buffer.
513         //  If the phase offset == 0 then all the multiplication
514         //  is going to result in the output equalling the input.
515         //  So instead point the temporary buffer to the input.
516         //  Also handle the boundry condition of not being able to
517         //  filter that last lines.
518         if (phase_offset_h && (lh < in_height - 2))
519         {
520             hbuf = g_b_scaler.hbuf;
521 
522             for (w = 0; w < in_width; w++)
523             {
524                 temp_sum =  c_h[phase_offset_h*4+3] * ip[w - in_stride];
525                 temp_sum += c_h[phase_offset_h*4+2] * ip[w];
526                 temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride];
527                 temp_sum += c_h[phase_offset_h*4]   * ip[w + 2*in_stride];
528 
529                 hbuf[w] = temp_sum >> 12;
530             }
531         }
532         else
533             hbuf = ip;
534 
535         // increase the phase offset for the next time around.
536         if (++phase_offset_h >= g_b_scaler.nh)
537             phase_offset_h = 0;
538 
539         // now filter and expand it horizontally into the final
540         //  output buffer
541         phase_offset_w = 0;
542 
543         for (w = 0; w < out_width; w++)
544         {
545             // get the index to use to expand the image
546             lw = l_w[w];
547 
548             temp_sum =  c_w[phase_offset_w*4]   * hbuf[lw - 1];
549             temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw];
550             temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1];
551             temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2];
552             temp_sum = temp_sum >> 12;
553 
554             if (++phase_offset_w >= g_b_scaler.nw)
555                 phase_offset_w = 0;
556 
557             // boundry conditions
558             if ((lw + 2) >= in_width)
559                 temp_sum = hbuf[lw];
560 
561             if (lw == 0)
562                 temp_sum = hbuf[0];
563 
564             op[w] = temp_sum;
565         }
566 
567         op += out_stride;
568     }
569 
570     return 0;
571 }
572 
bicubic_scale_frame_reset()573 void bicubic_scale_frame_reset()
574 {
575     g_b_scaler.out_width = 0;
576     g_b_scaler.out_height = 0;
577 }
578 
bicubic_scale_frame(YV12_BUFFER_CONFIG * src,YV12_BUFFER_CONFIG * dst,int new_width,int new_height)579 void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
580                          int new_width, int new_height)
581 {
582 
583     dst->y_width = new_width;
584     dst->y_height = new_height;
585     dst->uv_width = new_width / 2;
586     dst->uv_height = new_height / 2;
587 
588     dst->y_stride = dst->y_width;
589     dst->uv_stride = dst->uv_width;
590 
591     bicubic_scale(src->y_width, src->y_height, src->y_stride,
592                   new_width, new_height, dst->y_stride,
593                   src->y_buffer, dst->y_buffer);
594 
595     bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
596                   new_width / 2, new_height / 2, dst->uv_stride,
597                   src->u_buffer, dst->u_buffer);
598 
599     bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
600                   new_width / 2, new_height / 2, dst->uv_stride,
601                   src->v_buffer, dst->v_buffer);
602 }
603