• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include <float.h>
13 #include <math.h>
14 #include <stdio.h>
15 #include "vpx_mem/vpx_mem.h"
16 #include "vpxscale_arbitrary.h"
17 
18 #define FIXED_POINT
19 
20 #define MAX_IN_WIDTH        800
21 #define MAX_IN_HEIGHT       600
22 #define MAX_OUT_WIDTH       800
23 #define MAX_OUT_HEIGHT      600
24 #define MAX_OUT_DIMENSION   ((MAX_OUT_WIDTH > MAX_OUT_HEIGHT) ? \
25                              MAX_OUT_WIDTH : MAX_OUT_HEIGHT)
26 
27 BICUBIC_SCALER_STRUCT g_b_scaler;
28 static int g_first_time = 1;
29 
30 #pragma DATA_SECTION(g_hbuf, "VP6_HEAP")
31 #pragma DATA_ALIGN (g_hbuf, 32);
32 unsigned char g_hbuf[MAX_OUT_DIMENSION];
33 
34 #pragma DATA_SECTION(g_hbuf_uv, "VP6_HEAP")
35 #pragma DATA_ALIGN (g_hbuf_uv, 32);
36 unsigned char g_hbuf_uv[MAX_OUT_DIMENSION];
37 
38 
39 #ifdef FIXED_POINT
40 static int a_i = 0.6 * 65536;
41 #else
42 static float a = -0.6;
43 #endif
44 
45 #ifdef FIXED_POINT
46 //         3     2
47 // C0 = a*t - a*t
48 //
c0_fixed(unsigned int t)49 static short c0_fixed(unsigned int t) {
50   // put t in Q16 notation
51   unsigned short v1, v2;
52 
53   // Q16
54   v1 = (a_i * t) >> 16;
55   v1 = (v1 * t) >> 16;
56 
57   // Q16
58   v2 = (a_i * t) >> 16;
59   v2 = (v2 * t) >> 16;
60   v2 = (v2 * t) >> 16;
61 
62   // Q12
63   return -((v1 - v2) >> 4);
64 }
65 
66 //                     2          3
67 // C1 = a*t + (3-2*a)*t  - (2-a)*t
68 //
c1_fixed(unsigned int t)69 static short c1_fixed(unsigned int t) {
70   unsigned short v1, v2, v3;
71   unsigned short two, three;
72 
73   // Q16
74   v1 = (a_i * t) >> 16;
75 
76   // Q13
77   two = 2 << 13;
78   v2 = two - (a_i >> 3);
79   v2 = (v2 * t) >> 16;
80   v2 = (v2 * t) >> 16;
81   v2 = (v2 * t) >> 16;
82 
83   // Q13
84   three = 3 << 13;
85   v3 = three - (2 * (a_i >> 3));
86   v3 = (v3 * t) >> 16;
87   v3 = (v3 * t) >> 16;
88 
89   // Q12
90   return (((v1 >> 3) - v2 + v3) >> 1);
91 
92 }
93 
94 //                 2          3
95 // C2 = 1 - (3-a)*t  + (2-a)*t
96 //
c2_fixed(unsigned int t)97 static short c2_fixed(unsigned int t) {
98   unsigned short v1, v2, v3;
99   unsigned short two, three;
100 
101   // Q13
102   v1 = 1 << 13;
103 
104   // Q13
105   three = 3 << 13;
106   v2 = three - (a_i >> 3);
107   v2 = (v2 * t) >> 16;
108   v2 = (v2 * t) >> 16;
109 
110   // Q13
111   two = 2 << 13;
112   v3 = two - (a_i >> 3);
113   v3 = (v3 * t) >> 16;
114   v3 = (v3 * t) >> 16;
115   v3 = (v3 * t) >> 16;
116 
117   // Q12
118   return (v1 - v2 + v3) >> 1;
119 }
120 
121 //                 2      3
122 // C3 = a*t - 2*a*t  + a*t
123 //
c3_fixed(unsigned int t)124 static short c3_fixed(unsigned int t) {
125   int v1, v2, v3;
126 
127   // Q16
128   v1 = (a_i * t) >> 16;
129 
130   // Q15
131   v2 = 2 * (a_i >> 1);
132   v2 = (v2 * t) >> 16;
133   v2 = (v2 * t) >> 16;
134 
135   // Q16
136   v3 = (a_i * t) >> 16;
137   v3 = (v3 * t) >> 16;
138   v3 = (v3 * t) >> 16;
139 
140   // Q12
141   return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3);
142 }
143 #else
144 //          3     2
145 // C0 = -a*t + a*t
146 //
C0(float t)147 float C0(float t) {
148   return -a * t * t * t + a * t * t;
149 }
150 
151 //                      2          3
152 // C1 = -a*t + (2*a+3)*t  - (a+2)*t
153 //
C1(float t)154 float C1(float t) {
155   return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t;
156 }
157 
158 //                 2          3
159 // C2 = 1 - (a+3)*t  + (a+2)*t
160 //
C2(float t)161 float C2(float t) {
162   return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f;
163 }
164 
165 //                 2      3
166 // C3 = a*t - 2*a*t  + a*t
167 //
C3(float t)168 float C3(float t) {
169   return a * t * t * t - 2.0f * a * t * t + a * t;
170 }
171 #endif
172 
173 #if 0
174 int compare_real_fixed() {
175   int i, errors = 0;
176   float mult = 1.0 / 10000.0;
177   unsigned int fixed_mult = mult * 4294967296;// 65536;
178   unsigned int phase_offset_int;
179   float phase_offset_real;
180 
181   for (i = 0; i < 10000; i++) {
182     int fixed0, fixed1, fixed2, fixed3, fixed_total;
183     int real0, real1, real2, real3, real_total;
184 
185     phase_offset_real = (float)i * mult;
186     phase_offset_int = (fixed_mult * i) >> 16;
187 //      phase_offset_int = phase_offset_real * 65536;
188 
189     fixed0 = c0_fixed(phase_offset_int);
190     real0 = C0(phase_offset_real) * 4096.0;
191 
192     if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1)))
193       errors++;
194 
195     fixed1 = c1_fixed(phase_offset_int);
196     real1 = C1(phase_offset_real) * 4096.0;
197 
198     if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1)))
199       errors++;
200 
201     fixed2 = c2_fixed(phase_offset_int);
202     real2 = C2(phase_offset_real) * 4096.0;
203 
204     if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1)))
205       errors++;
206 
207     fixed3 = c3_fixed(phase_offset_int);
208     real3 = C3(phase_offset_real) * 4096.0;
209 
210     if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1)))
211       errors++;
212 
213     fixed_total = fixed0 + fixed1 + fixed2 + fixed3;
214     real_total = real0 + real1 + real2 + real3;
215 
216     if ((fixed_total > 4097) || (fixed_total < 4094))
217       errors++;
218 
219     if ((real_total > 4097) || (real_total < 4095))
220       errors++;
221   }
222 
223   return errors;
224 }
225 #endif
226 
227 // Find greatest common denominator between two integers.  Method used here is
228 //  slow compared to Euclid's algorithm, but does not require any division.
gcd(int a,int b)229 int gcd(int a, int b) {
230   // Problem with this algorithm is that if a or b = 0 this function
231   //  will never exit.  Don't want to return 0 because any computation
232   //  that was based on a common denoninator and tried to reduce by
233   //  dividing by 0 would fail.  Best solution that could be thought of
234   //  would to be fail by returing a 1;
235   if (a <= 0 || b <= 0)
236     return 1;
237 
238   while (a != b) {
239     if (b > a)
240       b = b - a;
241     else {
242       int tmp = a;// swap large and
243       a = b; // small
244       b = tmp;
245     }
246   }
247 
248   return b;
249 }
250 
bicubic_coefficient_init()251 void bicubic_coefficient_init() {
252   vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
253   g_first_time = 0;
254 }
255 
bicubic_coefficient_destroy()256 void bicubic_coefficient_destroy() {
257   if (!g_first_time) {
258     vpx_free(g_b_scaler.l_w);
259 
260     vpx_free(g_b_scaler.l_h);
261 
262     vpx_free(g_b_scaler.l_h_uv);
263 
264     vpx_free(g_b_scaler.c_w);
265 
266     vpx_free(g_b_scaler.c_h);
267 
268     vpx_free(g_b_scaler.c_h_uv);
269 
270     vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT));
271   }
272 }
273 
274 // Create the coeffients that will be used for the cubic interpolation.
275 //  Because scaling does not have to be equal in the vertical and horizontal
276 //  regimes the phase offsets will be different.  There are 4 coefficents
277 //  for each point, two on each side.  The layout is that there are the
278 //  4 coefficents for each phase in the array and then the next phase.
bicubic_coefficient_setup(int in_width,int in_height,int out_width,int out_height)279 int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) {
280   int i;
281 #ifdef FIXED_POINT
282   int phase_offset_int;
283   unsigned int fixed_mult;
284   int product_val = 0;
285 #else
286   float phase_offset;
287 #endif
288   int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv;
289 
290   if (g_first_time)
291     bicubic_coefficient_init();
292 
293 
294   // check to see if the coefficents have already been set up correctly
295   if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height)
296       && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height))
297     return 0;
298 
299   g_b_scaler.in_width = in_width;
300   g_b_scaler.in_height = in_height;
301   g_b_scaler.out_width = out_width;
302   g_b_scaler.out_height = out_height;
303 
304   // Don't want to allow crazy scaling, just try and prevent a catastrophic
305   //  failure here.  Want to fail after setting the member functions so if
306   //  if the scaler is called the member functions will not scale.
307   if (out_width <= 0 || out_height <= 0)
308     return -1;
309 
310   // reduce in/out width and height ratios using the gcd
311   gcd_w = gcd(out_width, in_width);
312   gcd_h = gcd(out_height, in_height);
313   gcd_h_uv = gcd(out_height, in_height / 2);
314 
315   // the numerator width and height are to be saved in
316   //  globals so they can be used during the scaling process
317   //  without having to be recalculated.
318   g_b_scaler.nw = out_width / gcd_w;
319   d_w = in_width / gcd_w;
320 
321   g_b_scaler.nh = out_height / gcd_h;
322   d_h = in_height / gcd_h;
323 
324   g_b_scaler.nh_uv = out_height / gcd_h_uv;
325   d_h_uv = (in_height / 2) / gcd_h_uv;
326 
327   // allocate memory for the coefficents
328   vpx_free(g_b_scaler.l_w);
329 
330   vpx_free(g_b_scaler.l_h);
331 
332   vpx_free(g_b_scaler.l_h_uv);
333 
334   g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2);
335   g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2);
336   g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2);
337 
338   vpx_free(g_b_scaler.c_w);
339 
340   vpx_free(g_b_scaler.c_h);
341 
342   vpx_free(g_b_scaler.c_h_uv);
343 
344   g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2);
345   g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2);
346   g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2);
347 
348   g_b_scaler.hbuf = g_hbuf;
349   g_b_scaler.hbuf_uv = g_hbuf_uv;
350 
351   // Set up polyphase filter taps.  This needs to be done before
352   //  the scaling because of the floating point math required.  The
353   //  coefficients are multiplied by 2^12 so that fixed point math
354   //  can be used in the main scaling loop.
355 #ifdef FIXED_POINT
356   fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296;
357 
358   product_val = 0;
359 
360   for (i = 0; i < g_b_scaler.nw; i++) {
361     if (product_val > g_b_scaler.nw)
362       product_val -= g_b_scaler.nw;
363 
364     phase_offset_int = (fixed_mult * product_val) >> 16;
365 
366     g_b_scaler.c_w[i * 4]   = c3_fixed(phase_offset_int);
367     g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int);
368     g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int);
369     g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int);
370 
371     product_val += d_w;
372   }
373 
374 
375   fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296;
376 
377   product_val = 0;
378 
379   for (i = 0; i < g_b_scaler.nh; i++) {
380     if (product_val > g_b_scaler.nh)
381       product_val -= g_b_scaler.nh;
382 
383     phase_offset_int = (fixed_mult * product_val) >> 16;
384 
385     g_b_scaler.c_h[i * 4]   = c0_fixed(phase_offset_int);
386     g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int);
387     g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int);
388     g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int);
389 
390     product_val += d_h;
391   }
392 
393   fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296;
394 
395   product_val = 0;
396 
397   for (i = 0; i < g_b_scaler.nh_uv; i++) {
398     if (product_val > g_b_scaler.nh_uv)
399       product_val -= g_b_scaler.nh_uv;
400 
401     phase_offset_int = (fixed_mult * product_val) >> 16;
402 
403     g_b_scaler.c_h_uv[i * 4]   = c0_fixed(phase_offset_int);
404     g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int);
405     g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int);
406     g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int);
407 
408     product_val += d_h_uv;
409   }
410 
411 #else
412 
413   for (i = 0; i < g_nw; i++) {
414     phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw;
415     g_c_w[i * 4]   = (C3(phase_offset) * 4096.0);
416     g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0);
417     g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0);
418     g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0);
419   }
420 
421   for (i = 0; i < g_nh; i++) {
422     phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh;
423     g_c_h[i * 4]   = (C0(phase_offset) * 4096.0);
424     g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0);
425     g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0);
426     g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0);
427   }
428 
429   for (i = 0; i < g_nh_uv; i++) {
430     phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv;
431     g_c_h_uv[i * 4]   = (C0(phase_offset) * 4096.0);
432     g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0);
433     g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0);
434     g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0);
435   }
436 
437 #endif
438 
439   // Create an array that corresponds input lines to output lines.
440   //  This doesn't require floating point math, but it does require
441   //  a division and because hardware division is not present that
442   //  is a call.
443   for (i = 0; i < out_width; i++) {
444     g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw;
445 
446     if ((g_b_scaler.l_w[i] + 2) <= in_width)
447       g_b_scaler.max_usable_out_width = i;
448 
449   }
450 
451   for (i = 0; i < out_height + 1; i++) {
452     g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh;
453     g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv;
454   }
455 
456   return 0;
457 }
458 
bicubic_scale(int in_width,int in_height,int in_stride,int out_width,int out_height,int out_stride,unsigned char * input_image,unsigned char * output_image)459 int bicubic_scale(int in_width, int in_height, int in_stride,
460                   int out_width, int out_height, int out_stride,
461                   unsigned char *input_image, unsigned char *output_image) {
462   short *RESTRICT l_w, * RESTRICT l_h;
463   short *RESTRICT c_w, * RESTRICT c_h;
464   unsigned char *RESTRICT ip, * RESTRICT op;
465   unsigned char *RESTRICT hbuf;
466   int h, w, lw, lh;
467   int temp_sum;
468   int phase_offset_w, phase_offset_h;
469 
470   c_w = g_b_scaler.c_w;
471   c_h = g_b_scaler.c_h;
472 
473   op = output_image;
474 
475   l_w = g_b_scaler.l_w;
476   l_h = g_b_scaler.l_h;
477 
478   phase_offset_h = 0;
479 
480   for (h = 0; h < out_height; h++) {
481     // select the row to work on
482     lh = l_h[h];
483     ip = input_image + (in_stride * lh);
484 
485     // vp8_filter the row vertically into an temporary buffer.
486     //  If the phase offset == 0 then all the multiplication
487     //  is going to result in the output equalling the input.
488     //  So instead point the temporary buffer to the input.
489     //  Also handle the boundry condition of not being able to
490     //  filter that last lines.
491     if (phase_offset_h && (lh < in_height - 2)) {
492       hbuf = g_b_scaler.hbuf;
493 
494       for (w = 0; w < in_width; w++) {
495         temp_sum =  c_h[phase_offset_h * 4 + 3] * ip[w - in_stride];
496         temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w];
497         temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride];
498         temp_sum += c_h[phase_offset_h * 4]   * ip[w + 2 * in_stride];
499 
500         hbuf[w] = temp_sum >> 12;
501       }
502     } else
503       hbuf = ip;
504 
505     // increase the phase offset for the next time around.
506     if (++phase_offset_h >= g_b_scaler.nh)
507       phase_offset_h = 0;
508 
509     // now filter and expand it horizontally into the final
510     //  output buffer
511     phase_offset_w = 0;
512 
513     for (w = 0; w < out_width; w++) {
514       // get the index to use to expand the image
515       lw = l_w[w];
516 
517       temp_sum =  c_w[phase_offset_w * 4]   * hbuf[lw - 1];
518       temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw];
519       temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1];
520       temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2];
521       temp_sum = temp_sum >> 12;
522 
523       if (++phase_offset_w >= g_b_scaler.nw)
524         phase_offset_w = 0;
525 
526       // boundry conditions
527       if ((lw + 2) >= in_width)
528         temp_sum = hbuf[lw];
529 
530       if (lw == 0)
531         temp_sum = hbuf[0];
532 
533       op[w] = temp_sum;
534     }
535 
536     op += out_stride;
537   }
538 
539   return 0;
540 }
541 
bicubic_scale_frame_reset()542 void bicubic_scale_frame_reset() {
543   g_b_scaler.out_width = 0;
544   g_b_scaler.out_height = 0;
545 }
546 
bicubic_scale_frame(YV12_BUFFER_CONFIG * src,YV12_BUFFER_CONFIG * dst,int new_width,int new_height)547 void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
548                          int new_width, int new_height) {
549 
550   dst->y_width = new_width;
551   dst->y_height = new_height;
552   dst->uv_width = new_width / 2;
553   dst->uv_height = new_height / 2;
554 
555   dst->y_stride = dst->y_width;
556   dst->uv_stride = dst->uv_width;
557 
558   bicubic_scale(src->y_width, src->y_height, src->y_stride,
559                 new_width, new_height, dst->y_stride,
560                 src->y_buffer, dst->y_buffer);
561 
562   bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
563                 new_width / 2, new_height / 2, dst->uv_stride,
564                 src->u_buffer, dst->u_buffer);
565 
566   bicubic_scale(src->uv_width, src->uv_height, src->uv_stride,
567                 new_width / 2, new_height / 2, dst->uv_stride,
568                 src->v_buffer, dst->v_buffer);
569 }
570