• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  isvce_downscaler.c
25 *
26 * @brief
27 *  Contains downscaler functions required by the SVC encoder
28 *
29 * @author
30 *  ittiam
31 *
32 * @par List of Functions:
33 *  - isvce_get_downscaler_data_size()
34 *  - isvce_get_downscaler_padding_dims()
35 *  - isvce_get_downscaler_normalized_filtered_pixel()
36 *  - isvce_horizontal_downscale_and_transpose()
37 *  - isvce_process_downscaler()
38 *  - isvce_initialize_downscaler()
39 *
40 * @remarks
41 *  None
42 *
43 *******************************************************************************
44 */
45 
46 /*****************************************************************************/
47 /* File Includes                                                             */
48 /*****************************************************************************/
49 
50 /* system include files */
51 #include <stdio.h>
52 #include <stdlib.h>
53 
54 #include "ih264_typedefs.h"
55 #include "ih264_macros.h"
56 #include "isvc_macros.h"
57 #include "ih264_platform_macros.h"
58 #include "iv2.h"
59 #include "isvc_defs.h"
60 #include "isvce_defs.h"
61 #include "isvc_structs.h"
62 #include "isvc_structs.h"
63 #include "isvce_downscaler.h"
64 #include "isvce_downscaler_private_defs.h"
65 
66 /**
67 ******************************************************************************
68 * @brief  lanczos filter coefficients for 2x downscaling
69 * @remarks Though the length of the filter is 8, the
70 * same coefficients
71 * are replicated so that 2 rows can be processed at one
72 * go in SIMD
73 ******************************************************************************
74 */
75 static WORD8 gai1_lanczos_coefficients_2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] = {
76     {-7, 0, 39, 64, 39, 0, -7, 0, -7, 0, 39, 64, 39, 0, -7, 0},
77     {-6, 0, 33, 62, 41, 4, -6, 0, -6, 0, 33, 62, 41, 4, -6, 0},
78     {-5, -1, 29, 57, 45, 9, -5, -1, -5, -1, 29, 57, 45, 9, -5, -1},
79     {-4, -2, 23, 55, 48, 14, -4, -2, -4, -2, 23, 55, 48, 14, -4, -2},
80     {-3, -3, 18, 52, 52, 18, -3, -3, -3, -3, 18, 52, 52, 18, -3, -3},
81     {-2, -4, 13, 49, 54, 24, -2, -4, -2, -4, 13, 49, 54, 24, -2, -4},
82     {-1, -5, 9, 44, 58, 29, -1, -5, -1, -5, 9, 44, 58, 29, -1, -5},
83     {0, -6, 3, 42, 61, 34, 0, -6, 0, -6, 3, 42, 61, 34, 0, -6}};
84 
85 /**
86 ******************************************************************************
87 * @brief  lanczos filter coefficients for 1.5x downscaling
88 * @remarks Though the length of the filter is 8, the same coefficients
89 * are replicated so that 2 rows can be processed at one go in SIMD.
90 ******************************************************************************
91 */
92 static WORD8 gai1_lanczos_coefficients_3by2x[NUM_SCALER_FILTER_PHASES][NUM_SCALER_FILTER_TAPS * 2] =
93     {{0, -11, 32, 86, 32, -11, 0, 0, 0, -11, 32, 86, 32, -11, 0, 0},
94      {0, -10, 26, 79, 39, -5, 0, 0, 0, -10, 26, 79, 39, -5, 0, 0},
95      {0, -8, 21, 72, 46, 0, -2, 0, 0, -8, 21, 72, 46, 0, -2, 0},
96      {0, -6, 15, 66, 52, 3, -3, 0, 0, -6, 15, 66, 52, 3, -3, 0},
97      {0, -6, 10, 60, 60, 10, -6, 0, 0, -6, 10, 60, 60, 10, -6, 0},
98      {0, -3, 3, 52, 66, 15, -6, 0, 0, -3, 3, 52, 66, 15, -6, 0},
99      {0, -2, 0, 46, 72, 21, -8, 0, 0, -2, 0, 46, 72, 21, -8, 0},
100      {0, 0, -5, 39, 79, 26, -10, 0, 0, 0, -5, 39, 79, 26, -10, 0}};
101 
102 /**
103 *******************************************************************************
104 *
105 * @brief
106 *   gets the memory size required for downscaler
107 *
108 * @par Description:
109 *   returns the memory required by the downscaler context and state structs
110 *   for allocation.
111 *
112 * @returns
113 *
114 * @remarks
115 *
116 *
117 *******************************************************************************
118 */
119 
isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers,DOUBLE d_scaling_factor,UWORD32 u4_width,UWORD32 u4_height)120 UWORD32 isvce_get_downscaler_data_size(UWORD8 u1_num_spatial_layers, DOUBLE d_scaling_factor,
121                                        UWORD32 u4_width, UWORD32 u4_height)
122 {
123     UWORD32 u4_size = 0;
124 
125     if(u1_num_spatial_layers > 1)
126     {
127         u4_size += sizeof(downscaler_state_t);
128 
129         u4_size +=
130             (u4_height + NUM_SCALER_FILTER_TAPS * 2) * ((UWORD32) (u4_width / d_scaling_factor));
131     }
132 
133     return u4_size;
134 }
135 
136 /**
137 *******************************************************************************
138 *
139 * @brief
140 *   gets the padding size required for filtering
141 *
142 * @par Description:
143 *   gets the padding size required for filtering
144 *
145 * @returns
146 *
147 * @remarks
148 *
149 *
150 *******************************************************************************
151 */
152 
isvce_get_downscaler_padding_dims(padding_dims_t * ps_pad_dims)153 void isvce_get_downscaler_padding_dims(padding_dims_t *ps_pad_dims)
154 {
155     ps_pad_dims->u1_left_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
156     ps_pad_dims->u1_right_pad_size = ALIGN8(NUM_SCALER_FILTER_TAPS / 2);
157     ps_pad_dims->u1_top_pad_size = NUM_SCALER_FILTER_TAPS / 2;
158     ps_pad_dims->u1_bottom_pad_size = NUM_SCALER_FILTER_TAPS / 2;
159 }
160 
161 /**
162 *******************************************************************************
163 *
164 * @brief
165 *   processes downscaler
166 *
167 * @par Description:
168 *   calls the function for padding and scaling
169 *
170 * @param[in] ps_scaler
171 *  pointer to downdownscaler context
172 *
173 * @param[in] ps_src_buf_props
174 *  pointer to source buffer props struct
175 *
176 * @param[in] u4_blk_wd
177 *  width of the block to be processed
178 *
179 * @param[in] u4_blk_ht
180 *  height of the block to be processed
181 *
182 * @returns
183 *
184 * @remarks
185 *
186 *
187 *******************************************************************************
188 */
189 
isvce_process_downscaler(downscaler_ctxt_t * ps_scaler,yuv_buf_props_t * ps_src_buf_props,yuv_buf_props_t * ps_dst_buf_props,UWORD32 u4_blk_wd,UWORD32 u4_blk_ht)190 void isvce_process_downscaler(downscaler_ctxt_t *ps_scaler, yuv_buf_props_t *ps_src_buf_props,
191                               yuv_buf_props_t *ps_dst_buf_props, UWORD32 u4_blk_wd,
192                               UWORD32 u4_blk_ht)
193 {
194     buffer_container_t s_src_buf;
195     buffer_container_t s_dst_buf;
196 
197     UWORD32 u4_scaled_block_size_x, u4_scaled_block_size_y;
198 
199     downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
200 
201     ASSERT(ps_src_buf_props->e_color_format == IV_YUV_420SP_UV);
202 
203     u4_scaled_block_size_x = (UWORD32) (u4_blk_wd / ps_scaler->d_scaling_factor);
204     u4_scaled_block_size_y = (UWORD32) (u4_blk_ht / ps_scaler->d_scaling_factor);
205 
206     /* luma */
207     s_src_buf = ps_src_buf_props->as_component_bufs[Y];
208     s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - (NUM_SCALER_FILTER_TAPS / 2) -
209                         (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
210 
211     s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
212     s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
213 
214     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
215                                    u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 0);
216 
217     s_src_buf = s_dst_buf;
218     s_dst_buf = ps_dst_buf_props->as_component_bufs[Y];
219 
220     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
221                                    u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
222 
223     /* chroma */
224     u4_blk_ht /= 2;
225     u4_scaled_block_size_y /= 2;
226 
227     s_src_buf = ps_src_buf_props->as_component_bufs[U];
228     s_src_buf.pv_data = ((UWORD8 *) s_src_buf.pv_data) - NUM_SCALER_FILTER_TAPS -
229                         (NUM_SCALER_FILTER_TAPS / 2) * s_src_buf.i4_data_stride;
230 
231     s_dst_buf.pv_data = ps_scaler_state->pv_scratch_buf;
232     s_dst_buf.i4_data_stride = u4_blk_ht + NUM_SCALER_FILTER_TAPS;
233 
234     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
235                                    u4_scaled_block_size_x, u4_blk_ht + NUM_SCALER_FILTER_TAPS, 1);
236 
237     s_src_buf = s_dst_buf;
238     s_dst_buf = ps_dst_buf_props->as_component_bufs[U];
239 
240     ps_scaler_state->pf_downscaler(ps_scaler, &s_src_buf, &s_dst_buf, ps_scaler_state->pai1_filters,
241                                    u4_scaled_block_size_y, u4_scaled_block_size_x, 0);
242 }
243 
244 /**
245 *******************************************************************************
246 *
247 * @brief
248 *   normalized dot product computer for downscaler
249 *
250 * @par Description:
251 *   Given the downscaler filter coefficients, source buffer, the function
252 *   calculates the dot product between them, adds an offset and normalizes it
253 *
254 * @param[in] ps_scaler
255 *  pointer to src buf
256 *
257 * @param[in] pi1_filter
258 *  pointer to filter coefficients
259 *
260 * @returns
261 *
262 * @remarks
263 *
264 *******************************************************************************
265 */
266 
isvce_get_downscaler_normalized_filtered_pixel(UWORD8 * pu1_src,WORD8 * pi1_filter)267 static UWORD8 isvce_get_downscaler_normalized_filtered_pixel(UWORD8 *pu1_src, WORD8 *pi1_filter)
268 {
269     WORD32 i;
270     WORD32 i4_norm_dot_product;
271     UWORD8 u1_out_pixel;
272     WORD32 i4_dot_product_sum = 0;
273     WORD32 i4_rounding_offset = 1 << (FILTER_COEFF_Q - 1);
274     WORD32 i4_normalizing_factor = 1 << FILTER_COEFF_Q;
275 
276     for(i = 0; i < NUM_SCALER_FILTER_TAPS; i++)
277     {
278         i4_dot_product_sum += (pu1_src[i] * pi1_filter[i]);
279     }
280 
281     i4_norm_dot_product = ((i4_dot_product_sum + i4_rounding_offset) / i4_normalizing_factor);
282     u1_out_pixel = (UWORD8) CLIP_U8(i4_norm_dot_product);
283 
284     return u1_out_pixel;
285 }
286 
287 /**
288 *******************************************************************************
289 *
290 * @brief
291 *   horizontal scaler function
292 *
293 * @par Description:
294 *   Does horizontal scaling for the given block
295 *
296 * @param[in] ps_scaler
297 *  pointer to downscaler context
298 *
299 * @param[in] ps_src
300 *  pointer to source buffer container
301 *
302 * @param[in] ps_dst
303 *  pointer to destination buffer container
304 *
305 * @param[in] pai1_filters
306 *  pointer to array of downscaler filters
307 *
308 * @param[in] u4_blk_wd
309 *  width of the block after horizontal scaling (output block width)
310 *
311 * @param[in] u4_blk_ht
312 *  height of the current block (input block height)
313 *
314 * @param[in] u1_is_chroma
315 *  flag suggesting whether the buffer is luma or chroma
316 *
317 *
318 * @returns
319 *
320 * @remarks
321 *  The same function is used for vertical scaling too as
322 *  the horizontally scaled input in stored in transpose fashion.
323 *
324 *******************************************************************************
325 */
326 
isvce_horizontal_downscale_and_transpose(downscaler_ctxt_t * ps_scaler,buffer_container_t * ps_src,buffer_container_t * ps_dst,FILTER_COEFF_ARRAY pai1_filters,UWORD32 u4_blk_wd,UWORD32 u4_blk_ht,UWORD8 u1_is_chroma)327 static void isvce_horizontal_downscale_and_transpose(
328     downscaler_ctxt_t *ps_scaler, buffer_container_t *ps_src, buffer_container_t *ps_dst,
329     FILTER_COEFF_ARRAY pai1_filters, UWORD32 u4_blk_wd, UWORD32 u4_blk_ht, UWORD8 u1_is_chroma)
330 {
331     WORD32 i, j, k;
332     UWORD8 u1_phase;
333     UWORD8 u1_filtered_out_pixel;
334     UWORD8 *pu1_src_j, *pu1_dst_j;
335     UWORD8 u1_filtered_out_u_pixel, u1_filtered_out_v_pixel;
336     UWORD8 *pu1_in_pixel;
337     UWORD8 *pu1_out_pixel;
338     WORD8 *pi1_filter_grid;
339     UWORD16 u2_full_pixel_inc;
340     UWORD8 au1_temp_u_buff[NUM_SCALER_FILTER_TAPS];
341     UWORD8 au1_temp_v_buff[NUM_SCALER_FILTER_TAPS];
342 
343     downscaler_state_t *ps_scaler_state = (downscaler_state_t *) ps_scaler->pv_scaler_state;
344 
345     UWORD32 u4_center_pixel_pos = ps_scaler_state->i4_init_offset;
346     UWORD32 u4_src_horz_increments = ps_scaler_state->u4_horz_increment;
347     UWORD8 *pu1_src = ps_src->pv_data;
348     UWORD32 u4_in_stride = ps_src->i4_data_stride;
349     UWORD8 *pu1_dst = ps_dst->pv_data;
350     UWORD32 u4_out_stride = ps_dst->i4_data_stride;
351     UWORD32 u4_center_pixel_pos_src = u4_center_pixel_pos;
352 
353     /* Offset the input so that the input pixel to be processed
354     co-incides with the centre of filter (4th coefficient)*/
355     pu1_src += (1 + u1_is_chroma);
356 
357     ASSERT((1 << DOWNSCALER_Q) == ps_scaler_state->u4_vert_increment);
358 
359     if(!u1_is_chroma)
360     {
361         for(j = 0; j < (WORD32) u4_blk_ht; j++)
362         {
363             pu1_src_j = pu1_src + (j * u4_in_stride);
364             pu1_dst_j = pu1_dst + j;
365 
366             u4_center_pixel_pos = u4_center_pixel_pos_src;
367 
368             for(i = 0; i < (WORD32) u4_blk_wd; i++)
369             {
370                 u1_phase = get_filter_phase(u4_center_pixel_pos);
371                 pi1_filter_grid = pai1_filters[u1_phase];
372 
373                 /* Doing the Calculation for current Loop Count  */
374                 u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
375                 pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
376                 pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
377 
378                 u1_filtered_out_pixel =
379                     isvce_get_downscaler_normalized_filtered_pixel(pu1_in_pixel, pi1_filter_grid);
380                 *pu1_out_pixel = u1_filtered_out_pixel;
381 
382                 /* Update the context for next Loop Count */
383                 u4_center_pixel_pos += u4_src_horz_increments;
384             }
385         }
386     }
387     else
388     {
389         for(j = 0; j < (WORD32) u4_blk_ht; j++)
390         {
391             pu1_src_j = pu1_src + (j * u4_in_stride);
392             pu1_dst_j = pu1_dst + j;
393 
394             u4_center_pixel_pos = u4_center_pixel_pos_src;
395 
396             for(i = 0; i < (WORD32) u4_blk_wd; i++)
397             {
398                 u1_phase = get_filter_phase(u4_center_pixel_pos);
399                 pi1_filter_grid = pai1_filters[u1_phase];
400 
401                 /*Doing the Calculation for current Loop Count  */
402                 u2_full_pixel_inc = u4_center_pixel_pos >> DOWNSCALER_Q;
403                 pu1_in_pixel = pu1_src_j + (u2_full_pixel_inc << u1_is_chroma);
404                 pu1_out_pixel = pu1_dst_j + ((i << u1_is_chroma) * u4_out_stride);
405 
406                 for(k = 0; k < NUM_SCALER_FILTER_TAPS; k++)
407                 {
408                     au1_temp_u_buff[k] = *(pu1_in_pixel + (2 * k));
409                     au1_temp_v_buff[k] = *(pu1_in_pixel + ((2 * k) + 1));
410                 }
411 
412                 u1_filtered_out_u_pixel = isvce_get_downscaler_normalized_filtered_pixel(
413                     au1_temp_u_buff, pi1_filter_grid);
414                 u1_filtered_out_v_pixel = isvce_get_downscaler_normalized_filtered_pixel(
415                     au1_temp_v_buff, pi1_filter_grid);
416                 *pu1_out_pixel = u1_filtered_out_u_pixel;
417                 *(pu1_out_pixel + u4_out_stride) = u1_filtered_out_v_pixel;
418 
419                 /* Update the context for next Loop Count */
420                 u4_center_pixel_pos += u4_src_horz_increments;
421             }
422         }
423     }
424 }
425 
isvce_downscaler_function_selector(downscaler_state_t * ps_scaler_state,IV_ARCH_T e_arch)426 void isvce_downscaler_function_selector(downscaler_state_t *ps_scaler_state, IV_ARCH_T e_arch)
427 {
428     switch(e_arch)
429     {
430 #if defined(X86)
431         case ARCH_X86_SSE42:
432         {
433             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_sse42;
434 
435             break;
436         }
437 #elif defined(ARMV8)
438         case ARCH_ARM_A53:
439         case ARCH_ARM_A57:
440         case ARCH_ARM_V8_NEON:
441         {
442             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
443 
444             break;
445         }
446 #elif defined(ARM) && !defined(DISABLE_NEON)
447         case ARCH_ARM_A9Q:
448         case ARCH_ARM_A9A:
449         case ARCH_ARM_A9:
450         case ARCH_ARM_A7:
451         case ARCH_ARM_A5:
452         case ARCH_ARM_A15:
453         {
454             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose_neon;
455 
456             break;
457         }
458 #endif
459         default:
460         {
461             ps_scaler_state->pf_downscaler = isvce_horizontal_downscale_and_transpose;
462 
463             break;
464         }
465     }
466 }
467 
468 /**
469 *******************************************************************************
470 *
471 * @brief
472 *   initializes the downscaler context
473 *
474 * @par Description:
475 *   initializes the downscaler context for the given scaling factor
476 *   with padding size, filter size, etc.
477 *
478 * @param[in] ps_scaler
479 *   pointer downscaler context
480 *
481 * @param[in] ps_mem_rec
482 *   pointer to memory allocated to downscaler process
483 *
484 * @param[in] d_scaling_factor
485 *   scaling reatio of width/ height between two consecutive SVC layers
486 *
487 * @param[in] u1_num_spatial_layers
488 *   scaling reatio of width/ height between two consecutive SVC layers
489 *
490 * @param[in] u4_wd
491 *   width of the input
492 *
493 * @param[in] u4_ht
494 *   height of the input
495 *
496 * @param[in] e_arch
497 *   architecure type
498 *
499 * @returns
500 *
501 * @remarks
502 *  when ARM intrinsics are added, update should be done here
503 *
504 *******************************************************************************
505 */
506 
isvce_initialize_downscaler(downscaler_ctxt_t * ps_scaler,iv_mem_rec_t * ps_mem_rec,DOUBLE d_scaling_factor,UWORD8 u1_num_spatial_layers,UWORD32 u4_in_width,UWORD32 u4_in_height,IV_ARCH_T e_arch)507 void isvce_initialize_downscaler(downscaler_ctxt_t *ps_scaler, iv_mem_rec_t *ps_mem_rec,
508                                  DOUBLE d_scaling_factor, UWORD8 u1_num_spatial_layers,
509                                  UWORD32 u4_in_width, UWORD32 u4_in_height, IV_ARCH_T e_arch)
510 {
511     if(u1_num_spatial_layers > 1)
512     {
513         downscaler_state_t *ps_scaler_state;
514 
515         UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
516 
517         ps_scaler_state = (downscaler_state_t *) pu1_buf;
518         pu1_buf += sizeof(ps_scaler_state[0]);
519 
520         ps_scaler_state->pv_scratch_buf = pu1_buf;
521         ps_scaler_state->u4_in_wd = u4_in_width;
522         ps_scaler_state->u4_in_ht = u4_in_height;
523 
524         ps_scaler->pv_scaler_state = ps_scaler_state;
525         ps_scaler->d_scaling_factor = d_scaling_factor;
526         ps_scaler->u1_num_spatial_layers = u1_num_spatial_layers;
527 
528         isvce_downscaler_function_selector(ps_scaler_state, e_arch);
529 
530         ps_scaler_state->u4_horz_increment = (UWORD32) (d_scaling_factor * (1 << DOWNSCALER_Q));
531 
532         ps_scaler_state->u4_vert_increment = (1 << DOWNSCALER_Q);
533         ps_scaler_state->i4_init_offset = 0;
534         ps_scaler_state->pai1_filters = (d_scaling_factor == 2.0) ? gai1_lanczos_coefficients_2x
535                                                                   : gai1_lanczos_coefficients_3by2x;
536     }
537 }
538