1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*****************************************************************************/
22 /* File Includes */
23 /*****************************************************************************/
24 /* System include files */
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <stdarg.h>
30 #include <math.h>
31 #include <limits.h>
32
33 /* User include files */
34 #include "ihevc_typedefs.h"
35 #include "itt_video_api.h"
36 #include "ihevce_api.h"
37
38 #include "rc_cntrl_param.h"
39 #include "rc_frame_info_collector.h"
40 #include "rc_look_ahead_params.h"
41
42 #include "ihevc_defs.h"
43 #include "ihevc_structs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_deblk.h"
46 #include "ihevc_itrans_recon.h"
47 #include "ihevc_chroma_itrans_recon.h"
48 #include "ihevc_chroma_intra_pred.h"
49 #include "ihevc_intra_pred.h"
50 #include "ihevc_inter_pred.h"
51 #include "ihevc_mem_fns.h"
52 #include "ihevc_padding.h"
53 #include "ihevc_weighted_pred.h"
54 #include "ihevc_sao.h"
55 #include "ihevc_resi_trans.h"
56 #include "ihevc_quant_iquant_ssd.h"
57 #include "ihevc_cabac_tables.h"
58
59 #include "ihevce_defs.h"
60 #include "ihevce_lap_enc_structs.h"
61 #include "ihevce_multi_thrd_structs.h"
62 #include "ihevce_multi_thrd_funcs.h"
63 #include "ihevce_me_common_defs.h"
64 #include "ihevce_had_satd.h"
65 #include "ihevce_error_codes.h"
66 #include "ihevce_bitstream.h"
67 #include "ihevce_cabac.h"
68 #include "ihevce_rdoq_macros.h"
69 #include "ihevce_function_selector.h"
70 #include "ihevce_enc_structs.h"
71 #include "ihevce_entropy_structs.h"
72 #include "ihevce_cmn_utils_instr_set_router.h"
73 #include "ihevce_enc_loop_structs.h"
74 #include "ihevce_bs_compute_ctb.h"
75 #include "ihevce_global_tables.h"
76 #include "ihevce_dep_mngr_interface.h"
77 #include "hme_datatype.h"
78 #include "hme_interface.h"
79 #include "hme_common_defs.h"
80 #include "hme_defs.h"
81 #include "ihevce_me_instr_set_router.h"
82 #include "hme_globals.h"
83 #include "hme_utils.h"
84 #include "hme_coarse.h"
85 #include "hme_refine.h"
86 #include "hme_err_compute.h"
87 #include "hme_common_utils.h"
88 #include "hme_search_algo.h"
89 #include "ihevce_profile.h"
90
91 /*****************************************************************************/
92 /* Function Definitions */
93 /*****************************************************************************/
94
hme_init_globals()95 void hme_init_globals()
96 {
97 GRID_PT_T id;
98 S32 i, j;
99 /*************************************************************************/
100 /* Initialize the lookup table for x offset, y offset, optimized mask */
101 /* based on grid id. The design is as follows: */
102 /* */
103 /* a b c d */
104 /* TL T TR e */
105 /* L C R f */
106 /* BL B BR */
107 /* */
108 /* IF a non corner pt, like T is the new minima, then we need to */
109 /* evaluate only 3 new pts, in this case, a, b, c. So the optimal */
110 /* grid mask would reflect this. If a corner pt like TR is the new */
111 /* minima, then we need to evaluate 5 new pts, in this case, b, c, d, */
112 /* e and f. So the grid mask will have 5 pts enabled. */
113 /*************************************************************************/
114
115 id = PT_C;
116 gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
117 gai1_grid_id_to_x[id] = 0;
118 gai1_grid_id_to_y[id] = 0;
119 gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
120 gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
121
122 id = PT_L;
123 gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
124 gai1_grid_id_to_x[id] = -1;
125 gai1_grid_id_to_y[id] = 0;
126 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
127 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
128
129 id = PT_R;
130 gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
131 gai1_grid_id_to_x[id] = 1;
132 gai1_grid_id_to_y[id] = 0;
133 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
134 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
135
136 id = PT_T;
137 gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
138 gai1_grid_id_to_x[id] = 0;
139 gai1_grid_id_to_y[id] = -1;
140 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
141 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
142
143 id = PT_B;
144 gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
145 gai1_grid_id_to_x[id] = 0;
146 gai1_grid_id_to_y[id] = 1;
147 gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
148 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
149
150 id = PT_TL;
151 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
152 gai1_grid_id_to_x[id] = -1;
153 gai1_grid_id_to_y[id] = -1;
154 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
155
156 id = PT_TR;
157 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
158 gai1_grid_id_to_x[id] = 1;
159 gai1_grid_id_to_y[id] = -1;
160 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
161
162 id = PT_BL;
163 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
164 gai1_grid_id_to_x[id] = -1;
165 gai1_grid_id_to_y[id] = 1;
166 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
167
168 id = PT_BR;
169 gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
170 gai1_grid_id_to_x[id] = 1;
171 gai1_grid_id_to_y[id] = 1;
172 gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
173
174 ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
175 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
176 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
177 ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
178 ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
179 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
180 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
181 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
182 ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
183 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
184 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
185 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
186 ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
187 ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
188 ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
189 ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
190 ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
191
192 ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
193 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
194 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
195 ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
196 ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
197 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
198 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
199 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
200 ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
201 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
202 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
203 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
204 ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
205 ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
206 ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
207 ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
208 ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
209
210 ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
211 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
212 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
213 ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
214 ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
215 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
216 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
217 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
218 ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
219 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
220 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
221 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
222 ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
223 ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
224 ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
225 ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
226 ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
227
228 ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
229 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
230 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
231 ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
232 ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
233 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
234 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
235 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
236 ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
237 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
238 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
239 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
240 ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
241 ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
242 ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
243 ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
244 ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
245
246 gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
247 gau1_num_parts_in_part_type[PRT_2NxN] = 2;
248 gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
249 gau1_num_parts_in_part_type[PRT_NxN] = 4;
250 gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
251 gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
252 gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
253 gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
254
255 for(i = 0; i < MAX_PART_TYPES; i++)
256 for(j = 0; j < MAX_NUM_PARTS; j++)
257 ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
258
259 /* 2Nx2N only one partition */
260 ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
261
262 /* 2NxN 2 partitions */
263 ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
264 ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
265
266 /* Nx2N 2 partitions */
267 ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
268 ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
269
270 /* NxN 4 partitions */
271 ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
272 ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
273 ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
274 ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
275
276 /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
277 ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
278 ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
279
280 /* AMP 2Nx (3N/2 + N/2) 2 partitions */
281 ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
282 ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
283
284 /* AMP (N/2 + 3N/2) x 2N 2 partitions */
285 ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
286 ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
287
288 /* AMP (3N/2 + N/2) x 2N 2 partitions */
289 ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
290 ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
291
292 /*************************************************************************/
293 /* initialize attributes for each partition id within the cu. */
294 /*************************************************************************/
295 {
296 part_attr_t *ps_part_attr;
297
298 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
299 ps_part_attr->u1_x_start = 0;
300 ps_part_attr->u1_y_start = 0;
301 ps_part_attr->u1_x_count = 8;
302 ps_part_attr->u1_y_count = 8;
303
304 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
305 ps_part_attr->u1_x_start = 0;
306 ps_part_attr->u1_y_start = 0;
307 ps_part_attr->u1_x_count = 8;
308 ps_part_attr->u1_y_count = 4;
309
310 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
311 ps_part_attr->u1_x_start = 0;
312 ps_part_attr->u1_y_start = 4;
313 ps_part_attr->u1_x_count = 8;
314 ps_part_attr->u1_y_count = 4;
315
316 ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
317 ps_part_attr->u1_x_start = 0;
318 ps_part_attr->u1_y_start = 0;
319 ps_part_attr->u1_x_count = 4;
320 ps_part_attr->u1_y_count = 8;
321
322 ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
323 ps_part_attr->u1_x_start = 4;
324 ps_part_attr->u1_y_start = 0;
325 ps_part_attr->u1_x_count = 4;
326 ps_part_attr->u1_y_count = 8;
327
328 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
329 ps_part_attr->u1_x_start = 0;
330 ps_part_attr->u1_y_start = 0;
331 ps_part_attr->u1_x_count = 4;
332 ps_part_attr->u1_y_count = 4;
333
334 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
335 ps_part_attr->u1_x_start = 4;
336 ps_part_attr->u1_y_start = 0;
337 ps_part_attr->u1_x_count = 4;
338 ps_part_attr->u1_y_count = 4;
339
340 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
341 ps_part_attr->u1_x_start = 0;
342 ps_part_attr->u1_y_start = 4;
343 ps_part_attr->u1_x_count = 4;
344 ps_part_attr->u1_y_count = 4;
345
346 ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
347 ps_part_attr->u1_x_start = 4;
348 ps_part_attr->u1_y_start = 4;
349 ps_part_attr->u1_x_count = 4;
350 ps_part_attr->u1_y_count = 4;
351
352 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
353 ps_part_attr->u1_x_start = 0;
354 ps_part_attr->u1_y_start = 0;
355 ps_part_attr->u1_x_count = 8;
356 ps_part_attr->u1_y_count = 2;
357
358 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
359 ps_part_attr->u1_x_start = 0;
360 ps_part_attr->u1_y_start = 2;
361 ps_part_attr->u1_x_count = 8;
362 ps_part_attr->u1_y_count = 6;
363
364 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
365 ps_part_attr->u1_x_start = 0;
366 ps_part_attr->u1_y_start = 0;
367 ps_part_attr->u1_x_count = 8;
368 ps_part_attr->u1_y_count = 6;
369
370 ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
371 ps_part_attr->u1_x_start = 0;
372 ps_part_attr->u1_y_start = 6;
373 ps_part_attr->u1_x_count = 8;
374 ps_part_attr->u1_y_count = 2;
375
376 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
377 ps_part_attr->u1_x_start = 0;
378 ps_part_attr->u1_y_start = 0;
379 ps_part_attr->u1_x_count = 2;
380 ps_part_attr->u1_y_count = 8;
381
382 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
383 ps_part_attr->u1_x_start = 2;
384 ps_part_attr->u1_y_start = 0;
385 ps_part_attr->u1_x_count = 6;
386 ps_part_attr->u1_y_count = 8;
387
388 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
389 ps_part_attr->u1_x_start = 0;
390 ps_part_attr->u1_y_start = 0;
391 ps_part_attr->u1_x_count = 6;
392 ps_part_attr->u1_y_count = 8;
393
394 ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
395 ps_part_attr->u1_x_start = 6;
396 ps_part_attr->u1_y_start = 0;
397 ps_part_attr->u1_x_count = 2;
398 ps_part_attr->u1_y_count = 8;
399 }
400 for(i = 0; i < NUM_BLK_SIZES; i++)
401 ge_blk_size_to_cu_size[i] = CU_INVALID;
402
403 ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
404 ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
405 ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
406 ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
407
408 /* This is the reverse, given cU size, get blk size */
409 ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
410 ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
411 ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
412 ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
413
414 gau1_is_vert_part[PRT_2Nx2N] = 0;
415 gau1_is_vert_part[PRT_2NxN] = 0;
416 gau1_is_vert_part[PRT_Nx2N] = 1;
417 gau1_is_vert_part[PRT_NxN] = 1;
418 gau1_is_vert_part[PRT_2NxnU] = 0;
419 gau1_is_vert_part[PRT_2NxnD] = 0;
420 gau1_is_vert_part[PRT_nLx2N] = 1;
421 gau1_is_vert_part[PRT_nRx2N] = 1;
422
423 /* Initialise the number of best results for the full pell refinement */
424 gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
425 gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
426 gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
427 gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
428 gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
429 gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
430 gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
431 gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
432 gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
433 gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
434 gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
435 gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
436 gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
437 gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
438 gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
439 gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
440 gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
441
442 gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
443 gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
444 gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
445 gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
446 gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
447 gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
448 gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
449 gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
450 gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
451 gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
452 gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
453 gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
454 gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
455 gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
456 gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
457 gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
458 gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
459
460 gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
461 gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
462 gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
463 gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
464 gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
465 gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
466 gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
467 gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
468 gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
469 gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
470 gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
471 gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
472 gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
473 gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
474 gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
475 gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
476 gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
477
478 gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
479 gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
480 gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
481 gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
482 gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
483 gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
484 gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
485 gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
486 gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
487 gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
488 gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
489 gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
490 gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
491 gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
492 gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
493 gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
494 gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
495
496 gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
497 gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
498 gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
499 gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
500 gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
501 gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
502 gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
503 gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
504 gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
505 gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
506 gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
507 gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
508 gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
509 gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
510 gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
511 gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
512 gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
513
514 gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
515 gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
516 gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
517 gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
518 gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
519 gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
520 gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
521 gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
522 gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
523 gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
524 gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
525 gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
526 gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
527 gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
528 gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
529 gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
530 gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
531
532 /* Top right validity for each part id */
533 gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
534 gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
535 gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
536 gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
537 gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
538 gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
539 gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
540 gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
541 gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
542 gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
543 gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
544 gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
545 gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
546 gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
547 gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
548 gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
549 gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
550
551 /* Bot Left validity for each part id */
552 gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
553 gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
554 gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
555 gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
556 gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
557 gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
558 gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
559 gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
560 gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
561 gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
562 gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
563 gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
564 gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
565 gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
566 gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
567 gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
568 gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
569
570 /*Part id to part num of this partition id in the CU */
571 gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
572 gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
573 gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
574 gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
575 gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
576 gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
577 gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
578 gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
579 gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
580 gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
581 gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
582 gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
583 gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
584 gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
585 gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
586 gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
587 gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
588
589 /*Which partition type does this partition id belong to */
590 ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
591 ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
592 ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
593 ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
594 ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
595 ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
596 ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
597 ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
598 ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
599 ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
600 ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
601 ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
602 ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
603 ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
604 ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
605 ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
606 ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
607
608 /*************************************************************************/
609 /* Set up the bits to be taken up for the part type. This is equally */
610 /* divided up between the various partitions in the part-type. */
611 /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as */
612 /* partition 2Nx2N. */
613 /*************************************************************************/
614 /* 1 bit for 2Nx2N partition */
615 gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
616
617 /* 3 bits for symmetric part types, so 1.5 bits per partition */
618 gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
619 gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
620 gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
621 gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
622
623 /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
624 gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
625 gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
626 gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
627 gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
628
629 /* 4 bits for AMP so 2 bits per partition */
630 gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
631 gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
632 gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
633 gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
634 gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
635 gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
636 gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
637 gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
638 }
639
640 /**
641 ********************************************************************************
642 * @fn hme_enc_num_alloc()
643 *
644 * @brief returns number of memtabs that is required by hme module
645 *
646 * @return Number of memtabs required
647 ********************************************************************************
648 */
hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)649 S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
650 {
651 if(i4_num_me_frm_pllel > 1)
652 {
653 return ((S32)MAX_HME_ENC_TOT_MEMTABS);
654 }
655 else
656 {
657 return ((S32)MIN_HME_ENC_TOT_MEMTABS);
658 }
659 }
660
661 /**
662 ********************************************************************************
663 * @fn hme_coarse_num_alloc()
664 *
665 * @brief returns number of memtabs that is required by hme module
666 *
667 * @return Number of memtabs required
668 ********************************************************************************
669 */
hme_coarse_num_alloc()670 S32 hme_coarse_num_alloc()
671 {
672 return ((S32)HME_COARSE_TOT_MEMTABS);
673 }
674
675 /**
676 ********************************************************************************
677 * @fn hme_coarse_dep_mngr_num_alloc()
678 *
679 * @brief returns number of memtabs that is required by Dep Mngr for hme module
680 *
681 * @return Number of memtabs required
682 ********************************************************************************
683 */
hme_coarse_dep_mngr_num_alloc()684 WORD32 hme_coarse_dep_mngr_num_alloc()
685 {
686 return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
687 }
688
hme_validate_init_prms(hme_init_prms_t * ps_prms)689 S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
690 {
691 S32 n_layers = ps_prms->num_simulcast_layers;
692
693 /* The final layer has got to be a non encode coarse layer */
694 if(n_layers > (MAX_NUM_LAYERS - 1))
695 return (-1);
696
697 if(n_layers < 1)
698 return (-1);
699
700 /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
701 /* represents the min allowed width in any layer. Ditto with ht */
702 if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
703 return (-1);
704 if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
705 return (-1);
706 if(ps_prms->max_num_ref > MAX_NUM_REF)
707 return (-1);
708 if(ps_prms->max_num_ref < 0)
709 return (-1);
710
711 return (0);
712 }
hme_set_layer_res_attrs(layer_ctxt_t * ps_layer,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,U08 u1_enc)713 void hme_set_layer_res_attrs(
714 layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
715 {
716 ps_layer->i4_wd = wd;
717 ps_layer->i4_ht = ht;
718 ps_layer->i4_disp_wd = disp_wd;
719 ps_layer->i4_disp_ht = disp_ht;
720 if(0 == u1_enc)
721 {
722 ps_layer->i4_inp_stride = wd + 32 + 4;
723 ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
724 ps_layer->i4_pad_x_inp = 16;
725 ps_layer->i4_pad_y_inp = 16;
726 ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
727 }
728 }
729
730 /**
731 ********************************************************************************
732 * @fn hme_coarse_get_layer1_mv_bank_ref_idx_size()
733 *
734 * @brief returns the MV bank and ref idx size of Layer 1 (penultimate)
735 *
736 * @return none
737 ********************************************************************************
738 */
hme_coarse_get_layer1_mv_bank_ref_idx_size(S32 n_tot_layers,S32 * a_wd,S32 * a_ht,S32 max_num_ref,S32 * pi4_mv_bank_size,S32 * pi4_ref_idx_size)739 void hme_coarse_get_layer1_mv_bank_ref_idx_size(
740 S32 n_tot_layers,
741 S32 *a_wd,
742 S32 *a_ht,
743 S32 max_num_ref,
744 S32 *pi4_mv_bank_size,
745 S32 *pi4_ref_idx_size)
746 {
747 S32 num_blks, num_mvs_per_blk, num_ref;
748 S32 num_cols, num_rows, num_mvs_per_row;
749 S32 is_explicit_store = 1;
750 S32 wd, ht, num_layers_explicit_search;
751 S32 num_results, use_4x4;
752 wd = a_wd[1];
753 ht = a_ht[1];
754
755 /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
756 /* frames in all but final layer In final layer, it could be 1/2 */
757 //ps_hme_init_prms->num_layers_explicit_search = 3;
758 num_layers_explicit_search = 3;
759
760 if(num_layers_explicit_search <= 0)
761 num_layers_explicit_search = n_tot_layers - 1;
762
763 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
764
765 /* Possibly implicit search for lower (finer) layers */
766 if(n_tot_layers - 1 > num_layers_explicit_search)
767 is_explicit_store = 0;
768
769 /* coarsest layer alwasy uses 4x4 blks to store results */
770 if(1 == (n_tot_layers - 1))
771 {
772 /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
773 //ps_hme_init_prms->max_num_results_coarse = 4;
774 //vijay : with new algo in coarseset layer this has to be revisited
775 num_results = 4;
776 }
777 else
778 {
779 /* Every refinement layer stores a max of 2 results per partition */
780 //ps_hme_init_prms->max_num_results = 2;
781 num_results = 2;
782 }
783 use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
784
785 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
786 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
787
788 if(is_explicit_store)
789 num_ref = max_num_ref;
790 else
791 num_ref = 2;
792
793 num_blks = num_cols * num_rows;
794 num_mvs_per_blk = num_ref * num_results;
795 num_mvs_per_row = num_mvs_per_blk * num_cols;
796
797 /* stroe the sizes */
798 *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
799 *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
800
801 return;
802 }
803 /**
804 ********************************************************************************
805 * @fn hme_alloc_init_layer_mv_bank()
806 *
807 * @brief memory alloc and init function for MV bank
808 *
809 * @return Number of memtabs required
810 ********************************************************************************
811 */
hme_alloc_init_layer_mv_bank(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 is_explicit_store,hme_mv_t ** pps_mv_base,S08 ** pi1_ref_idx_base,S32 * pi4_num_mvs_per_row)812 S32 hme_alloc_init_layer_mv_bank(
813 hme_memtab_t *ps_memtab,
814 S32 max_num_results,
815 S32 max_num_ref,
816 S32 use_4x4,
817 S32 mem_avail,
818 S32 u1_enc,
819 S32 wd,
820 S32 ht,
821 S32 is_explicit_store,
822 hme_mv_t **pps_mv_base,
823 S08 **pi1_ref_idx_base,
824 S32 *pi4_num_mvs_per_row)
825 {
826 S32 count = 0;
827 S32 size;
828 S32 num_blks, num_mvs_per_blk;
829 S32 num_ref;
830 S32 num_cols, num_rows, num_mvs_per_row;
831
832 if(is_explicit_store)
833 num_ref = max_num_ref;
834 else
835 num_ref = 2;
836
837 /* MV Bank allocation takes into consideration following */
838 /* number of results per reference x max num refrences is the amount */
839 /* bufffered up per blk. Numbero f blks in pic deps on the blk size, */
840 /* which could be either 4x4 or 8x8. */
841 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
842 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
843
844 if(u1_enc)
845 {
846 /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
847 WORD32 num_ctb_cols = ((wd + 63) >> 6);
848 WORD32 num_ctb_rows = ((ht + 63) >> 6);
849
850 num_cols = (num_ctb_cols << 3) + 2;
851 num_rows = (num_ctb_rows << 3) + 2;
852 }
853 num_blks = num_cols * num_rows;
854 num_mvs_per_blk = num_ref * max_num_results;
855 num_mvs_per_row = num_mvs_per_blk * num_cols;
856
857 size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
858 if(mem_avail)
859 {
860 /* store this for run time verifications */
861 *pi4_num_mvs_per_row = num_mvs_per_row;
862 ASSERT(ps_memtab[count].size == size);
863 *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
864 }
865 else
866 {
867 ps_memtab[count].size = size;
868 ps_memtab[count].align = 4;
869 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
870 }
871
872 count++;
873 /* Ref idx takes the same route as mvbase */
874
875 size = num_blks * num_mvs_per_blk * sizeof(S08);
876 if(mem_avail)
877 {
878 ASSERT(ps_memtab[count].size == size);
879 *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
880 }
881 else
882 {
883 ps_memtab[count].size = size;
884 ps_memtab[count].align = 4;
885 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
886 }
887 count++;
888
889 return (count);
890 }
891 /**
892 ********************************************************************************
893 * @fn hme_alloc_init_layer()
894 *
895 * @brief memory alloc and init function
896 *
897 * @return Number of memtabs required
898 ********************************************************************************
899 */
hme_alloc_init_layer(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,S32 segment_layer,S32 is_explicit_store,layer_ctxt_t ** pps_layer)900 S32 hme_alloc_init_layer(
901 hme_memtab_t *ps_memtab,
902 S32 max_num_results,
903 S32 max_num_ref,
904 S32 use_4x4,
905 S32 mem_avail,
906 S32 u1_enc,
907 S32 wd,
908 S32 ht,
909 S32 disp_wd,
910 S32 disp_ht,
911 S32 segment_layer,
912 S32 is_explicit_store,
913 layer_ctxt_t **pps_layer)
914 {
915 S32 count = 0;
916 layer_ctxt_t *ps_layer = NULL;
917 S32 size;
918 S32 num_ref;
919
920 ARG_NOT_USED(segment_layer);
921
922 if(is_explicit_store)
923 num_ref = max_num_ref;
924 else
925 num_ref = 2;
926
927 /* We do not store 4x4 results for encoding layers */
928 if(u1_enc)
929 use_4x4 = 0;
930
931 size = sizeof(layer_ctxt_t);
932 if(mem_avail)
933 {
934 ASSERT(ps_memtab[count].size == size);
935 ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
936 *pps_layer = ps_layer;
937 }
938 else
939 {
940 ps_memtab[count].size = size;
941 ps_memtab[count].align = 8;
942 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
943 }
944
945 count++;
946
947 /* Input luma buffer allocated only for non encode case */
948 if(0 == u1_enc)
949 {
950 /* Allocate input with padding of 16 pixels */
951 size = (wd + 32 + 4) * (ht + 32 + 4);
952 if(mem_avail)
953 {
954 ASSERT(ps_memtab[count].size == size);
955 ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
956 }
957 else
958 {
959 ps_memtab[count].size = size;
960 ps_memtab[count].align = 16;
961 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
962 }
963 count++;
964 }
965
966 /* Allocate memory or just the layer mvbank strcture. */
967 /* TODO : see if this can be removed by moving it to layer_ctxt */
968 size = sizeof(layer_mv_t);
969
970 if(mem_avail)
971 {
972 ASSERT(ps_memtab[count].size == size);
973 ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
974 }
975 else
976 {
977 ps_memtab[count].size = size;
978 ps_memtab[count].align = 8;
979 ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
980 }
981
982 count++;
983
984 if(mem_avail)
985 {
986 hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
987 }
988
989 return (count);
990 }
991
hme_alloc_init_search_nodes(search_results_t * ps_search_results,hme_memtab_t * ps_memtabs,S32 mem_avail,S32 max_num_ref,S32 max_num_results)992 S32 hme_alloc_init_search_nodes(
993 search_results_t *ps_search_results,
994 hme_memtab_t *ps_memtabs,
995 S32 mem_avail,
996 S32 max_num_ref,
997 S32 max_num_results)
998 {
999 S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
1000 S32 j, k;
1001 search_node_t *ps_search_node;
1002
1003 if(mem_avail == 0)
1004 {
1005 ps_memtabs->size = size;
1006 ps_memtabs->align = 4;
1007 ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
1008 return (1);
1009 }
1010
1011 ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
1012 ASSERT(ps_memtabs->size == size);
1013 /****************************************************************************/
1014 /* For each CU, we search and store N best results, per partition, per ref */
1015 /* So, number of memtabs is num_refs * num_parts */
1016 /****************************************************************************/
1017 for(j = 0; j < max_num_ref; j++)
1018 {
1019 for(k = 0; k < TOT_NUM_PARTS; k++)
1020 {
1021 ps_search_results->aps_part_results[j][k] = ps_search_node;
1022 ps_search_node += max_num_results;
1023 }
1024 }
1025 return (1);
1026 }
1027
hme_derive_num_layers(S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 * p_disp_wd,S32 * p_disp_ht)1028 S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
1029 {
1030 S32 i;
1031 /* We keep downscaling by 2 till we hit one of the conditions: */
1032 /* 1. MAX_NUM_LAYERS reached. */
1033 /* 2. Width or ht goes below min width and ht allowed at coarsest layer */
1034 ASSERT(n_enc_layers < MAX_NUM_LAYERS);
1035 ASSERT(n_enc_layers > 0);
1036 ASSERT(p_wd[0] <= HME_MAX_WIDTH);
1037 ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
1038
1039 p_disp_wd[0] = p_wd[0];
1040 p_disp_ht[0] = p_ht[0];
1041 /*************************************************************************/
1042 /* Verify that for simulcast, lower layer to higher layer ratio is bet */
1043 /* 2 (dyadic) and 1.33. Typically it should be 1.5. */
1044 /* TODO : for interlace, we may choose to have additional downscaling for*/
1045 /* width alone in coarsest layer to next layer. */
1046 /*************************************************************************/
1047 for(i = 1; i < n_enc_layers; i++)
1048 {
1049 S32 wd1, wd2, ht1, ht2;
1050 wd1 = FLOOR16(p_wd[i - 1] >> 1);
1051 wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
1052 ASSERT(p_wd[i] >= wd1);
1053 ASSERT(p_wd[i] <= wd2);
1054 ht1 = FLOOR16(p_ht[i - 1] >> 1);
1055 ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
1056 ASSERT(p_ht[i] >= ht1);
1057 ASSERT(p_ht[i] <= ht2);
1058 }
1059 ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
1060 ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
1061
1062 for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
1063 {
1064 if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
1065 {
1066 return (i);
1067 }
1068 /* Use CEIL16 to facilitate 16x16 searches in future, or to do */
1069 /* segmentation study in future */
1070 p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
1071 p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
1072
1073 p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
1074 p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
1075 }
1076 return (i);
1077 }
1078
1079 /**
1080 ********************************************************************************
1081 * @fn hme_get_mv_blk_size()
1082 *
1083 * @brief returns whether blk uses 4x4 size or something else.
1084 *
1085 * @param[in] enable_4x4 : input param from application to enable 4x4
1086 *
1087 * @param[in] layer_id : id of current layer (0 finest)
1088 *
1089 * @param[in] num_layeers : total num layers
1090 *
1091 * @param[in] is_enc : Whether encoding enabled for layer
1092 *
1093 * @return 1 for 4x4 blks, 0 for 8x8
1094 ********************************************************************************
1095 */
hme_get_mv_blk_size(S32 enable_4x4,S32 layer_id,S32 num_layers,S32 is_enc)1096 S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
1097 {
1098 S32 use_4x4 = enable_4x4;
1099
1100 if((layer_id <= 1) && (num_layers >= 4))
1101 use_4x4 = USE_4x4_IN_L1;
1102 if(layer_id == num_layers - 1)
1103 use_4x4 = 1;
1104 if(is_enc)
1105 use_4x4 = 0;
1106
1107 return (use_4x4);
1108 }
1109
1110 /**
1111 ********************************************************************************
1112 * @fn hme_enc_alloc_init_mem()
1113 *
1114 * @brief Requests/ assign memory based on mem avail
1115 *
1116 * @param[in] ps_memtabs : memtab array
1117 *
1118 * @param[in] ps_prms : init prms
1119 *
1120 * @param[in] pv_ctxt : ME ctxt
1121 *
1122 * @param[in] mem_avail : request/assign flag
1123 *
1124 * @return 1 for 4x4 blks, 0 for 8x8
1125 ********************************************************************************
1126 */
hme_enc_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail,S32 i4_num_me_frm_pllel)1127 S32 hme_enc_alloc_init_mem(
1128 hme_memtab_t *ps_memtabs,
1129 hme_init_prms_t *ps_prms,
1130 void *pv_ctxt,
1131 S32 mem_avail,
1132 S32 i4_num_me_frm_pllel)
1133 {
1134 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
1135 me_ctxt_t *ps_ctxt;
1136 S32 count = 0, size, i, j, use_4x4;
1137 S32 n_tot_layers, n_enc_layers;
1138 S32 num_layers_explicit_search;
1139 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1140 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1141 S32 num_results;
1142 S32 num_thrds;
1143 S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1144
1145 /* MV bank changes */
1146 hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1147 S32 i4_num_mvs_per_row = 0;
1148 S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1149
1150 n_enc_layers = ps_prms->num_simulcast_layers;
1151
1152 /* Memtab 0: handle */
1153 size = sizeof(me_master_ctxt_t);
1154 if(mem_avail)
1155 {
1156 /* store the number of processing threads */
1157 ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1158 }
1159 else
1160 {
1161 ps_memtabs[count].size = size;
1162 ps_memtabs[count].align = 8;
1163 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1164 }
1165
1166 count++;
1167
1168 /* Memtab 1: ME threads ctxt */
1169 size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
1170 if(mem_avail)
1171 {
1172 me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
1173
1174 /* store the indivisual thread ctxt pointers */
1175 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1176 {
1177 ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1178 }
1179 }
1180 else
1181 {
1182 ps_memtabs[count].size = size;
1183 ps_memtabs[count].align = 8;
1184 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1185 }
1186
1187 count++;
1188
1189 /* Memtab 2: ME frame ctxts */
1190 size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
1191 if(mem_avail)
1192 {
1193 me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
1194
1195 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1196 {
1197 /* store the indivisual thread ctxt pointers */
1198 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1199 {
1200 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
1201
1202 ps_me_frm_tmp_ctxt++;
1203 }
1204 }
1205 }
1206 else
1207 {
1208 ps_memtabs[count].size = size;
1209 ps_memtabs[count].align = 8;
1210 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1211 }
1212
1213 count++;
1214
1215 memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1216 memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1217 /*************************************************************************/
1218 /* Derive the number of HME layers, including both encoded and non encode*/
1219 /* This function also derives the width and ht of each layer. */
1220 /*************************************************************************/
1221 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
1222 num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1223 if(num_layers_explicit_search <= 0)
1224 num_layers_explicit_search = n_tot_layers - 1;
1225
1226 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1227
1228 if(mem_avail)
1229 {
1230 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1231 {
1232 me_frm_ctxt_t *ps_frm_ctxt;
1233 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1234
1235 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1236 {
1237 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1238
1239 memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
1240 memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
1241
1242 /* only one enocde layer is used */
1243 ps_frm_ctxt->num_layers = 1;
1244
1245 ps_frm_ctxt->i4_wd = a_wd[0];
1246 ps_frm_ctxt->i4_ht = a_ht[0];
1247 /*
1248 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
1249 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
1250 */
1251 ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1252 ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
1253 ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1254 ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
1255 }
1256 }
1257 }
1258
1259 /* Memtabs : Layers MV bank for encode layer */
1260 /* Each ref_desr in master ctxt will have seperate layer ctxt */
1261
1262 for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1263 {
1264 for(j = 0; j < 1; j++)
1265 {
1266 S32 is_explicit_store = 1;
1267 S32 wd, ht;
1268 U08 u1_enc = 1;
1269 wd = a_wd[j];
1270 ht = a_ht[j];
1271
1272 /* Possibly implicit search for lower (finer) layers */
1273 if(n_tot_layers - j > num_layers_explicit_search)
1274 is_explicit_store = 0;
1275
1276 /* Even if explicit search, we store only 2 results (L0 and L1) */
1277 /* in finest layer */
1278 if(j == 0)
1279 {
1280 is_explicit_store = 0;
1281 }
1282
1283 /* coarsest layer alwasy uses 4x4 blks to store results */
1284 if(j == n_tot_layers - 1)
1285 {
1286 num_results = ps_prms->max_num_results_coarse;
1287 }
1288 else
1289 {
1290 num_results = ps_prms->max_num_results;
1291 if(j == 0)
1292 num_results = 1;
1293 }
1294 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1295
1296 count += hme_alloc_init_layer_mv_bank(
1297 &ps_memtabs[count],
1298 num_results,
1299 ps_prms->max_num_ref,
1300 use_4x4,
1301 mem_avail,
1302 u1_enc,
1303 wd,
1304 ht,
1305 is_explicit_store,
1306 &aps_mv_bank[i],
1307 &api1_ref_idx[i],
1308 &i4_num_mvs_per_row);
1309 }
1310 }
1311
1312 /* Memtabs : Layers * num-ref + 1 */
1313 for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1314 {
1315 /* layer memory allocated only for enocde layer */
1316 for(j = 0; j < 1; j++)
1317 {
1318 layer_ctxt_t *ps_layer;
1319 S32 is_explicit_store = 1;
1320 S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1321 S32 wd, ht;
1322 U08 u1_enc = 1;
1323 wd = a_wd[j];
1324 ht = a_ht[j];
1325
1326 /* Possibly implicit search for lower (finer) layers */
1327 if(n_tot_layers - j > num_layers_explicit_search)
1328 is_explicit_store = 0;
1329
1330 /* Even if explicit search, we store only 2 results (L0 and L1) */
1331 /* in finest layer */
1332 if(j == 0)
1333 {
1334 is_explicit_store = 0;
1335 }
1336
1337 /* coarsest layer alwasy uses 4x4 blks to store results */
1338 if(j == n_tot_layers - 1)
1339 {
1340 num_results = ps_prms->max_num_results_coarse;
1341 }
1342 else
1343 {
1344 num_results = ps_prms->max_num_results;
1345 if(j == 0)
1346 num_results = 1;
1347 }
1348 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1349
1350 count += hme_alloc_init_layer(
1351 &ps_memtabs[count],
1352 num_results,
1353 ps_prms->max_num_ref,
1354 use_4x4,
1355 mem_avail,
1356 u1_enc,
1357 wd,
1358 ht,
1359 a_disp_wd[j],
1360 a_disp_ht[j],
1361 segment_this_layer,
1362 is_explicit_store,
1363 &ps_layer);
1364 if(mem_avail)
1365 {
1366 /* same ps_layer memory pointer is stored in all the threads */
1367 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1368 {
1369 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1370 ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1371 }
1372
1373 /* store the MV bank pointers */
1374 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
1375 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
1376 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
1377 }
1378 }
1379 }
1380
1381 /* Memtabs : Buf Mgr for predictor bufs and working mem */
1382 /* TODO : Parameterise this appropriately */
1383 size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
1384
1385 if(mem_avail)
1386 {
1387 U08 *pu1_mem = ps_memtabs[count].pu1_mem;
1388
1389 ASSERT(ps_memtabs[count].size == size);
1390
1391 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1392 {
1393 me_frm_ctxt_t *ps_frm_ctxt;
1394 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1395
1396 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1397 {
1398 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1399
1400 hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
1401
1402 if(i4_num_me_frm_pllel != 1)
1403 {
1404 /* update the memory buffer pointer */
1405 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1406 }
1407 }
1408 if(i4_num_me_frm_pllel == 1)
1409 {
1410 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1411 }
1412 }
1413 }
1414 else
1415 {
1416 ps_memtabs[count].size = size;
1417 ps_memtabs[count].align = 4;
1418 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1419 }
1420 count++;
1421
1422 /*************************************************************************/
1423 /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
1424 /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred */
1425 /*************************************************************************/
1426 size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
1427 i4_num_me_frm_pllel;
1428
1429 if(mem_avail)
1430 {
1431 S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
1432
1433 ASSERT(ps_memtabs[count].size == size);
1434
1435 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1436 {
1437 me_frm_ctxt_t *ps_frm_ctxt;
1438 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1439
1440 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1441 {
1442 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1443
1444 ps_frm_ctxt->pi2_inp_bck = pi2_mem;
1445 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1446 if(i4_num_me_frm_pllel != 1)
1447 {
1448 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1449 }
1450 }
1451 if(i4_num_me_frm_pllel == 1)
1452 {
1453 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1454 }
1455 }
1456 }
1457 else
1458 {
1459 ps_memtabs[count].size = size;
1460 ps_memtabs[count].align = 16;
1461 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1462 }
1463
1464 count++;
1465
1466 /* Allocate a memtab for each histogram. As many as num ref and number of threads */
1467 /* Loop across for each ME_FRM in PARALLEL */
1468 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1469 {
1470 for(i = 0; i < ps_prms->max_num_ref; i++)
1471 {
1472 size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
1473 if(mem_avail)
1474 {
1475 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
1476
1477 ASSERT(size == ps_memtabs[count].size);
1478
1479 /* divide the memory accross the threads */
1480 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1481 {
1482 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1483
1484 ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
1485 ps_mv_hist++;
1486 }
1487 }
1488 else
1489 {
1490 ps_memtabs[count].size = size;
1491 ps_memtabs[count].align = 8;
1492 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1493 }
1494 count++;
1495 }
1496 if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1497 {
1498 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1499 /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
1500 count -= ps_prms->max_num_ref;
1501 }
1502 }
1503
1504 /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
1505 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1506 {
1507 S32 count_cpy = count;
1508 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1509 {
1510 if(mem_avail)
1511 {
1512 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1513 }
1514
1515 for(i = 0; i < 21; i++)
1516 {
1517 search_results_t *ps_search_results = NULL;
1518 if(mem_avail)
1519 {
1520 if(i < 16)
1521 {
1522 ps_search_results =
1523 &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
1524 }
1525 else if(i < 20)
1526 {
1527 ps_search_results =
1528 &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
1529 ps_search_results->ps_cu_results =
1530 &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
1531 }
1532 else if(i == 20)
1533 {
1534 ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
1535 ps_search_results->ps_cu_results =
1536 &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
1537 }
1538 else
1539 {
1540 /* 8x8 search results are not required in LO ME */
1541 ASSERT(0);
1542 }
1543 }
1544 count += hme_alloc_init_search_nodes(
1545 ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
1546 }
1547 }
1548
1549 if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1550 {
1551 count = count_cpy;
1552 }
1553 }
1554
1555 /* Weighted inputs, one for each ref + one non weighted */
1556 for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1557 {
1558 size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
1559 if(mem_avail)
1560 {
1561 U08 *pu1_mem;
1562 ASSERT(ps_memtabs[count].size == size);
1563 pu1_mem = ps_memtabs[count].pu1_mem;
1564
1565 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1566 {
1567 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1568
1569 for(i = 0; i < ps_prms->max_num_ref + 1; i++)
1570 {
1571 ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
1572 pu1_mem += (ctb_wd * ctb_wd);
1573 }
1574 }
1575 }
1576 else
1577 {
1578 ps_memtabs[count].size = size;
1579 ps_memtabs[count].align = 16;
1580 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1581 }
1582 if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
1583 {
1584 count++;
1585 }
1586 }
1587
1588 /* if memory is allocated the intislaise the frm prms ptr to each thrd */
1589 if(mem_avail)
1590 {
1591 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1592 {
1593 me_frm_ctxt_t *ps_frm_ctxt;
1594 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1595
1596 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1597 {
1598 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1599
1600 ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
1601 ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
1602 }
1603 }
1604 }
1605
1606 /* Memory allocation for use in Clustering */
1607 if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
1608 {
1609 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1610 {
1611 size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
1612 sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
1613 size *= ps_prms->i4_num_proc_thrds;
1614
1615 if(mem_avail)
1616 {
1617 U08 *pu1_mem;
1618
1619 ASSERT(ps_memtabs[count].size == size);
1620 pu1_mem = ps_memtabs[count].pu1_mem;
1621
1622 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1623 {
1624 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1625
1626 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
1627 pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
1628
1629 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
1630 pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
1631
1632 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
1633 pu1_mem += (sizeof(cluster_64x64_blk_t));
1634
1635 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
1636 (ctb_cluster_info_t *)pu1_mem;
1637 pu1_mem += (sizeof(ctb_cluster_info_t));
1638 }
1639 }
1640 else
1641 {
1642 ps_memtabs[count].size = size;
1643 ps_memtabs[count].align = 16;
1644 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1645 }
1646
1647 if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1648 {
1649 count++;
1650 }
1651 }
1652 }
1653 else if(mem_avail)
1654 {
1655 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1656 {
1657 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1658 {
1659 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1660
1661 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
1662
1663 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
1664
1665 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
1666
1667 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
1668 }
1669 }
1670 }
1671
1672 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1673 {
1674 size = sizeof(fullpel_refine_ctxt_t);
1675 size *= ps_prms->i4_num_proc_thrds;
1676
1677 if(mem_avail)
1678 {
1679 U08 *pu1_mem;
1680
1681 ASSERT(ps_memtabs[count].size == size);
1682 pu1_mem = ps_memtabs[count].pu1_mem;
1683
1684 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1685 {
1686 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1687
1688 ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
1689 (fullpel_refine_ctxt_t *)pu1_mem;
1690 pu1_mem += (sizeof(fullpel_refine_ctxt_t));
1691 }
1692 }
1693 else
1694 {
1695 ps_memtabs[count].size = size;
1696 ps_memtabs[count].align = 16;
1697 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1698 }
1699
1700 if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1701 {
1702 count++;
1703 }
1704 }
1705
1706 /* Memory for ihevce_me_optimised_function_list_t struct */
1707 if(mem_avail)
1708 {
1709 ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
1710 }
1711 else
1712 {
1713 ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
1714 ps_memtabs[count].align = 16;
1715 ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1716 }
1717
1718 ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
1719 return (count);
1720 }
1721
1722 /**
1723 ********************************************************************************
1724 * @fn hme_coarse_alloc_init_mem()
1725 *
1726 * @brief Requests/ assign memory based on mem avail
1727 *
1728 * @param[in] ps_memtabs : memtab array
1729 *
1730 * @param[in] ps_prms : init prms
1731 *
1732 * @param[in] pv_ctxt : ME ctxt
1733 *
1734 * @param[in] mem_avail : request/assign flag
1735 *
1736 * @return number of memtabs
1737 ********************************************************************************
1738 */
hme_coarse_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail)1739 S32 hme_coarse_alloc_init_mem(
1740 hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
1741 {
1742 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
1743 coarse_me_ctxt_t *ps_ctxt;
1744 S32 count = 0, size, i, j, use_4x4, wd;
1745 S32 n_tot_layers;
1746 S32 num_layers_explicit_search;
1747 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1748 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1749 S32 num_results;
1750 S32 num_thrds;
1751 //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1752 S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
1753 S32 layer1_blk_width = 8; // 8x8 search
1754 S32 blk_shift;
1755
1756 /* MV bank changes */
1757 hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
1758 S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
1759 S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
1760
1761 /* Memtab 0: handle */
1762 size = sizeof(coarse_me_master_ctxt_t);
1763 if(mem_avail)
1764 {
1765 /* store the number of processing threads */
1766 ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1767 }
1768 else
1769 {
1770 ps_memtabs[count].size = size;
1771 ps_memtabs[count].align = 8;
1772 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1773 }
1774
1775 count++;
1776
1777 /* Memtab 1: ME threads ctxt */
1778 size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
1779 if(mem_avail)
1780 {
1781 coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
1782
1783 /* store the indivisual thread ctxt pointers */
1784 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1785 {
1786 ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1787 }
1788 }
1789 else
1790 {
1791 ps_memtabs[count].size = size;
1792 ps_memtabs[count].align = 8;
1793 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1794 }
1795
1796 count++;
1797
1798 memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1799 memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1800 /*************************************************************************/
1801 /* Derive the number of HME layers, including both encoded and non encode*/
1802 /* This function also derives the width and ht of each layer. */
1803 /*************************************************************************/
1804 n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1805
1806 num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1807
1808 if(num_layers_explicit_search <= 0)
1809 num_layers_explicit_search = n_tot_layers - 1;
1810
1811 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1812
1813 if(mem_avail)
1814 {
1815 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1816 {
1817 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1818 memset(ps_ctxt->u1_encode, 0, n_tot_layers);
1819
1820 /* encode layer should be excluded during processing */
1821 ps_ctxt->num_layers = n_tot_layers;
1822
1823 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
1824 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
1825
1826 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1827 ps_ctxt->max_num_results = ps_prms->max_num_results;
1828 ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1829 ps_ctxt->max_num_ref = ps_prms->max_num_ref;
1830 }
1831 }
1832
1833 /* Memtabs : Layers MV bank for total layers - 2 */
1834 /* for penultimate layer MV bank will be initialsed at every frame level */
1835 for(j = 1; j < n_tot_layers; j++)
1836 {
1837 S32 is_explicit_store = 1;
1838 S32 wd, ht;
1839 U08 u1_enc = 0;
1840 wd = a_wd[j];
1841 ht = a_ht[j];
1842
1843 /* Possibly implicit search for lower (finer) layers */
1844 if(n_tot_layers - j > num_layers_explicit_search)
1845 is_explicit_store = 0;
1846
1847 /* Even if explicit search, we store only 2 results (L0 and L1) */
1848 /* in finest layer */
1849 if(j == 0)
1850 {
1851 is_explicit_store = 0;
1852 }
1853
1854 /* coarsest layer alwasy uses 4x4 blks to store results */
1855 if(j == n_tot_layers - 1)
1856 {
1857 num_results = ps_prms->max_num_results_coarse;
1858 }
1859 else
1860 {
1861 num_results = ps_prms->max_num_results;
1862 if(j == 0)
1863 num_results = 1;
1864 }
1865 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1866
1867 /* for penultimate compute the parameters and store */
1868 if(j == 1)
1869 {
1870 S32 num_blks, num_mvs_per_blk, num_ref;
1871 S32 num_cols, num_rows, num_mvs_per_row;
1872
1873 num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
1874 num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
1875
1876 if(is_explicit_store)
1877 num_ref = ps_prms->max_num_ref;
1878 else
1879 num_ref = 2;
1880
1881 num_blks = num_cols * num_rows;
1882 num_mvs_per_blk = num_ref * num_results;
1883 num_mvs_per_row = num_mvs_per_blk * num_cols;
1884
1885 ai4_num_mvs_per_row[j] = num_mvs_per_row;
1886 aps_mv_bank[j] = NULL;
1887 api1_ref_idx[j] = NULL;
1888 }
1889 else
1890 {
1891 count += hme_alloc_init_layer_mv_bank(
1892 &ps_memtabs[count],
1893 num_results,
1894 ps_prms->max_num_ref,
1895 use_4x4,
1896 mem_avail,
1897 u1_enc,
1898 wd,
1899 ht,
1900 is_explicit_store,
1901 &aps_mv_bank[j],
1902 &api1_ref_idx[j],
1903 &ai4_num_mvs_per_row[j]);
1904 }
1905 }
1906
1907 /* Memtabs : Layers * num-ref + 1 */
1908 for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1909 {
1910 /* for all layer except encode layer */
1911 for(j = 1; j < n_tot_layers; j++)
1912 {
1913 layer_ctxt_t *ps_layer;
1914 S32 is_explicit_store = 1;
1915 S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1916 S32 wd, ht;
1917 U08 u1_enc = 0;
1918 wd = a_wd[j];
1919 ht = a_ht[j];
1920
1921 /* Possibly implicit search for lower (finer) layers */
1922 if(n_tot_layers - j > num_layers_explicit_search)
1923 is_explicit_store = 0;
1924
1925 /* Even if explicit search, we store only 2 results (L0 and L1) */
1926 /* in finest layer */
1927 if(j == 0)
1928 {
1929 is_explicit_store = 0;
1930 }
1931
1932 /* coarsest layer alwasy uses 4x4 blks to store results */
1933 if(j == n_tot_layers - 1)
1934 {
1935 num_results = ps_prms->max_num_results_coarse;
1936 }
1937 else
1938 {
1939 num_results = ps_prms->max_num_results;
1940 if(j == 0)
1941 num_results = 1;
1942 }
1943 use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1944
1945 count += hme_alloc_init_layer(
1946 &ps_memtabs[count],
1947 num_results,
1948 ps_prms->max_num_ref,
1949 use_4x4,
1950 mem_avail,
1951 u1_enc,
1952 wd,
1953 ht,
1954 a_disp_wd[j],
1955 a_disp_ht[j],
1956 segment_this_layer,
1957 is_explicit_store,
1958 &ps_layer);
1959 if(mem_avail)
1960 {
1961 /* same ps_layer memory pointer is stored in all the threads */
1962 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1963 {
1964 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1965 ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1966 }
1967
1968 /* store the MV bank pointers */
1969 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
1970 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
1971 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
1972 }
1973 }
1974 }
1975
1976 /* Memtabs : Prev Row search node at coarsest layer */
1977 wd = a_wd[n_tot_layers - 1];
1978
1979 /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
1980 num_rows = ps_prms->i4_num_proc_thrds + 1;
1981 if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
1982 search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
1983 else
1984 search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
1985
1986 /*shift factor*/
1987 blk_shift = 2; /*4x4*/
1988 search_step >>= 1;
1989
1990 sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
1991 ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
1992 sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
1993
1994 size = num_rows * sad_4x4_block_stride * sizeof(S16);
1995 for(i = 0; i < ps_prms->max_num_ref; i++)
1996 {
1997 if(mem_avail)
1998 {
1999 ASSERT(size == ps_memtabs[count].size);
2000
2001 /* same row memory pointer is stored in all the threads */
2002 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2003 {
2004 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2005 ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
2006 }
2007 }
2008 else
2009 {
2010 ps_memtabs[count].size = size;
2011 ps_memtabs[count].align = 4;
2012 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2013 }
2014 count++;
2015 }
2016
2017 /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2018 size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2019 for(i = 0; i < ps_prms->max_num_ref; i++)
2020 {
2021 if(mem_avail)
2022 {
2023 ASSERT(size == ps_memtabs[count].size);
2024
2025 /* same row memory pointer is stored in all the threads */
2026 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2027 {
2028 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2029 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
2030 (search_node_t *)ps_memtabs[count].pu1_mem;
2031 }
2032 }
2033 else
2034 {
2035 ps_memtabs[count].size = size;
2036 ps_memtabs[count].align = 4;
2037 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2038 }
2039 count++;
2040 }
2041 /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2042 size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2043 for(i = 0; i < ps_prms->max_num_ref; i++)
2044 {
2045 if(mem_avail)
2046 {
2047 ASSERT(size == ps_memtabs[count].size);
2048
2049 /* same row memory pointer is stored in all the threads */
2050 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2051 {
2052 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2053 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
2054 (search_node_t *)ps_memtabs[count].pu1_mem;
2055 }
2056 }
2057 else
2058 {
2059 ps_memtabs[count].size = size;
2060 ps_memtabs[count].align = 4;
2061 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2062 }
2063 count++;
2064 }
2065
2066 /* Allocate a memtab for each histogram. As many as num ref and number of threads */
2067 for(i = 0; i < ps_prms->max_num_ref; i++)
2068 {
2069 size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
2070 if(mem_avail)
2071 {
2072 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
2073
2074 ASSERT(size == ps_memtabs[count].size);
2075
2076 /* divide the memory accross the threads */
2077 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2078 {
2079 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2080 ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
2081 ps_mv_hist++;
2082 }
2083 }
2084 else
2085 {
2086 ps_memtabs[count].size = size;
2087 ps_memtabs[count].align = 8;
2088 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
2089 }
2090 count++;
2091 }
2092
2093 /* Memtabs : Search nodes for 8x8 blks */
2094 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2095 {
2096 search_results_t *ps_search_results = NULL;
2097
2098 if(mem_avail)
2099 {
2100 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2101 }
2102
2103 if(mem_avail)
2104 {
2105 ps_search_results = &ps_ctxt->s_search_results_8x8;
2106 }
2107 count += hme_alloc_init_search_nodes(
2108 ps_search_results,
2109 &ps_memtabs[count],
2110 mem_avail,
2111 ps_prms->max_num_ref,
2112 ps_prms->max_num_results);
2113 }
2114
2115 /* Weighted inputs, one for each ref */
2116 size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
2117 ps_prms->i4_num_proc_thrds;
2118 if(mem_avail)
2119 {
2120 U08 *pu1_mem;
2121 ASSERT(ps_memtabs[count].size == size);
2122 pu1_mem = ps_memtabs[count].pu1_mem;
2123
2124 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2125 {
2126 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2127
2128 for(i = 0; i < ps_prms->max_num_ref + 1; i++)
2129 {
2130 ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
2131 pu1_mem += (layer1_blk_width * layer1_blk_width);
2132 }
2133 }
2134 }
2135 else
2136 {
2137 ps_memtabs[count].size = size;
2138 ps_memtabs[count].align = 16;
2139 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2140 }
2141 count++;
2142
2143 /* if memory is allocated the intislaise the frm prms ptr to each thrd */
2144 if(mem_avail)
2145 {
2146 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2147 {
2148 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2149
2150 ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
2151 ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
2152 }
2153 }
2154
2155 /* Memory for ihevce_me_optimised_function_list_t struct */
2156 if(mem_avail)
2157 {
2158 ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
2159 }
2160 else
2161 {
2162 ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
2163 ps_memtabs[count].align = 16;
2164 ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2165 }
2166
2167 //ASSERT(count < hme_enc_num_alloc());
2168 ASSERT(count < hme_coarse_num_alloc());
2169 return (count);
2170 }
2171
2172 /*!
2173 ******************************************************************************
2174 * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
2175 *
2176 * \brief Returns to the caller key attributes relevant for dependency manager,
2177 * ie, the number of vertical units in each layer
2178 *
2179 * \par Description:
2180 * This function requires the precondition that the width and ht of encode
2181 * layer is known.
2182 * The number of layers, number of vertical units in each layer, and for
2183 * each vertial unit in each layer, its dependency on previous layer's units
2184 * From ME's perspective, a vertical unit is one which is smallest min size
2185 * vertically (and spans the entire row horizontally). This is CTB for encode
2186 * layer, and 8x8 / 4x4 for non encode layers.
2187 *
2188 * \param[in] num_layers : Number of ME Layers
2189 * \param[in] pai4_ht : Array storing ht at each layer
2190 * \param[in] pai4_wd : Array storing wd at each layer
2191 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
2192 * entry has num vertical units in that particular layer
2193 *
2194 * \return
2195 * None
2196 *
2197 * \author
2198 * Ittiam
2199 *
2200 *****************************************************************************
2201 */
ihevce_coarse_me_get_lyr_prms_dep_mngr(WORD32 num_layers,WORD32 * pai4_ht,WORD32 * pai4_wd,WORD32 * pai4_num_vert_units_in_lyr)2202 void ihevce_coarse_me_get_lyr_prms_dep_mngr(
2203 WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
2204 {
2205 /* Height of current and next layers */
2206 WORD32 ht_c, ht_n;
2207 /* Blk ht at a given layer and next layer*/
2208 WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
2209 /* Number of vertical units in current and next layer */
2210 WORD32 num_vert_c, num_vert_n;
2211
2212 WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
2213 UWORD8 au1_encode[MAX_NUM_LAYERS];
2214
2215 memset(au1_encode, 0, num_layers);
2216 memset(au1_encode, 1, num_enc_layers);
2217
2218 ht_n = pai4_ht[num_layers - 2];
2219 ht_c = pai4_ht[num_layers - 1];
2220
2221 /* compute blk ht and unit ht for c and n */
2222 if(au1_encode[num_layers - 1])
2223 {
2224 blk_ht_c = 16;
2225 unit_ht_c = ctb_size;
2226 }
2227 else
2228 {
2229 blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
2230 unit_ht_c = blk_ht_c;
2231 }
2232
2233 num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
2234 /* For new design in Coarsest HME layer we need */
2235 /* one additional row extra at the end of frame */
2236 /* hence num_vert_c is incremented by 1 */
2237 num_vert_c++;
2238
2239 /*************************************************************************/
2240 /* Run through each layer, set the number of vertical units */
2241 /*************************************************************************/
2242 for(i = num_layers - 1; i > 0; i--)
2243 {
2244 pai4_num_vert_units_in_lyr[i] = num_vert_c;
2245
2246 /* "n" is computed for first time */
2247 ht_n = pai4_ht[i - 1];
2248 blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
2249 unit_ht_n = blk_ht_n;
2250 if(au1_encode[i - 1])
2251 unit_ht_n = ctb_size;
2252
2253 num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
2254
2255 /* Compute the blk size and vert unit size in each layer */
2256 /* "c" denotes curr layer, and "n" denotes the layer to which result */
2257 /* is projected to */
2258 ht_c = ht_n;
2259 blk_ht_c = blk_ht_n;
2260 unit_ht_c = unit_ht_n;
2261 num_vert_c = num_vert_n;
2262 }
2263
2264 /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
2265 /* set the numebr of vertical units */
2266 pai4_num_vert_units_in_lyr[0] = num_vert_c;
2267 }
2268
2269 /**
2270 ********************************************************************************
2271 * @fn hme_coarse_dep_mngr_alloc_mem()
2272 *
2273 * @brief Requests memory for HME Dep Mngr
2274 *
2275 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2276 * \param[in] ps_init_prms : Create time static parameters
2277 * \param[in] i4_mem_space : memspace in whihc memory request should be done
2278 *
2279 * @return number of memtabs
2280 ********************************************************************************
2281 */
hme_coarse_dep_mngr_alloc_mem(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2282 WORD32 hme_coarse_dep_mngr_alloc_mem(
2283 iv_mem_rec_t *ps_mem_tab,
2284 ihevce_static_cfg_params_t *ps_init_prms,
2285 WORD32 i4_mem_space,
2286 WORD32 i4_num_proc_thrds,
2287 WORD32 i4_resolution_id)
2288 {
2289 WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2290 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2291 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2292 WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2293 WORD32 min_cu_size;
2294
2295 /* get the min cu size from config params */
2296 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2297
2298 min_cu_size = 1 << min_cu_size;
2299
2300 /* Get the width and heights of different decomp layers */
2301 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2302 SET_CTB_ALIGN(
2303 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2304
2305 *a_ht =
2306 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2307 SET_CTB_ALIGN(
2308 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2309
2310 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2311 ASSERT(n_tot_layers >= 3);
2312
2313 /* --- Get the number of vartical units in each layer for dep. mngr -- */
2314 ihevce_coarse_me_get_lyr_prms_dep_mngr(
2315 n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2316
2317 /* Fill memtabs for HME layers,except for L0 layer */
2318 for(i = 1; i < n_tot_layers; i++)
2319 {
2320 n_dep_tabs += ihevce_dmgr_get_mem_recs(
2321 &ps_mem_tab[n_dep_tabs],
2322 DEP_MNGR_ROW_ROW_SYNC,
2323 ai4_num_vert_units_in_lyr[i],
2324 1, /* Number of Col Tiles : Not supported in PreEnc */
2325 i4_num_proc_thrds,
2326 i4_mem_space);
2327 }
2328
2329 ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
2330
2331 return (n_dep_tabs);
2332 }
2333
2334 /**
2335 ********************************************************************************
2336 * @fn hme_coarse_dep_mngr_init()
2337 *
2338 * @brief Assign memory for HME Dep Mngr
2339 *
2340 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2341 * \param[in] ps_init_prms : Create time static parameters
2342 * @param[in] pv_ctxt : ME ctxt
2343 * \param[in] pv_osal_handle : Osal handle
2344 *
2345 * @return number of memtabs
2346 ********************************************************************************
2347 */
hme_coarse_dep_mngr_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,void * pv_ctxt,void * pv_osal_handle,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2348 WORD32 hme_coarse_dep_mngr_init(
2349 iv_mem_rec_t *ps_mem_tab,
2350 ihevce_static_cfg_params_t *ps_init_prms,
2351 void *pv_ctxt,
2352 void *pv_osal_handle,
2353 WORD32 i4_num_proc_thrds,
2354 WORD32 i4_resolution_id)
2355 {
2356 WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2357 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2358 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2359 WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2360 WORD32 min_cu_size;
2361
2362 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2363
2364 /* get the min cu size from config params */
2365 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2366
2367 min_cu_size = 1 << min_cu_size;
2368
2369 /* Get the width and heights of different decomp layers */
2370 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2371 SET_CTB_ALIGN(
2372 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2373 *a_ht =
2374 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2375 SET_CTB_ALIGN(
2376 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2377
2378 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2379 ASSERT(n_tot_layers >= 3);
2380
2381 /* --- Get the number of vartical units in each layer for dep. mngr -- */
2382 ihevce_coarse_me_get_lyr_prms_dep_mngr(
2383 n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2384
2385 /* --- HME sync Dep Mngr Mem init -- */
2386 for(i = 1; i < n_tot_layers; i++)
2387 {
2388 WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
2389
2390 if(i == (n_tot_layers - 1)) /* coarsest layer */
2391 blk_size_shift = 2;
2392 else
2393 blk_size_shift = 3; /* refine layers */
2394
2395 GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
2396
2397 /* Coarsest layer : 1 block extra, since the last block */
2398 if(i == (n_tot_layers - 1)) /* in a row needs East block */
2399 num_blks_in_row += 1;
2400
2401 /* Note : i-1, only for HME layers, L0 is separate */
2402 ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
2403 &ps_mem_tab[n_dep_tabs],
2404 pv_osal_handle,
2405 DEP_MNGR_ROW_ROW_SYNC,
2406 ai4_num_vert_units_in_lyr[i],
2407 num_blks_in_row,
2408 1, /* Number of Col Tiles : Not supported in PreEnc */
2409 i4_num_proc_thrds,
2410 1 /*Sem disabled*/
2411 );
2412
2413 n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
2414 }
2415
2416 return n_dep_tabs;
2417 }
2418
2419 /**
2420 ********************************************************************************
2421 * @fn hme_coarse_dep_mngr_reg_sem()
2422 *
2423 * @brief Assign semaphores for HME Dep Mngr
2424 *
2425 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2426 * \param[in] ppv_sem_hdls : Arry of semaphore handles
2427 * \param[in] i4_num_proc_thrds : Number of processing threads
2428 *
2429 * @return number of memtabs
2430 ********************************************************************************
2431 */
hme_coarse_dep_mngr_reg_sem(void * pv_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)2432 void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
2433 {
2434 WORD32 i;
2435 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2436 coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
2437
2438 /* --- HME sync Dep Mngr semaphore init -- */
2439 for(i = 1; i < ps_ctxt->num_layers; i++)
2440 {
2441 ihevce_dmgr_reg_sem_hdls(
2442 ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
2443 }
2444
2445 return;
2446 }
2447
2448 /**
2449 ********************************************************************************
2450 * @fn hme_coarse_dep_mngr_delete()
2451 *
2452 * Destroy Coarse ME Dep Mngr module
2453 * Note : Only Destroys the resources allocated in the module like
2454 * semaphore,etc. Memory free is done Separately using memtabs
2455 *
2456 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2457 * \param[in] ps_init_prms : Create time static parameters
2458 *
2459 * @return none
2460 ********************************************************************************
2461 */
hme_coarse_dep_mngr_delete(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_resolution_id)2462 void hme_coarse_dep_mngr_delete(
2463 void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
2464 {
2465 WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2466 WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2467 WORD32 n_enc_layers = 1, n_tot_layers, i;
2468 WORD32 min_cu_size;
2469
2470 coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
2471
2472 /* get the min cu size from config params */
2473 min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2474
2475 min_cu_size = 1 << min_cu_size;
2476
2477 /* Get the width and heights of different decomp layers */
2478 *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2479 SET_CTB_ALIGN(
2480 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2481 *a_ht =
2482 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2483 SET_CTB_ALIGN(
2484 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2485 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2486 ASSERT(n_tot_layers >= 3);
2487
2488 /* --- HME sync Dep Mngr Delete -- */
2489 for(i = 1; i < n_tot_layers; i++)
2490 {
2491 /* Note : i-1, only for HME layers, L0 is separate */
2492 ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
2493 }
2494 }
2495
2496 /**
2497 *******************************************************************************
2498 * @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2499 *
2500 * @brief Fills up memtabs with memory information details required by HME
2501 *
2502 * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2503 * up its requirements of memory
2504 *
2505 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2506 * amt of memory
2507 *
2508 * @return Number of memtabs required
2509 *******************************************************************************
2510 */
hme_enc_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,WORD32 i4_num_me_frm_pllel)2511 S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
2512 {
2513 S32 num, tot, i;
2514
2515 /* Validation of init params */
2516 if(-1 == hme_validate_init_prms(ps_prms))
2517 return (-1);
2518
2519 num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
2520 tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2521 for(i = num; i < tot; i++)
2522 {
2523 ps_memtabs[i].size = 4;
2524 ps_memtabs[i].align = 4;
2525 ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2526 }
2527 return (tot);
2528 }
2529
2530 /**
2531 *******************************************************************************
2532 * @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2533 *
2534 * @brief Fills up memtabs with memory information details required by Coarse HME
2535 *
2536 * @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2537 * up its requirements of memory
2538 *
2539 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2540 * amt of memory
2541 *
2542 * @return Number of memtabs required
2543 *******************************************************************************
2544 */
hme_coarse_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2545 S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2546 {
2547 S32 num, tot, i;
2548
2549 /* Validation of init params */
2550 if(-1 == hme_validate_init_prms(ps_prms))
2551 return (-1);
2552
2553 num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
2554 tot = hme_coarse_num_alloc();
2555 for(i = num; i < tot; i++)
2556 {
2557 ps_memtabs[i].size = 4;
2558 ps_memtabs[i].align = 4;
2559 ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2560 }
2561 return (tot);
2562 }
2563
2564 /**
2565 *******************************************************************************
2566 * @fn hme_coarse_dep_mngr_alloc
2567 *
2568 * @brief Fills up memtabs with memory information details required by Coarse HME
2569 *
2570 * \param[in,out] ps_mem_tab : pointer to memory descriptors table
2571 * \param[in] ps_init_prms : Create time static parameters
2572 * \param[in] i4_mem_space : memspace in whihc memory request should be done
2573 *
2574 * @return Number of memtabs required
2575 *******************************************************************************
2576 */
hme_coarse_dep_mngr_alloc(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2577 WORD32 hme_coarse_dep_mngr_alloc(
2578 iv_mem_rec_t *ps_mem_tab,
2579 ihevce_static_cfg_params_t *ps_init_prms,
2580 WORD32 i4_mem_space,
2581 WORD32 i4_num_proc_thrds,
2582 WORD32 i4_resolution_id)
2583 {
2584 S32 num, tot, i;
2585
2586 num = hme_coarse_dep_mngr_alloc_mem(
2587 ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
2588 tot = hme_coarse_dep_mngr_num_alloc();
2589 for(i = num; i < tot; i++)
2590 {
2591 ps_mem_tab[i].i4_mem_size = 4;
2592 ps_mem_tab[i].i4_mem_alignment = 4;
2593 ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
2594 }
2595 return (tot);
2596 }
2597
2598 /**
2599 ********************************************************************************
2600 * @fn hme_coarse_init_ctxt()
2601 *
2602 * @brief initialise context memory
2603 *
2604 * @param[in] ps_prms : init prms
2605 *
2606 * @param[in] pv_ctxt : ME ctxt
2607 *
2608 * @return number of memtabs
2609 ********************************************************************************
2610 */
hme_coarse_init_ctxt(coarse_me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms)2611 void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
2612 {
2613 S32 i, j, num_thrds;
2614 coarse_me_ctxt_t *ps_ctxt;
2615 S32 num_rows_coarse;
2616
2617 /* initialise the parameters inot context of all threads */
2618 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2619 {
2620 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2621
2622 /* Copy the init prms to context */
2623 ps_ctxt->s_init_prms = *ps_prms;
2624
2625 /* Initialize some other variables in ctxt */
2626 ps_ctxt->i4_prev_poc = -1;
2627
2628 ps_ctxt->num_b_frms = ps_prms->num_b_frms;
2629
2630 ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
2631 ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
2632
2633 /* Initialize num rows lookuptable */
2634 ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
2635 num_rows_coarse = ps_ctxt->i4_num_row_bufs;
2636 for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
2637 {
2638 ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
2639 }
2640 }
2641
2642 /* since same layer desc pointer is stored in all the threads ctxt */
2643 /* layer init is done only using 0th thread ctxt */
2644 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2645
2646 /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2647 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2648 {
2649 for(j = 1; j < ps_ctxt->num_layers; j++)
2650 {
2651 layer_ctxt_t *ps_layer;
2652 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2653 ps_layer->i4_poc = -1;
2654 ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
2655 memset(
2656 ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
2657 }
2658 }
2659 }
2660
2661 /**
2662 ********************************************************************************
2663 * @fn hme_enc_init_ctxt()
2664 *
2665 * @brief initialise context memory
2666 *
2667 * @param[in] ps_prms : init prms
2668 *
2669 * @param[in] pv_ctxt : ME ctxt
2670 *
2671 * @return number of memtabs
2672 ********************************************************************************
2673 */
hme_enc_init_ctxt(me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt)2674 void hme_enc_init_ctxt(
2675 me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
2676 {
2677 S32 i, j, num_thrds;
2678 me_ctxt_t *ps_ctxt;
2679 me_frm_ctxt_t *ps_frm_ctxt;
2680
2681 /* initialise the parameters in context of all threads */
2682 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2683 {
2684 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2685 /* Store Tile params base into ME context */
2686 ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
2687
2688 for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
2689 {
2690 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
2691
2692 /* Copy the init prms to context */
2693 ps_ctxt->s_init_prms = *ps_prms;
2694
2695 /* Initialize some other variables in ctxt */
2696 ps_frm_ctxt->i4_prev_poc = INVALID_POC;
2697
2698 ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
2699
2700 ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
2701
2702 ps_frm_ctxt->i4_is_prev_frame_reference = 0;
2703
2704 ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
2705
2706 /* Initialize mv grids for L0 and L1 used in final refinement layer */
2707 {
2708 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
2709 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
2710 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
2711 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
2712 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
2713 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
2714 }
2715
2716 ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
2717 ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
2718 }
2719 }
2720
2721 /* since same layer desc pointer is stored in all the threads ctxt */
2722 /* layer init is done only using 0th thread ctxt */
2723 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2724
2725 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
2726
2727 /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2728 for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
2729 {
2730 /* only enocde layer is processed */
2731 for(j = 0; j < 1; j++)
2732 {
2733 layer_ctxt_t *ps_layer;
2734 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2735 ps_layer->i4_poc = INVALID_POC;
2736 ps_layer->i4_is_free = 1;
2737 ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
2738 ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
2739 ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
2740 ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
2741 ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
2742 ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
2743
2744 memset(
2745 ps_layer->s_global_mv,
2746 0,
2747 sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
2748 }
2749 }
2750 }
2751
2752 /**
2753 *******************************************************************************
2754 * @fn S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
2755 *
2756 * @brief Initialises the Encode Layer HME ctxt
2757 *
2758 * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2759 * up its requirements of memory
2760 *
2761 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2762 * amt of memory
2763 *
2764 * @return Number of memtabs required
2765 *******************************************************************************
2766 */
hme_enc_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt,WORD32 i4_num_me_frm_pllel)2767 S32 hme_enc_init(
2768 void *pv_ctxt,
2769 hme_memtab_t *ps_memtabs,
2770 hme_init_prms_t *ps_prms,
2771 rc_quant_t *ps_rc_quant_ctxt,
2772 WORD32 i4_num_me_frm_pllel)
2773 {
2774 S32 num, tot;
2775 me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
2776
2777 tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2778 /* Validation of init params */
2779 if(-1 == hme_validate_init_prms(ps_prms))
2780 return (-1);
2781
2782 num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
2783 if(num > tot)
2784 return (-1);
2785
2786 /* Initialize all enumerations based globals */
2787 //hme_init_globals(); /* done as part of coarse me */
2788
2789 /* Copy the memtabs into the context for returning during free */
2790 memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2791
2792 /* initialize the context and related buffers */
2793 hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
2794 return (0);
2795 }
2796
2797 /**
2798 *******************************************************************************
2799 * @fn S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2800 *
2801 * @brief Initialises the Coarse HME ctxt
2802 *
2803 * @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2804 * up its requirements of memory
2805 *
2806 * @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2807 * amt of memory
2808 *
2809 * @return Number of memtabs required
2810 *******************************************************************************
2811 */
hme_coarse_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2812 S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2813 {
2814 S32 num, tot;
2815 coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2816
2817 tot = hme_coarse_num_alloc();
2818 /* Validation of init params */
2819 if(-1 == hme_validate_init_prms(ps_prms))
2820 return (-1);
2821
2822 num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
2823 if(num > tot)
2824 return (-1);
2825
2826 /* Initialize all enumerations based globals */
2827 hme_init_globals();
2828
2829 /* Copy the memtabs into the context for returning during free */
2830 memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2831
2832 /* initialize the context and related buffers */
2833 hme_coarse_init_ctxt(ps_ctxt, ps_prms);
2834
2835 return (0);
2836 }
2837
2838 /**
2839 *******************************************************************************
2840 * @fn S32 hme_set_resolution(void *pv_me_ctxt,
2841 * S32 n_enc_layers,
2842 * S32 *p_wd,
2843 * S32 *p_ht
2844 *
2845 * @brief Sets up the layers based on resolution information.
2846 *
2847 * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2848 *
2849 * @param[in] n_enc_layers : Number of layers encoded
2850 *
2851 * @param[in] p_wd : Pointer to an array having widths for each encode layer
2852 *
2853 * @param[in] p_ht : Pointer to an array having heights for each encode layer
2854 *
2855 * @return void
2856 *******************************************************************************
2857 */
2858
hme_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 me_frm_id)2859 void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
2860 {
2861 S32 n_tot_layers, num_layers_explicit_search, i, j;
2862 me_ctxt_t *ps_thrd_ctxt;
2863 me_frm_ctxt_t *ps_ctxt;
2864
2865 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2866 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2867 memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2868 memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2869
2870 ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
2871
2872 ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
2873
2874 /*************************************************************************/
2875 /* Derive the number of HME layers, including both encoded and non encode*/
2876 /* This function also derives the width and ht of each layer. */
2877 /*************************************************************************/
2878 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2879 num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
2880 if(num_layers_explicit_search <= 0)
2881 num_layers_explicit_search = n_tot_layers - 1;
2882
2883 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2884 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2885 memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2886 memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2887
2888 /* only encode layer should be processed */
2889 ps_ctxt->num_layers = n_tot_layers;
2890
2891 ps_ctxt->i4_wd = a_wd[0];
2892 ps_ctxt->i4_ht = a_ht[0];
2893
2894 /* Memtabs : Layers * num-ref + 1 */
2895 for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
2896 {
2897 for(j = 0; j < 1; j++)
2898 {
2899 S32 wd, ht;
2900 layer_ctxt_t *ps_layer;
2901 U08 u1_enc = ps_ctxt->u1_encode[j];
2902 wd = a_wd[j];
2903 ht = a_ht[j];
2904 ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
2905 hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2906 }
2907 }
2908 }
2909
2910 /**
2911 *******************************************************************************
2912 * @fn S32 hme_coarse_set_resolution(void *pv_me_ctxt,
2913 * S32 n_enc_layers,
2914 * S32 *p_wd,
2915 * S32 *p_ht
2916 *
2917 * @brief Sets up the layers based on resolution information.
2918 *
2919 * @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2920 *
2921 * @param[in] n_enc_layers : Number of layers encoded
2922 *
2923 * @param[in] p_wd : Pointer to an array having widths for each encode layer
2924 *
2925 * @param[in] p_ht : Pointer to an array having heights for each encode layer
2926 *
2927 * @return void
2928 *******************************************************************************
2929 */
2930
hme_coarse_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht)2931 void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
2932 {
2933 S32 n_tot_layers, num_layers_explicit_search, i, j;
2934 coarse_me_ctxt_t *ps_ctxt;
2935 S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2936 S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2937 memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2938 memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2939
2940 ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
2941 /*************************************************************************/
2942 /* Derive the number of HME layers, including both encoded and non encode*/
2943 /* This function also derives the width and ht of each layer. */
2944 /*************************************************************************/
2945 n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2946 num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
2947 if(num_layers_explicit_search <= 0)
2948 num_layers_explicit_search = n_tot_layers - 1;
2949
2950 num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2951 ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2952 memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2953 memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2954
2955 /* encode layer should be excluded */
2956 ps_ctxt->num_layers = n_tot_layers;
2957
2958 memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
2959 memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
2960
2961 /* Memtabs : Layers * num-ref + 1 */
2962 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2963 {
2964 for(j = 1; j < n_tot_layers; j++)
2965 {
2966 S32 wd, ht;
2967 layer_ctxt_t *ps_layer;
2968 U08 u1_enc = ps_ctxt->u1_encode[j];
2969 wd = a_wd[j];
2970 ht = a_ht[j];
2971 ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2972 hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2973 }
2974 }
2975 }
2976
hme_find_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_poc,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)2977 S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
2978 {
2979 S32 i;
2980
2981 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
2982 {
2983 if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
2984 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
2985 return i;
2986 }
2987 /* Should not come here */
2988 ASSERT(0);
2989 return (-1);
2990 }
2991
hme_coarse_find_descr_idx(coarse_me_ctxt_t * ps_ctxt,S32 i4_poc)2992 S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
2993 {
2994 S32 i;
2995
2996 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2997 {
2998 if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
2999 return i;
3000 }
3001 /* Should not come here */
3002 ASSERT(0);
3003 return (-1);
3004 }
3005
hme_find_free_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_num_me_frm_pllel)3006 S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
3007 {
3008 S32 i;
3009
3010 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
3011 {
3012 if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
3013 {
3014 ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
3015 return i;
3016 }
3017 }
3018 /* Should not come here */
3019 ASSERT(0);
3020 return (-1);
3021 }
3022
hme_coarse_find_free_descr_idx(void * pv_ctxt)3023 S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
3024 {
3025 S32 i;
3026
3027 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
3028
3029 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
3030 {
3031 if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
3032 return i;
3033 }
3034 /* Should not come here */
3035 ASSERT(0);
3036 return (-1);
3037 }
3038
hme_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)3039 void hme_discard_frm(
3040 void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
3041 {
3042 me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
3043 S32 count = 0, idx, i;
3044 layers_descr_t *ps_descr;
3045
3046 /* Search for the id of the layer descriptor that has this poc */
3047 while(p_pocs_to_remove[count] != INVALID_POC)
3048 {
3049 ASSERT(count == 0);
3050 idx = hme_find_descr_idx(
3051 ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
3052 ps_descr = &ps_ctxt->as_ref_descr[idx];
3053 /*********************************************************************/
3054 /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt */
3055 /* Now this can be used for a fresh picture. */
3056 /*********************************************************************/
3057 for(i = 0; i < 1; i++)
3058 {
3059 ps_descr->aps_layers[i]->i4_is_free = 1;
3060 }
3061 count++;
3062 }
3063 }
3064
hme_coarse_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove)3065 void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
3066 {
3067 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
3068 S32 count = 0, idx, i;
3069 layers_descr_t *ps_descr;
3070
3071 /* Search for the id of the layer descriptor that has this poc */
3072 while(p_pocs_to_remove[count] != -1)
3073 {
3074 idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
3075 ps_descr = &ps_ctxt->as_ref_descr[idx];
3076 /*********************************************************************/
3077 /* Setting poc = -1 in all layers invalidates this layer ctxt */
3078 /* Now this can be used for a fresh picture. */
3079 /*********************************************************************/
3080 for(i = 1; i < ps_ctxt->num_layers; i++)
3081 {
3082 ps_descr->aps_layers[i]->i4_poc = -1;
3083 }
3084 count++;
3085 }
3086 }
3087
hme_update_layer_desc(layers_descr_t * ps_layers_desc,hme_ref_desc_t * ps_ref_desc,S32 start_lyr_id,S32 num_layers,layers_descr_t * ps_curr_desc)3088 void hme_update_layer_desc(
3089 layers_descr_t *ps_layers_desc,
3090 hme_ref_desc_t *ps_ref_desc,
3091 S32 start_lyr_id,
3092 S32 num_layers,
3093 layers_descr_t *ps_curr_desc)
3094 {
3095 layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
3096 S32 i;
3097 for(i = start_lyr_id; i < num_layers; i++)
3098 {
3099 ps_layer_ctxt = ps_layers_desc->aps_layers[i];
3100 ps_curr_layer = ps_curr_desc->aps_layers[i];
3101
3102 ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
3103 ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
3104
3105 /* Copy the recon planes for the given reference pic at given layer */
3106 ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
3107 ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
3108 ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
3109 ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
3110
3111 /*********************************************************************/
3112 /* reconstruction strides, offsets and padding info are copied for */
3113 /* this reference pic. It is assumed that these will be same across */
3114 /* pics, so even the current pic has this info updated, though the */
3115 /* current pic still does not have valid recon pointers. */
3116 /*********************************************************************/
3117 ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3118 ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
3119 ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3120 ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3121
3122 ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3123 ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3124 ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3125 }
3126 }
3127
hme_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,S32 me_frm_id,S32 i4_thrd_id)3128 void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
3129 {
3130 layers_descr_t *ps_desc;
3131 layer_ctxt_t *ps_layer_ctxt;
3132 me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
3133 me_ctxt_t *ps_thrd_ctxt;
3134 me_frm_ctxt_t *ps_ctxt;
3135
3136 hme_inp_buf_attr_t *ps_attr;
3137 S32 i4_poc, idx, i, i4_prev_poc;
3138 S32 num_thrds, prev_me_frm_id;
3139 S32 i4_idr_gop_num, i4_is_reference;
3140
3141 /* since same layer desc pointer is stored in all thread ctxt */
3142 /* a free idx is obtained using 0th thread ctxt pointer */
3143
3144 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
3145
3146 ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3147
3148 /* Deriving the previous poc from previous frames context */
3149 if(me_frm_id == 0)
3150 prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
3151 else
3152 prev_me_frm_id = me_frm_id - 1;
3153
3154 i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
3155
3156 /* Obtain an empty layer descriptor */
3157 idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
3158 ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
3159
3160 /* initialise the parameters for all the threads */
3161 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3162 {
3163 me_frm_ctxt_t *ps_tmp_frm_ctxt;
3164
3165 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3166 ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3167
3168 ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
3169
3170 /* Do the initialization for the first thread alone */
3171 i4_poc = ps_inp_desc->i4_poc;
3172 i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
3173 i4_is_reference = ps_inp_desc->i4_is_reference;
3174 /*Update poc id of previously encoded frm and curr frm */
3175 ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
3176 ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
3177 }
3178
3179 /* since same layer desc pointer is stored in all thread ctxt */
3180 /* following processing is done using 0th thread ctxt pointer */
3181 ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3182
3183 /* only encode layer */
3184 for(i = 0; i < 1; i++)
3185 {
3186 ps_layer_ctxt = ps_desc->aps_layers[i];
3187 ps_attr = &ps_inp_desc->s_layer_desc[i];
3188
3189 ps_layer_ctxt->i4_poc = i4_poc;
3190 ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
3191 ps_layer_ctxt->i4_is_reference = i4_is_reference;
3192 ps_layer_ctxt->i4_non_ref_free = 0;
3193
3194 /* If this layer is encoded, copy input attributes */
3195 if(ps_ctxt->u1_encode[i])
3196 {
3197 ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3198 ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3199 ps_layer_ctxt->i4_pad_x_inp = 0;
3200 ps_layer_ctxt->i4_pad_y_inp = 0;
3201 }
3202 else
3203 {
3204 /* If not encoded, then ME owns the buffer.*/
3205 S32 wd, dst_stride;
3206
3207 ASSERT(i != 0);
3208
3209 wd = ps_ctxt->i4_wd;
3210
3211 /* destination has padding on either side of 16 */
3212 dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3213 ps_layer_ctxt->i4_inp_stride = dst_stride;
3214 }
3215 }
3216
3217 return;
3218 }
3219
hme_coarse_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,WORD32 i4_curr_idx)3220 void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
3221 {
3222 layers_descr_t *ps_desc;
3223 layer_ctxt_t *ps_layer_ctxt;
3224 coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
3225 coarse_me_ctxt_t *ps_ctxt;
3226 hme_inp_buf_attr_t *ps_attr;
3227 S32 i4_poc, i;
3228 S32 num_thrds;
3229
3230 /* since same layer desc pointer is stored in all thread ctxt */
3231 /* a free idx is obtained using 0th thread ctxt pointer */
3232 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3233
3234 ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
3235
3236 /* initialise the parameters for all the threads */
3237 for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3238 {
3239 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3240 ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
3241 i4_poc = ps_inp_desc->i4_poc;
3242
3243 /*Update poc id of previously encoded frm and curr frm */
3244 ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
3245 ps_ctxt->i4_curr_poc = i4_poc;
3246 }
3247
3248 /* since same layer desc pointer is stored in all thread ctxt */
3249 /* following processing is done using 0th thread ctxt pointer */
3250 ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3251
3252 /* only non encode layer */
3253 for(i = 1; i < ps_ctxt->num_layers; i++)
3254 {
3255 ps_layer_ctxt = ps_desc->aps_layers[i];
3256 ps_attr = &ps_inp_desc->s_layer_desc[i];
3257
3258 ps_layer_ctxt->i4_poc = i4_poc;
3259 /* If this layer is encoded, copy input attributes */
3260 if(ps_ctxt->u1_encode[i])
3261 {
3262 ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3263 ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3264 ps_layer_ctxt->i4_pad_x_inp = 0;
3265 ps_layer_ctxt->i4_pad_y_inp = 0;
3266 }
3267 else
3268 {
3269 /* If not encoded, then ME owns the buffer. */
3270 /* decomp of lower layers happens on a seperate pass */
3271 /* Coarse Me should export the pointers to the caller */
3272 S32 wd, dst_stride;
3273
3274 ASSERT(i != 0);
3275
3276 wd = ps_ctxt->a_wd[i - 1];
3277
3278 /* destination has padding on either side of 16 */
3279 dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3280 ps_layer_ctxt->i4_inp_stride = dst_stride;
3281 }
3282 }
3283 }
3284
hme_determine_num_results_per_part(U08 u1_layer_id,U08 u1_num_layers,ME_QUALITY_PRESETS_T e_quality_preset)3285 static __inline U08 hme_determine_num_results_per_part(
3286 U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
3287 {
3288 U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3289
3290 if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
3291 {
3292 switch(e_quality_preset)
3293 {
3294 case ME_XTREME_SPEED_25:
3295 case ME_XTREME_SPEED:
3296 case ME_HIGH_SPEED:
3297 case ME_MEDIUM_SPEED:
3298 case ME_HIGH_QUALITY:
3299 case ME_PRISTINE_QUALITY:
3300 {
3301 u1_num_results_per_part = 1;
3302
3303 break;
3304 }
3305 default:
3306 {
3307 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3308
3309 break;
3310 }
3311 }
3312 }
3313 else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
3314 {
3315 switch(e_quality_preset)
3316 {
3317 case ME_XTREME_SPEED_25:
3318 case ME_HIGH_QUALITY:
3319 case ME_PRISTINE_QUALITY:
3320 {
3321 u1_num_results_per_part = 1;
3322
3323 break;
3324 }
3325 default:
3326 {
3327 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3328
3329 break;
3330 }
3331 }
3332 }
3333 else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
3334 {
3335 switch(e_quality_preset)
3336 {
3337 case ME_XTREME_SPEED_25:
3338 case ME_XTREME_SPEED:
3339 case ME_HIGH_SPEED:
3340 case ME_MEDIUM_SPEED:
3341 {
3342 u1_num_results_per_part = 1;
3343
3344 break;
3345 }
3346 default:
3347 {
3348 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3349
3350 break;
3351 }
3352 }
3353 }
3354
3355 return u1_num_results_per_part;
3356 }
3357
hme_max_search_cands_per_search_cand_loc_populator(hme_frm_prms_t * ps_frm_prms,U08 * pu1_num_fpel_search_cands,U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3358 static __inline void hme_max_search_cands_per_search_cand_loc_populator(
3359 hme_frm_prms_t *ps_frm_prms,
3360 U08 *pu1_num_fpel_search_cands,
3361 U08 u1_layer_id,
3362 ME_QUALITY_PRESETS_T e_quality_preset)
3363 {
3364 if(0 == u1_layer_id)
3365 {
3366 S32 i;
3367
3368 for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
3369 {
3370 switch(e_quality_preset)
3371 {
3372 #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
3373 case ME_XTREME_SPEED_25:
3374 case ME_XTREME_SPEED:
3375 case ME_HIGH_SPEED:
3376 case ME_MEDIUM_SPEED:
3377 {
3378 pu1_num_fpel_search_cands[i] = 1;
3379
3380 break;
3381 }
3382 #endif
3383 default:
3384 {
3385 pu1_num_fpel_search_cands[i] =
3386 MAX(2,
3387 MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
3388 ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
3389
3390 break;
3391 }
3392 }
3393 }
3394 }
3395 }
3396
3397 static __inline U08
hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3398 hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3399 {
3400 U08 u1_num_cands = 2;
3401
3402 if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
3403 {
3404 switch(e_quality_preset)
3405 {
3406 case ME_XTREME_SPEED_25:
3407 case ME_XTREME_SPEED:
3408 case ME_HIGH_SPEED:
3409 case ME_MEDIUM_SPEED:
3410 {
3411 u1_num_cands = 1;
3412
3413 break;
3414 }
3415 default:
3416 {
3417 u1_num_cands = 2;
3418
3419 break;
3420 }
3421 }
3422 }
3423
3424 return u1_num_cands;
3425 }
3426
3427 static __inline U08
hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3428 hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3429 {
3430 U08 i;
3431
3432 U08 u1_num_centers = 0;
3433
3434 if(0 == u1_layer_id)
3435 {
3436 switch(e_quality_preset)
3437 {
3438 case ME_XTREME_SPEED_25:
3439 {
3440 for(i = 0; i < TOT_NUM_PARTS; i++)
3441 {
3442 u1_num_centers += gau1_num_best_results_XS25[i];
3443 }
3444
3445 break;
3446 }
3447 case ME_XTREME_SPEED:
3448 {
3449 for(i = 0; i < TOT_NUM_PARTS; i++)
3450 {
3451 u1_num_centers += gau1_num_best_results_XS[i];
3452 }
3453
3454 break;
3455 }
3456 case ME_HIGH_SPEED:
3457 {
3458 for(i = 0; i < TOT_NUM_PARTS; i++)
3459 {
3460 u1_num_centers += gau1_num_best_results_HS[i];
3461 }
3462
3463 break;
3464 }
3465 case ME_MEDIUM_SPEED:
3466 {
3467 for(i = 0; i < TOT_NUM_PARTS; i++)
3468 {
3469 u1_num_centers += gau1_num_best_results_MS[i];
3470 }
3471
3472 break;
3473 }
3474 case ME_HIGH_QUALITY:
3475 {
3476 for(i = 0; i < TOT_NUM_PARTS; i++)
3477 {
3478 u1_num_centers += gau1_num_best_results_HQ[i];
3479 }
3480
3481 break;
3482 }
3483 case ME_PRISTINE_QUALITY:
3484 {
3485 for(i = 0; i < TOT_NUM_PARTS; i++)
3486 {
3487 u1_num_centers += gau1_num_best_results_PQ[i];
3488 }
3489
3490 break;
3491 }
3492 }
3493 }
3494
3495 return u1_num_centers;
3496 }
3497
hme_determine_max_num_subpel_refine_centers(U08 u1_layer_id,U08 u1_max_2Nx2N_subpel_cands,U08 u1_max_NxN_subpel_cands)3498 static __inline U08 hme_determine_max_num_subpel_refine_centers(
3499 U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
3500 {
3501 U08 u1_num_centers = 0;
3502
3503 if(0 == u1_layer_id)
3504 {
3505 u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
3506 }
3507
3508 return u1_num_centers;
3509 }
3510
hme_set_refine_prms(void * pv_refine_prms,U08 u1_encode,S32 num_ref,S32 layer_id,S32 num_layers,S32 num_layers_explicit_search,S32 use_4x4,hme_frm_prms_t * ps_frm_prms,double ** ppd_intra_costs,me_coding_params_t * ps_me_coding_tools)3511 void hme_set_refine_prms(
3512 void *pv_refine_prms,
3513 U08 u1_encode,
3514 S32 num_ref,
3515 S32 layer_id,
3516 S32 num_layers,
3517 S32 num_layers_explicit_search,
3518 S32 use_4x4,
3519 hme_frm_prms_t *ps_frm_prms,
3520 double **ppd_intra_costs,
3521 me_coding_params_t *ps_me_coding_tools)
3522 {
3523 refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
3524
3525 ps_refine_prms->i4_encode = u1_encode;
3526 ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
3527 ps_refine_prms->i4_layer_id = layer_id;
3528 /*************************************************************************/
3529 /* Refinement layers have two lambdas, one for closed loop, another for */
3530 /* open loop. Non encode layers use only open loop lambda. */
3531 /*************************************************************************/
3532 ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
3533 ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
3534 ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
3535 ps_refine_prms->lambda_inp =
3536 ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3537 ps_refine_prms->lambda_recon =
3538 ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3539
3540 if((u1_encode) && (NULL != ppd_intra_costs))
3541 {
3542 ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
3543 }
3544
3545 /* Explicit or implicit depends on number of layers having eplicit search */
3546 if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
3547 {
3548 ps_refine_prms->explicit_ref = 0;
3549 ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
3550 }
3551 else
3552 {
3553 ps_refine_prms->explicit_ref = 1;
3554 ps_refine_prms->i4_num_ref_fpel = num_ref;
3555 }
3556
3557 ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
3558
3559 ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
3560 ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
3561
3562 if(u1_encode)
3563 {
3564 ps_refine_prms->i4_num_mvbank_results = 1;
3565 ps_refine_prms->i4_use_rec_in_fpel = 1;
3566 ps_refine_prms->i4_num_steps_fpel_refine = 1;
3567
3568 if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3569 {
3570 ps_refine_prms->i4_num_fpel_results = 4;
3571 ps_refine_prms->i4_num_32x32_merge_results = 4;
3572 ps_refine_prms->i4_num_64x64_merge_results = 4;
3573 ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3574 ps_refine_prms->i4_use_satd_subpel = 1;
3575 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3576 ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3577 ps_refine_prms->u1_subpel_candt_threshold = 1;
3578 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3579 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3580 ps_refine_prms->limit_active_partitions = 0;
3581 }
3582 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3583 {
3584 ps_refine_prms->i4_num_fpel_results = 4;
3585 ps_refine_prms->i4_num_32x32_merge_results = 4;
3586 ps_refine_prms->i4_num_64x64_merge_results = 4;
3587 ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3588 ps_refine_prms->i4_use_satd_subpel = 1;
3589 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3590 ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3591 ps_refine_prms->u1_subpel_candt_threshold = 2;
3592 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3593 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3594 ps_refine_prms->limit_active_partitions = 0;
3595 }
3596 else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3597 {
3598 ps_refine_prms->i4_num_fpel_results = 1;
3599 ps_refine_prms->i4_num_32x32_merge_results = 2;
3600 ps_refine_prms->i4_num_64x64_merge_results = 2;
3601 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3602 ps_refine_prms->i4_use_satd_subpel = 1;
3603 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3604 ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3605 ps_refine_prms->u1_subpel_candt_threshold = 3;
3606 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3607 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3608 ps_refine_prms->limit_active_partitions = 1;
3609 }
3610 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3611 {
3612 ps_refine_prms->i4_num_fpel_results = 1;
3613 ps_refine_prms->i4_num_32x32_merge_results = 2;
3614 ps_refine_prms->i4_num_64x64_merge_results = 2;
3615 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3616 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3617 ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3618 ps_refine_prms->i4_use_satd_subpel = 0;
3619 ps_refine_prms->u1_subpel_candt_threshold = 0;
3620 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3621 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3622 ps_refine_prms->limit_active_partitions = 1;
3623 }
3624 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3625 {
3626 ps_refine_prms->i4_num_fpel_results = 1;
3627 ps_refine_prms->i4_num_32x32_merge_results = 2;
3628 ps_refine_prms->i4_num_64x64_merge_results = 2;
3629 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3630 ps_refine_prms->i4_use_satd_subpel = 0;
3631 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3632 ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3633 ps_refine_prms->u1_subpel_candt_threshold = 0;
3634 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3635 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3636 ps_refine_prms->limit_active_partitions = 1;
3637 }
3638 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3639 {
3640 ps_refine_prms->i4_num_fpel_results = 1;
3641 ps_refine_prms->i4_num_32x32_merge_results = 2;
3642 ps_refine_prms->i4_num_64x64_merge_results = 2;
3643 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3644 ps_refine_prms->i4_use_satd_subpel = 0;
3645 ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3646 ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3647 ps_refine_prms->u1_subpel_candt_threshold = 0;
3648 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3649 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3650 ps_refine_prms->limit_active_partitions = 1;
3651 }
3652 }
3653 else
3654 {
3655 ps_refine_prms->i4_num_fpel_results = 2;
3656 ps_refine_prms->i4_use_rec_in_fpel = 0;
3657 ps_refine_prms->i4_num_steps_fpel_refine = 1;
3658 ps_refine_prms->i4_num_steps_hpel_refine = 0;
3659 ps_refine_prms->i4_num_steps_qpel_refine = 0;
3660
3661 if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3662 {
3663 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3664 ps_refine_prms->i4_use_satd_subpel = 1;
3665 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3666 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3667 }
3668 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3669 {
3670 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3671 ps_refine_prms->i4_use_satd_subpel = 0;
3672 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3673 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3674 }
3675 else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3676 {
3677 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3678 ps_refine_prms->i4_use_satd_subpel = 0;
3679 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3680 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3681 }
3682 else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3683 {
3684 ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3685 ps_refine_prms->i4_use_satd_subpel = 1;
3686 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3687 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3688 }
3689 else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3690 {
3691 ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3692 ps_refine_prms->i4_use_satd_subpel = 1;
3693 ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3694 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3695 }
3696 else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3697 {
3698 ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3699 ps_refine_prms->i4_use_satd_subpel = 1;
3700 ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3701 ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3702 }
3703
3704 /* Following fields unused in the non-encode layers */
3705 /* But setting the same to default values */
3706 ps_refine_prms->i4_num_32x32_merge_results = 4;
3707 ps_refine_prms->i4_num_64x64_merge_results = 4;
3708
3709 if(!ps_frm_prms->bidir_enabled)
3710 {
3711 ps_refine_prms->limit_active_partitions = 0;
3712 }
3713 else
3714 {
3715 ps_refine_prms->limit_active_partitions = 1;
3716 }
3717 }
3718
3719 ps_refine_prms->i4_enable_4x4_part =
3720 hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
3721
3722 if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
3723 {
3724 ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3725 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3726
3727 hme_max_search_cands_per_search_cand_loc_populator(
3728 ps_frm_prms,
3729 ps_refine_prms->au1_num_fpel_search_cands,
3730 layer_id,
3731 ps_me_coding_tools->e_me_quality_presets);
3732
3733 ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
3734 layer_id, ps_me_coding_tools->e_me_quality_presets);
3735
3736 ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
3737 layer_id, ps_me_coding_tools->e_me_quality_presets);
3738
3739 ps_refine_prms->u1_max_num_subpel_refine_centers =
3740 hme_determine_max_num_subpel_refine_centers(
3741 layer_id,
3742 ps_refine_prms->u1_max_subpel_candts_2Nx2N,
3743 ps_refine_prms->u1_max_subpel_candts_NxN);
3744 }
3745 else
3746 {
3747 if(0 == layer_id)
3748 {
3749 ps_refine_prms->i4_num_results_per_part =
3750 ps_me_coding_tools->u1_num_results_per_part_in_l0me;
3751 }
3752 else if(1 == layer_id)
3753 {
3754 ps_refine_prms->i4_num_results_per_part =
3755 ps_me_coding_tools->u1_num_results_per_part_in_l1me;
3756 }
3757 else if((2 == layer_id) && (num_layers > 3))
3758 {
3759 ps_refine_prms->i4_num_results_per_part =
3760 ps_me_coding_tools->u1_num_results_per_part_in_l2me;
3761 }
3762 else
3763 {
3764 ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3765 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3766 }
3767
3768 memset(
3769 ps_refine_prms->au1_num_fpel_search_cands,
3770 ps_me_coding_tools->u1_max_num_coloc_cands,
3771 sizeof(ps_refine_prms->au1_num_fpel_search_cands));
3772
3773 ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
3774 ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
3775
3776 ps_refine_prms->u1_max_num_fpel_refine_centers =
3777 ps_me_coding_tools->u1_max_num_fpel_refine_centers;
3778
3779 ps_refine_prms->u1_max_num_subpel_refine_centers =
3780 ps_me_coding_tools->u1_max_num_subpel_refine_centers;
3781 }
3782
3783 if(layer_id != 0)
3784 {
3785 ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
3786 }
3787
3788 /* 4 * lambda */
3789 ps_refine_prms->sdi_threshold =
3790 (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
3791 (ps_frm_prms->lambda_q_shift - 2);
3792
3793 ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
3794 MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
3795 }
3796
hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t * ps_attrs,S32 num_8x8_horz,S32 num_8x8_vert)3797 void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
3798 {
3799 S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
3800 S32 blk, blk_x, blk_y;
3801 S32 num_16x16_horz, num_16x16_vert;
3802 blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
3803
3804 num_16x16_horz = (num_8x8_horz + 1) >> 1;
3805 num_16x16_vert = (num_8x8_vert + 1) >> 1;
3806 ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
3807
3808 /*************************************************************************/
3809 /* Run through each blk assuming all 16x16 CUs valid. The order would be */
3810 /* 0 1 4 5 */
3811 /* 2 3 6 7 */
3812 /* 8 9 12 13 */
3813 /* 10 11 14 15 */
3814 /* Out of these some may not be valid. For example, if num_16x16_horz is */
3815 /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this */
3816 /* case, blks 8-11 get encoding number of 4-7. Further, the variable */
3817 /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable */
3818 /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0 */
3819 /*************************************************************************/
3820 for(blk = 0; blk < 16; blk++)
3821 {
3822 U08 u1_blk_8x8_mask = 0xF;
3823 blk_x = gau1_encode_to_raster_x[blk];
3824 blk_y = gau1_encode_to_raster_y[blk];
3825 if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
3826 {
3827 continue;
3828 }
3829
3830 /* The CU at encode location blk is valid */
3831 cu_16x16_valid_flag |= (1 << blk);
3832 ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
3833 ps_blk_attrs->u1_blk_x = blk_x;
3834 ps_blk_attrs->u1_blk_y = blk_y;
3835
3836 /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
3837 if(((blk_x << 1) + 2) > num_8x8_horz)
3838 u1_blk_8x8_mask &= 0x5;
3839 /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
3840 if(((blk_y << 1) + 2) > num_8x8_vert)
3841 u1_blk_8x8_mask &= 0x3;
3842 ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
3843 ps_blk_attrs++;
3844 }
3845
3846 ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
3847
3848 /* 32x32 merge is logical combination of what merge is possible */
3849 /* horizontally as well as vertically. */
3850 if(num_8x8_horz < 4)
3851 merge_pattern_x = 0x0;
3852 else if(num_8x8_horz < 8)
3853 merge_pattern_x = 0x5;
3854 else
3855 merge_pattern_x = 0xF;
3856
3857 if(num_8x8_vert < 4)
3858 merge_pattern_y = 0x0;
3859 else if(num_8x8_vert < 8)
3860 merge_pattern_y = 0x3;
3861 else
3862 merge_pattern_y = 0xF;
3863
3864 ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
3865
3866 /* Do not attempt 64x64 merge if any blk invalid */
3867 if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
3868 ps_attrs->u1_merge_to_64x64_flag = 0;
3869 else
3870 ps_attrs->u1_merge_to_64x64_flag = 1;
3871 }
3872
hme_set_ctb_attrs(ctb_boundary_attrs_t * ps_attrs,S32 wd,S32 ht)3873 void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
3874 {
3875 S32 is_cropped_rt, is_cropped_bot;
3876
3877 is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
3878 is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
3879
3880 if(is_cropped_rt)
3881 {
3882 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
3883 }
3884 if(is_cropped_bot)
3885 {
3886 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
3887 }
3888 if(is_cropped_rt & is_cropped_bot)
3889 {
3890 hme_set_ctb_boundary_attrs(
3891 &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
3892 }
3893 hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
3894 }
3895
3896 /**
3897 ********************************************************************************
3898 * @fn hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3899 *
3900 * @brief When we have an mv with ref id "poc_to" for which predictor to be
3901 * computed, and predictor is ref id "poc_from", this funciton returns
3902 * scale factor in Q8 for such a purpose
3903 *
3904 * @param[in] curr_poc : input picture poc
3905 *
3906 * @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
3907 *
3908 * @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
3909 *
3910 * @return Scale factor in Q8 format
3911 ********************************************************************************
3912 */
hme_scale_for_ref_idx(S32 curr_poc,S32 poc_from,S32 poc_to)3913 S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3914 {
3915 S32 td, tx, tb;
3916 S16 i2_scf;
3917 /*************************************************************************/
3918 /* Approximate scale factor: 256 * num / denom */
3919 /* num = curr_poc - poc_to, denom = curr_poc - poc_from */
3920 /* Exact implementation as per standard. */
3921 /*************************************************************************/
3922
3923 tb = HME_CLIP((curr_poc - poc_to), -128, 127);
3924 td = HME_CLIP((curr_poc - poc_from), -128, 127);
3925
3926 tx = (16384 + (ABS(td) >> 1)) / td;
3927 //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
3928 i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
3929
3930 return (i2_scf);
3931 }
3932
3933 /**
3934 ********************************************************************************
3935 * @fn hme_process_frm_init
3936 *
3937 * @brief HME frame level initialsation processing function
3938 *
3939 * @param[in] pv_me_ctxt : ME ctxt pointer
3940 *
3941 * @param[in] ps_ref_map : Reference map prms pointer
3942 *
3943 * @param[in] ps_frm_prms :Pointer to frame params
3944 *
3945 * called only for encode layer
3946 *
3947 * @return Scale factor in Q8 format
3948 ********************************************************************************
3949 */
hme_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,WORD32 i4_me_frm_id,WORD32 i4_num_me_frm_pllel)3950 void hme_process_frm_init(
3951 void *pv_me_ctxt,
3952 hme_ref_map_t *ps_ref_map,
3953 hme_frm_prms_t *ps_frm_prms,
3954 WORD32 i4_me_frm_id,
3955 WORD32 i4_num_me_frm_pllel)
3956 {
3957 me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
3958 me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
3959
3960 S32 i, j, desc_idx;
3961 S16 i2_max_x = 0, i2_max_y = 0;
3962
3963 /* Set the Qp of current frm passed by caller. Required for intra cost */
3964 ps_ctxt->frm_qstep = ps_frm_prms->qstep;
3965 ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
3966
3967 /* Bidir enabled or not */
3968 ps_ctxt->s_frm_prms = *ps_frm_prms;
3969
3970 /*************************************************************************/
3971 /* Set up the ref pic parameters across all layers. For this, we do the */
3972 /* following: the application has given us a ref pic list, we go index */
3973 /* by index and pick up the picture. A picture can be uniquely be mapped */
3974 /* to a POC. So we search all layer descriptor array to find the POC */
3975 /* Once found, we update all attributes in this descriptor. */
3976 /* During this updation process we also create an index of descriptor id */
3977 /* to ref id mapping. It is important to find the same POC in the layers */
3978 /* descr strcture since it holds the pyramid inputs for non encode layers*/
3979 /* Apart from this, e also update array containing the index of the descr*/
3980 /* During processing for ease of access, each layer has a pointer to aray*/
3981 /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
3982 /* we update this too. */
3983 /*************************************************************************/
3984 ps_ctxt->num_ref_past = 0;
3985 ps_ctxt->num_ref_future = 0;
3986 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
3987 {
3988 S32 ref_id_lc, idx;
3989 hme_ref_desc_t *ps_ref_desc;
3990
3991 ps_ref_desc = &ps_ref_map->as_ref_desc[i];
3992 ref_id_lc = ps_ref_desc->i1_ref_id_lc;
3993 /* Obtain the id of descriptor that contains this POC */
3994 idx = hme_find_descr_idx(
3995 ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
3996
3997 /* Update all layers in this descr with the reference attributes */
3998 hme_update_layer_desc(
3999 &ps_thrd_ctxt->as_ref_descr[idx],
4000 ps_ref_desc,
4001 0,
4002 1, //ps_ctxt->num_layers,
4003 ps_ctxt->ps_curr_descr);
4004
4005 /* Update the pointer holder for the recon planes */
4006 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
4007 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
4008 &ps_ctxt->apu1_list_rec_fxfy[0][0];
4009 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
4010 &ps_ctxt->apu1_list_rec_hxfy[0][0];
4011 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
4012 &ps_ctxt->apu1_list_rec_fxhy[0][0];
4013 ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
4014 &ps_ctxt->apu1_list_rec_hxhy[0][0];
4015 ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
4016 &ps_ctxt->apv_list_dep_mngr[0][0];
4017
4018 /* Update the array having ref id lc to descr id mapping */
4019 ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4020
4021 /* From ref id lc we need to work out the POC, So update this array */
4022 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4023
4024 /* When computing costs in L0 and L1 directions, we need the */
4025 /* respective ref id L0 and L1, so update this mapping */
4026 ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4027 ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4028 if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4029 {
4030 ps_ctxt->au1_is_past[ref_id_lc] = 1;
4031 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4032 ps_ctxt->num_ref_past++;
4033 }
4034 else
4035 {
4036 ps_ctxt->au1_is_past[ref_id_lc] = 0;
4037 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4038 ps_ctxt->num_ref_future++;
4039 }
4040
4041 if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4042 {
4043 /* copy the weight and offsets from current ref desc */
4044 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4045
4046 /* inv weight is stored in Q15 format */
4047 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4048 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4049 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4050 }
4051 else
4052 {
4053 /* store default wt and offset*/
4054 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4055
4056 /* inv weight is stored in Q15 format */
4057 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4058 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4059
4060 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4061 }
4062 }
4063
4064 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4065 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4066
4067 /*************************************************************************/
4068 /* Preparation of the TLU for bits for reference indices. */
4069 /* Special case is that of numref = 2. (TEV) */
4070 /* Other cases uses UEV */
4071 /*************************************************************************/
4072 for(i = 0; i < MAX_NUM_REF; i++)
4073 {
4074 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4075 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4076 }
4077
4078 if(ps_ref_map->i4_num_ref == 2)
4079 {
4080 ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4081 ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4082 ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4083 ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4084 }
4085 else if(ps_ref_map->i4_num_ref > 2)
4086 {
4087 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4088 {
4089 S32 l0, l1;
4090 l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4091 l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4092 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4093 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4094 }
4095 }
4096
4097 /*************************************************************************/
4098 /* Preparation of the scaling factors for reference indices. The scale */
4099 /* factor depends on distance of the two ref indices from current input */
4100 /* in terms of poc delta. */
4101 /*************************************************************************/
4102 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4103 {
4104 for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4105 {
4106 S16 i2_scf_q8;
4107 S32 poc_from, poc_to;
4108
4109 poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4110 poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4111
4112 i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4113 ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4114 }
4115 }
4116
4117 /*************************************************************************/
4118 /* We store simplified look ups for 4 hpel planes and inp y plane for */
4119 /* every layer and for every ref id in the layer. So update these lookups*/
4120 /*************************************************************************/
4121 for(i = 0; i < 1; i++)
4122 {
4123 U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
4124 U08 **ppu1_inp;
4125 void **ppvlist_dep_mngr;
4126 layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4127
4128 ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
4129 ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
4130 ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
4131 ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
4132 ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
4133 ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4134 for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4135 {
4136 hme_ref_desc_t *ps_ref_desc;
4137 hme_ref_buf_info_t *ps_buf_info;
4138 layer_ctxt_t *ps_layer;
4139 S32 ref_id_lc;
4140
4141 ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4142 ps_buf_info = &ps_ref_desc->as_ref_info[i];
4143 ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4144
4145 desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4146 ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4147
4148 ppu1_inp[j] = ps_buf_info->pu1_ref_src;
4149 ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4150 ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4151 ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4152 ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4153 ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
4154
4155 /* Update the curr descriptors reference pointers here */
4156 ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
4157 ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4158 ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4159 ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4160 ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4161 }
4162 }
4163 /*************************************************************************/
4164 /* The mv range for each layer is computed. For dyadic layers it will */
4165 /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4166 /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4167 /*************************************************************************/
4168 for(i = 0; i < 1; i++)
4169 {
4170 layer_ctxt_t *ps_layer_ctxt;
4171 if(i == 0)
4172 {
4173 i2_max_x = ps_frm_prms->i2_mv_range_x;
4174 i2_max_y = ps_frm_prms->i2_mv_range_y;
4175 }
4176 else
4177 {
4178 i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
4179 i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
4180 }
4181 ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4182 ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4183 ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4184
4185 /*********************************************************************/
4186 /* Every layer maintains a reference id lc to POC mapping. This is */
4187 /* because the mapping is unique for every frm. Also, in next frm, */
4188 /* we require colocated mvs which means scaling according to temporal*/
4189 /*distance. Hence this mapping needs to be maintained in every */
4190 /* layer ctxt */
4191 /*********************************************************************/
4192 memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4193 if(ps_ref_map->i4_num_ref)
4194 {
4195 memcpy(
4196 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4197 ps_ctxt->ai4_ref_idx_to_poc_lc,
4198 ps_ref_map->i4_num_ref * sizeof(S32));
4199 }
4200 }
4201
4202 return;
4203 }
4204
4205 /**
4206 ********************************************************************************
4207 * @fn hme_coarse_process_frm_init
4208 *
4209 * @brief HME frame level initialsation processing function
4210 *
4211 * @param[in] pv_me_ctxt : ME ctxt pointer
4212 *
4213 * @param[in] ps_ref_map : Reference map prms pointer
4214 *
4215 * @param[in] ps_frm_prms :Pointer to frame params
4216 *
4217 * @return Scale factor in Q8 format
4218 ********************************************************************************
4219 */
hme_coarse_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms)4220 void hme_coarse_process_frm_init(
4221 void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
4222 {
4223 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4224 S32 i, j, desc_idx;
4225 S16 i2_max_x = 0, i2_max_y = 0;
4226
4227 /* Set the Qp of current frm passed by caller. Required for intra cost */
4228 ps_ctxt->frm_qstep = ps_frm_prms->qstep;
4229
4230 /* Bidir enabled or not */
4231 ps_ctxt->s_frm_prms = *ps_frm_prms;
4232
4233 /*************************************************************************/
4234 /* Set up the ref pic parameters across all layers. For this, we do the */
4235 /* following: the application has given us a ref pic list, we go index */
4236 /* by index and pick up the picture. A picture can be uniquely be mapped */
4237 /* to a POC. So we search all layer descriptor array to find the POC */
4238 /* Once found, we update all attributes in this descriptor. */
4239 /* During this updation process we also create an index of descriptor id */
4240 /* to ref id mapping. It is important to find the same POC in the layers */
4241 /* descr strcture since it holds the pyramid inputs for non encode layers*/
4242 /* Apart from this, e also update array containing the index of the descr*/
4243 /* During processing for ease of access, each layer has a pointer to aray*/
4244 /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
4245 /* we update this too. */
4246 /*************************************************************************/
4247 ps_ctxt->num_ref_past = 0;
4248 ps_ctxt->num_ref_future = 0;
4249 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4250 {
4251 S32 ref_id_lc, idx;
4252 hme_ref_desc_t *ps_ref_desc;
4253
4254 ps_ref_desc = &ps_ref_map->as_ref_desc[i];
4255 ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4256 /* Obtain the id of descriptor that contains this POC */
4257 idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
4258
4259 /* Update all layers in this descr with the reference attributes */
4260 hme_update_layer_desc(
4261 &ps_ctxt->as_ref_descr[idx],
4262 ps_ref_desc,
4263 1,
4264 ps_ctxt->num_layers - 1,
4265 ps_ctxt->ps_curr_descr);
4266
4267 /* Update the array having ref id lc to descr id mapping */
4268 ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4269
4270 /* From ref id lc we need to work out the POC, So update this array */
4271 ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4272
4273 /* From ref id lc we need to work out the display num, So update this array */
4274 ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
4275
4276 /* When computing costs in L0 and L1 directions, we need the */
4277 /* respective ref id L0 and L1, so update this mapping */
4278 ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4279 ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4280 if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4281 {
4282 ps_ctxt->au1_is_past[ref_id_lc] = 1;
4283 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4284 ps_ctxt->num_ref_past++;
4285 }
4286 else
4287 {
4288 ps_ctxt->au1_is_past[ref_id_lc] = 0;
4289 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4290 ps_ctxt->num_ref_future++;
4291 }
4292 if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4293 {
4294 /* copy the weight and offsets from current ref desc */
4295 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4296
4297 /* inv weight is stored in Q15 format */
4298 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4299 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4300
4301 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4302 }
4303 else
4304 {
4305 /* store default wt and offset*/
4306 ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4307
4308 /* inv weight is stored in Q15 format */
4309 ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4310 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4311
4312 ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4313 }
4314 }
4315
4316 ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4317 ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4318
4319 /*************************************************************************/
4320 /* Preparation of the TLU for bits for reference indices. */
4321 /* Special case is that of numref = 2. (TEV) */
4322 /* Other cases uses UEV */
4323 /*************************************************************************/
4324 for(i = 0; i < MAX_NUM_REF; i++)
4325 {
4326 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4327 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4328 }
4329
4330 if(ps_ref_map->i4_num_ref == 2)
4331 {
4332 ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4333 ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4334 ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4335 ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4336 }
4337 else if(ps_ref_map->i4_num_ref > 2)
4338 {
4339 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4340 {
4341 S32 l0, l1;
4342 l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4343 l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4344 ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4345 ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4346 }
4347 }
4348
4349 /*************************************************************************/
4350 /* Preparation of the scaling factors for reference indices. The scale */
4351 /* factor depends on distance of the two ref indices from current input */
4352 /* in terms of poc delta. */
4353 /*************************************************************************/
4354 for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4355 {
4356 for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4357 {
4358 S16 i2_scf_q8;
4359 S32 poc_from, poc_to;
4360
4361 poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4362 poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4363
4364 i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4365 ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4366 }
4367 }
4368
4369 /*************************************************************************/
4370 /* We store simplified look ups for inp y plane for */
4371 /* every layer and for every ref id in the layer. */
4372 /*************************************************************************/
4373 for(i = 1; i < ps_ctxt->num_layers; i++)
4374 {
4375 U08 **ppu1_inp;
4376
4377 ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4378 for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4379 {
4380 hme_ref_desc_t *ps_ref_desc;
4381 hme_ref_buf_info_t *ps_buf_info;
4382 layer_ctxt_t *ps_layer;
4383 S32 ref_id_lc;
4384
4385 ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4386 ps_buf_info = &ps_ref_desc->as_ref_info[i];
4387 ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4388
4389 desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4390 ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4391
4392 ppu1_inp[j] = ps_layer->pu1_inp;
4393 }
4394 }
4395 /*************************************************************************/
4396 /* The mv range for each layer is computed. For dyadic layers it will */
4397 /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4398 /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4399 /*************************************************************************/
4400
4401 /* set to layer 0 search range params */
4402 i2_max_x = ps_frm_prms->i2_mv_range_x;
4403 i2_max_y = ps_frm_prms->i2_mv_range_y;
4404
4405 for(i = 1; i < ps_ctxt->num_layers; i++)
4406 {
4407 layer_ctxt_t *ps_layer_ctxt;
4408
4409 {
4410 i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
4411 i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
4412 }
4413 ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4414 ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4415 ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4416
4417 /*********************************************************************/
4418 /* Every layer maintains a reference id lc to POC mapping. This is */
4419 /* because the mapping is unique for every frm. Also, in next frm, */
4420 /* we require colocated mvs which means scaling according to temporal*/
4421 /*distance. Hence this mapping needs to be maintained in every */
4422 /* layer ctxt */
4423 /*********************************************************************/
4424 memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4425 if(ps_ref_map->i4_num_ref)
4426 {
4427 memcpy(
4428 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4429 ps_ctxt->ai4_ref_idx_to_poc_lc,
4430 ps_ref_map->i4_num_ref * sizeof(S32));
4431 memcpy(
4432 ps_layer_ctxt->ai4_ref_id_to_disp_num,
4433 ps_ctxt->ai4_ref_idx_to_disp_num,
4434 ps_ref_map->i4_num_ref * sizeof(S32));
4435 }
4436 }
4437
4438 return;
4439 }
4440
4441 /**
4442 ********************************************************************************
4443 * @fn hme_process_frm
4444 *
4445 * @brief HME frame level processing function
4446 *
4447 * @param[in] pv_me_ctxt : ME ctxt pointer
4448 *
4449 * @param[in] ps_ref_map : Reference map prms pointer
4450 *
4451 * @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
4452 *
4453 * @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4454 *
4455 * @param[in] pf_ext_update_fxn : function pointer to update CTb results
4456 *
4457 * @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
4458 *
4459 * @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
4460 *
4461 * @return Scale factor in Q8 format
4462 ********************************************************************************
4463 */
4464
hme_process_frm(void * pv_me_ctxt,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,hme_ref_map_t * ps_ref_map,double ** ppd_intra_costs,hme_frm_prms_t * ps_frm_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,void * pv_coarse_layer,void * pv_multi_thrd_ctxt,S32 i4_frame_parallelism_level,S32 thrd_id,S32 i4_me_frm_id)4465 void hme_process_frm(
4466 void *pv_me_ctxt,
4467 pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
4468 hme_ref_map_t *ps_ref_map,
4469 double **ppd_intra_costs,
4470 hme_frm_prms_t *ps_frm_prms,
4471 PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
4472 void *pv_coarse_layer,
4473 void *pv_multi_thrd_ctxt,
4474 S32 i4_frame_parallelism_level,
4475 S32 thrd_id,
4476 S32 i4_me_frm_id)
4477 {
4478 refine_prms_t s_refine_prms;
4479 me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
4480 me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
4481
4482 S32 lyr_job_type;
4483 multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4484 layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
4485
4486 ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4487
4488 lyr_job_type = ME_JOB_ENC_LYR;
4489 /*************************************************************************/
4490 /* Final L0 layer ME call */
4491 /*************************************************************************/
4492 {
4493 /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
4494 hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
4495
4496 hme_set_refine_prms(
4497 &s_refine_prms,
4498 ps_ctxt->u1_encode[0],
4499 ps_ref_map->i4_num_ref,
4500 0,
4501 ps_ctxt->num_layers,
4502 ps_ctxt->num_layers_explicit_search,
4503 ps_thrd_ctxt->s_init_prms.use_4x4,
4504 ps_frm_prms,
4505 ppd_intra_costs,
4506 &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
4507
4508 hme_refine(
4509 ps_thrd_ctxt,
4510 &s_refine_prms,
4511 pf_ext_update_fxn,
4512 ps_coarse_layer,
4513 ps_multi_thrd_ctxt,
4514 lyr_job_type,
4515 thrd_id,
4516 i4_me_frm_id,
4517 ps_l0_ipe_input);
4518
4519 /* Set current ref pic status which will used as perv frame ref pic */
4520 if(i4_frame_parallelism_level)
4521 {
4522 ps_ctxt->i4_is_prev_frame_reference = 0;
4523 }
4524 else
4525 {
4526 ps_ctxt->i4_is_prev_frame_reference =
4527 ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
4528 ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
4529 }
4530 }
4531
4532 return;
4533 }
4534
4535 /**
4536 ********************************************************************************
4537 * @fn hme_coarse_process_frm
4538 *
4539 * @brief HME frame level processing function (coarse + refine)
4540 *
4541 * @param[in] pv_me_ctxt : ME ctxt pointer
4542 *
4543 * @param[in] ps_ref_map : Reference map prms pointer
4544 *
4545 * @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4546 *
4547 * @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
4548 *
4549 * @return Scale factor in Q8 format
4550 ********************************************************************************
4551 */
4552
hme_coarse_process_frm(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,void * pv_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)4553 void hme_coarse_process_frm(
4554 void *pv_me_ctxt,
4555 hme_ref_map_t *ps_ref_map,
4556 hme_frm_prms_t *ps_frm_prms,
4557 void *pv_multi_thrd_ctxt,
4558 WORD32 i4_ping_pong,
4559 void **ppv_dep_mngr_hme_sync)
4560 {
4561 S16 i2_max;
4562 S32 layer_id;
4563 coarse_prms_t s_coarse_prms;
4564 refine_prms_t s_refine_prms;
4565 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4566 S32 lyr_job_type;
4567 multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4568
4569 ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4570 /*************************************************************************/
4571 /* Fire processing of all layers, starting with coarsest layer. */
4572 /*************************************************************************/
4573 layer_id = ps_ctxt->num_layers - 1;
4574 i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
4575 i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
4576 s_coarse_prms.i4_layer_id = layer_id;
4577 {
4578 S32 log_start_step;
4579 /* Based on Preset, set the starting step size for Refinement */
4580 if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
4581 {
4582 log_start_step = 0;
4583 }
4584 else
4585 {
4586 log_start_step = 1;
4587 }
4588
4589 s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
4590 s_coarse_prms.i4_start_step = 1 << log_start_step;
4591 }
4592 s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
4593 s_coarse_prms.do_full_search = 1;
4594 if(s_coarse_prms.do_full_search)
4595 {
4596 /* Set to 2 or 4 */
4597 if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
4598 s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
4599 else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
4600 s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
4601 }
4602 s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
4603
4604 /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
4605 s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
4606 s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
4607 s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
4608
4609 hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
4610
4611 /* all refinement layer processed in the loop below */
4612 layer_id--;
4613 lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
4614
4615 /*************************************************************************/
4616 /* This loop will run for all refine layers (non- encode layers) */
4617 /*************************************************************************/
4618 while(layer_id > 0)
4619 {
4620 hme_set_refine_prms(
4621 &s_refine_prms,
4622 ps_ctxt->u1_encode[layer_id],
4623 ps_ref_map->i4_num_ref,
4624 layer_id,
4625 ps_ctxt->num_layers,
4626 ps_ctxt->num_layers_explicit_search,
4627 ps_ctxt->s_init_prms.use_4x4,
4628 ps_frm_prms,
4629 NULL,
4630 &ps_ctxt->s_init_prms.s_me_coding_tools);
4631
4632 hme_refine_no_encode(
4633 ps_ctxt,
4634 &s_refine_prms,
4635 ps_multi_thrd_ctxt,
4636 lyr_job_type,
4637 i4_ping_pong,
4638 ppv_dep_mngr_hme_sync);
4639
4640 layer_id--;
4641 lyr_job_type++;
4642 }
4643 }
4644 /**
4645 ********************************************************************************
4646 * @fn hme_fill_neighbour_mvs
4647 *
4648 * @brief HME neighbour MV population function
4649 *
4650 * @param[in] pps_mv_grid : MV grid array pointer
4651 *
4652 * @param[in] i4_ctb_x : CTB pos X
4653
4654 * @param[in] i4_ctb_y : CTB pos Y
4655 *
4656 * @remarks : Needs to be populated for proper implementation of cost fxn
4657 *
4658 * @return Scale factor in Q8 format
4659 ********************************************************************************
4660 */
hme_fill_neighbour_mvs(mv_grid_t ** pps_mv_grid,S32 i4_ctb_x,S32 i4_ctb_y,S32 i4_num_ref,void * pv_ctxt)4661 void hme_fill_neighbour_mvs(
4662 mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
4663 {
4664 /* TODO : Needs to be populated for proper implementation of cost fxn */
4665 ARG_NOT_USED(pps_mv_grid);
4666 ARG_NOT_USED(i4_ctb_x);
4667 ARG_NOT_USED(i4_ctb_y);
4668 ARG_NOT_USED(i4_num_ref);
4669 ARG_NOT_USED(pv_ctxt);
4670 }
4671
4672 /**
4673 *******************************************************************************
4674 * @fn void hme_get_active_pocs_list(void *pv_me_ctxt,
4675 * S32 *p_pocs_buffered_in_me)
4676 *
4677 * @brief Returns the list of active POCs in ME ctxt
4678 *
4679 * @param[in] pv_me_ctxt : handle to ME context
4680 *
4681 * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4682 * populates with pocs active
4683 *
4684 * @return void
4685 *******************************************************************************
4686 */
hme_get_active_pocs_list(void * pv_me_ctxt,S32 i4_num_me_frm_pllel)4687 WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
4688 {
4689 me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
4690 S32 i, count = 0;
4691
4692 for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
4693 {
4694 S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
4695 S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
4696
4697 if((i4_is_free == 0) && (poc != INVALID_POC))
4698 {
4699 count++;
4700 }
4701 }
4702 if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
4703 {
4704 return 1;
4705 }
4706 else
4707 {
4708 return 0;
4709 }
4710 }
4711
4712 /**
4713 *******************************************************************************
4714 * @fn void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
4715 * S32 *p_pocs_buffered_in_me)
4716 *
4717 * @brief Returns the list of active POCs in ME ctxt
4718 *
4719 * @param[in] pv_me_ctxt : handle to ME context
4720 *
4721 * @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4722 * populates with pocs active
4723 *
4724 * @return void
4725 *******************************************************************************
4726 */
hme_coarse_get_active_pocs_list(void * pv_me_ctxt,S32 * p_pocs_buffered_in_me)4727 void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
4728 {
4729 coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4730 S32 i, count = 0;
4731
4732 for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
4733 {
4734 S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
4735
4736 if(poc != -1)
4737 {
4738 p_pocs_buffered_in_me[count] = poc;
4739 count++;
4740 }
4741 }
4742 p_pocs_buffered_in_me[count] = -1;
4743 }
4744
hme_get_blk_size(S32 use_4x4,S32 layer_id,S32 n_layers,S32 encode)4745 S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
4746 {
4747 /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
4748 if(layer_id == n_layers - 1)
4749 return 4;
4750 else if((layer_id == 0) || (encode))
4751 return 16;
4752
4753 /* Intermediate non encode layers use 8 */
4754 return 8;
4755 }
4756