• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro VPU HEVC codec driver
4  *
5  * Copyright (C) 2020 Safran Passenger Innovations LLC
6  */
7 
8 #include <linux/types.h>
9 #include <media/v4l2-mem2mem.h>
10 
11 #include "hantro.h"
12 #include "hantro_hw.h"
13 
14 #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
15 /*
16  * BSD control data of current picture at tile border
17  * 128 bits per 4x4 tile = 128/(8*4) bytes per row
18  */
19 #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
20 /* tile border coefficients of filter */
21 #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
22 
23 #define MAX_TILE_COLS 20
24 #define MAX_TILE_ROWS 22
25 
26 #define UNUSED_REF	-1
27 
28 #define G2_ALIGN		16
29 
hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps * sps)30 size_t hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps *sps)
31 {
32 	int bytes_per_pixel = sps->bit_depth_luma_minus8 == 0 ? 1 : 2;
33 
34 	return sps->pic_width_in_luma_samples *
35 	       sps->pic_height_in_luma_samples * bytes_per_pixel;
36 }
37 
hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps * sps)38 size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps)
39 {
40 	size_t cr_offset = hantro_hevc_chroma_offset(sps);
41 
42 	return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
43 }
44 
hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps * sps)45 static size_t hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps *sps)
46 {
47 	u32 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
48 	u32 ctb_log2_size_y = min_cb_log2_size_y + sps->log2_diff_max_min_luma_coding_block_size;
49 	u32 pic_width_in_ctbs_y = (sps->pic_width_in_luma_samples + (1 << ctb_log2_size_y) - 1)
50 				  >> ctb_log2_size_y;
51 	u32 pic_height_in_ctbs_y = (sps->pic_height_in_luma_samples + (1 << ctb_log2_size_y) - 1)
52 				   >> ctb_log2_size_y;
53 	size_t mv_size;
54 
55 	mv_size = pic_width_in_ctbs_y * pic_height_in_ctbs_y *
56 		  (1 << (2 * (ctb_log2_size_y - 4))) * 16;
57 
58 	vpu_debug(4, "%dx%d (CTBs) %zu MV bytes\n",
59 		  pic_width_in_ctbs_y, pic_height_in_ctbs_y, mv_size);
60 
61 	return mv_size;
62 }
63 
hantro_hevc_ref_size(struct hantro_ctx * ctx)64 static size_t hantro_hevc_ref_size(struct hantro_ctx *ctx)
65 {
66 	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
67 	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
68 
69 	return hantro_hevc_motion_vectors_offset(sps) + hantro_hevc_mv_size(sps);
70 }
71 
hantro_hevc_ref_free(struct hantro_ctx * ctx)72 static void hantro_hevc_ref_free(struct hantro_ctx *ctx)
73 {
74 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
75 	struct hantro_dev *vpu = ctx->dev;
76 	int i;
77 
78 	for (i = 0;  i < NUM_REF_PICTURES; i++) {
79 		if (hevc_dec->ref_bufs[i].cpu)
80 			dma_free_coherent(vpu->dev, hevc_dec->ref_bufs[i].size,
81 					  hevc_dec->ref_bufs[i].cpu,
82 					  hevc_dec->ref_bufs[i].dma);
83 	}
84 }
85 
hantro_hevc_ref_init(struct hantro_ctx * ctx)86 static void hantro_hevc_ref_init(struct hantro_ctx *ctx)
87 {
88 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
89 	int i;
90 
91 	for (i = 0;  i < NUM_REF_PICTURES; i++)
92 		hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
93 }
94 
hantro_hevc_get_ref_buf(struct hantro_ctx * ctx,int poc)95 dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
96 				   int poc)
97 {
98 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
99 	int i;
100 
101 	/* Find the reference buffer in already known ones */
102 	for (i = 0;  i < NUM_REF_PICTURES; i++) {
103 		if (hevc_dec->ref_bufs_poc[i] == poc) {
104 			hevc_dec->ref_bufs_used |= 1 << i;
105 			return hevc_dec->ref_bufs[i].dma;
106 		}
107 	}
108 
109 	/* Allocate a new reference buffer */
110 	for (i = 0; i < NUM_REF_PICTURES; i++) {
111 		if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF) {
112 			if (!hevc_dec->ref_bufs[i].cpu) {
113 				struct hantro_dev *vpu = ctx->dev;
114 
115 				/*
116 				 * Allocate the space needed for the raw data +
117 				 * motion vector data. Optimizations could be to
118 				 * allocate raw data in non coherent memory and only
119 				 * clear the motion vector data.
120 				 */
121 				hevc_dec->ref_bufs[i].cpu =
122 					dma_alloc_coherent(vpu->dev,
123 							   hantro_hevc_ref_size(ctx),
124 							   &hevc_dec->ref_bufs[i].dma,
125 							   GFP_KERNEL);
126 				if (!hevc_dec->ref_bufs[i].cpu)
127 					return 0;
128 
129 				hevc_dec->ref_bufs[i].size = hantro_hevc_ref_size(ctx);
130 			}
131 			hevc_dec->ref_bufs_used |= 1 << i;
132 			memset(hevc_dec->ref_bufs[i].cpu, 0, hantro_hevc_ref_size(ctx));
133 			hevc_dec->ref_bufs_poc[i] = poc;
134 
135 			return hevc_dec->ref_bufs[i].dma;
136 		}
137 	}
138 
139 	return 0;
140 }
141 
hantro_hevc_ref_remove_unused(struct hantro_ctx * ctx)142 void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx)
143 {
144 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
145 	int i;
146 
147 	/* Just tag buffer as unused, do not free them */
148 	for (i = 0;  i < NUM_REF_PICTURES; i++) {
149 		if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF)
150 			continue;
151 
152 		if (hevc_dec->ref_bufs_used & (1 << i))
153 			continue;
154 
155 		hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
156 	}
157 }
158 
tile_buffer_reallocate(struct hantro_ctx * ctx)159 static int tile_buffer_reallocate(struct hantro_ctx *ctx)
160 {
161 	struct hantro_dev *vpu = ctx->dev;
162 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
163 	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
164 	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
165 	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
166 	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
167 	unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
168 	unsigned int size;
169 
170 	if (num_tile_cols <= 1 ||
171 	    num_tile_cols <= hevc_dec->num_tile_cols_allocated)
172 		return 0;
173 
174 	/* Need to reallocate due to tiles passed via PPS */
175 	if (hevc_dec->tile_filter.cpu) {
176 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
177 				  hevc_dec->tile_filter.cpu,
178 				  hevc_dec->tile_filter.dma);
179 		hevc_dec->tile_filter.cpu = NULL;
180 	}
181 
182 	if (hevc_dec->tile_sao.cpu) {
183 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
184 				  hevc_dec->tile_sao.cpu,
185 				  hevc_dec->tile_sao.dma);
186 		hevc_dec->tile_sao.cpu = NULL;
187 	}
188 
189 	if (hevc_dec->tile_bsd.cpu) {
190 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
191 				  hevc_dec->tile_bsd.cpu,
192 				  hevc_dec->tile_bsd.dma);
193 		hevc_dec->tile_bsd.cpu = NULL;
194 	}
195 
196 	size = VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1);
197 	hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
198 						       &hevc_dec->tile_filter.dma,
199 						       GFP_KERNEL);
200 	if (!hevc_dec->tile_filter.cpu)
201 		goto err_free_tile_buffers;
202 	hevc_dec->tile_filter.size = size;
203 
204 	size = VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1);
205 	hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
206 						    &hevc_dec->tile_sao.dma,
207 						    GFP_KERNEL);
208 	if (!hevc_dec->tile_sao.cpu)
209 		goto err_free_tile_buffers;
210 	hevc_dec->tile_sao.size = size;
211 
212 	size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
213 	hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
214 						    &hevc_dec->tile_bsd.dma,
215 						    GFP_KERNEL);
216 	if (!hevc_dec->tile_bsd.cpu)
217 		goto err_free_tile_buffers;
218 	hevc_dec->tile_bsd.size = size;
219 
220 	hevc_dec->num_tile_cols_allocated = num_tile_cols;
221 
222 	return 0;
223 
224 err_free_tile_buffers:
225 	if (hevc_dec->tile_filter.cpu)
226 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
227 				  hevc_dec->tile_filter.cpu,
228 				  hevc_dec->tile_filter.dma);
229 	hevc_dec->tile_filter.cpu = NULL;
230 
231 	if (hevc_dec->tile_sao.cpu)
232 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
233 				  hevc_dec->tile_sao.cpu,
234 				  hevc_dec->tile_sao.dma);
235 	hevc_dec->tile_sao.cpu = NULL;
236 
237 	if (hevc_dec->tile_bsd.cpu)
238 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
239 				  hevc_dec->tile_bsd.cpu,
240 				  hevc_dec->tile_bsd.dma);
241 	hevc_dec->tile_bsd.cpu = NULL;
242 
243 	return -ENOMEM;
244 }
245 
hantro_hevc_dec_prepare_run(struct hantro_ctx * ctx)246 int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
247 {
248 	struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
249 	struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
250 	int ret;
251 
252 	hantro_start_prepare_run(ctx);
253 
254 	ctrls->decode_params =
255 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
256 	if (WARN_ON(!ctrls->decode_params))
257 		return -EINVAL;
258 
259 	ctrls->sps =
260 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
261 	if (WARN_ON(!ctrls->sps))
262 		return -EINVAL;
263 
264 	ctrls->pps =
265 		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_PPS);
266 	if (WARN_ON(!ctrls->pps))
267 		return -EINVAL;
268 
269 	ret = tile_buffer_reallocate(ctx);
270 	if (ret)
271 		return ret;
272 
273 	return 0;
274 }
275 
hantro_hevc_dec_exit(struct hantro_ctx * ctx)276 void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
277 {
278 	struct hantro_dev *vpu = ctx->dev;
279 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
280 
281 	if (hevc_dec->tile_sizes.cpu)
282 		dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
283 				  hevc_dec->tile_sizes.cpu,
284 				  hevc_dec->tile_sizes.dma);
285 	hevc_dec->tile_sizes.cpu = NULL;
286 
287 	if (hevc_dec->tile_filter.cpu)
288 		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
289 				  hevc_dec->tile_filter.cpu,
290 				  hevc_dec->tile_filter.dma);
291 	hevc_dec->tile_filter.cpu = NULL;
292 
293 	if (hevc_dec->tile_sao.cpu)
294 		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
295 				  hevc_dec->tile_sao.cpu,
296 				  hevc_dec->tile_sao.dma);
297 	hevc_dec->tile_sao.cpu = NULL;
298 
299 	if (hevc_dec->tile_bsd.cpu)
300 		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
301 				  hevc_dec->tile_bsd.cpu,
302 				  hevc_dec->tile_bsd.dma);
303 	hevc_dec->tile_bsd.cpu = NULL;
304 
305 	hantro_hevc_ref_free(ctx);
306 }
307 
hantro_hevc_dec_init(struct hantro_ctx * ctx)308 int hantro_hevc_dec_init(struct hantro_ctx *ctx)
309 {
310 	struct hantro_dev *vpu = ctx->dev;
311 	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
312 	unsigned int size;
313 
314 	memset(hevc_dec, 0, sizeof(*hevc_dec));
315 
316 	/*
317 	 * Maximum number of tiles times width and height (2 bytes each),
318 	 * rounding up to next 16 bytes boundary + one extra 16 byte
319 	 * chunk (HW guys wanted to have this).
320 	 */
321 	size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
322 	hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
323 						      &hevc_dec->tile_sizes.dma,
324 						      GFP_KERNEL);
325 	if (!hevc_dec->tile_sizes.cpu)
326 		return -ENOMEM;
327 
328 	hevc_dec->tile_sizes.size = size;
329 
330 	hantro_hevc_ref_init(ctx);
331 
332 	return 0;
333 }
334