1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Hantro VPU HEVC codec driver
4 *
5 * Copyright (C) 2020 Safran Passenger Innovations LLC
6 */
7
8 #include <linux/types.h>
9 #include <media/v4l2-mem2mem.h>
10
11 #include "hantro.h"
12 #include "hantro_hw.h"
13
14 #define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
15 /*
16 * BSD control data of current picture at tile border
17 * 128 bits per 4x4 tile = 128/(8*4) bytes per row
18 */
19 #define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
20 /* tile border coefficients of filter */
21 #define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
22
23 #define MAX_TILE_COLS 20
24 #define MAX_TILE_ROWS 22
25
26 #define UNUSED_REF -1
27
28 #define G2_ALIGN 16
29
hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps * sps)30 size_t hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps *sps)
31 {
32 int bytes_per_pixel = sps->bit_depth_luma_minus8 == 0 ? 1 : 2;
33
34 return sps->pic_width_in_luma_samples *
35 sps->pic_height_in_luma_samples * bytes_per_pixel;
36 }
37
hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps * sps)38 size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps)
39 {
40 size_t cr_offset = hantro_hevc_chroma_offset(sps);
41
42 return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
43 }
44
hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps * sps)45 static size_t hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps *sps)
46 {
47 u32 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
48 u32 ctb_log2_size_y = min_cb_log2_size_y + sps->log2_diff_max_min_luma_coding_block_size;
49 u32 pic_width_in_ctbs_y = (sps->pic_width_in_luma_samples + (1 << ctb_log2_size_y) - 1)
50 >> ctb_log2_size_y;
51 u32 pic_height_in_ctbs_y = (sps->pic_height_in_luma_samples + (1 << ctb_log2_size_y) - 1)
52 >> ctb_log2_size_y;
53 size_t mv_size;
54
55 mv_size = pic_width_in_ctbs_y * pic_height_in_ctbs_y *
56 (1 << (2 * (ctb_log2_size_y - 4))) * 16;
57
58 vpu_debug(4, "%dx%d (CTBs) %zu MV bytes\n",
59 pic_width_in_ctbs_y, pic_height_in_ctbs_y, mv_size);
60
61 return mv_size;
62 }
63
hantro_hevc_ref_size(struct hantro_ctx * ctx)64 static size_t hantro_hevc_ref_size(struct hantro_ctx *ctx)
65 {
66 const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
67 const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
68
69 return hantro_hevc_motion_vectors_offset(sps) + hantro_hevc_mv_size(sps);
70 }
71
hantro_hevc_ref_free(struct hantro_ctx * ctx)72 static void hantro_hevc_ref_free(struct hantro_ctx *ctx)
73 {
74 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
75 struct hantro_dev *vpu = ctx->dev;
76 int i;
77
78 for (i = 0; i < NUM_REF_PICTURES; i++) {
79 if (hevc_dec->ref_bufs[i].cpu)
80 dma_free_coherent(vpu->dev, hevc_dec->ref_bufs[i].size,
81 hevc_dec->ref_bufs[i].cpu,
82 hevc_dec->ref_bufs[i].dma);
83 }
84 }
85
hantro_hevc_ref_init(struct hantro_ctx * ctx)86 static void hantro_hevc_ref_init(struct hantro_ctx *ctx)
87 {
88 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
89 int i;
90
91 for (i = 0; i < NUM_REF_PICTURES; i++)
92 hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
93 }
94
hantro_hevc_get_ref_buf(struct hantro_ctx * ctx,int poc)95 dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
96 int poc)
97 {
98 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
99 int i;
100
101 /* Find the reference buffer in already known ones */
102 for (i = 0; i < NUM_REF_PICTURES; i++) {
103 if (hevc_dec->ref_bufs_poc[i] == poc) {
104 hevc_dec->ref_bufs_used |= 1 << i;
105 return hevc_dec->ref_bufs[i].dma;
106 }
107 }
108
109 /* Allocate a new reference buffer */
110 for (i = 0; i < NUM_REF_PICTURES; i++) {
111 if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF) {
112 if (!hevc_dec->ref_bufs[i].cpu) {
113 struct hantro_dev *vpu = ctx->dev;
114
115 /*
116 * Allocate the space needed for the raw data +
117 * motion vector data. Optimizations could be to
118 * allocate raw data in non coherent memory and only
119 * clear the motion vector data.
120 */
121 hevc_dec->ref_bufs[i].cpu =
122 dma_alloc_coherent(vpu->dev,
123 hantro_hevc_ref_size(ctx),
124 &hevc_dec->ref_bufs[i].dma,
125 GFP_KERNEL);
126 if (!hevc_dec->ref_bufs[i].cpu)
127 return 0;
128
129 hevc_dec->ref_bufs[i].size = hantro_hevc_ref_size(ctx);
130 }
131 hevc_dec->ref_bufs_used |= 1 << i;
132 memset(hevc_dec->ref_bufs[i].cpu, 0, hantro_hevc_ref_size(ctx));
133 hevc_dec->ref_bufs_poc[i] = poc;
134
135 return hevc_dec->ref_bufs[i].dma;
136 }
137 }
138
139 return 0;
140 }
141
hantro_hevc_ref_remove_unused(struct hantro_ctx * ctx)142 void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx)
143 {
144 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
145 int i;
146
147 /* Just tag buffer as unused, do not free them */
148 for (i = 0; i < NUM_REF_PICTURES; i++) {
149 if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF)
150 continue;
151
152 if (hevc_dec->ref_bufs_used & (1 << i))
153 continue;
154
155 hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
156 }
157 }
158
tile_buffer_reallocate(struct hantro_ctx * ctx)159 static int tile_buffer_reallocate(struct hantro_ctx *ctx)
160 {
161 struct hantro_dev *vpu = ctx->dev;
162 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
163 const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
164 const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
165 const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
166 unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
167 unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
168 unsigned int size;
169
170 if (num_tile_cols <= 1 ||
171 num_tile_cols <= hevc_dec->num_tile_cols_allocated)
172 return 0;
173
174 /* Need to reallocate due to tiles passed via PPS */
175 if (hevc_dec->tile_filter.cpu) {
176 dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
177 hevc_dec->tile_filter.cpu,
178 hevc_dec->tile_filter.dma);
179 hevc_dec->tile_filter.cpu = NULL;
180 }
181
182 if (hevc_dec->tile_sao.cpu) {
183 dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
184 hevc_dec->tile_sao.cpu,
185 hevc_dec->tile_sao.dma);
186 hevc_dec->tile_sao.cpu = NULL;
187 }
188
189 if (hevc_dec->tile_bsd.cpu) {
190 dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
191 hevc_dec->tile_bsd.cpu,
192 hevc_dec->tile_bsd.dma);
193 hevc_dec->tile_bsd.cpu = NULL;
194 }
195
196 size = VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1);
197 hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
198 &hevc_dec->tile_filter.dma,
199 GFP_KERNEL);
200 if (!hevc_dec->tile_filter.cpu)
201 goto err_free_tile_buffers;
202 hevc_dec->tile_filter.size = size;
203
204 size = VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1);
205 hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
206 &hevc_dec->tile_sao.dma,
207 GFP_KERNEL);
208 if (!hevc_dec->tile_sao.cpu)
209 goto err_free_tile_buffers;
210 hevc_dec->tile_sao.size = size;
211
212 size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
213 hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
214 &hevc_dec->tile_bsd.dma,
215 GFP_KERNEL);
216 if (!hevc_dec->tile_bsd.cpu)
217 goto err_free_tile_buffers;
218 hevc_dec->tile_bsd.size = size;
219
220 hevc_dec->num_tile_cols_allocated = num_tile_cols;
221
222 return 0;
223
224 err_free_tile_buffers:
225 if (hevc_dec->tile_filter.cpu)
226 dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
227 hevc_dec->tile_filter.cpu,
228 hevc_dec->tile_filter.dma);
229 hevc_dec->tile_filter.cpu = NULL;
230
231 if (hevc_dec->tile_sao.cpu)
232 dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
233 hevc_dec->tile_sao.cpu,
234 hevc_dec->tile_sao.dma);
235 hevc_dec->tile_sao.cpu = NULL;
236
237 if (hevc_dec->tile_bsd.cpu)
238 dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
239 hevc_dec->tile_bsd.cpu,
240 hevc_dec->tile_bsd.dma);
241 hevc_dec->tile_bsd.cpu = NULL;
242
243 return -ENOMEM;
244 }
245
hantro_hevc_dec_prepare_run(struct hantro_ctx * ctx)246 int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
247 {
248 struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
249 struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
250 int ret;
251
252 hantro_start_prepare_run(ctx);
253
254 ctrls->decode_params =
255 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
256 if (WARN_ON(!ctrls->decode_params))
257 return -EINVAL;
258
259 ctrls->sps =
260 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
261 if (WARN_ON(!ctrls->sps))
262 return -EINVAL;
263
264 ctrls->pps =
265 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_PPS);
266 if (WARN_ON(!ctrls->pps))
267 return -EINVAL;
268
269 ret = tile_buffer_reallocate(ctx);
270 if (ret)
271 return ret;
272
273 return 0;
274 }
275
hantro_hevc_dec_exit(struct hantro_ctx * ctx)276 void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
277 {
278 struct hantro_dev *vpu = ctx->dev;
279 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
280
281 if (hevc_dec->tile_sizes.cpu)
282 dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
283 hevc_dec->tile_sizes.cpu,
284 hevc_dec->tile_sizes.dma);
285 hevc_dec->tile_sizes.cpu = NULL;
286
287 if (hevc_dec->tile_filter.cpu)
288 dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
289 hevc_dec->tile_filter.cpu,
290 hevc_dec->tile_filter.dma);
291 hevc_dec->tile_filter.cpu = NULL;
292
293 if (hevc_dec->tile_sao.cpu)
294 dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
295 hevc_dec->tile_sao.cpu,
296 hevc_dec->tile_sao.dma);
297 hevc_dec->tile_sao.cpu = NULL;
298
299 if (hevc_dec->tile_bsd.cpu)
300 dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
301 hevc_dec->tile_bsd.cpu,
302 hevc_dec->tile_bsd.dma);
303 hevc_dec->tile_bsd.cpu = NULL;
304
305 hantro_hevc_ref_free(ctx);
306 }
307
hantro_hevc_dec_init(struct hantro_ctx * ctx)308 int hantro_hevc_dec_init(struct hantro_ctx *ctx)
309 {
310 struct hantro_dev *vpu = ctx->dev;
311 struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
312 unsigned int size;
313
314 memset(hevc_dec, 0, sizeof(*hevc_dec));
315
316 /*
317 * Maximum number of tiles times width and height (2 bytes each),
318 * rounding up to next 16 bytes boundary + one extra 16 byte
319 * chunk (HW guys wanted to have this).
320 */
321 size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
322 hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
323 &hevc_dec->tile_sizes.dma,
324 GFP_KERNEL);
325 if (!hevc_dec->tile_sizes.cpu)
326 return -ENOMEM;
327
328 hevc_dec->tile_sizes.size = size;
329
330 hantro_hevc_ref_init(ctx);
331
332 return 0;
333 }
334