• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * TI VPE mem2mem driver, based on the virtual v4l2-mem2mem example driver
3  *
4  * Copyright (c) 2013 Texas Instruments Inc.
5  * David Griego, <dagriego@biglakesoftware.com>
6  * Dale Farnsworth, <dale@farnsworth.org>
7  * Archit Taneja, <archit@ti.com>
8  *
9  * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd.
10  * Pawel Osciak, <pawel@osciak.com>
11  * Marek Szyprowski, <m.szyprowski@samsung.com>
12  *
13  * Based on the virtual v4l2-mem2mem example device
14  *
15  * This program is free software; you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License version 2 as published by
17  * the Free Software Foundation
18  */
19 
20 #include <linux/delay.h>
21 #include <linux/dma-mapping.h>
22 #include <linux/err.h>
23 #include <linux/fs.h>
24 #include <linux/interrupt.h>
25 #include <linux/io.h>
26 #include <linux/ioctl.h>
27 #include <linux/module.h>
28 #include <linux/of.h>
29 #include <linux/platform_device.h>
30 #include <linux/pm_runtime.h>
31 #include <linux/sched.h>
32 #include <linux/slab.h>
33 #include <linux/videodev2.h>
34 #include <linux/log2.h>
35 #include <linux/sizes.h>
36 
37 #include <media/v4l2-common.h>
38 #include <media/v4l2-ctrls.h>
39 #include <media/v4l2-device.h>
40 #include <media/v4l2-event.h>
41 #include <media/v4l2-ioctl.h>
42 #include <media/v4l2-mem2mem.h>
43 #include <media/videobuf2-v4l2.h>
44 #include <media/videobuf2-dma-contig.h>
45 
46 #include "vpdma.h"
47 #include "vpdma_priv.h"
48 #include "vpe_regs.h"
49 #include "sc.h"
50 #include "csc.h"
51 
52 #define VPE_MODULE_NAME "vpe"
53 
54 /* minimum and maximum frame sizes */
55 #define MIN_W		32
56 #define MIN_H		32
57 #define MAX_W		2048
58 #define MAX_H		1184
59 
60 /* required alignments */
61 #define S_ALIGN		0	/* multiple of 1 */
62 #define H_ALIGN		1	/* multiple of 2 */
63 
64 /* flags that indicate a format can be used for capture/output */
65 #define VPE_FMT_TYPE_CAPTURE	(1 << 0)
66 #define VPE_FMT_TYPE_OUTPUT	(1 << 1)
67 
68 /* used as plane indices */
69 #define VPE_MAX_PLANES	2
70 #define VPE_LUMA	0
71 #define VPE_CHROMA	1
72 
73 /* per m2m context info */
74 #define VPE_MAX_SRC_BUFS	3	/* need 3 src fields to de-interlace */
75 
76 #define VPE_DEF_BUFS_PER_JOB	1	/* default one buffer per batch job */
77 
78 /*
79  * each VPE context can need up to 3 config descriptors, 7 input descriptors,
80  * 3 output descriptors, and 10 control descriptors
81  */
82 #define VPE_DESC_LIST_SIZE	(10 * VPDMA_DTD_DESC_SIZE +	\
83 					13 * VPDMA_CFD_CTD_DESC_SIZE)
84 
85 #define vpe_dbg(vpedev, fmt, arg...)	\
86 		dev_dbg((vpedev)->v4l2_dev.dev, fmt, ##arg)
87 #define vpe_err(vpedev, fmt, arg...)	\
88 		dev_err((vpedev)->v4l2_dev.dev, fmt, ##arg)
89 
90 struct vpe_us_coeffs {
91 	unsigned short	anchor_fid0_c0;
92 	unsigned short	anchor_fid0_c1;
93 	unsigned short	anchor_fid0_c2;
94 	unsigned short	anchor_fid0_c3;
95 	unsigned short	interp_fid0_c0;
96 	unsigned short	interp_fid0_c1;
97 	unsigned short	interp_fid0_c2;
98 	unsigned short	interp_fid0_c3;
99 	unsigned short	anchor_fid1_c0;
100 	unsigned short	anchor_fid1_c1;
101 	unsigned short	anchor_fid1_c2;
102 	unsigned short	anchor_fid1_c3;
103 	unsigned short	interp_fid1_c0;
104 	unsigned short	interp_fid1_c1;
105 	unsigned short	interp_fid1_c2;
106 	unsigned short	interp_fid1_c3;
107 };
108 
109 /*
110  * Default upsampler coefficients
111  */
112 static const struct vpe_us_coeffs us_coeffs[] = {
113 	{
114 		/* Coefficients for progressive input */
115 		0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
116 		0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
117 	},
118 	{
119 		/* Coefficients for Top Field Interlaced input */
120 		0x0051, 0x03D5, 0x3FE3, 0x3FF7, 0x3FB5, 0x02E9, 0x018F, 0x3FD3,
121 		/* Coefficients for Bottom Field Interlaced input */
122 		0x016B, 0x0247, 0x00B1, 0x3F9D, 0x3FCF, 0x03DB, 0x005D, 0x3FF9,
123 	},
124 };
125 
126 /*
127  * the following registers are for configuring some of the parameters of the
128  * motion and edge detection blocks inside DEI, these generally remain the same,
129  * these could be passed later via userspace if some one needs to tweak these.
130  */
131 struct vpe_dei_regs {
132 	unsigned long mdt_spacial_freq_thr_reg;		/* VPE_DEI_REG2 */
133 	unsigned long edi_config_reg;			/* VPE_DEI_REG3 */
134 	unsigned long edi_lut_reg0;			/* VPE_DEI_REG4 */
135 	unsigned long edi_lut_reg1;			/* VPE_DEI_REG5 */
136 	unsigned long edi_lut_reg2;			/* VPE_DEI_REG6 */
137 	unsigned long edi_lut_reg3;			/* VPE_DEI_REG7 */
138 };
139 
140 /*
141  * default expert DEI register values, unlikely to be modified.
142  */
143 static const struct vpe_dei_regs dei_regs = {
144 	.mdt_spacial_freq_thr_reg = 0x020C0804u,
145 	.edi_config_reg = 0x0118100Cu,
146 	.edi_lut_reg0 = 0x08040200u,
147 	.edi_lut_reg1 = 0x1010100Cu,
148 	.edi_lut_reg2 = 0x10101010u,
149 	.edi_lut_reg3 = 0x10101010u,
150 };
151 
152 /*
153  * The port_data structure contains per-port data.
154  */
155 struct vpe_port_data {
156 	enum vpdma_channel channel;	/* VPDMA channel */
157 	u8	vb_index;		/* input frame f, f-1, f-2 index */
158 	u8	vb_part;		/* plane index for co-panar formats */
159 };
160 
161 /*
162  * Define indices into the port_data tables
163  */
164 #define VPE_PORT_LUMA1_IN	0
165 #define VPE_PORT_CHROMA1_IN	1
166 #define VPE_PORT_LUMA2_IN	2
167 #define VPE_PORT_CHROMA2_IN	3
168 #define VPE_PORT_LUMA3_IN	4
169 #define VPE_PORT_CHROMA3_IN	5
170 #define VPE_PORT_MV_IN		6
171 #define VPE_PORT_MV_OUT		7
172 #define VPE_PORT_LUMA_OUT	8
173 #define VPE_PORT_CHROMA_OUT	9
174 #define VPE_PORT_RGB_OUT	10
175 
176 static const struct vpe_port_data port_data[11] = {
177 	[VPE_PORT_LUMA1_IN] = {
178 		.channel	= VPE_CHAN_LUMA1_IN,
179 		.vb_index	= 0,
180 		.vb_part	= VPE_LUMA,
181 	},
182 	[VPE_PORT_CHROMA1_IN] = {
183 		.channel	= VPE_CHAN_CHROMA1_IN,
184 		.vb_index	= 0,
185 		.vb_part	= VPE_CHROMA,
186 	},
187 	[VPE_PORT_LUMA2_IN] = {
188 		.channel	= VPE_CHAN_LUMA2_IN,
189 		.vb_index	= 1,
190 		.vb_part	= VPE_LUMA,
191 	},
192 	[VPE_PORT_CHROMA2_IN] = {
193 		.channel	= VPE_CHAN_CHROMA2_IN,
194 		.vb_index	= 1,
195 		.vb_part	= VPE_CHROMA,
196 	},
197 	[VPE_PORT_LUMA3_IN] = {
198 		.channel	= VPE_CHAN_LUMA3_IN,
199 		.vb_index	= 2,
200 		.vb_part	= VPE_LUMA,
201 	},
202 	[VPE_PORT_CHROMA3_IN] = {
203 		.channel	= VPE_CHAN_CHROMA3_IN,
204 		.vb_index	= 2,
205 		.vb_part	= VPE_CHROMA,
206 	},
207 	[VPE_PORT_MV_IN] = {
208 		.channel	= VPE_CHAN_MV_IN,
209 	},
210 	[VPE_PORT_MV_OUT] = {
211 		.channel	= VPE_CHAN_MV_OUT,
212 	},
213 	[VPE_PORT_LUMA_OUT] = {
214 		.channel	= VPE_CHAN_LUMA_OUT,
215 		.vb_part	= VPE_LUMA,
216 	},
217 	[VPE_PORT_CHROMA_OUT] = {
218 		.channel	= VPE_CHAN_CHROMA_OUT,
219 		.vb_part	= VPE_CHROMA,
220 	},
221 	[VPE_PORT_RGB_OUT] = {
222 		.channel	= VPE_CHAN_RGB_OUT,
223 		.vb_part	= VPE_LUMA,
224 	},
225 };
226 
227 
228 /* driver info for each of the supported video formats */
229 struct vpe_fmt {
230 	char	*name;			/* human-readable name */
231 	u32	fourcc;			/* standard format identifier */
232 	u8	types;			/* CAPTURE and/or OUTPUT */
233 	u8	coplanar;		/* set for unpacked Luma and Chroma */
234 	/* vpdma format info for each plane */
235 	struct vpdma_data_format const *vpdma_fmt[VPE_MAX_PLANES];
236 };
237 
238 static struct vpe_fmt vpe_formats[] = {
239 	{
240 		.name		= "NV16 YUV 422 co-planar",
241 		.fourcc		= V4L2_PIX_FMT_NV16,
242 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
243 		.coplanar	= 1,
244 		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y444],
245 				    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C444],
246 				  },
247 	},
248 	{
249 		.name		= "NV12 YUV 420 co-planar",
250 		.fourcc		= V4L2_PIX_FMT_NV12,
251 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
252 		.coplanar	= 1,
253 		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y420],
254 				    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C420],
255 				  },
256 	},
257 	{
258 		.name		= "YUYV 422 packed",
259 		.fourcc		= V4L2_PIX_FMT_YUYV,
260 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
261 		.coplanar	= 0,
262 		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_YCB422],
263 				  },
264 	},
265 	{
266 		.name		= "UYVY 422 packed",
267 		.fourcc		= V4L2_PIX_FMT_UYVY,
268 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
269 		.coplanar	= 0,
270 		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_CBY422],
271 				  },
272 	},
273 	{
274 		.name		= "RGB888 packed",
275 		.fourcc		= V4L2_PIX_FMT_RGB24,
276 		.types		= VPE_FMT_TYPE_CAPTURE,
277 		.coplanar	= 0,
278 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGB24],
279 				  },
280 	},
281 	{
282 		.name		= "ARGB32",
283 		.fourcc		= V4L2_PIX_FMT_RGB32,
284 		.types		= VPE_FMT_TYPE_CAPTURE,
285 		.coplanar	= 0,
286 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ARGB32],
287 				  },
288 	},
289 	{
290 		.name		= "BGR888 packed",
291 		.fourcc		= V4L2_PIX_FMT_BGR24,
292 		.types		= VPE_FMT_TYPE_CAPTURE,
293 		.coplanar	= 0,
294 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_BGR24],
295 				  },
296 	},
297 	{
298 		.name		= "ABGR32",
299 		.fourcc		= V4L2_PIX_FMT_BGR32,
300 		.types		= VPE_FMT_TYPE_CAPTURE,
301 		.coplanar	= 0,
302 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ABGR32],
303 				  },
304 	},
305 	{
306 		.name		= "RGB565",
307 		.fourcc		= V4L2_PIX_FMT_RGB565,
308 		.types		= VPE_FMT_TYPE_CAPTURE,
309 		.coplanar	= 0,
310 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGB565],
311 				  },
312 	},
313 	{
314 		.name		= "RGB5551",
315 		.fourcc		= V4L2_PIX_FMT_RGB555,
316 		.types		= VPE_FMT_TYPE_CAPTURE,
317 		.coplanar	= 0,
318 		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGBA16_5551],
319 				  },
320 	},
321 };
322 
323 /*
324  * per-queue, driver-specific private data.
325  * there is one source queue and one destination queue for each m2m context.
326  */
327 struct vpe_q_data {
328 	unsigned int		width;				/* frame width */
329 	unsigned int		height;				/* frame height */
330 	unsigned int		nplanes;			/* Current number of planes */
331 	unsigned int		bytesperline[VPE_MAX_PLANES];	/* bytes per line in memory */
332 	enum v4l2_colorspace	colorspace;
333 	enum v4l2_field		field;				/* supported field value */
334 	unsigned int		flags;
335 	unsigned int		sizeimage[VPE_MAX_PLANES];	/* image size in memory */
336 	struct v4l2_rect	c_rect;				/* crop/compose rectangle */
337 	struct vpe_fmt		*fmt;				/* format info */
338 };
339 
340 /* vpe_q_data flag bits */
341 #define	Q_DATA_FRAME_1D			BIT(0)
342 #define	Q_DATA_MODE_TILED		BIT(1)
343 #define	Q_DATA_INTERLACED_ALTERNATE	BIT(2)
344 #define	Q_DATA_INTERLACED_SEQ_TB	BIT(3)
345 
346 #define Q_IS_INTERLACED		(Q_DATA_INTERLACED_ALTERNATE | \
347 				Q_DATA_INTERLACED_SEQ_TB)
348 
349 enum {
350 	Q_DATA_SRC = 0,
351 	Q_DATA_DST = 1,
352 };
353 
354 /* find our format description corresponding to the passed v4l2_format */
__find_format(u32 fourcc)355 static struct vpe_fmt *__find_format(u32 fourcc)
356 {
357 	struct vpe_fmt *fmt;
358 	unsigned int k;
359 
360 	for (k = 0; k < ARRAY_SIZE(vpe_formats); k++) {
361 		fmt = &vpe_formats[k];
362 		if (fmt->fourcc == fourcc)
363 			return fmt;
364 	}
365 
366 	return NULL;
367 }
368 
find_format(struct v4l2_format * f)369 static struct vpe_fmt *find_format(struct v4l2_format *f)
370 {
371 	return __find_format(f->fmt.pix.pixelformat);
372 }
373 
374 /*
375  * there is one vpe_dev structure in the driver, it is shared by
376  * all instances.
377  */
378 struct vpe_dev {
379 	struct v4l2_device	v4l2_dev;
380 	struct video_device	vfd;
381 	struct v4l2_m2m_dev	*m2m_dev;
382 
383 	atomic_t		num_instances;	/* count of driver instances */
384 	dma_addr_t		loaded_mmrs;	/* shadow mmrs in device */
385 	struct mutex		dev_mutex;
386 	spinlock_t		lock;
387 
388 	int			irq;
389 	void __iomem		*base;
390 	struct resource		*res;
391 
392 	struct vpdma_data	vpdma_data;
393 	struct vpdma_data	*vpdma;		/* vpdma data handle */
394 	struct sc_data		*sc;		/* scaler data handle */
395 	struct csc_data		*csc;		/* csc data handle */
396 };
397 
398 /*
399  * There is one vpe_ctx structure for each m2m context.
400  */
401 struct vpe_ctx {
402 	struct v4l2_fh		fh;
403 	struct vpe_dev		*dev;
404 	struct v4l2_ctrl_handler hdl;
405 
406 	unsigned int		field;			/* current field */
407 	unsigned int		sequence;		/* current frame/field seq */
408 	unsigned int		aborting;		/* abort after next irq */
409 
410 	unsigned int		bufs_per_job;		/* input buffers per batch */
411 	unsigned int		bufs_completed;		/* bufs done in this batch */
412 
413 	struct vpe_q_data	q_data[2];		/* src & dst queue data */
414 	struct vb2_v4l2_buffer	*src_vbs[VPE_MAX_SRC_BUFS];
415 	struct vb2_v4l2_buffer	*dst_vb;
416 
417 	dma_addr_t		mv_buf_dma[2];		/* dma addrs of motion vector in/out bufs */
418 	void			*mv_buf[2];		/* virtual addrs of motion vector bufs */
419 	size_t			mv_buf_size;		/* current motion vector buffer size */
420 	struct vpdma_buf	mmr_adb;		/* shadow reg addr/data block */
421 	struct vpdma_buf	sc_coeff_h;		/* h coeff buffer */
422 	struct vpdma_buf	sc_coeff_v;		/* v coeff buffer */
423 	struct vpdma_desc_list	desc_list;		/* DMA descriptor list */
424 
425 	bool			deinterlacing;		/* using de-interlacer */
426 	bool			load_mmrs;		/* have new shadow reg values */
427 
428 	unsigned int		src_mv_buf_selector;
429 };
430 
431 
432 /*
433  * M2M devices get 2 queues.
434  * Return the queue given the type.
435  */
get_q_data(struct vpe_ctx * ctx,enum v4l2_buf_type type)436 static struct vpe_q_data *get_q_data(struct vpe_ctx *ctx,
437 				     enum v4l2_buf_type type)
438 {
439 	switch (type) {
440 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
441 	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
442 		return &ctx->q_data[Q_DATA_SRC];
443 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
444 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
445 		return &ctx->q_data[Q_DATA_DST];
446 	default:
447 		return NULL;
448 	}
449 	return NULL;
450 }
451 
read_reg(struct vpe_dev * dev,int offset)452 static u32 read_reg(struct vpe_dev *dev, int offset)
453 {
454 	return ioread32(dev->base + offset);
455 }
456 
write_reg(struct vpe_dev * dev,int offset,u32 value)457 static void write_reg(struct vpe_dev *dev, int offset, u32 value)
458 {
459 	iowrite32(value, dev->base + offset);
460 }
461 
462 /* register field read/write helpers */
get_field(u32 value,u32 mask,int shift)463 static int get_field(u32 value, u32 mask, int shift)
464 {
465 	return (value & (mask << shift)) >> shift;
466 }
467 
read_field_reg(struct vpe_dev * dev,int offset,u32 mask,int shift)468 static int read_field_reg(struct vpe_dev *dev, int offset, u32 mask, int shift)
469 {
470 	return get_field(read_reg(dev, offset), mask, shift);
471 }
472 
write_field(u32 * valp,u32 field,u32 mask,int shift)473 static void write_field(u32 *valp, u32 field, u32 mask, int shift)
474 {
475 	u32 val = *valp;
476 
477 	val &= ~(mask << shift);
478 	val |= (field & mask) << shift;
479 	*valp = val;
480 }
481 
write_field_reg(struct vpe_dev * dev,int offset,u32 field,u32 mask,int shift)482 static void write_field_reg(struct vpe_dev *dev, int offset, u32 field,
483 		u32 mask, int shift)
484 {
485 	u32 val = read_reg(dev, offset);
486 
487 	write_field(&val, field, mask, shift);
488 
489 	write_reg(dev, offset, val);
490 }
491 
492 /*
493  * DMA address/data block for the shadow registers
494  */
495 struct vpe_mmr_adb {
496 	struct vpdma_adb_hdr	out_fmt_hdr;
497 	u32			out_fmt_reg[1];
498 	u32			out_fmt_pad[3];
499 	struct vpdma_adb_hdr	us1_hdr;
500 	u32			us1_regs[8];
501 	struct vpdma_adb_hdr	us2_hdr;
502 	u32			us2_regs[8];
503 	struct vpdma_adb_hdr	us3_hdr;
504 	u32			us3_regs[8];
505 	struct vpdma_adb_hdr	dei_hdr;
506 	u32			dei_regs[8];
507 	struct vpdma_adb_hdr	sc_hdr0;
508 	u32			sc_regs0[7];
509 	u32			sc_pad0[1];
510 	struct vpdma_adb_hdr	sc_hdr8;
511 	u32			sc_regs8[6];
512 	u32			sc_pad8[2];
513 	struct vpdma_adb_hdr	sc_hdr17;
514 	u32			sc_regs17[9];
515 	u32			sc_pad17[3];
516 	struct vpdma_adb_hdr	csc_hdr;
517 	u32			csc_regs[6];
518 	u32			csc_pad[2];
519 };
520 
521 #define GET_OFFSET_TOP(ctx, obj, reg)	\
522 	((obj)->res->start - ctx->dev->res->start + reg)
523 
524 #define VPE_SET_MMR_ADB_HDR(ctx, hdr, regs, offset_a)	\
525 	VPDMA_SET_MMR_ADB_HDR(ctx->mmr_adb, vpe_mmr_adb, hdr, regs, offset_a)
526 /*
527  * Set the headers for all of the address/data block structures.
528  */
init_adb_hdrs(struct vpe_ctx * ctx)529 static void init_adb_hdrs(struct vpe_ctx *ctx)
530 {
531 	VPE_SET_MMR_ADB_HDR(ctx, out_fmt_hdr, out_fmt_reg, VPE_CLK_FORMAT_SELECT);
532 	VPE_SET_MMR_ADB_HDR(ctx, us1_hdr, us1_regs, VPE_US1_R0);
533 	VPE_SET_MMR_ADB_HDR(ctx, us2_hdr, us2_regs, VPE_US2_R0);
534 	VPE_SET_MMR_ADB_HDR(ctx, us3_hdr, us3_regs, VPE_US3_R0);
535 	VPE_SET_MMR_ADB_HDR(ctx, dei_hdr, dei_regs, VPE_DEI_FRAME_SIZE);
536 	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr0, sc_regs0,
537 		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC0));
538 	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr8, sc_regs8,
539 		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC8));
540 	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr17, sc_regs17,
541 		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC17));
542 	VPE_SET_MMR_ADB_HDR(ctx, csc_hdr, csc_regs,
543 		GET_OFFSET_TOP(ctx, ctx->dev->csc, CSC_CSC00));
544 };
545 
546 /*
547  * Allocate or re-allocate the motion vector DMA buffers
548  * There are two buffers, one for input and one for output.
549  * However, the roles are reversed after each field is processed.
550  * In other words, after each field is processed, the previous
551  * output (dst) MV buffer becomes the new input (src) MV buffer.
552  */
realloc_mv_buffers(struct vpe_ctx * ctx,size_t size)553 static int realloc_mv_buffers(struct vpe_ctx *ctx, size_t size)
554 {
555 	struct device *dev = ctx->dev->v4l2_dev.dev;
556 
557 	if (ctx->mv_buf_size == size)
558 		return 0;
559 
560 	if (ctx->mv_buf[0])
561 		dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[0],
562 			ctx->mv_buf_dma[0]);
563 
564 	if (ctx->mv_buf[1])
565 		dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[1],
566 			ctx->mv_buf_dma[1]);
567 
568 	if (size == 0)
569 		return 0;
570 
571 	ctx->mv_buf[0] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[0],
572 				GFP_KERNEL);
573 	if (!ctx->mv_buf[0]) {
574 		vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
575 		return -ENOMEM;
576 	}
577 
578 	ctx->mv_buf[1] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[1],
579 				GFP_KERNEL);
580 	if (!ctx->mv_buf[1]) {
581 		vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
582 		dma_free_coherent(dev, size, ctx->mv_buf[0],
583 			ctx->mv_buf_dma[0]);
584 
585 		return -ENOMEM;
586 	}
587 
588 	ctx->mv_buf_size = size;
589 	ctx->src_mv_buf_selector = 0;
590 
591 	return 0;
592 }
593 
free_mv_buffers(struct vpe_ctx * ctx)594 static void free_mv_buffers(struct vpe_ctx *ctx)
595 {
596 	realloc_mv_buffers(ctx, 0);
597 }
598 
599 /*
600  * While de-interlacing, we keep the two most recent input buffers
601  * around.  This function frees those two buffers when we have
602  * finished processing the current stream.
603  */
free_vbs(struct vpe_ctx * ctx)604 static void free_vbs(struct vpe_ctx *ctx)
605 {
606 	struct vpe_dev *dev = ctx->dev;
607 	unsigned long flags;
608 
609 	if (ctx->src_vbs[2] == NULL)
610 		return;
611 
612 	spin_lock_irqsave(&dev->lock, flags);
613 	if (ctx->src_vbs[2]) {
614 		v4l2_m2m_buf_done(ctx->src_vbs[2], VB2_BUF_STATE_DONE);
615 		if (ctx->src_vbs[1] && (ctx->src_vbs[1] != ctx->src_vbs[2]))
616 			v4l2_m2m_buf_done(ctx->src_vbs[1], VB2_BUF_STATE_DONE);
617 		ctx->src_vbs[2] = NULL;
618 		ctx->src_vbs[1] = NULL;
619 	}
620 	spin_unlock_irqrestore(&dev->lock, flags);
621 }
622 
623 /*
624  * Enable or disable the VPE clocks
625  */
vpe_set_clock_enable(struct vpe_dev * dev,bool on)626 static void vpe_set_clock_enable(struct vpe_dev *dev, bool on)
627 {
628 	u32 val = 0;
629 
630 	if (on)
631 		val = VPE_DATA_PATH_CLK_ENABLE | VPE_VPEDMA_CLK_ENABLE;
632 	write_reg(dev, VPE_CLK_ENABLE, val);
633 }
634 
vpe_top_reset(struct vpe_dev * dev)635 static void vpe_top_reset(struct vpe_dev *dev)
636 {
637 
638 	write_field_reg(dev, VPE_CLK_RESET, 1, VPE_DATA_PATH_CLK_RESET_MASK,
639 		VPE_DATA_PATH_CLK_RESET_SHIFT);
640 
641 	usleep_range(100, 150);
642 
643 	write_field_reg(dev, VPE_CLK_RESET, 0, VPE_DATA_PATH_CLK_RESET_MASK,
644 		VPE_DATA_PATH_CLK_RESET_SHIFT);
645 }
646 
vpe_top_vpdma_reset(struct vpe_dev * dev)647 static void vpe_top_vpdma_reset(struct vpe_dev *dev)
648 {
649 	write_field_reg(dev, VPE_CLK_RESET, 1, VPE_VPDMA_CLK_RESET_MASK,
650 		VPE_VPDMA_CLK_RESET_SHIFT);
651 
652 	usleep_range(100, 150);
653 
654 	write_field_reg(dev, VPE_CLK_RESET, 0, VPE_VPDMA_CLK_RESET_MASK,
655 		VPE_VPDMA_CLK_RESET_SHIFT);
656 }
657 
658 /*
659  * Load the correct of upsampler coefficients into the shadow MMRs
660  */
set_us_coefficients(struct vpe_ctx * ctx)661 static void set_us_coefficients(struct vpe_ctx *ctx)
662 {
663 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
664 	struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
665 	u32 *us1_reg = &mmr_adb->us1_regs[0];
666 	u32 *us2_reg = &mmr_adb->us2_regs[0];
667 	u32 *us3_reg = &mmr_adb->us3_regs[0];
668 	const unsigned short *cp, *end_cp;
669 
670 	cp = &us_coeffs[0].anchor_fid0_c0;
671 
672 	if (s_q_data->flags & Q_IS_INTERLACED)		/* interlaced */
673 		cp += sizeof(us_coeffs[0]) / sizeof(*cp);
674 
675 	end_cp = cp + sizeof(us_coeffs[0]) / sizeof(*cp);
676 
677 	while (cp < end_cp) {
678 		write_field(us1_reg, *cp++, VPE_US_C0_MASK, VPE_US_C0_SHIFT);
679 		write_field(us1_reg, *cp++, VPE_US_C1_MASK, VPE_US_C1_SHIFT);
680 		*us2_reg++ = *us1_reg;
681 		*us3_reg++ = *us1_reg++;
682 	}
683 	ctx->load_mmrs = true;
684 }
685 
686 /*
687  * Set the upsampler config mode and the VPDMA line mode in the shadow MMRs.
688  */
set_cfg_modes(struct vpe_ctx * ctx)689 static void set_cfg_modes(struct vpe_ctx *ctx)
690 {
691 	struct vpe_fmt *fmt = ctx->q_data[Q_DATA_SRC].fmt;
692 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
693 	u32 *us1_reg0 = &mmr_adb->us1_regs[0];
694 	u32 *us2_reg0 = &mmr_adb->us2_regs[0];
695 	u32 *us3_reg0 = &mmr_adb->us3_regs[0];
696 	int cfg_mode = 1;
697 
698 	/*
699 	 * Cfg Mode 0: YUV420 source, enable upsampler, DEI is de-interlacing.
700 	 * Cfg Mode 1: YUV422 source, disable upsampler, DEI is de-interlacing.
701 	 */
702 
703 	if (fmt->fourcc == V4L2_PIX_FMT_NV12)
704 		cfg_mode = 0;
705 
706 	write_field(us1_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
707 	write_field(us2_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
708 	write_field(us3_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
709 
710 	ctx->load_mmrs = true;
711 }
712 
set_line_modes(struct vpe_ctx * ctx)713 static void set_line_modes(struct vpe_ctx *ctx)
714 {
715 	struct vpe_fmt *fmt = ctx->q_data[Q_DATA_SRC].fmt;
716 	int line_mode = 1;
717 
718 	if (fmt->fourcc == V4L2_PIX_FMT_NV12)
719 		line_mode = 0;		/* double lines to line buffer */
720 
721 	/* regs for now */
722 	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA1_IN);
723 	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA2_IN);
724 	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA3_IN);
725 
726 	/* frame start for input luma */
727 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
728 		VPE_CHAN_LUMA1_IN);
729 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
730 		VPE_CHAN_LUMA2_IN);
731 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
732 		VPE_CHAN_LUMA3_IN);
733 
734 	/* frame start for input chroma */
735 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
736 		VPE_CHAN_CHROMA1_IN);
737 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
738 		VPE_CHAN_CHROMA2_IN);
739 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
740 		VPE_CHAN_CHROMA3_IN);
741 
742 	/* frame start for MV in client */
743 	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
744 		VPE_CHAN_MV_IN);
745 }
746 
747 /*
748  * Set the shadow registers that are modified when the source
749  * format changes.
750  */
set_src_registers(struct vpe_ctx * ctx)751 static void set_src_registers(struct vpe_ctx *ctx)
752 {
753 	set_us_coefficients(ctx);
754 }
755 
756 /*
757  * Set the shadow registers that are modified when the destination
758  * format changes.
759  */
set_dst_registers(struct vpe_ctx * ctx)760 static void set_dst_registers(struct vpe_ctx *ctx)
761 {
762 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
763 	enum v4l2_colorspace clrspc = ctx->q_data[Q_DATA_DST].colorspace;
764 	struct vpe_fmt *fmt = ctx->q_data[Q_DATA_DST].fmt;
765 	u32 val = 0;
766 
767 	if (clrspc == V4L2_COLORSPACE_SRGB) {
768 		val |= VPE_RGB_OUT_SELECT;
769 		vpdma_set_bg_color(ctx->dev->vpdma,
770 			(struct vpdma_data_format *)fmt->vpdma_fmt[0], 0xff);
771 	} else if (fmt->fourcc == V4L2_PIX_FMT_NV16)
772 		val |= VPE_COLOR_SEPARATE_422;
773 
774 	/*
775 	 * the source of CHR_DS and CSC is always the scaler, irrespective of
776 	 * whether it's used or not
777 	 */
778 	val |= VPE_DS_SRC_DEI_SCALER | VPE_CSC_SRC_DEI_SCALER;
779 
780 	if (fmt->fourcc != V4L2_PIX_FMT_NV12)
781 		val |= VPE_DS_BYPASS;
782 
783 	mmr_adb->out_fmt_reg[0] = val;
784 
785 	ctx->load_mmrs = true;
786 }
787 
788 /*
789  * Set the de-interlacer shadow register values
790  */
set_dei_regs(struct vpe_ctx * ctx)791 static void set_dei_regs(struct vpe_ctx *ctx)
792 {
793 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
794 	struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
795 	unsigned int src_h = s_q_data->c_rect.height;
796 	unsigned int src_w = s_q_data->c_rect.width;
797 	u32 *dei_mmr0 = &mmr_adb->dei_regs[0];
798 	bool deinterlace = true;
799 	u32 val = 0;
800 
801 	/*
802 	 * according to TRM, we should set DEI in progressive bypass mode when
803 	 * the input content is progressive, however, DEI is bypassed correctly
804 	 * for both progressive and interlace content in interlace bypass mode.
805 	 * It has been recommended not to use progressive bypass mode.
806 	 */
807 	if (!(s_q_data->flags & Q_IS_INTERLACED) || !ctx->deinterlacing) {
808 		deinterlace = false;
809 		val = VPE_DEI_INTERLACE_BYPASS;
810 	}
811 
812 	src_h = deinterlace ? src_h * 2 : src_h;
813 
814 	val |= (src_h << VPE_DEI_HEIGHT_SHIFT) |
815 		(src_w << VPE_DEI_WIDTH_SHIFT) |
816 		VPE_DEI_FIELD_FLUSH;
817 
818 	*dei_mmr0 = val;
819 
820 	ctx->load_mmrs = true;
821 }
822 
set_dei_shadow_registers(struct vpe_ctx * ctx)823 static void set_dei_shadow_registers(struct vpe_ctx *ctx)
824 {
825 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
826 	u32 *dei_mmr = &mmr_adb->dei_regs[0];
827 	const struct vpe_dei_regs *cur = &dei_regs;
828 
829 	dei_mmr[2]  = cur->mdt_spacial_freq_thr_reg;
830 	dei_mmr[3]  = cur->edi_config_reg;
831 	dei_mmr[4]  = cur->edi_lut_reg0;
832 	dei_mmr[5]  = cur->edi_lut_reg1;
833 	dei_mmr[6]  = cur->edi_lut_reg2;
834 	dei_mmr[7]  = cur->edi_lut_reg3;
835 
836 	ctx->load_mmrs = true;
837 }
838 
config_edi_input_mode(struct vpe_ctx * ctx,int mode)839 static void config_edi_input_mode(struct vpe_ctx *ctx, int mode)
840 {
841 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
842 	u32 *edi_config_reg = &mmr_adb->dei_regs[3];
843 
844 	if (mode & 0x2)
845 		write_field(edi_config_reg, 1, 1, 2);	/* EDI_ENABLE_3D */
846 
847 	if (mode & 0x3)
848 		write_field(edi_config_reg, 1, 1, 3);	/* EDI_CHROMA_3D  */
849 
850 	write_field(edi_config_reg, mode, VPE_EDI_INP_MODE_MASK,
851 		VPE_EDI_INP_MODE_SHIFT);
852 
853 	ctx->load_mmrs = true;
854 }
855 
856 /*
857  * Set the shadow registers whose values are modified when either the
858  * source or destination format is changed.
859  */
set_srcdst_params(struct vpe_ctx * ctx)860 static int set_srcdst_params(struct vpe_ctx *ctx)
861 {
862 	struct vpe_q_data *s_q_data =  &ctx->q_data[Q_DATA_SRC];
863 	struct vpe_q_data *d_q_data =  &ctx->q_data[Q_DATA_DST];
864 	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
865 	unsigned int src_w = s_q_data->c_rect.width;
866 	unsigned int src_h = s_q_data->c_rect.height;
867 	unsigned int dst_w = d_q_data->c_rect.width;
868 	unsigned int dst_h = d_q_data->c_rect.height;
869 	size_t mv_buf_size;
870 	int ret;
871 
872 	ctx->sequence = 0;
873 	ctx->field = V4L2_FIELD_TOP;
874 
875 	if ((s_q_data->flags & Q_IS_INTERLACED) &&
876 			!(d_q_data->flags & Q_IS_INTERLACED)) {
877 		int bytes_per_line;
878 		const struct vpdma_data_format *mv =
879 			&vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
880 
881 		/*
882 		 * we make sure that the source image has a 16 byte aligned
883 		 * stride, we need to do the same for the motion vector buffer
884 		 * by aligning it's stride to the next 16 byte boundry. this
885 		 * extra space will not be used by the de-interlacer, but will
886 		 * ensure that vpdma operates correctly
887 		 */
888 		bytes_per_line = ALIGN((s_q_data->width * mv->depth) >> 3,
889 					VPDMA_STRIDE_ALIGN);
890 		mv_buf_size = bytes_per_line * s_q_data->height;
891 
892 		ctx->deinterlacing = true;
893 		src_h <<= 1;
894 	} else {
895 		ctx->deinterlacing = false;
896 		mv_buf_size = 0;
897 	}
898 
899 	free_vbs(ctx);
900 	ctx->src_vbs[2] = ctx->src_vbs[1] = ctx->src_vbs[0] = NULL;
901 
902 	ret = realloc_mv_buffers(ctx, mv_buf_size);
903 	if (ret)
904 		return ret;
905 
906 	set_cfg_modes(ctx);
907 	set_dei_regs(ctx);
908 
909 	csc_set_coeff(ctx->dev->csc, &mmr_adb->csc_regs[0],
910 		s_q_data->colorspace, d_q_data->colorspace);
911 
912 	sc_set_hs_coeffs(ctx->dev->sc, ctx->sc_coeff_h.addr, src_w, dst_w);
913 	sc_set_vs_coeffs(ctx->dev->sc, ctx->sc_coeff_v.addr, src_h, dst_h);
914 
915 	sc_config_scaler(ctx->dev->sc, &mmr_adb->sc_regs0[0],
916 		&mmr_adb->sc_regs8[0], &mmr_adb->sc_regs17[0],
917 		src_w, src_h, dst_w, dst_h);
918 
919 	return 0;
920 }
921 
922 /*
923  * Return the vpe_ctx structure for a given struct file
924  */
file2ctx(struct file * file)925 static struct vpe_ctx *file2ctx(struct file *file)
926 {
927 	return container_of(file->private_data, struct vpe_ctx, fh);
928 }
929 
930 /*
931  * mem2mem callbacks
932  */
933 
934 /**
935  * job_ready() - check whether an instance is ready to be scheduled to run
936  */
job_ready(void * priv)937 static int job_ready(void *priv)
938 {
939 	struct vpe_ctx *ctx = priv;
940 
941 	/*
942 	 * This check is needed as this might be called directly from driver
943 	 * When called by m2m framework, this will always satisfy, but when
944 	 * called from vpe_irq, this might fail. (src stream with zero buffers)
945 	 */
946 	if (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) <= 0 ||
947 		v4l2_m2m_num_dst_bufs_ready(ctx->fh.m2m_ctx) <= 0)
948 		return 0;
949 
950 	return 1;
951 }
952 
job_abort(void * priv)953 static void job_abort(void *priv)
954 {
955 	struct vpe_ctx *ctx = priv;
956 
957 	/* Will cancel the transaction in the next interrupt handler */
958 	ctx->aborting = 1;
959 }
960 
961 /*
962  * Lock access to the device
963  */
vpe_lock(void * priv)964 static void vpe_lock(void *priv)
965 {
966 	struct vpe_ctx *ctx = priv;
967 	struct vpe_dev *dev = ctx->dev;
968 	mutex_lock(&dev->dev_mutex);
969 }
970 
vpe_unlock(void * priv)971 static void vpe_unlock(void *priv)
972 {
973 	struct vpe_ctx *ctx = priv;
974 	struct vpe_dev *dev = ctx->dev;
975 	mutex_unlock(&dev->dev_mutex);
976 }
977 
vpe_dump_regs(struct vpe_dev * dev)978 static void vpe_dump_regs(struct vpe_dev *dev)
979 {
980 #define DUMPREG(r) vpe_dbg(dev, "%-35s %08x\n", #r, read_reg(dev, VPE_##r))
981 
982 	vpe_dbg(dev, "VPE Registers:\n");
983 
984 	DUMPREG(PID);
985 	DUMPREG(SYSCONFIG);
986 	DUMPREG(INT0_STATUS0_RAW);
987 	DUMPREG(INT0_STATUS0);
988 	DUMPREG(INT0_ENABLE0);
989 	DUMPREG(INT0_STATUS1_RAW);
990 	DUMPREG(INT0_STATUS1);
991 	DUMPREG(INT0_ENABLE1);
992 	DUMPREG(CLK_ENABLE);
993 	DUMPREG(CLK_RESET);
994 	DUMPREG(CLK_FORMAT_SELECT);
995 	DUMPREG(CLK_RANGE_MAP);
996 	DUMPREG(US1_R0);
997 	DUMPREG(US1_R1);
998 	DUMPREG(US1_R2);
999 	DUMPREG(US1_R3);
1000 	DUMPREG(US1_R4);
1001 	DUMPREG(US1_R5);
1002 	DUMPREG(US1_R6);
1003 	DUMPREG(US1_R7);
1004 	DUMPREG(US2_R0);
1005 	DUMPREG(US2_R1);
1006 	DUMPREG(US2_R2);
1007 	DUMPREG(US2_R3);
1008 	DUMPREG(US2_R4);
1009 	DUMPREG(US2_R5);
1010 	DUMPREG(US2_R6);
1011 	DUMPREG(US2_R7);
1012 	DUMPREG(US3_R0);
1013 	DUMPREG(US3_R1);
1014 	DUMPREG(US3_R2);
1015 	DUMPREG(US3_R3);
1016 	DUMPREG(US3_R4);
1017 	DUMPREG(US3_R5);
1018 	DUMPREG(US3_R6);
1019 	DUMPREG(US3_R7);
1020 	DUMPREG(DEI_FRAME_SIZE);
1021 	DUMPREG(MDT_BYPASS);
1022 	DUMPREG(MDT_SF_THRESHOLD);
1023 	DUMPREG(EDI_CONFIG);
1024 	DUMPREG(DEI_EDI_LUT_R0);
1025 	DUMPREG(DEI_EDI_LUT_R1);
1026 	DUMPREG(DEI_EDI_LUT_R2);
1027 	DUMPREG(DEI_EDI_LUT_R3);
1028 	DUMPREG(DEI_FMD_WINDOW_R0);
1029 	DUMPREG(DEI_FMD_WINDOW_R1);
1030 	DUMPREG(DEI_FMD_CONTROL_R0);
1031 	DUMPREG(DEI_FMD_CONTROL_R1);
1032 	DUMPREG(DEI_FMD_STATUS_R0);
1033 	DUMPREG(DEI_FMD_STATUS_R1);
1034 	DUMPREG(DEI_FMD_STATUS_R2);
1035 #undef DUMPREG
1036 
1037 	sc_dump_regs(dev->sc);
1038 	csc_dump_regs(dev->csc);
1039 }
1040 
add_out_dtd(struct vpe_ctx * ctx,int port)1041 static void add_out_dtd(struct vpe_ctx *ctx, int port)
1042 {
1043 	struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_DST];
1044 	const struct vpe_port_data *p_data = &port_data[port];
1045 	struct vb2_buffer *vb = &ctx->dst_vb->vb2_buf;
1046 	struct vpe_fmt *fmt = q_data->fmt;
1047 	const struct vpdma_data_format *vpdma_fmt;
1048 	int mv_buf_selector = !ctx->src_mv_buf_selector;
1049 	dma_addr_t dma_addr;
1050 	u32 flags = 0;
1051 	u32 offset = 0;
1052 	u32 stride;
1053 
1054 	if (port == VPE_PORT_MV_OUT) {
1055 		vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1056 		dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1057 		q_data = &ctx->q_data[Q_DATA_SRC];
1058 		stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3,
1059 			       VPDMA_STRIDE_ALIGN);
1060 	} else {
1061 		/* to incorporate interleaved formats */
1062 		int plane = fmt->coplanar ? p_data->vb_part : 0;
1063 
1064 		vpdma_fmt = fmt->vpdma_fmt[plane];
1065 		/*
1066 		 * If we are using a single plane buffer and
1067 		 * we need to set a separate vpdma chroma channel.
1068 		 */
1069 		if (q_data->nplanes == 1 && plane) {
1070 			dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1071 			/* Compute required offset */
1072 			offset = q_data->bytesperline[0] * q_data->height;
1073 		} else {
1074 			dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1075 			/* Use address as is, no offset */
1076 			offset = 0;
1077 		}
1078 		if (!dma_addr) {
1079 			vpe_err(ctx->dev,
1080 				"acquiring output buffer(%d) dma_addr failed\n",
1081 				port);
1082 			return;
1083 		}
1084 		/* Apply the offset */
1085 		dma_addr += offset;
1086 		stride = q_data->bytesperline[VPE_LUMA];
1087 	}
1088 
1089 	if (q_data->flags & Q_DATA_FRAME_1D)
1090 		flags |= VPDMA_DATA_FRAME_1D;
1091 	if (q_data->flags & Q_DATA_MODE_TILED)
1092 		flags |= VPDMA_DATA_MODE_TILED;
1093 
1094 	vpdma_set_max_size(ctx->dev->vpdma, VPDMA_MAX_SIZE1,
1095 			   MAX_W, MAX_H);
1096 
1097 	vpdma_add_out_dtd(&ctx->desc_list, q_data->width,
1098 			  stride, &q_data->c_rect,
1099 			  vpdma_fmt, dma_addr, MAX_OUT_WIDTH_REG1,
1100 			  MAX_OUT_HEIGHT_REG1, p_data->channel, flags);
1101 }
1102 
add_in_dtd(struct vpe_ctx * ctx,int port)1103 static void add_in_dtd(struct vpe_ctx *ctx, int port)
1104 {
1105 	struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_SRC];
1106 	const struct vpe_port_data *p_data = &port_data[port];
1107 	struct vb2_buffer *vb = &ctx->src_vbs[p_data->vb_index]->vb2_buf;
1108 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
1109 	struct vpe_fmt *fmt = q_data->fmt;
1110 	const struct vpdma_data_format *vpdma_fmt;
1111 	int mv_buf_selector = ctx->src_mv_buf_selector;
1112 	int field = vbuf->field == V4L2_FIELD_BOTTOM;
1113 	int frame_width, frame_height;
1114 	dma_addr_t dma_addr;
1115 	u32 flags = 0;
1116 	u32 offset = 0;
1117 	u32 stride;
1118 
1119 	if (port == VPE_PORT_MV_IN) {
1120 		vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1121 		dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1122 		stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3,
1123 			       VPDMA_STRIDE_ALIGN);
1124 	} else {
1125 		/* to incorporate interleaved formats */
1126 		int plane = fmt->coplanar ? p_data->vb_part : 0;
1127 
1128 		vpdma_fmt = fmt->vpdma_fmt[plane];
1129 		/*
1130 		 * If we are using a single plane buffer and
1131 		 * we need to set a separate vpdma chroma channel.
1132 		 */
1133 		if (q_data->nplanes == 1 && plane) {
1134 			dma_addr = vb2_dma_contig_plane_dma_addr(vb, 0);
1135 			/* Compute required offset */
1136 			offset = q_data->bytesperline[0] * q_data->height;
1137 		} else {
1138 			dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1139 			/* Use address as is, no offset */
1140 			offset = 0;
1141 		}
1142 		if (!dma_addr) {
1143 			vpe_err(ctx->dev,
1144 				"acquiring output buffer(%d) dma_addr failed\n",
1145 				port);
1146 			return;
1147 		}
1148 		/* Apply the offset */
1149 		dma_addr += offset;
1150 		stride = q_data->bytesperline[VPE_LUMA];
1151 
1152 		if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB) {
1153 			/*
1154 			 * Use top or bottom field from same vb alternately
1155 			 * f,f-1,f-2 = TBT when seq is even
1156 			 * f,f-1,f-2 = BTB when seq is odd
1157 			 */
1158 			field = (p_data->vb_index + (ctx->sequence % 2)) % 2;
1159 
1160 			if (field) {
1161 				/*
1162 				 * bottom field of a SEQ_TB buffer
1163 				 * Skip the top field data by
1164 				 */
1165 				int height = q_data->height / 2;
1166 				int bpp = fmt->fourcc == V4L2_PIX_FMT_NV12 ?
1167 						1 : (vpdma_fmt->depth >> 3);
1168 				if (plane)
1169 					height /= 2;
1170 				dma_addr += q_data->width * height * bpp;
1171 			}
1172 		}
1173 	}
1174 
1175 	if (q_data->flags & Q_DATA_FRAME_1D)
1176 		flags |= VPDMA_DATA_FRAME_1D;
1177 	if (q_data->flags & Q_DATA_MODE_TILED)
1178 		flags |= VPDMA_DATA_MODE_TILED;
1179 
1180 	frame_width = q_data->c_rect.width;
1181 	frame_height = q_data->c_rect.height;
1182 
1183 	if (p_data->vb_part && fmt->fourcc == V4L2_PIX_FMT_NV12)
1184 		frame_height /= 2;
1185 
1186 	vpdma_add_in_dtd(&ctx->desc_list, q_data->width, stride,
1187 			 &q_data->c_rect, vpdma_fmt, dma_addr,
1188 			 p_data->channel, field, flags, frame_width,
1189 			 frame_height, 0, 0);
1190 }
1191 
1192 /*
1193  * Enable the expected IRQ sources
1194  */
enable_irqs(struct vpe_ctx * ctx)1195 static void enable_irqs(struct vpe_ctx *ctx)
1196 {
1197 	write_reg(ctx->dev, VPE_INT0_ENABLE0_SET, VPE_INT0_LIST0_COMPLETE);
1198 	write_reg(ctx->dev, VPE_INT0_ENABLE1_SET, VPE_DEI_ERROR_INT |
1199 				VPE_DS1_UV_ERROR_INT);
1200 
1201 	vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, 0, true);
1202 }
1203 
disable_irqs(struct vpe_ctx * ctx)1204 static void disable_irqs(struct vpe_ctx *ctx)
1205 {
1206 	write_reg(ctx->dev, VPE_INT0_ENABLE0_CLR, 0xffffffff);
1207 	write_reg(ctx->dev, VPE_INT0_ENABLE1_CLR, 0xffffffff);
1208 
1209 	vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, 0, false);
1210 }
1211 
1212 /* device_run() - prepares and starts the device
1213  *
1214  * This function is only called when both the source and destination
1215  * buffers are in place.
1216  */
device_run(void * priv)1217 static void device_run(void *priv)
1218 {
1219 	struct vpe_ctx *ctx = priv;
1220 	struct sc_data *sc = ctx->dev->sc;
1221 	struct vpe_q_data *d_q_data = &ctx->q_data[Q_DATA_DST];
1222 	struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
1223 
1224 	if (ctx->deinterlacing && s_q_data->flags & Q_DATA_INTERLACED_SEQ_TB &&
1225 		ctx->sequence % 2 == 0) {
1226 		/* When using SEQ_TB buffers, When using it first time,
1227 		 * No need to remove the buffer as the next field is present
1228 		 * in the same buffer. (so that job_ready won't fail)
1229 		 * It will be removed when using bottom field
1230 		 */
1231 		ctx->src_vbs[0] = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1232 		WARN_ON(ctx->src_vbs[0] == NULL);
1233 	} else {
1234 		ctx->src_vbs[0] = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1235 		WARN_ON(ctx->src_vbs[0] == NULL);
1236 	}
1237 
1238 	ctx->dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1239 	WARN_ON(ctx->dst_vb == NULL);
1240 
1241 	if (ctx->deinterlacing) {
1242 
1243 		if (ctx->src_vbs[2] == NULL) {
1244 			ctx->src_vbs[2] = ctx->src_vbs[0];
1245 			WARN_ON(ctx->src_vbs[2] == NULL);
1246 			ctx->src_vbs[1] = ctx->src_vbs[0];
1247 			WARN_ON(ctx->src_vbs[1] == NULL);
1248 		}
1249 
1250 		/*
1251 		 * we have output the first 2 frames through line average, we
1252 		 * now switch to EDI de-interlacer
1253 		 */
1254 		if (ctx->sequence == 2)
1255 			config_edi_input_mode(ctx, 0x3); /* EDI (Y + UV) */
1256 	}
1257 
1258 	/* config descriptors */
1259 	if (ctx->dev->loaded_mmrs != ctx->mmr_adb.dma_addr || ctx->load_mmrs) {
1260 		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->mmr_adb);
1261 		vpdma_add_cfd_adb(&ctx->desc_list, CFD_MMR_CLIENT, &ctx->mmr_adb);
1262 
1263 		set_line_modes(ctx);
1264 
1265 		ctx->dev->loaded_mmrs = ctx->mmr_adb.dma_addr;
1266 		ctx->load_mmrs = false;
1267 	}
1268 
1269 	if (sc->loaded_coeff_h != ctx->sc_coeff_h.dma_addr ||
1270 			sc->load_coeff_h) {
1271 		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_h);
1272 		vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1273 			&ctx->sc_coeff_h, 0);
1274 
1275 		sc->loaded_coeff_h = ctx->sc_coeff_h.dma_addr;
1276 		sc->load_coeff_h = false;
1277 	}
1278 
1279 	if (sc->loaded_coeff_v != ctx->sc_coeff_v.dma_addr ||
1280 			sc->load_coeff_v) {
1281 		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_v);
1282 		vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1283 			&ctx->sc_coeff_v, SC_COEF_SRAM_SIZE >> 4);
1284 
1285 		sc->loaded_coeff_v = ctx->sc_coeff_v.dma_addr;
1286 		sc->load_coeff_v = false;
1287 	}
1288 
1289 	/* output data descriptors */
1290 	if (ctx->deinterlacing)
1291 		add_out_dtd(ctx, VPE_PORT_MV_OUT);
1292 
1293 	if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1294 		add_out_dtd(ctx, VPE_PORT_RGB_OUT);
1295 	} else {
1296 		add_out_dtd(ctx, VPE_PORT_LUMA_OUT);
1297 		if (d_q_data->fmt->coplanar)
1298 			add_out_dtd(ctx, VPE_PORT_CHROMA_OUT);
1299 	}
1300 
1301 	/* input data descriptors */
1302 	if (ctx->deinterlacing) {
1303 		add_in_dtd(ctx, VPE_PORT_LUMA3_IN);
1304 		add_in_dtd(ctx, VPE_PORT_CHROMA3_IN);
1305 
1306 		add_in_dtd(ctx, VPE_PORT_LUMA2_IN);
1307 		add_in_dtd(ctx, VPE_PORT_CHROMA2_IN);
1308 	}
1309 
1310 	add_in_dtd(ctx, VPE_PORT_LUMA1_IN);
1311 	add_in_dtd(ctx, VPE_PORT_CHROMA1_IN);
1312 
1313 	if (ctx->deinterlacing)
1314 		add_in_dtd(ctx, VPE_PORT_MV_IN);
1315 
1316 	/* sync on channel control descriptors for input ports */
1317 	vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_LUMA1_IN);
1318 	vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_CHROMA1_IN);
1319 
1320 	if (ctx->deinterlacing) {
1321 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1322 			VPE_CHAN_LUMA2_IN);
1323 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1324 			VPE_CHAN_CHROMA2_IN);
1325 
1326 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1327 			VPE_CHAN_LUMA3_IN);
1328 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1329 			VPE_CHAN_CHROMA3_IN);
1330 
1331 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_IN);
1332 	}
1333 
1334 	/* sync on channel control descriptors for output ports */
1335 	if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1336 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1337 			VPE_CHAN_RGB_OUT);
1338 	} else {
1339 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1340 			VPE_CHAN_LUMA_OUT);
1341 		if (d_q_data->fmt->coplanar)
1342 			vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1343 				VPE_CHAN_CHROMA_OUT);
1344 	}
1345 
1346 	if (ctx->deinterlacing)
1347 		vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_OUT);
1348 
1349 	enable_irqs(ctx);
1350 
1351 	vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->desc_list.buf);
1352 	vpdma_submit_descs(ctx->dev->vpdma, &ctx->desc_list, 0);
1353 }
1354 
dei_error(struct vpe_ctx * ctx)1355 static void dei_error(struct vpe_ctx *ctx)
1356 {
1357 	dev_warn(ctx->dev->v4l2_dev.dev,
1358 		"received DEI error interrupt\n");
1359 }
1360 
ds1_uv_error(struct vpe_ctx * ctx)1361 static void ds1_uv_error(struct vpe_ctx *ctx)
1362 {
1363 	dev_warn(ctx->dev->v4l2_dev.dev,
1364 		"received downsampler error interrupt\n");
1365 }
1366 
vpe_irq(int irq_vpe,void * data)1367 static irqreturn_t vpe_irq(int irq_vpe, void *data)
1368 {
1369 	struct vpe_dev *dev = (struct vpe_dev *)data;
1370 	struct vpe_ctx *ctx;
1371 	struct vpe_q_data *d_q_data;
1372 	struct vb2_v4l2_buffer *s_vb, *d_vb;
1373 	unsigned long flags;
1374 	u32 irqst0, irqst1;
1375 	bool list_complete = false;
1376 
1377 	irqst0 = read_reg(dev, VPE_INT0_STATUS0);
1378 	if (irqst0) {
1379 		write_reg(dev, VPE_INT0_STATUS0_CLR, irqst0);
1380 		vpe_dbg(dev, "INT0_STATUS0 = 0x%08x\n", irqst0);
1381 	}
1382 
1383 	irqst1 = read_reg(dev, VPE_INT0_STATUS1);
1384 	if (irqst1) {
1385 		write_reg(dev, VPE_INT0_STATUS1_CLR, irqst1);
1386 		vpe_dbg(dev, "INT0_STATUS1 = 0x%08x\n", irqst1);
1387 	}
1388 
1389 	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1390 	if (!ctx) {
1391 		vpe_err(dev, "instance released before end of transaction\n");
1392 		goto handled;
1393 	}
1394 
1395 	if (irqst1) {
1396 		if (irqst1 & VPE_DEI_ERROR_INT) {
1397 			irqst1 &= ~VPE_DEI_ERROR_INT;
1398 			dei_error(ctx);
1399 		}
1400 		if (irqst1 & VPE_DS1_UV_ERROR_INT) {
1401 			irqst1 &= ~VPE_DS1_UV_ERROR_INT;
1402 			ds1_uv_error(ctx);
1403 		}
1404 	}
1405 
1406 	if (irqst0) {
1407 		if (irqst0 & VPE_INT0_LIST0_COMPLETE)
1408 			vpdma_clear_list_stat(ctx->dev->vpdma, 0, 0);
1409 
1410 		irqst0 &= ~(VPE_INT0_LIST0_COMPLETE);
1411 		list_complete = true;
1412 	}
1413 
1414 	if (irqst0 | irqst1) {
1415 		dev_warn(dev->v4l2_dev.dev, "Unexpected interrupt: INT0_STATUS0 = 0x%08x, INT0_STATUS1 = 0x%08x\n",
1416 			irqst0, irqst1);
1417 	}
1418 
1419 	/*
1420 	 * Setup next operation only when list complete IRQ occurs
1421 	 * otherwise, skip the following code
1422 	 */
1423 	if (!list_complete)
1424 		goto handled;
1425 
1426 	disable_irqs(ctx);
1427 
1428 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf);
1429 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb);
1430 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h);
1431 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v);
1432 
1433 	vpdma_reset_desc_list(&ctx->desc_list);
1434 
1435 	 /* the previous dst mv buffer becomes the next src mv buffer */
1436 	ctx->src_mv_buf_selector = !ctx->src_mv_buf_selector;
1437 
1438 	s_vb = ctx->src_vbs[0];
1439 	d_vb = ctx->dst_vb;
1440 
1441 	d_vb->flags = s_vb->flags;
1442 	d_vb->vb2_buf.timestamp = s_vb->vb2_buf.timestamp;
1443 
1444 	if (s_vb->flags & V4L2_BUF_FLAG_TIMECODE)
1445 		d_vb->timecode = s_vb->timecode;
1446 
1447 	d_vb->sequence = ctx->sequence;
1448 	s_vb->sequence = ctx->sequence;
1449 
1450 	d_q_data = &ctx->q_data[Q_DATA_DST];
1451 	if (d_q_data->flags & Q_IS_INTERLACED) {
1452 		d_vb->field = ctx->field;
1453 		if (ctx->field == V4L2_FIELD_BOTTOM) {
1454 			ctx->sequence++;
1455 			ctx->field = V4L2_FIELD_TOP;
1456 		} else {
1457 			WARN_ON(ctx->field != V4L2_FIELD_TOP);
1458 			ctx->field = V4L2_FIELD_BOTTOM;
1459 		}
1460 	} else {
1461 		d_vb->field = V4L2_FIELD_NONE;
1462 		ctx->sequence++;
1463 	}
1464 
1465 	if (ctx->deinterlacing) {
1466 		/*
1467 		 * Allow source buffer to be dequeued only if it won't be used
1468 		 * in the next iteration. All vbs are initialized to first
1469 		 * buffer and we are shifting buffers every iteration, for the
1470 		 * first two iterations, no buffer will be dequeued.
1471 		 * This ensures that driver will keep (n-2)th (n-1)th and (n)th
1472 		 * field when deinterlacing is enabled
1473 		 */
1474 		if (ctx->src_vbs[2] != ctx->src_vbs[1])
1475 			s_vb = ctx->src_vbs[2];
1476 		else
1477 			s_vb = NULL;
1478 	}
1479 
1480 	spin_lock_irqsave(&dev->lock, flags);
1481 
1482 	if (s_vb)
1483 		v4l2_m2m_buf_done(s_vb, VB2_BUF_STATE_DONE);
1484 
1485 	v4l2_m2m_buf_done(d_vb, VB2_BUF_STATE_DONE);
1486 
1487 	spin_unlock_irqrestore(&dev->lock, flags);
1488 
1489 	if (ctx->deinterlacing) {
1490 		ctx->src_vbs[2] = ctx->src_vbs[1];
1491 		ctx->src_vbs[1] = ctx->src_vbs[0];
1492 	}
1493 
1494 	/*
1495 	 * Since the vb2_buf_done has already been called fir therse
1496 	 * buffer we can now NULL them out so that we won't try
1497 	 * to clean out stray pointer later on.
1498 	*/
1499 	ctx->src_vbs[0] = NULL;
1500 	ctx->dst_vb = NULL;
1501 
1502 	if (ctx->aborting)
1503 		goto finished;
1504 
1505 	ctx->bufs_completed++;
1506 	if (ctx->bufs_completed < ctx->bufs_per_job && job_ready(ctx)) {
1507 		device_run(ctx);
1508 		goto handled;
1509 	}
1510 
1511 finished:
1512 	vpe_dbg(ctx->dev, "finishing transaction\n");
1513 	ctx->bufs_completed = 0;
1514 	v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
1515 handled:
1516 	return IRQ_HANDLED;
1517 }
1518 
1519 /*
1520  * video ioctls
1521  */
vpe_querycap(struct file * file,void * priv,struct v4l2_capability * cap)1522 static int vpe_querycap(struct file *file, void *priv,
1523 			struct v4l2_capability *cap)
1524 {
1525 	strncpy(cap->driver, VPE_MODULE_NAME, sizeof(cap->driver) - 1);
1526 	strncpy(cap->card, VPE_MODULE_NAME, sizeof(cap->card) - 1);
1527 	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
1528 		VPE_MODULE_NAME);
1529 	cap->device_caps  = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1530 	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
1531 	return 0;
1532 }
1533 
__enum_fmt(struct v4l2_fmtdesc * f,u32 type)1534 static int __enum_fmt(struct v4l2_fmtdesc *f, u32 type)
1535 {
1536 	int i, index;
1537 	struct vpe_fmt *fmt = NULL;
1538 
1539 	index = 0;
1540 	for (i = 0; i < ARRAY_SIZE(vpe_formats); ++i) {
1541 		if (vpe_formats[i].types & type) {
1542 			if (index == f->index) {
1543 				fmt = &vpe_formats[i];
1544 				break;
1545 			}
1546 			index++;
1547 		}
1548 	}
1549 
1550 	if (!fmt)
1551 		return -EINVAL;
1552 
1553 	strncpy(f->description, fmt->name, sizeof(f->description) - 1);
1554 	f->pixelformat = fmt->fourcc;
1555 	return 0;
1556 }
1557 
vpe_enum_fmt(struct file * file,void * priv,struct v4l2_fmtdesc * f)1558 static int vpe_enum_fmt(struct file *file, void *priv,
1559 				struct v4l2_fmtdesc *f)
1560 {
1561 	if (V4L2_TYPE_IS_OUTPUT(f->type))
1562 		return __enum_fmt(f, VPE_FMT_TYPE_OUTPUT);
1563 
1564 	return __enum_fmt(f, VPE_FMT_TYPE_CAPTURE);
1565 }
1566 
vpe_g_fmt(struct file * file,void * priv,struct v4l2_format * f)1567 static int vpe_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
1568 {
1569 	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1570 	struct vpe_ctx *ctx = file2ctx(file);
1571 	struct vb2_queue *vq;
1572 	struct vpe_q_data *q_data;
1573 	int i;
1574 
1575 	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1576 	if (!vq)
1577 		return -EINVAL;
1578 
1579 	q_data = get_q_data(ctx, f->type);
1580 
1581 	pix->width = q_data->width;
1582 	pix->height = q_data->height;
1583 	pix->pixelformat = q_data->fmt->fourcc;
1584 	pix->field = q_data->field;
1585 
1586 	if (V4L2_TYPE_IS_OUTPUT(f->type)) {
1587 		pix->colorspace = q_data->colorspace;
1588 	} else {
1589 		struct vpe_q_data *s_q_data;
1590 
1591 		/* get colorspace from the source queue */
1592 		s_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
1593 
1594 		pix->colorspace = s_q_data->colorspace;
1595 	}
1596 
1597 	pix->num_planes = q_data->nplanes;
1598 
1599 	for (i = 0; i < pix->num_planes; i++) {
1600 		pix->plane_fmt[i].bytesperline = q_data->bytesperline[i];
1601 		pix->plane_fmt[i].sizeimage = q_data->sizeimage[i];
1602 	}
1603 
1604 	return 0;
1605 }
1606 
__vpe_try_fmt(struct vpe_ctx * ctx,struct v4l2_format * f,struct vpe_fmt * fmt,int type)1607 static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f,
1608 		       struct vpe_fmt *fmt, int type)
1609 {
1610 	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1611 	struct v4l2_plane_pix_format *plane_fmt;
1612 	unsigned int w_align;
1613 	int i, depth, depth_bytes, height;
1614 	unsigned int stride = 0;
1615 
1616 	if (!fmt || !(fmt->types & type)) {
1617 		vpe_dbg(ctx->dev, "Fourcc format (0x%08x) invalid.\n",
1618 			pix->pixelformat);
1619 		fmt = __find_format(V4L2_PIX_FMT_YUYV);
1620 	}
1621 
1622 	if (pix->field != V4L2_FIELD_NONE && pix->field != V4L2_FIELD_ALTERNATE
1623 			&& pix->field != V4L2_FIELD_SEQ_TB)
1624 		pix->field = V4L2_FIELD_NONE;
1625 
1626 	depth = fmt->vpdma_fmt[VPE_LUMA]->depth;
1627 
1628 	/*
1629 	 * the line stride should 16 byte aligned for VPDMA to work, based on
1630 	 * the bytes per pixel, figure out how much the width should be aligned
1631 	 * to make sure line stride is 16 byte aligned
1632 	 */
1633 	depth_bytes = depth >> 3;
1634 
1635 	if (depth_bytes == 3) {
1636 		/*
1637 		 * if bpp is 3(as in some RGB formats), the pixel width doesn't
1638 		 * really help in ensuring line stride is 16 byte aligned
1639 		 */
1640 		w_align = 4;
1641 	} else {
1642 		/*
1643 		 * for the remainder bpp(4, 2 and 1), the pixel width alignment
1644 		 * can ensure a line stride alignment of 16 bytes. For example,
1645 		 * if bpp is 2, then the line stride can be 16 byte aligned if
1646 		 * the width is 8 byte aligned
1647 		 */
1648 
1649 		/*
1650 		 * HACK: using order_base_2() here causes lots of asm output
1651 		 * errors with smatch, on i386:
1652 		 * ./arch/x86/include/asm/bitops.h:457:22:
1653 		 *		 warning: asm output is not an lvalue
1654 		 * Perhaps some gcc optimization is doing the wrong thing
1655 		 * there.
1656 		 * Let's get rid of them by doing the calculus on two steps
1657 		 */
1658 		w_align = roundup_pow_of_two(VPDMA_DESC_ALIGN / depth_bytes);
1659 		w_align = ilog2(w_align);
1660 	}
1661 
1662 	v4l_bound_align_image(&pix->width, MIN_W, MAX_W, w_align,
1663 			      &pix->height, MIN_H, MAX_H, H_ALIGN,
1664 			      S_ALIGN);
1665 
1666 	if (!pix->num_planes || pix->num_planes > 2)
1667 		pix->num_planes = fmt->coplanar ? 2 : 1;
1668 	else if (pix->num_planes > 1 && !fmt->coplanar)
1669 		pix->num_planes = 1;
1670 
1671 	pix->pixelformat = fmt->fourcc;
1672 
1673 	/*
1674 	 * For the actual image parameters, we need to consider the field
1675 	 * height of the image for SEQ_TB buffers.
1676 	 */
1677 	if (pix->field == V4L2_FIELD_SEQ_TB)
1678 		height = pix->height / 2;
1679 	else
1680 		height = pix->height;
1681 
1682 	if (!pix->colorspace) {
1683 		if (fmt->fourcc == V4L2_PIX_FMT_RGB24 ||
1684 				fmt->fourcc == V4L2_PIX_FMT_BGR24 ||
1685 				fmt->fourcc == V4L2_PIX_FMT_RGB32 ||
1686 				fmt->fourcc == V4L2_PIX_FMT_BGR32) {
1687 			pix->colorspace = V4L2_COLORSPACE_SRGB;
1688 		} else {
1689 			if (height > 1280)	/* HD */
1690 				pix->colorspace = V4L2_COLORSPACE_REC709;
1691 			else			/* SD */
1692 				pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
1693 		}
1694 	}
1695 
1696 	memset(pix->reserved, 0, sizeof(pix->reserved));
1697 	for (i = 0; i < pix->num_planes; i++) {
1698 		plane_fmt = &pix->plane_fmt[i];
1699 		depth = fmt->vpdma_fmt[i]->depth;
1700 
1701 		stride = (pix->width * fmt->vpdma_fmt[VPE_LUMA]->depth) >> 3;
1702 		if (stride > plane_fmt->bytesperline)
1703 			plane_fmt->bytesperline = stride;
1704 
1705 		plane_fmt->bytesperline = clamp_t(u32, plane_fmt->bytesperline,
1706 						  stride,
1707 						  VPDMA_MAX_STRIDE);
1708 
1709 		plane_fmt->bytesperline = ALIGN(plane_fmt->bytesperline,
1710 						VPDMA_STRIDE_ALIGN);
1711 
1712 		if (i == VPE_LUMA) {
1713 			plane_fmt->sizeimage = pix->height *
1714 					       plane_fmt->bytesperline;
1715 
1716 			if (pix->num_planes == 1 && fmt->coplanar)
1717 				plane_fmt->sizeimage += pix->height *
1718 					plane_fmt->bytesperline *
1719 					fmt->vpdma_fmt[VPE_CHROMA]->depth >> 3;
1720 
1721 		} else { /* i == VIP_CHROMA */
1722 			plane_fmt->sizeimage = (pix->height *
1723 					       plane_fmt->bytesperline *
1724 					       depth) >> 3;
1725 		}
1726 		memset(plane_fmt->reserved, 0, sizeof(plane_fmt->reserved));
1727 	}
1728 
1729 	return 0;
1730 }
1731 
vpe_try_fmt(struct file * file,void * priv,struct v4l2_format * f)1732 static int vpe_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
1733 {
1734 	struct vpe_ctx *ctx = file2ctx(file);
1735 	struct vpe_fmt *fmt = find_format(f);
1736 
1737 	if (V4L2_TYPE_IS_OUTPUT(f->type))
1738 		return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_OUTPUT);
1739 	else
1740 		return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_CAPTURE);
1741 }
1742 
__vpe_s_fmt(struct vpe_ctx * ctx,struct v4l2_format * f)1743 static int __vpe_s_fmt(struct vpe_ctx *ctx, struct v4l2_format *f)
1744 {
1745 	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1746 	struct v4l2_plane_pix_format *plane_fmt;
1747 	struct vpe_q_data *q_data;
1748 	struct vb2_queue *vq;
1749 	int i;
1750 
1751 	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1752 	if (!vq)
1753 		return -EINVAL;
1754 
1755 	if (vb2_is_busy(vq)) {
1756 		vpe_err(ctx->dev, "queue busy\n");
1757 		return -EBUSY;
1758 	}
1759 
1760 	q_data = get_q_data(ctx, f->type);
1761 	if (!q_data)
1762 		return -EINVAL;
1763 
1764 	q_data->fmt		= find_format(f);
1765 	q_data->width		= pix->width;
1766 	q_data->height		= pix->height;
1767 	q_data->colorspace	= pix->colorspace;
1768 	q_data->field		= pix->field;
1769 	q_data->nplanes		= pix->num_planes;
1770 
1771 	for (i = 0; i < pix->num_planes; i++) {
1772 		plane_fmt = &pix->plane_fmt[i];
1773 
1774 		q_data->bytesperline[i]	= plane_fmt->bytesperline;
1775 		q_data->sizeimage[i]	= plane_fmt->sizeimage;
1776 	}
1777 
1778 	q_data->c_rect.left	= 0;
1779 	q_data->c_rect.top	= 0;
1780 	q_data->c_rect.width	= q_data->width;
1781 	q_data->c_rect.height	= q_data->height;
1782 
1783 	if (q_data->field == V4L2_FIELD_ALTERNATE)
1784 		q_data->flags |= Q_DATA_INTERLACED_ALTERNATE;
1785 	else if (q_data->field == V4L2_FIELD_SEQ_TB)
1786 		q_data->flags |= Q_DATA_INTERLACED_SEQ_TB;
1787 	else
1788 		q_data->flags &= ~Q_IS_INTERLACED;
1789 
1790 	/* the crop height is halved for the case of SEQ_TB buffers */
1791 	if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB)
1792 		q_data->c_rect.height /= 2;
1793 
1794 	vpe_dbg(ctx->dev, "Setting format for type %d, wxh: %dx%d, fmt: %d bpl_y %d",
1795 		f->type, q_data->width, q_data->height, q_data->fmt->fourcc,
1796 		q_data->bytesperline[VPE_LUMA]);
1797 	if (q_data->nplanes == 2)
1798 		vpe_dbg(ctx->dev, " bpl_uv %d\n",
1799 			q_data->bytesperline[VPE_CHROMA]);
1800 
1801 	return 0;
1802 }
1803 
vpe_s_fmt(struct file * file,void * priv,struct v4l2_format * f)1804 static int vpe_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
1805 {
1806 	int ret;
1807 	struct vpe_ctx *ctx = file2ctx(file);
1808 
1809 	ret = vpe_try_fmt(file, priv, f);
1810 	if (ret)
1811 		return ret;
1812 
1813 	ret = __vpe_s_fmt(ctx, f);
1814 	if (ret)
1815 		return ret;
1816 
1817 	if (V4L2_TYPE_IS_OUTPUT(f->type))
1818 		set_src_registers(ctx);
1819 	else
1820 		set_dst_registers(ctx);
1821 
1822 	return set_srcdst_params(ctx);
1823 }
1824 
__vpe_try_selection(struct vpe_ctx * ctx,struct v4l2_selection * s)1825 static int __vpe_try_selection(struct vpe_ctx *ctx, struct v4l2_selection *s)
1826 {
1827 	struct vpe_q_data *q_data;
1828 	int height;
1829 
1830 	if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1831 	    (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1832 		return -EINVAL;
1833 
1834 	q_data = get_q_data(ctx, s->type);
1835 	if (!q_data)
1836 		return -EINVAL;
1837 
1838 	switch (s->target) {
1839 	case V4L2_SEL_TGT_COMPOSE:
1840 		/*
1841 		 * COMPOSE target is only valid for capture buffer type, return
1842 		 * error for output buffer type
1843 		 */
1844 		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1845 			return -EINVAL;
1846 		break;
1847 	case V4L2_SEL_TGT_CROP:
1848 		/*
1849 		 * CROP target is only valid for output buffer type, return
1850 		 * error for capture buffer type
1851 		 */
1852 		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1853 			return -EINVAL;
1854 		break;
1855 	/*
1856 	 * bound and default crop/compose targets are invalid targets to
1857 	 * try/set
1858 	 */
1859 	default:
1860 		return -EINVAL;
1861 	}
1862 
1863 	/*
1864 	 * For SEQ_TB buffers, crop height should be less than the height of
1865 	 * the field height, not the buffer height
1866 	 */
1867 	if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB)
1868 		height = q_data->height / 2;
1869 	else
1870 		height = q_data->height;
1871 
1872 	if (s->r.top < 0 || s->r.left < 0) {
1873 		vpe_err(ctx->dev, "negative values for top and left\n");
1874 		s->r.top = s->r.left = 0;
1875 	}
1876 
1877 	v4l_bound_align_image(&s->r.width, MIN_W, q_data->width, 1,
1878 		&s->r.height, MIN_H, height, H_ALIGN, S_ALIGN);
1879 
1880 	/* adjust left/top if cropping rectangle is out of bounds */
1881 	if (s->r.left + s->r.width > q_data->width)
1882 		s->r.left = q_data->width - s->r.width;
1883 	if (s->r.top + s->r.height > q_data->height)
1884 		s->r.top = q_data->height - s->r.height;
1885 
1886 	return 0;
1887 }
1888 
vpe_g_selection(struct file * file,void * fh,struct v4l2_selection * s)1889 static int vpe_g_selection(struct file *file, void *fh,
1890 		struct v4l2_selection *s)
1891 {
1892 	struct vpe_ctx *ctx = file2ctx(file);
1893 	struct vpe_q_data *q_data;
1894 	bool use_c_rect = false;
1895 
1896 	if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1897 	    (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1898 		return -EINVAL;
1899 
1900 	q_data = get_q_data(ctx, s->type);
1901 	if (!q_data)
1902 		return -EINVAL;
1903 
1904 	switch (s->target) {
1905 	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
1906 	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
1907 		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1908 			return -EINVAL;
1909 		break;
1910 	case V4L2_SEL_TGT_CROP_BOUNDS:
1911 	case V4L2_SEL_TGT_CROP_DEFAULT:
1912 		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1913 			return -EINVAL;
1914 		break;
1915 	case V4L2_SEL_TGT_COMPOSE:
1916 		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1917 			return -EINVAL;
1918 		use_c_rect = true;
1919 		break;
1920 	case V4L2_SEL_TGT_CROP:
1921 		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1922 			return -EINVAL;
1923 		use_c_rect = true;
1924 		break;
1925 	default:
1926 		return -EINVAL;
1927 	}
1928 
1929 	if (use_c_rect) {
1930 		/*
1931 		 * for CROP/COMPOSE target type, return c_rect params from the
1932 		 * respective buffer type
1933 		 */
1934 		s->r = q_data->c_rect;
1935 	} else {
1936 		/*
1937 		 * for DEFAULT/BOUNDS target type, return width and height from
1938 		 * S_FMT of the respective buffer type
1939 		 */
1940 		s->r.left = 0;
1941 		s->r.top = 0;
1942 		s->r.width = q_data->width;
1943 		s->r.height = q_data->height;
1944 	}
1945 
1946 	return 0;
1947 }
1948 
1949 
vpe_s_selection(struct file * file,void * fh,struct v4l2_selection * s)1950 static int vpe_s_selection(struct file *file, void *fh,
1951 		struct v4l2_selection *s)
1952 {
1953 	struct vpe_ctx *ctx = file2ctx(file);
1954 	struct vpe_q_data *q_data;
1955 	struct v4l2_selection sel = *s;
1956 	int ret;
1957 
1958 	ret = __vpe_try_selection(ctx, &sel);
1959 	if (ret)
1960 		return ret;
1961 
1962 	q_data = get_q_data(ctx, sel.type);
1963 	if (!q_data)
1964 		return -EINVAL;
1965 
1966 	if ((q_data->c_rect.left == sel.r.left) &&
1967 			(q_data->c_rect.top == sel.r.top) &&
1968 			(q_data->c_rect.width == sel.r.width) &&
1969 			(q_data->c_rect.height == sel.r.height)) {
1970 		vpe_dbg(ctx->dev,
1971 			"requested crop/compose values are already set\n");
1972 		return 0;
1973 	}
1974 
1975 	q_data->c_rect = sel.r;
1976 
1977 	return set_srcdst_params(ctx);
1978 }
1979 
1980 /*
1981  * defines number of buffers/frames a context can process with VPE before
1982  * switching to a different context. default value is 1 buffer per context
1983  */
1984 #define V4L2_CID_VPE_BUFS_PER_JOB		(V4L2_CID_USER_TI_VPE_BASE + 0)
1985 
vpe_s_ctrl(struct v4l2_ctrl * ctrl)1986 static int vpe_s_ctrl(struct v4l2_ctrl *ctrl)
1987 {
1988 	struct vpe_ctx *ctx =
1989 		container_of(ctrl->handler, struct vpe_ctx, hdl);
1990 
1991 	switch (ctrl->id) {
1992 	case V4L2_CID_VPE_BUFS_PER_JOB:
1993 		ctx->bufs_per_job = ctrl->val;
1994 		break;
1995 
1996 	default:
1997 		vpe_err(ctx->dev, "Invalid control\n");
1998 		return -EINVAL;
1999 	}
2000 
2001 	return 0;
2002 }
2003 
2004 static const struct v4l2_ctrl_ops vpe_ctrl_ops = {
2005 	.s_ctrl = vpe_s_ctrl,
2006 };
2007 
2008 static const struct v4l2_ioctl_ops vpe_ioctl_ops = {
2009 	.vidioc_querycap		= vpe_querycap,
2010 
2011 	.vidioc_enum_fmt_vid_cap_mplane	= vpe_enum_fmt,
2012 	.vidioc_g_fmt_vid_cap_mplane	= vpe_g_fmt,
2013 	.vidioc_try_fmt_vid_cap_mplane	= vpe_try_fmt,
2014 	.vidioc_s_fmt_vid_cap_mplane	= vpe_s_fmt,
2015 
2016 	.vidioc_enum_fmt_vid_out_mplane	= vpe_enum_fmt,
2017 	.vidioc_g_fmt_vid_out_mplane	= vpe_g_fmt,
2018 	.vidioc_try_fmt_vid_out_mplane	= vpe_try_fmt,
2019 	.vidioc_s_fmt_vid_out_mplane	= vpe_s_fmt,
2020 
2021 	.vidioc_g_selection		= vpe_g_selection,
2022 	.vidioc_s_selection		= vpe_s_selection,
2023 
2024 	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
2025 	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
2026 	.vidioc_qbuf			= v4l2_m2m_ioctl_qbuf,
2027 	.vidioc_dqbuf			= v4l2_m2m_ioctl_dqbuf,
2028 	.vidioc_expbuf			= v4l2_m2m_ioctl_expbuf,
2029 	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
2030 	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
2031 
2032 	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
2033 	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
2034 };
2035 
2036 /*
2037  * Queue operations
2038  */
vpe_queue_setup(struct vb2_queue * vq,unsigned int * nbuffers,unsigned int * nplanes,unsigned int sizes[],struct device * alloc_devs[])2039 static int vpe_queue_setup(struct vb2_queue *vq,
2040 			   unsigned int *nbuffers, unsigned int *nplanes,
2041 			   unsigned int sizes[], struct device *alloc_devs[])
2042 {
2043 	int i;
2044 	struct vpe_ctx *ctx = vb2_get_drv_priv(vq);
2045 	struct vpe_q_data *q_data;
2046 
2047 	q_data = get_q_data(ctx, vq->type);
2048 
2049 	*nplanes = q_data->nplanes;
2050 
2051 	for (i = 0; i < *nplanes; i++)
2052 		sizes[i] = q_data->sizeimage[i];
2053 
2054 	vpe_dbg(ctx->dev, "get %d buffer(s) of size %d", *nbuffers,
2055 		sizes[VPE_LUMA]);
2056 	if (q_data->nplanes == 2)
2057 		vpe_dbg(ctx->dev, " and %d\n", sizes[VPE_CHROMA]);
2058 
2059 	return 0;
2060 }
2061 
vpe_buf_prepare(struct vb2_buffer * vb)2062 static int vpe_buf_prepare(struct vb2_buffer *vb)
2063 {
2064 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
2065 	struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
2066 	struct vpe_q_data *q_data;
2067 	int i, num_planes;
2068 
2069 	vpe_dbg(ctx->dev, "type: %d\n", vb->vb2_queue->type);
2070 
2071 	q_data = get_q_data(ctx, vb->vb2_queue->type);
2072 	num_planes = q_data->nplanes;
2073 
2074 	if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
2075 		if (!(q_data->flags & Q_IS_INTERLACED)) {
2076 			vbuf->field = V4L2_FIELD_NONE;
2077 		} else {
2078 			if (vbuf->field != V4L2_FIELD_TOP &&
2079 			    vbuf->field != V4L2_FIELD_BOTTOM &&
2080 			    vbuf->field != V4L2_FIELD_SEQ_TB)
2081 				return -EINVAL;
2082 		}
2083 	}
2084 
2085 	for (i = 0; i < num_planes; i++) {
2086 		if (vb2_plane_size(vb, i) < q_data->sizeimage[i]) {
2087 			vpe_err(ctx->dev,
2088 				"data will not fit into plane (%lu < %lu)\n",
2089 				vb2_plane_size(vb, i),
2090 				(long) q_data->sizeimage[i]);
2091 			return -EINVAL;
2092 		}
2093 	}
2094 
2095 	for (i = 0; i < num_planes; i++)
2096 		vb2_set_plane_payload(vb, i, q_data->sizeimage[i]);
2097 
2098 	return 0;
2099 }
2100 
vpe_buf_queue(struct vb2_buffer * vb)2101 static void vpe_buf_queue(struct vb2_buffer *vb)
2102 {
2103 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
2104 	struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
2105 
2106 	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
2107 }
2108 
check_srcdst_sizes(struct vpe_ctx * ctx)2109 static int check_srcdst_sizes(struct vpe_ctx *ctx)
2110 {
2111 	struct vpe_q_data *s_q_data =  &ctx->q_data[Q_DATA_SRC];
2112 	struct vpe_q_data *d_q_data =  &ctx->q_data[Q_DATA_DST];
2113 	unsigned int src_w = s_q_data->c_rect.width;
2114 	unsigned int src_h = s_q_data->c_rect.height;
2115 	unsigned int dst_w = d_q_data->c_rect.width;
2116 	unsigned int dst_h = d_q_data->c_rect.height;
2117 
2118 	if (src_w == dst_w && src_h == dst_h)
2119 		return 0;
2120 
2121 	if (src_h <= SC_MAX_PIXEL_HEIGHT &&
2122 	    src_w <= SC_MAX_PIXEL_WIDTH &&
2123 	    dst_h <= SC_MAX_PIXEL_HEIGHT &&
2124 	    dst_w <= SC_MAX_PIXEL_WIDTH)
2125 		return 0;
2126 
2127 	return -1;
2128 }
2129 
vpe_return_all_buffers(struct vpe_ctx * ctx,struct vb2_queue * q,enum vb2_buffer_state state)2130 static void vpe_return_all_buffers(struct vpe_ctx *ctx,  struct vb2_queue *q,
2131 				   enum vb2_buffer_state state)
2132 {
2133 	struct vb2_v4l2_buffer *vb;
2134 	unsigned long flags;
2135 
2136 	for (;;) {
2137 		if (V4L2_TYPE_IS_OUTPUT(q->type))
2138 			vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
2139 		else
2140 			vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2141 		if (!vb)
2142 			break;
2143 		spin_lock_irqsave(&ctx->dev->lock, flags);
2144 		v4l2_m2m_buf_done(vb, state);
2145 		spin_unlock_irqrestore(&ctx->dev->lock, flags);
2146 	}
2147 
2148 	/*
2149 	 * Cleanup the in-transit vb2 buffers that have been
2150 	 * removed from their respective queue already but for
2151 	 * which procecessing has not been completed yet.
2152 	 */
2153 	if (V4L2_TYPE_IS_OUTPUT(q->type)) {
2154 		spin_lock_irqsave(&ctx->dev->lock, flags);
2155 
2156 		if (ctx->src_vbs[2])
2157 			v4l2_m2m_buf_done(ctx->src_vbs[2], state);
2158 
2159 		if (ctx->src_vbs[1] && (ctx->src_vbs[1] != ctx->src_vbs[2]))
2160 			v4l2_m2m_buf_done(ctx->src_vbs[1], state);
2161 
2162 		if (ctx->src_vbs[0] &&
2163 		    (ctx->src_vbs[0] != ctx->src_vbs[1]) &&
2164 		    (ctx->src_vbs[0] != ctx->src_vbs[2]))
2165 			v4l2_m2m_buf_done(ctx->src_vbs[0], state);
2166 
2167 		ctx->src_vbs[2] = NULL;
2168 		ctx->src_vbs[1] = NULL;
2169 		ctx->src_vbs[0] = NULL;
2170 
2171 		spin_unlock_irqrestore(&ctx->dev->lock, flags);
2172 	} else {
2173 		if (ctx->dst_vb) {
2174 			spin_lock_irqsave(&ctx->dev->lock, flags);
2175 
2176 			v4l2_m2m_buf_done(ctx->dst_vb, state);
2177 			ctx->dst_vb = NULL;
2178 			spin_unlock_irqrestore(&ctx->dev->lock, flags);
2179 		}
2180 	}
2181 }
2182 
vpe_start_streaming(struct vb2_queue * q,unsigned int count)2183 static int vpe_start_streaming(struct vb2_queue *q, unsigned int count)
2184 {
2185 	struct vpe_ctx *ctx = vb2_get_drv_priv(q);
2186 
2187 	/* Check any of the size exceed maximum scaling sizes */
2188 	if (check_srcdst_sizes(ctx)) {
2189 		vpe_err(ctx->dev,
2190 			"Conversion setup failed, check source and destination parameters\n"
2191 			);
2192 		vpe_return_all_buffers(ctx, q, VB2_BUF_STATE_QUEUED);
2193 		return -EINVAL;
2194 	}
2195 
2196 	if (ctx->deinterlacing)
2197 		config_edi_input_mode(ctx, 0x0);
2198 
2199 	if (ctx->sequence != 0)
2200 		set_srcdst_params(ctx);
2201 
2202 	return 0;
2203 }
2204 
vpe_stop_streaming(struct vb2_queue * q)2205 static void vpe_stop_streaming(struct vb2_queue *q)
2206 {
2207 	struct vpe_ctx *ctx = vb2_get_drv_priv(q);
2208 
2209 	vpe_dump_regs(ctx->dev);
2210 	vpdma_dump_regs(ctx->dev->vpdma);
2211 
2212 	vpe_return_all_buffers(ctx, q, VB2_BUF_STATE_ERROR);
2213 }
2214 
2215 static const struct vb2_ops vpe_qops = {
2216 	.queue_setup	 = vpe_queue_setup,
2217 	.buf_prepare	 = vpe_buf_prepare,
2218 	.buf_queue	 = vpe_buf_queue,
2219 	.wait_prepare	 = vb2_ops_wait_prepare,
2220 	.wait_finish	 = vb2_ops_wait_finish,
2221 	.start_streaming = vpe_start_streaming,
2222 	.stop_streaming  = vpe_stop_streaming,
2223 };
2224 
queue_init(void * priv,struct vb2_queue * src_vq,struct vb2_queue * dst_vq)2225 static int queue_init(void *priv, struct vb2_queue *src_vq,
2226 		      struct vb2_queue *dst_vq)
2227 {
2228 	struct vpe_ctx *ctx = priv;
2229 	struct vpe_dev *dev = ctx->dev;
2230 	int ret;
2231 
2232 	memset(src_vq, 0, sizeof(*src_vq));
2233 	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
2234 	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
2235 	src_vq->drv_priv = ctx;
2236 	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
2237 	src_vq->ops = &vpe_qops;
2238 	src_vq->mem_ops = &vb2_dma_contig_memops;
2239 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
2240 	src_vq->lock = &dev->dev_mutex;
2241 	src_vq->dev = dev->v4l2_dev.dev;
2242 
2243 	ret = vb2_queue_init(src_vq);
2244 	if (ret)
2245 		return ret;
2246 
2247 	memset(dst_vq, 0, sizeof(*dst_vq));
2248 	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
2249 	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
2250 	dst_vq->drv_priv = ctx;
2251 	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
2252 	dst_vq->ops = &vpe_qops;
2253 	dst_vq->mem_ops = &vb2_dma_contig_memops;
2254 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
2255 	dst_vq->lock = &dev->dev_mutex;
2256 	dst_vq->dev = dev->v4l2_dev.dev;
2257 
2258 	return vb2_queue_init(dst_vq);
2259 }
2260 
2261 static const struct v4l2_ctrl_config vpe_bufs_per_job = {
2262 	.ops = &vpe_ctrl_ops,
2263 	.id = V4L2_CID_VPE_BUFS_PER_JOB,
2264 	.name = "Buffers Per Transaction",
2265 	.type = V4L2_CTRL_TYPE_INTEGER,
2266 	.def = VPE_DEF_BUFS_PER_JOB,
2267 	.min = 1,
2268 	.max = VIDEO_MAX_FRAME,
2269 	.step = 1,
2270 };
2271 
2272 /*
2273  * File operations
2274  */
vpe_open(struct file * file)2275 static int vpe_open(struct file *file)
2276 {
2277 	struct vpe_dev *dev = video_drvdata(file);
2278 	struct vpe_q_data *s_q_data;
2279 	struct v4l2_ctrl_handler *hdl;
2280 	struct vpe_ctx *ctx;
2281 	int ret;
2282 
2283 	vpe_dbg(dev, "vpe_open\n");
2284 
2285 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
2286 	if (!ctx)
2287 		return -ENOMEM;
2288 
2289 	ctx->dev = dev;
2290 
2291 	if (mutex_lock_interruptible(&dev->dev_mutex)) {
2292 		ret = -ERESTARTSYS;
2293 		goto free_ctx;
2294 	}
2295 
2296 	ret = vpdma_create_desc_list(&ctx->desc_list, VPE_DESC_LIST_SIZE,
2297 			VPDMA_LIST_TYPE_NORMAL);
2298 	if (ret != 0)
2299 		goto unlock;
2300 
2301 	ret = vpdma_alloc_desc_buf(&ctx->mmr_adb, sizeof(struct vpe_mmr_adb));
2302 	if (ret != 0)
2303 		goto free_desc_list;
2304 
2305 	ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_h, SC_COEF_SRAM_SIZE);
2306 	if (ret != 0)
2307 		goto free_mmr_adb;
2308 
2309 	ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_v, SC_COEF_SRAM_SIZE);
2310 	if (ret != 0)
2311 		goto free_sc_h;
2312 
2313 	init_adb_hdrs(ctx);
2314 
2315 	v4l2_fh_init(&ctx->fh, video_devdata(file));
2316 	file->private_data = &ctx->fh;
2317 
2318 	hdl = &ctx->hdl;
2319 	v4l2_ctrl_handler_init(hdl, 1);
2320 	v4l2_ctrl_new_custom(hdl, &vpe_bufs_per_job, NULL);
2321 	if (hdl->error) {
2322 		ret = hdl->error;
2323 		goto exit_fh;
2324 	}
2325 	ctx->fh.ctrl_handler = hdl;
2326 	v4l2_ctrl_handler_setup(hdl);
2327 
2328 	s_q_data = &ctx->q_data[Q_DATA_SRC];
2329 	s_q_data->fmt = __find_format(V4L2_PIX_FMT_YUYV);
2330 	s_q_data->width = 1920;
2331 	s_q_data->height = 1080;
2332 	s_q_data->nplanes = 1;
2333 	s_q_data->bytesperline[VPE_LUMA] = (s_q_data->width *
2334 			s_q_data->fmt->vpdma_fmt[VPE_LUMA]->depth) >> 3;
2335 	s_q_data->sizeimage[VPE_LUMA] = (s_q_data->bytesperline[VPE_LUMA] *
2336 			s_q_data->height);
2337 	s_q_data->colorspace = V4L2_COLORSPACE_REC709;
2338 	s_q_data->field = V4L2_FIELD_NONE;
2339 	s_q_data->c_rect.left = 0;
2340 	s_q_data->c_rect.top = 0;
2341 	s_q_data->c_rect.width = s_q_data->width;
2342 	s_q_data->c_rect.height = s_q_data->height;
2343 	s_q_data->flags = 0;
2344 
2345 	ctx->q_data[Q_DATA_DST] = *s_q_data;
2346 
2347 	set_dei_shadow_registers(ctx);
2348 	set_src_registers(ctx);
2349 	set_dst_registers(ctx);
2350 	ret = set_srcdst_params(ctx);
2351 	if (ret)
2352 		goto exit_fh;
2353 
2354 	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
2355 
2356 	if (IS_ERR(ctx->fh.m2m_ctx)) {
2357 		ret = PTR_ERR(ctx->fh.m2m_ctx);
2358 		goto exit_fh;
2359 	}
2360 
2361 	v4l2_fh_add(&ctx->fh);
2362 
2363 	/*
2364 	 * for now, just report the creation of the first instance, we can later
2365 	 * optimize the driver to enable or disable clocks when the first
2366 	 * instance is created or the last instance released
2367 	 */
2368 	if (atomic_inc_return(&dev->num_instances) == 1)
2369 		vpe_dbg(dev, "first instance created\n");
2370 
2371 	ctx->bufs_per_job = VPE_DEF_BUFS_PER_JOB;
2372 
2373 	ctx->load_mmrs = true;
2374 
2375 	vpe_dbg(dev, "created instance %p, m2m_ctx: %p\n",
2376 		ctx, ctx->fh.m2m_ctx);
2377 
2378 	mutex_unlock(&dev->dev_mutex);
2379 
2380 	return 0;
2381 exit_fh:
2382 	v4l2_ctrl_handler_free(hdl);
2383 	v4l2_fh_exit(&ctx->fh);
2384 	vpdma_free_desc_buf(&ctx->sc_coeff_v);
2385 free_sc_h:
2386 	vpdma_free_desc_buf(&ctx->sc_coeff_h);
2387 free_mmr_adb:
2388 	vpdma_free_desc_buf(&ctx->mmr_adb);
2389 free_desc_list:
2390 	vpdma_free_desc_list(&ctx->desc_list);
2391 unlock:
2392 	mutex_unlock(&dev->dev_mutex);
2393 free_ctx:
2394 	kfree(ctx);
2395 	return ret;
2396 }
2397 
vpe_release(struct file * file)2398 static int vpe_release(struct file *file)
2399 {
2400 	struct vpe_dev *dev = video_drvdata(file);
2401 	struct vpe_ctx *ctx = file2ctx(file);
2402 
2403 	vpe_dbg(dev, "releasing instance %p\n", ctx);
2404 
2405 	mutex_lock(&dev->dev_mutex);
2406 	free_mv_buffers(ctx);
2407 
2408 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf);
2409 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb);
2410 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h);
2411 	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v);
2412 
2413 	vpdma_free_desc_list(&ctx->desc_list);
2414 	vpdma_free_desc_buf(&ctx->mmr_adb);
2415 
2416 	vpdma_free_desc_buf(&ctx->sc_coeff_v);
2417 	vpdma_free_desc_buf(&ctx->sc_coeff_h);
2418 
2419 	v4l2_fh_del(&ctx->fh);
2420 	v4l2_fh_exit(&ctx->fh);
2421 	v4l2_ctrl_handler_free(&ctx->hdl);
2422 	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
2423 
2424 	kfree(ctx);
2425 
2426 	/*
2427 	 * for now, just report the release of the last instance, we can later
2428 	 * optimize the driver to enable or disable clocks when the first
2429 	 * instance is created or the last instance released
2430 	 */
2431 	if (atomic_dec_return(&dev->num_instances) == 0)
2432 		vpe_dbg(dev, "last instance released\n");
2433 
2434 	mutex_unlock(&dev->dev_mutex);
2435 
2436 	return 0;
2437 }
2438 
2439 static const struct v4l2_file_operations vpe_fops = {
2440 	.owner		= THIS_MODULE,
2441 	.open		= vpe_open,
2442 	.release	= vpe_release,
2443 	.poll		= v4l2_m2m_fop_poll,
2444 	.unlocked_ioctl	= video_ioctl2,
2445 	.mmap		= v4l2_m2m_fop_mmap,
2446 };
2447 
2448 static const struct video_device vpe_videodev = {
2449 	.name		= VPE_MODULE_NAME,
2450 	.fops		= &vpe_fops,
2451 	.ioctl_ops	= &vpe_ioctl_ops,
2452 	.minor		= -1,
2453 	.release	= video_device_release_empty,
2454 	.vfl_dir	= VFL_DIR_M2M,
2455 };
2456 
2457 static const struct v4l2_m2m_ops m2m_ops = {
2458 	.device_run	= device_run,
2459 	.job_ready	= job_ready,
2460 	.job_abort	= job_abort,
2461 	.lock		= vpe_lock,
2462 	.unlock		= vpe_unlock,
2463 };
2464 
vpe_runtime_get(struct platform_device * pdev)2465 static int vpe_runtime_get(struct platform_device *pdev)
2466 {
2467 	int r;
2468 
2469 	dev_dbg(&pdev->dev, "vpe_runtime_get\n");
2470 
2471 	r = pm_runtime_get_sync(&pdev->dev);
2472 	WARN_ON(r < 0);
2473 	return r < 0 ? r : 0;
2474 }
2475 
vpe_runtime_put(struct platform_device * pdev)2476 static void vpe_runtime_put(struct platform_device *pdev)
2477 {
2478 
2479 	int r;
2480 
2481 	dev_dbg(&pdev->dev, "vpe_runtime_put\n");
2482 
2483 	r = pm_runtime_put_sync(&pdev->dev);
2484 	WARN_ON(r < 0 && r != -ENOSYS);
2485 }
2486 
vpe_fw_cb(struct platform_device * pdev)2487 static void vpe_fw_cb(struct platform_device *pdev)
2488 {
2489 	struct vpe_dev *dev = platform_get_drvdata(pdev);
2490 	struct video_device *vfd;
2491 	int ret;
2492 
2493 	vfd = &dev->vfd;
2494 	*vfd = vpe_videodev;
2495 	vfd->lock = &dev->dev_mutex;
2496 	vfd->v4l2_dev = &dev->v4l2_dev;
2497 
2498 	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
2499 	if (ret) {
2500 		vpe_err(dev, "Failed to register video device\n");
2501 
2502 		vpe_set_clock_enable(dev, 0);
2503 		vpe_runtime_put(pdev);
2504 		pm_runtime_disable(&pdev->dev);
2505 		v4l2_m2m_release(dev->m2m_dev);
2506 		v4l2_device_unregister(&dev->v4l2_dev);
2507 
2508 		return;
2509 	}
2510 
2511 	video_set_drvdata(vfd, dev);
2512 	snprintf(vfd->name, sizeof(vfd->name), "%s", vpe_videodev.name);
2513 	dev_info(dev->v4l2_dev.dev, "Device registered as /dev/video%d\n",
2514 		vfd->num);
2515 }
2516 
vpe_probe(struct platform_device * pdev)2517 static int vpe_probe(struct platform_device *pdev)
2518 {
2519 	struct vpe_dev *dev;
2520 	int ret, irq, func;
2521 
2522 	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
2523 	if (!dev)
2524 		return -ENOMEM;
2525 
2526 	spin_lock_init(&dev->lock);
2527 
2528 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
2529 	if (ret)
2530 		return ret;
2531 
2532 	atomic_set(&dev->num_instances, 0);
2533 	mutex_init(&dev->dev_mutex);
2534 
2535 	dev->res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
2536 			"vpe_top");
2537 	/*
2538 	 * HACK: we get resource info from device tree in the form of a list of
2539 	 * VPE sub blocks, the driver currently uses only the base of vpe_top
2540 	 * for register access, the driver should be changed later to access
2541 	 * registers based on the sub block base addresses
2542 	 */
2543 	dev->base = devm_ioremap(&pdev->dev, dev->res->start, SZ_32K);
2544 	if (!dev->base) {
2545 		ret = -ENOMEM;
2546 		goto v4l2_dev_unreg;
2547 	}
2548 
2549 	irq = platform_get_irq(pdev, 0);
2550 	ret = devm_request_irq(&pdev->dev, irq, vpe_irq, 0, VPE_MODULE_NAME,
2551 			dev);
2552 	if (ret)
2553 		goto v4l2_dev_unreg;
2554 
2555 	platform_set_drvdata(pdev, dev);
2556 
2557 	dev->m2m_dev = v4l2_m2m_init(&m2m_ops);
2558 	if (IS_ERR(dev->m2m_dev)) {
2559 		vpe_err(dev, "Failed to init mem2mem device\n");
2560 		ret = PTR_ERR(dev->m2m_dev);
2561 		goto v4l2_dev_unreg;
2562 	}
2563 
2564 	pm_runtime_enable(&pdev->dev);
2565 
2566 	ret = vpe_runtime_get(pdev);
2567 	if (ret)
2568 		goto rel_m2m;
2569 
2570 	/* Perform clk enable followed by reset */
2571 	vpe_set_clock_enable(dev, 1);
2572 
2573 	vpe_top_reset(dev);
2574 
2575 	func = read_field_reg(dev, VPE_PID, VPE_PID_FUNC_MASK,
2576 		VPE_PID_FUNC_SHIFT);
2577 	vpe_dbg(dev, "VPE PID function %x\n", func);
2578 
2579 	vpe_top_vpdma_reset(dev);
2580 
2581 	dev->sc = sc_create(pdev, "sc");
2582 	if (IS_ERR(dev->sc)) {
2583 		ret = PTR_ERR(dev->sc);
2584 		goto runtime_put;
2585 	}
2586 
2587 	dev->csc = csc_create(pdev, "csc");
2588 	if (IS_ERR(dev->csc)) {
2589 		ret = PTR_ERR(dev->csc);
2590 		goto runtime_put;
2591 	}
2592 
2593 	dev->vpdma = &dev->vpdma_data;
2594 	ret = vpdma_create(pdev, dev->vpdma, vpe_fw_cb);
2595 	if (ret)
2596 		goto runtime_put;
2597 
2598 	return 0;
2599 
2600 runtime_put:
2601 	vpe_runtime_put(pdev);
2602 rel_m2m:
2603 	pm_runtime_disable(&pdev->dev);
2604 	v4l2_m2m_release(dev->m2m_dev);
2605 v4l2_dev_unreg:
2606 	v4l2_device_unregister(&dev->v4l2_dev);
2607 
2608 	return ret;
2609 }
2610 
vpe_remove(struct platform_device * pdev)2611 static int vpe_remove(struct platform_device *pdev)
2612 {
2613 	struct vpe_dev *dev = platform_get_drvdata(pdev);
2614 
2615 	v4l2_info(&dev->v4l2_dev, "Removing " VPE_MODULE_NAME);
2616 
2617 	v4l2_m2m_release(dev->m2m_dev);
2618 	video_unregister_device(&dev->vfd);
2619 	v4l2_device_unregister(&dev->v4l2_dev);
2620 
2621 	vpe_set_clock_enable(dev, 0);
2622 	vpe_runtime_put(pdev);
2623 	pm_runtime_disable(&pdev->dev);
2624 
2625 	return 0;
2626 }
2627 
2628 #if defined(CONFIG_OF)
2629 static const struct of_device_id vpe_of_match[] = {
2630 	{
2631 		.compatible = "ti,vpe",
2632 	},
2633 	{},
2634 };
2635 MODULE_DEVICE_TABLE(of, vpe_of_match);
2636 #endif
2637 
2638 static struct platform_driver vpe_pdrv = {
2639 	.probe		= vpe_probe,
2640 	.remove		= vpe_remove,
2641 	.driver		= {
2642 		.name	= VPE_MODULE_NAME,
2643 		.of_match_table = of_match_ptr(vpe_of_match),
2644 	},
2645 };
2646 
2647 module_platform_driver(vpe_pdrv);
2648 
2649 MODULE_DESCRIPTION("TI VPE driver");
2650 MODULE_AUTHOR("Dale Farnsworth, <dale@farnsworth.org>");
2651 MODULE_LICENSE("GPL");
2652