• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * nvme-lightnvm.c - LightNVM NVMe device
3  *
4  * Copyright (C) 2014-2015 IT University of Copenhagen
5  * Initial release: Matias Bjorling <mb@lightnvm.io>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License version
9  * 2 as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; see the file COPYING.  If not, write to
18  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19  * USA.
20  *
21  */
22 
23 #include "nvme.h"
24 
25 #include <linux/nvme.h>
26 #include <linux/bitops.h>
27 #include <linux/lightnvm.h>
28 #include <linux/vmalloc.h>
29 #include <linux/sched/sysctl.h>
30 #include <uapi/linux/lightnvm.h>
31 
32 enum nvme_nvm_admin_opcode {
33 	nvme_nvm_admin_identity		= 0xe2,
34 	nvme_nvm_admin_get_l2p_tbl	= 0xea,
35 	nvme_nvm_admin_get_bb_tbl	= 0xf2,
36 	nvme_nvm_admin_set_bb_tbl	= 0xf1,
37 };
38 
39 struct nvme_nvm_hb_rw {
40 	__u8			opcode;
41 	__u8			flags;
42 	__u16			command_id;
43 	__le32			nsid;
44 	__u64			rsvd2;
45 	__le64			metadata;
46 	__le64			prp1;
47 	__le64			prp2;
48 	__le64			spba;
49 	__le16			length;
50 	__le16			control;
51 	__le32			dsmgmt;
52 	__le64			slba;
53 };
54 
55 struct nvme_nvm_ph_rw {
56 	__u8			opcode;
57 	__u8			flags;
58 	__u16			command_id;
59 	__le32			nsid;
60 	__u64			rsvd2;
61 	__le64			metadata;
62 	__le64			prp1;
63 	__le64			prp2;
64 	__le64			spba;
65 	__le16			length;
66 	__le16			control;
67 	__le32			dsmgmt;
68 	__le64			resv;
69 };
70 
71 struct nvme_nvm_identity {
72 	__u8			opcode;
73 	__u8			flags;
74 	__u16			command_id;
75 	__le32			nsid;
76 	__u64			rsvd[2];
77 	__le64			prp1;
78 	__le64			prp2;
79 	__le32			chnl_off;
80 	__u32			rsvd11[5];
81 };
82 
83 struct nvme_nvm_l2ptbl {
84 	__u8			opcode;
85 	__u8			flags;
86 	__u16			command_id;
87 	__le32			nsid;
88 	__le32			cdw2[4];
89 	__le64			prp1;
90 	__le64			prp2;
91 	__le64			slba;
92 	__le32			nlb;
93 	__le16			cdw14[6];
94 };
95 
96 struct nvme_nvm_getbbtbl {
97 	__u8			opcode;
98 	__u8			flags;
99 	__u16			command_id;
100 	__le32			nsid;
101 	__u64			rsvd[2];
102 	__le64			prp1;
103 	__le64			prp2;
104 	__le64			spba;
105 	__u32			rsvd4[4];
106 };
107 
108 struct nvme_nvm_setbbtbl {
109 	__u8			opcode;
110 	__u8			flags;
111 	__u16			command_id;
112 	__le32			nsid;
113 	__le64			rsvd[2];
114 	__le64			prp1;
115 	__le64			prp2;
116 	__le64			spba;
117 	__le16			nlb;
118 	__u8			value;
119 	__u8			rsvd3;
120 	__u32			rsvd4[3];
121 };
122 
123 struct nvme_nvm_erase_blk {
124 	__u8			opcode;
125 	__u8			flags;
126 	__u16			command_id;
127 	__le32			nsid;
128 	__u64			rsvd[2];
129 	__le64			prp1;
130 	__le64			prp2;
131 	__le64			spba;
132 	__le16			length;
133 	__le16			control;
134 	__le32			dsmgmt;
135 	__le64			resv;
136 };
137 
138 struct nvme_nvm_command {
139 	union {
140 		struct nvme_common_command common;
141 		struct nvme_nvm_identity identity;
142 		struct nvme_nvm_hb_rw hb_rw;
143 		struct nvme_nvm_ph_rw ph_rw;
144 		struct nvme_nvm_l2ptbl l2p;
145 		struct nvme_nvm_getbbtbl get_bb;
146 		struct nvme_nvm_setbbtbl set_bb;
147 		struct nvme_nvm_erase_blk erase;
148 	};
149 };
150 
151 #define NVME_NVM_LP_MLC_PAIRS 886
152 struct nvme_nvm_lp_mlc {
153 	__le16			num_pairs;
154 	__u8			pairs[NVME_NVM_LP_MLC_PAIRS];
155 };
156 
157 struct nvme_nvm_lp_tbl {
158 	__u8			id[8];
159 	struct nvme_nvm_lp_mlc	mlc;
160 };
161 
162 struct nvme_nvm_id_group {
163 	__u8			mtype;
164 	__u8			fmtype;
165 	__le16			res16;
166 	__u8			num_ch;
167 	__u8			num_lun;
168 	__u8			num_pln;
169 	__u8			rsvd1;
170 	__le16			num_blk;
171 	__le16			num_pg;
172 	__le16			fpg_sz;
173 	__le16			csecs;
174 	__le16			sos;
175 	__le16			rsvd2;
176 	__le32			trdt;
177 	__le32			trdm;
178 	__le32			tprt;
179 	__le32			tprm;
180 	__le32			tbet;
181 	__le32			tbem;
182 	__le32			mpos;
183 	__le32			mccap;
184 	__le16			cpar;
185 	__u8			reserved[10];
186 	struct nvme_nvm_lp_tbl lptbl;
187 } __packed;
188 
189 struct nvme_nvm_addr_format {
190 	__u8			ch_offset;
191 	__u8			ch_len;
192 	__u8			lun_offset;
193 	__u8			lun_len;
194 	__u8			pln_offset;
195 	__u8			pln_len;
196 	__u8			blk_offset;
197 	__u8			blk_len;
198 	__u8			pg_offset;
199 	__u8			pg_len;
200 	__u8			sect_offset;
201 	__u8			sect_len;
202 	__u8			res[4];
203 } __packed;
204 
205 struct nvme_nvm_id {
206 	__u8			ver_id;
207 	__u8			vmnt;
208 	__u8			cgrps;
209 	__u8			res;
210 	__le32			cap;
211 	__le32			dom;
212 	struct nvme_nvm_addr_format ppaf;
213 	__u8			resv[228];
214 	struct nvme_nvm_id_group groups[4];
215 } __packed;
216 
217 struct nvme_nvm_bb_tbl {
218 	__u8	tblid[4];
219 	__le16	verid;
220 	__le16	revid;
221 	__le32	rvsd1;
222 	__le32	tblks;
223 	__le32	tfact;
224 	__le32	tgrown;
225 	__le32	tdresv;
226 	__le32	thresv;
227 	__le32	rsvd2[8];
228 	__u8	blk[0];
229 };
230 
231 /*
232  * Check we didn't inadvertently grow the command struct
233  */
_nvme_nvm_check_size(void)234 static inline void _nvme_nvm_check_size(void)
235 {
236 	BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
237 	BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
238 	BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
239 	BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
240 	BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
241 	BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
242 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
243 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
244 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
245 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
246 	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
247 }
248 
init_grps(struct nvm_id * nvm_id,struct nvme_nvm_id * nvme_nvm_id)249 static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
250 {
251 	struct nvme_nvm_id_group *src;
252 	struct nvm_id_group *dst;
253 
254 	if (nvme_nvm_id->cgrps != 1)
255 		return -EINVAL;
256 
257 	src = &nvme_nvm_id->groups[0];
258 	dst = &nvm_id->grp;
259 
260 	dst->mtype = src->mtype;
261 	dst->fmtype = src->fmtype;
262 	dst->num_ch = src->num_ch;
263 	dst->num_lun = src->num_lun;
264 	dst->num_pln = src->num_pln;
265 
266 	dst->num_pg = le16_to_cpu(src->num_pg);
267 	dst->num_blk = le16_to_cpu(src->num_blk);
268 	dst->fpg_sz = le16_to_cpu(src->fpg_sz);
269 	dst->csecs = le16_to_cpu(src->csecs);
270 	dst->sos = le16_to_cpu(src->sos);
271 
272 	dst->trdt = le32_to_cpu(src->trdt);
273 	dst->trdm = le32_to_cpu(src->trdm);
274 	dst->tprt = le32_to_cpu(src->tprt);
275 	dst->tprm = le32_to_cpu(src->tprm);
276 	dst->tbet = le32_to_cpu(src->tbet);
277 	dst->tbem = le32_to_cpu(src->tbem);
278 	dst->mpos = le32_to_cpu(src->mpos);
279 	dst->mccap = le32_to_cpu(src->mccap);
280 
281 	dst->cpar = le16_to_cpu(src->cpar);
282 
283 	if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
284 		memcpy(dst->lptbl.id, src->lptbl.id, 8);
285 		dst->lptbl.mlc.num_pairs =
286 				le16_to_cpu(src->lptbl.mlc.num_pairs);
287 
288 		if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
289 			pr_err("nvm: number of MLC pairs not supported\n");
290 			return -EINVAL;
291 		}
292 
293 		memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
294 					dst->lptbl.mlc.num_pairs);
295 	}
296 
297 	return 0;
298 }
299 
nvme_nvm_identity(struct nvm_dev * nvmdev,struct nvm_id * nvm_id)300 static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
301 {
302 	struct nvme_ns *ns = nvmdev->q->queuedata;
303 	struct nvme_nvm_id *nvme_nvm_id;
304 	struct nvme_nvm_command c = {};
305 	int ret;
306 
307 	c.identity.opcode = nvme_nvm_admin_identity;
308 	c.identity.nsid = cpu_to_le32(ns->ns_id);
309 	c.identity.chnl_off = 0;
310 
311 	nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL);
312 	if (!nvme_nvm_id)
313 		return -ENOMEM;
314 
315 	ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
316 				nvme_nvm_id, sizeof(struct nvme_nvm_id));
317 	if (ret) {
318 		ret = -EIO;
319 		goto out;
320 	}
321 
322 	nvm_id->ver_id = nvme_nvm_id->ver_id;
323 	nvm_id->vmnt = nvme_nvm_id->vmnt;
324 	nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
325 	nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
326 	memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
327 					sizeof(struct nvm_addr_format));
328 
329 	ret = init_grps(nvm_id, nvme_nvm_id);
330 out:
331 	kfree(nvme_nvm_id);
332 	return ret;
333 }
334 
nvme_nvm_get_l2p_tbl(struct nvm_dev * nvmdev,u64 slba,u32 nlb,nvm_l2p_update_fn * update_l2p,void * priv)335 static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
336 				nvm_l2p_update_fn *update_l2p, void *priv)
337 {
338 	struct nvme_ns *ns = nvmdev->q->queuedata;
339 	struct nvme_nvm_command c = {};
340 	u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
341 	u32 nlb_pr_rq = len / sizeof(u64);
342 	u64 cmd_slba = slba;
343 	void *entries;
344 	int ret = 0;
345 
346 	c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
347 	c.l2p.nsid = cpu_to_le32(ns->ns_id);
348 	entries = kmalloc(len, GFP_KERNEL);
349 	if (!entries)
350 		return -ENOMEM;
351 
352 	while (nlb) {
353 		u32 cmd_nlb = min(nlb_pr_rq, nlb);
354 		u64 elba = slba + cmd_nlb;
355 
356 		c.l2p.slba = cpu_to_le64(cmd_slba);
357 		c.l2p.nlb = cpu_to_le32(cmd_nlb);
358 
359 		ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
360 				(struct nvme_command *)&c, entries, len);
361 		if (ret) {
362 			dev_err(ns->ctrl->device,
363 				"L2P table transfer failed (%d)\n", ret);
364 			ret = -EIO;
365 			goto out;
366 		}
367 
368 		if (unlikely(elba > nvmdev->total_secs)) {
369 			pr_err("nvm: L2P data from device is out of bounds!\n");
370 			ret = -EINVAL;
371 			goto out;
372 		}
373 
374 		/* Transform physical address to target address space */
375 		nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
376 
377 		if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
378 			ret = -EINTR;
379 			goto out;
380 		}
381 
382 		cmd_slba += cmd_nlb;
383 		nlb -= cmd_nlb;
384 	}
385 
386 out:
387 	kfree(entries);
388 	return ret;
389 }
390 
nvme_nvm_get_bb_tbl(struct nvm_dev * nvmdev,struct ppa_addr ppa,u8 * blks)391 static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
392 								u8 *blks)
393 {
394 	struct request_queue *q = nvmdev->q;
395 	struct nvm_geo *geo = &nvmdev->geo;
396 	struct nvme_ns *ns = q->queuedata;
397 	struct nvme_ctrl *ctrl = ns->ctrl;
398 	struct nvme_nvm_command c = {};
399 	struct nvme_nvm_bb_tbl *bb_tbl;
400 	int nr_blks = geo->blks_per_lun * geo->plane_mode;
401 	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
402 	int ret = 0;
403 
404 	c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
405 	c.get_bb.nsid = cpu_to_le32(ns->ns_id);
406 	c.get_bb.spba = cpu_to_le64(ppa.ppa);
407 
408 	bb_tbl = kzalloc(tblsz, GFP_KERNEL);
409 	if (!bb_tbl)
410 		return -ENOMEM;
411 
412 	ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
413 								bb_tbl, tblsz);
414 	if (ret) {
415 		dev_err(ctrl->device, "get bad block table failed (%d)\n", ret);
416 		ret = -EIO;
417 		goto out;
418 	}
419 
420 	if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
421 		bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
422 		dev_err(ctrl->device, "bbt format mismatch\n");
423 		ret = -EINVAL;
424 		goto out;
425 	}
426 
427 	if (le16_to_cpu(bb_tbl->verid) != 1) {
428 		ret = -EINVAL;
429 		dev_err(ctrl->device, "bbt version not supported\n");
430 		goto out;
431 	}
432 
433 	if (le32_to_cpu(bb_tbl->tblks) != nr_blks) {
434 		ret = -EINVAL;
435 		dev_err(ctrl->device,
436 				"bbt unsuspected blocks returned (%u!=%u)",
437 				le32_to_cpu(bb_tbl->tblks), nr_blks);
438 		goto out;
439 	}
440 
441 	memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode);
442 out:
443 	kfree(bb_tbl);
444 	return ret;
445 }
446 
nvme_nvm_set_bb_tbl(struct nvm_dev * nvmdev,struct ppa_addr * ppas,int nr_ppas,int type)447 static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
448 							int nr_ppas, int type)
449 {
450 	struct nvme_ns *ns = nvmdev->q->queuedata;
451 	struct nvme_nvm_command c = {};
452 	int ret = 0;
453 
454 	c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
455 	c.set_bb.nsid = cpu_to_le32(ns->ns_id);
456 	c.set_bb.spba = cpu_to_le64(ppas->ppa);
457 	c.set_bb.nlb = cpu_to_le16(nr_ppas - 1);
458 	c.set_bb.value = type;
459 
460 	ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
461 								NULL, 0);
462 	if (ret)
463 		dev_err(ns->ctrl->device, "set bad block table failed (%d)\n",
464 									ret);
465 	return ret;
466 }
467 
nvme_nvm_rqtocmd(struct nvm_rq * rqd,struct nvme_ns * ns,struct nvme_nvm_command * c)468 static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
469 				    struct nvme_nvm_command *c)
470 {
471 	c->ph_rw.opcode = rqd->opcode;
472 	c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
473 	c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa);
474 	c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
475 	c->ph_rw.control = cpu_to_le16(rqd->flags);
476 	c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
477 
478 	if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
479 		c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
480 					rqd->bio->bi_iter.bi_sector));
481 }
482 
nvme_nvm_end_io(struct request * rq,blk_status_t status)483 static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
484 {
485 	struct nvm_rq *rqd = rq->end_io_data;
486 
487 	rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
488 	rqd->error = nvme_req(rq)->status;
489 	nvm_end_io(rqd);
490 
491 	kfree(nvme_req(rq)->cmd);
492 	blk_mq_free_request(rq);
493 }
494 
nvme_nvm_submit_io(struct nvm_dev * dev,struct nvm_rq * rqd)495 static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
496 {
497 	struct request_queue *q = dev->q;
498 	struct nvme_ns *ns = q->queuedata;
499 	struct request *rq;
500 	struct bio *bio = rqd->bio;
501 	struct nvme_nvm_command *cmd;
502 
503 	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
504 	if (!cmd)
505 		return -ENOMEM;
506 
507 	nvme_nvm_rqtocmd(rqd, ns, cmd);
508 
509 	rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
510 	if (IS_ERR(rq)) {
511 		kfree(cmd);
512 		return PTR_ERR(rq);
513 	}
514 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
515 
516 	if (bio) {
517 		blk_init_request_from_bio(rq, bio);
518 	} else {
519 		rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
520 		rq->__data_len = 0;
521 	}
522 
523 	rq->end_io_data = rqd;
524 
525 	blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
526 
527 	return 0;
528 }
529 
nvme_nvm_create_dma_pool(struct nvm_dev * nvmdev,char * name)530 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
531 {
532 	struct nvme_ns *ns = nvmdev->q->queuedata;
533 
534 	return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
535 }
536 
nvme_nvm_destroy_dma_pool(void * pool)537 static void nvme_nvm_destroy_dma_pool(void *pool)
538 {
539 	struct dma_pool *dma_pool = pool;
540 
541 	dma_pool_destroy(dma_pool);
542 }
543 
nvme_nvm_dev_dma_alloc(struct nvm_dev * dev,void * pool,gfp_t mem_flags,dma_addr_t * dma_handler)544 static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
545 				    gfp_t mem_flags, dma_addr_t *dma_handler)
546 {
547 	return dma_pool_alloc(pool, mem_flags, dma_handler);
548 }
549 
nvme_nvm_dev_dma_free(void * pool,void * addr,dma_addr_t dma_handler)550 static void nvme_nvm_dev_dma_free(void *pool, void *addr,
551 							dma_addr_t dma_handler)
552 {
553 	dma_pool_free(pool, addr, dma_handler);
554 }
555 
556 static struct nvm_dev_ops nvme_nvm_dev_ops = {
557 	.identity		= nvme_nvm_identity,
558 
559 	.get_l2p_tbl		= nvme_nvm_get_l2p_tbl,
560 
561 	.get_bb_tbl		= nvme_nvm_get_bb_tbl,
562 	.set_bb_tbl		= nvme_nvm_set_bb_tbl,
563 
564 	.submit_io		= nvme_nvm_submit_io,
565 
566 	.create_dma_pool	= nvme_nvm_create_dma_pool,
567 	.destroy_dma_pool	= nvme_nvm_destroy_dma_pool,
568 	.dev_dma_alloc		= nvme_nvm_dev_dma_alloc,
569 	.dev_dma_free		= nvme_nvm_dev_dma_free,
570 
571 	.max_phys_sect		= 64,
572 };
573 
nvme_nvm_submit_user_cmd(struct request_queue * q,struct nvme_ns * ns,struct nvme_nvm_command * vcmd,void __user * ubuf,unsigned int bufflen,void __user * meta_buf,unsigned int meta_len,void __user * ppa_buf,unsigned int ppa_len,u32 * result,u64 * status,unsigned int timeout)574 static int nvme_nvm_submit_user_cmd(struct request_queue *q,
575 				struct nvme_ns *ns,
576 				struct nvme_nvm_command *vcmd,
577 				void __user *ubuf, unsigned int bufflen,
578 				void __user *meta_buf, unsigned int meta_len,
579 				void __user *ppa_buf, unsigned int ppa_len,
580 				u32 *result, u64 *status, unsigned int timeout)
581 {
582 	bool write = nvme_is_write((struct nvme_command *)vcmd);
583 	struct nvm_dev *dev = ns->ndev;
584 	struct gendisk *disk = ns->disk;
585 	struct request *rq;
586 	struct bio *bio = NULL;
587 	__le64 *ppa_list = NULL;
588 	dma_addr_t ppa_dma;
589 	__le64 *metadata = NULL;
590 	dma_addr_t metadata_dma;
591 	DECLARE_COMPLETION_ONSTACK(wait);
592 	int ret = 0;
593 
594 	rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
595 			NVME_QID_ANY);
596 	if (IS_ERR(rq)) {
597 		ret = -ENOMEM;
598 		goto err_cmd;
599 	}
600 
601 	rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
602 
603 	rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
604 
605 	if (ppa_buf && ppa_len) {
606 		ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
607 		if (!ppa_list) {
608 			ret = -ENOMEM;
609 			goto err_rq;
610 		}
611 		if (copy_from_user(ppa_list, (void __user *)ppa_buf,
612 						sizeof(u64) * (ppa_len + 1))) {
613 			ret = -EFAULT;
614 			goto err_ppa;
615 		}
616 		vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
617 	} else {
618 		vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
619 	}
620 
621 	if (ubuf && bufflen) {
622 		ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
623 		if (ret)
624 			goto err_ppa;
625 		bio = rq->bio;
626 
627 		if (meta_buf && meta_len) {
628 			metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
629 								&metadata_dma);
630 			if (!metadata) {
631 				ret = -ENOMEM;
632 				goto err_map;
633 			}
634 
635 			if (write) {
636 				if (copy_from_user(metadata,
637 						(void __user *)meta_buf,
638 						meta_len)) {
639 					ret = -EFAULT;
640 					goto err_meta;
641 				}
642 			}
643 			vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
644 		}
645 
646 		bio->bi_disk = disk;
647 	}
648 
649 	blk_execute_rq(q, NULL, rq, 0);
650 
651 	if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
652 		ret = -EINTR;
653 	else if (nvme_req(rq)->status & 0x7ff)
654 		ret = -EIO;
655 	if (result)
656 		*result = nvme_req(rq)->status & 0x7ff;
657 	if (status)
658 		*status = le64_to_cpu(nvme_req(rq)->result.u64);
659 
660 	if (metadata && !ret && !write) {
661 		if (copy_to_user(meta_buf, (void *)metadata, meta_len))
662 			ret = -EFAULT;
663 	}
664 err_meta:
665 	if (meta_buf && meta_len)
666 		dma_pool_free(dev->dma_pool, metadata, metadata_dma);
667 err_map:
668 	if (bio)
669 		blk_rq_unmap_user(bio);
670 err_ppa:
671 	if (ppa_buf && ppa_len)
672 		dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
673 err_rq:
674 	blk_mq_free_request(rq);
675 err_cmd:
676 	return ret;
677 }
678 
nvme_nvm_submit_vio(struct nvme_ns * ns,struct nvm_user_vio __user * uvio)679 static int nvme_nvm_submit_vio(struct nvme_ns *ns,
680 					struct nvm_user_vio __user *uvio)
681 {
682 	struct nvm_user_vio vio;
683 	struct nvme_nvm_command c;
684 	unsigned int length;
685 	int ret;
686 
687 	if (copy_from_user(&vio, uvio, sizeof(vio)))
688 		return -EFAULT;
689 	if (vio.flags)
690 		return -EINVAL;
691 
692 	memset(&c, 0, sizeof(c));
693 	c.ph_rw.opcode = vio.opcode;
694 	c.ph_rw.nsid = cpu_to_le32(ns->ns_id);
695 	c.ph_rw.control = cpu_to_le16(vio.control);
696 	c.ph_rw.length = cpu_to_le16(vio.nppas);
697 
698 	length = (vio.nppas + 1) << ns->lba_shift;
699 
700 	ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
701 			(void __user *)(uintptr_t)vio.addr, length,
702 			(void __user *)(uintptr_t)vio.metadata,
703 							vio.metadata_len,
704 			(void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
705 			&vio.result, &vio.status, 0);
706 
707 	if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
708 		return -EFAULT;
709 
710 	return ret;
711 }
712 
nvme_nvm_user_vcmd(struct nvme_ns * ns,int admin,struct nvm_passthru_vio __user * uvcmd)713 static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
714 					struct nvm_passthru_vio __user *uvcmd)
715 {
716 	struct nvm_passthru_vio vcmd;
717 	struct nvme_nvm_command c;
718 	struct request_queue *q;
719 	unsigned int timeout = 0;
720 	int ret;
721 
722 	if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
723 		return -EFAULT;
724 	if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
725 		return -EACCES;
726 	if (vcmd.flags)
727 		return -EINVAL;
728 
729 	memset(&c, 0, sizeof(c));
730 	c.common.opcode = vcmd.opcode;
731 	c.common.nsid = cpu_to_le32(ns->ns_id);
732 	c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
733 	c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
734 	/* cdw11-12 */
735 	c.ph_rw.length = cpu_to_le16(vcmd.nppas);
736 	c.ph_rw.control  = cpu_to_le16(vcmd.control);
737 	c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
738 	c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
739 	c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
740 
741 	if (vcmd.timeout_ms)
742 		timeout = msecs_to_jiffies(vcmd.timeout_ms);
743 
744 	q = admin ? ns->ctrl->admin_q : ns->queue;
745 
746 	ret = nvme_nvm_submit_user_cmd(q, ns,
747 			(struct nvme_nvm_command *)&c,
748 			(void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
749 			(void __user *)(uintptr_t)vcmd.metadata,
750 							vcmd.metadata_len,
751 			(void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
752 			&vcmd.result, &vcmd.status, timeout);
753 
754 	if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
755 		return -EFAULT;
756 
757 	return ret;
758 }
759 
nvme_nvm_ioctl(struct nvme_ns * ns,unsigned int cmd,unsigned long arg)760 int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
761 {
762 	switch (cmd) {
763 	case NVME_NVM_IOCTL_ADMIN_VIO:
764 		return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
765 	case NVME_NVM_IOCTL_IO_VIO:
766 		return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
767 	case NVME_NVM_IOCTL_SUBMIT_VIO:
768 		return nvme_nvm_submit_vio(ns, (void __user *)arg);
769 	default:
770 		return -ENOTTY;
771 	}
772 }
773 
nvme_nvm_register(struct nvme_ns * ns,char * disk_name,int node)774 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
775 {
776 	struct request_queue *q = ns->queue;
777 	struct nvm_dev *dev;
778 
779 	_nvme_nvm_check_size();
780 
781 	dev = nvm_alloc_dev(node);
782 	if (!dev)
783 		return -ENOMEM;
784 
785 	dev->q = q;
786 	memcpy(dev->name, disk_name, DISK_NAME_LEN);
787 	dev->ops = &nvme_nvm_dev_ops;
788 	dev->private_data = ns;
789 	ns->ndev = dev;
790 
791 	return nvm_register(dev);
792 }
793 
nvme_nvm_unregister(struct nvme_ns * ns)794 void nvme_nvm_unregister(struct nvme_ns *ns)
795 {
796 	nvm_unregister(ns->ndev);
797 }
798 
nvm_dev_attr_show(struct device * dev,struct device_attribute * dattr,char * page)799 static ssize_t nvm_dev_attr_show(struct device *dev,
800 				 struct device_attribute *dattr, char *page)
801 {
802 	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
803 	struct nvm_dev *ndev = ns->ndev;
804 	struct nvm_id *id;
805 	struct nvm_id_group *grp;
806 	struct attribute *attr;
807 
808 	if (!ndev)
809 		return 0;
810 
811 	id = &ndev->identity;
812 	grp = &id->grp;
813 	attr = &dattr->attr;
814 
815 	if (strcmp(attr->name, "version") == 0) {
816 		return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id);
817 	} else if (strcmp(attr->name, "vendor_opcode") == 0) {
818 		return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt);
819 	} else if (strcmp(attr->name, "capabilities") == 0) {
820 		return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
821 	} else if (strcmp(attr->name, "device_mode") == 0) {
822 		return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
823 	/* kept for compatibility */
824 	} else if (strcmp(attr->name, "media_manager") == 0) {
825 		return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
826 	} else if (strcmp(attr->name, "ppa_format") == 0) {
827 		return scnprintf(page, PAGE_SIZE,
828 			"0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
829 			id->ppaf.ch_offset, id->ppaf.ch_len,
830 			id->ppaf.lun_offset, id->ppaf.lun_len,
831 			id->ppaf.pln_offset, id->ppaf.pln_len,
832 			id->ppaf.blk_offset, id->ppaf.blk_len,
833 			id->ppaf.pg_offset, id->ppaf.pg_len,
834 			id->ppaf.sect_offset, id->ppaf.sect_len);
835 	} else if (strcmp(attr->name, "media_type") == 0) {	/* u8 */
836 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype);
837 	} else if (strcmp(attr->name, "flash_media_type") == 0) {
838 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype);
839 	} else if (strcmp(attr->name, "num_channels") == 0) {
840 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch);
841 	} else if (strcmp(attr->name, "num_luns") == 0) {
842 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun);
843 	} else if (strcmp(attr->name, "num_planes") == 0) {
844 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
845 	} else if (strcmp(attr->name, "num_blocks") == 0) {	/* u16 */
846 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk);
847 	} else if (strcmp(attr->name, "num_pages") == 0) {
848 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
849 	} else if (strcmp(attr->name, "page_size") == 0) {
850 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz);
851 	} else if (strcmp(attr->name, "hw_sector_size") == 0) {
852 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs);
853 	} else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */
854 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos);
855 	} else if (strcmp(attr->name, "read_typ") == 0) {
856 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt);
857 	} else if (strcmp(attr->name, "read_max") == 0) {
858 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm);
859 	} else if (strcmp(attr->name, "prog_typ") == 0) {
860 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt);
861 	} else if (strcmp(attr->name, "prog_max") == 0) {
862 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm);
863 	} else if (strcmp(attr->name, "erase_typ") == 0) {
864 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet);
865 	} else if (strcmp(attr->name, "erase_max") == 0) {
866 		return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem);
867 	} else if (strcmp(attr->name, "multiplane_modes") == 0) {
868 		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos);
869 	} else if (strcmp(attr->name, "media_capabilities") == 0) {
870 		return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap);
871 	} else if (strcmp(attr->name, "max_phys_secs") == 0) {
872 		return scnprintf(page, PAGE_SIZE, "%u\n",
873 				ndev->ops->max_phys_sect);
874 	} else {
875 		return scnprintf(page,
876 				 PAGE_SIZE,
877 				 "Unhandled attr(%s) in `nvm_dev_attr_show`\n",
878 				 attr->name);
879 	}
880 }
881 
882 #define NVM_DEV_ATTR_RO(_name)						\
883 	DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL)
884 
885 static NVM_DEV_ATTR_RO(version);
886 static NVM_DEV_ATTR_RO(vendor_opcode);
887 static NVM_DEV_ATTR_RO(capabilities);
888 static NVM_DEV_ATTR_RO(device_mode);
889 static NVM_DEV_ATTR_RO(ppa_format);
890 static NVM_DEV_ATTR_RO(media_manager);
891 
892 static NVM_DEV_ATTR_RO(media_type);
893 static NVM_DEV_ATTR_RO(flash_media_type);
894 static NVM_DEV_ATTR_RO(num_channels);
895 static NVM_DEV_ATTR_RO(num_luns);
896 static NVM_DEV_ATTR_RO(num_planes);
897 static NVM_DEV_ATTR_RO(num_blocks);
898 static NVM_DEV_ATTR_RO(num_pages);
899 static NVM_DEV_ATTR_RO(page_size);
900 static NVM_DEV_ATTR_RO(hw_sector_size);
901 static NVM_DEV_ATTR_RO(oob_sector_size);
902 static NVM_DEV_ATTR_RO(read_typ);
903 static NVM_DEV_ATTR_RO(read_max);
904 static NVM_DEV_ATTR_RO(prog_typ);
905 static NVM_DEV_ATTR_RO(prog_max);
906 static NVM_DEV_ATTR_RO(erase_typ);
907 static NVM_DEV_ATTR_RO(erase_max);
908 static NVM_DEV_ATTR_RO(multiplane_modes);
909 static NVM_DEV_ATTR_RO(media_capabilities);
910 static NVM_DEV_ATTR_RO(max_phys_secs);
911 
912 static struct attribute *nvm_dev_attrs[] = {
913 	&dev_attr_version.attr,
914 	&dev_attr_vendor_opcode.attr,
915 	&dev_attr_capabilities.attr,
916 	&dev_attr_device_mode.attr,
917 	&dev_attr_media_manager.attr,
918 
919 	&dev_attr_ppa_format.attr,
920 	&dev_attr_media_type.attr,
921 	&dev_attr_flash_media_type.attr,
922 	&dev_attr_num_channels.attr,
923 	&dev_attr_num_luns.attr,
924 	&dev_attr_num_planes.attr,
925 	&dev_attr_num_blocks.attr,
926 	&dev_attr_num_pages.attr,
927 	&dev_attr_page_size.attr,
928 	&dev_attr_hw_sector_size.attr,
929 	&dev_attr_oob_sector_size.attr,
930 	&dev_attr_read_typ.attr,
931 	&dev_attr_read_max.attr,
932 	&dev_attr_prog_typ.attr,
933 	&dev_attr_prog_max.attr,
934 	&dev_attr_erase_typ.attr,
935 	&dev_attr_erase_max.attr,
936 	&dev_attr_multiplane_modes.attr,
937 	&dev_attr_media_capabilities.attr,
938 	&dev_attr_max_phys_secs.attr,
939 	NULL,
940 };
941 
942 static const struct attribute_group nvm_dev_attr_group = {
943 	.name		= "lightnvm",
944 	.attrs		= nvm_dev_attrs,
945 };
946 
nvme_nvm_register_sysfs(struct nvme_ns * ns)947 int nvme_nvm_register_sysfs(struct nvme_ns *ns)
948 {
949 	return sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
950 					&nvm_dev_attr_group);
951 }
952 
nvme_nvm_unregister_sysfs(struct nvme_ns * ns)953 void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
954 {
955 	sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
956 					&nvm_dev_attr_group);
957 }
958