• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * NVDIMM Block Window Driver
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14 
15 #include <linux/blkdev.h>
16 #include <linux/fs.h>
17 #include <linux/genhd.h>
18 #include <linux/module.h>
19 #include <linux/moduleparam.h>
20 #include <linux/nd.h>
21 #include <linux/sizes.h>
22 #include "nd.h"
23 
24 struct nd_blk_device {
25 	struct request_queue *queue;
26 	struct gendisk *disk;
27 	struct nd_namespace_blk *nsblk;
28 	struct nd_blk_region *ndbr;
29 	size_t disk_size;
30 	u32 sector_size;
31 	u32 internal_lbasize;
32 };
33 
34 static int nd_blk_major;
35 
nd_blk_meta_size(struct nd_blk_device * blk_dev)36 static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
37 {
38 	return blk_dev->nsblk->lbasize - blk_dev->sector_size;
39 }
40 
to_dev_offset(struct nd_namespace_blk * nsblk,resource_size_t ns_offset,unsigned int len)41 static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
42 				resource_size_t ns_offset, unsigned int len)
43 {
44 	int i;
45 
46 	for (i = 0; i < nsblk->num_resources; i++) {
47 		if (ns_offset < resource_size(nsblk->res[i])) {
48 			if (ns_offset + len > resource_size(nsblk->res[i])) {
49 				dev_WARN_ONCE(&nsblk->common.dev, 1,
50 					"illegal request\n");
51 				return SIZE_MAX;
52 			}
53 			return nsblk->res[i]->start + ns_offset;
54 		}
55 		ns_offset -= resource_size(nsblk->res[i]);
56 	}
57 
58 	dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
59 	return SIZE_MAX;
60 }
61 
62 #ifdef CONFIG_BLK_DEV_INTEGRITY
nd_blk_rw_integrity(struct nd_blk_device * blk_dev,struct bio_integrity_payload * bip,u64 lba,int rw)63 static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
64 				struct bio_integrity_payload *bip, u64 lba,
65 				int rw)
66 {
67 	unsigned int len = nd_blk_meta_size(blk_dev);
68 	resource_size_t	dev_offset, ns_offset;
69 	struct nd_namespace_blk *nsblk;
70 	struct nd_blk_region *ndbr;
71 	int err = 0;
72 
73 	nsblk = blk_dev->nsblk;
74 	ndbr = blk_dev->ndbr;
75 	ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
76 	dev_offset = to_dev_offset(nsblk, ns_offset, len);
77 	if (dev_offset == SIZE_MAX)
78 		return -EIO;
79 
80 	while (len) {
81 		unsigned int cur_len;
82 		struct bio_vec bv;
83 		void *iobuf;
84 
85 		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
86 		/*
87 		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
88 		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
89 		 * can use those directly
90 		 */
91 
92 		cur_len = min(len, bv.bv_len);
93 		iobuf = kmap_atomic(bv.bv_page);
94 		err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
95 				cur_len, rw);
96 		kunmap_atomic(iobuf);
97 		if (err)
98 			return err;
99 
100 		len -= cur_len;
101 		dev_offset += cur_len;
102 		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
103 	}
104 
105 	return err;
106 }
107 
108 #else /* CONFIG_BLK_DEV_INTEGRITY */
nd_blk_rw_integrity(struct nd_blk_device * blk_dev,struct bio_integrity_payload * bip,u64 lba,int rw)109 static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
110 				struct bio_integrity_payload *bip, u64 lba,
111 				int rw)
112 {
113 	return 0;
114 }
115 #endif
116 
nd_blk_do_bvec(struct nd_blk_device * blk_dev,struct bio_integrity_payload * bip,struct page * page,unsigned int len,unsigned int off,int rw,sector_t sector)117 static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
118 			struct bio_integrity_payload *bip, struct page *page,
119 			unsigned int len, unsigned int off, int rw,
120 			sector_t sector)
121 {
122 	struct nd_blk_region *ndbr = blk_dev->ndbr;
123 	resource_size_t	dev_offset, ns_offset;
124 	int err = 0;
125 	void *iobuf;
126 	u64 lba;
127 
128 	while (len) {
129 		unsigned int cur_len;
130 
131 		/*
132 		 * If we don't have an integrity payload, we don't have to
133 		 * split the bvec into sectors, as this would cause unnecessary
134 		 * Block Window setup/move steps. the do_io routine is capable
135 		 * of handling len <= PAGE_SIZE.
136 		 */
137 		cur_len = bip ? min(len, blk_dev->sector_size) : len;
138 
139 		lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
140 		ns_offset = lba * blk_dev->internal_lbasize;
141 		dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
142 		if (dev_offset == SIZE_MAX)
143 			return -EIO;
144 
145 		iobuf = kmap_atomic(page);
146 		err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
147 		kunmap_atomic(iobuf);
148 		if (err)
149 			return err;
150 
151 		if (bip) {
152 			err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
153 			if (err)
154 				return err;
155 		}
156 		len -= cur_len;
157 		off += cur_len;
158 		sector += blk_dev->sector_size >> SECTOR_SHIFT;
159 	}
160 
161 	return err;
162 }
163 
nd_blk_make_request(struct request_queue * q,struct bio * bio)164 static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
165 {
166 	struct block_device *bdev = bio->bi_bdev;
167 	struct gendisk *disk = bdev->bd_disk;
168 	struct bio_integrity_payload *bip;
169 	struct nd_blk_device *blk_dev;
170 	struct bvec_iter iter;
171 	unsigned long start;
172 	struct bio_vec bvec;
173 	int err = 0, rw;
174 	bool do_acct;
175 
176 	/*
177 	 * bio_integrity_enabled also checks if the bio already has an
178 	 * integrity payload attached. If it does, we *don't* do a
179 	 * bio_integrity_prep here - the payload has been generated by
180 	 * another kernel subsystem, and we just pass it through.
181 	 */
182 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
183 		bio->bi_error = -EIO;
184 		goto out;
185 	}
186 
187 	bip = bio_integrity(bio);
188 	blk_dev = disk->private_data;
189 	rw = bio_data_dir(bio);
190 	do_acct = nd_iostat_start(bio, &start);
191 	bio_for_each_segment(bvec, bio, iter) {
192 		unsigned int len = bvec.bv_len;
193 
194 		BUG_ON(len > PAGE_SIZE);
195 		err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
196 					bvec.bv_offset, rw, iter.bi_sector);
197 		if (err) {
198 			dev_info(&blk_dev->nsblk->common.dev,
199 					"io error in %s sector %lld, len %d,\n",
200 					(rw == READ) ? "READ" : "WRITE",
201 					(unsigned long long) iter.bi_sector, len);
202 			bio->bi_error = err;
203 			break;
204 		}
205 	}
206 	if (do_acct)
207 		nd_iostat_end(bio, start);
208 
209  out:
210 	bio_endio(bio);
211 	return BLK_QC_T_NONE;
212 }
213 
nd_blk_rw_bytes(struct nd_namespace_common * ndns,resource_size_t offset,void * iobuf,size_t n,int rw)214 static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
215 		resource_size_t offset, void *iobuf, size_t n, int rw)
216 {
217 	struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
218 	struct nd_namespace_blk *nsblk = blk_dev->nsblk;
219 	struct nd_blk_region *ndbr = blk_dev->ndbr;
220 	resource_size_t	dev_offset;
221 
222 	dev_offset = to_dev_offset(nsblk, offset, n);
223 
224 	if (unlikely(offset + n > blk_dev->disk_size)) {
225 		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
226 		return -EFAULT;
227 	}
228 
229 	if (dev_offset == SIZE_MAX)
230 		return -EIO;
231 
232 	return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
233 }
234 
235 static const struct block_device_operations nd_blk_fops = {
236 	.owner = THIS_MODULE,
237 	.revalidate_disk = nvdimm_revalidate_disk,
238 };
239 
nd_blk_attach_disk(struct nd_namespace_common * ndns,struct nd_blk_device * blk_dev)240 static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
241 		struct nd_blk_device *blk_dev)
242 {
243 	resource_size_t available_disk_size;
244 	struct gendisk *disk;
245 	u64 internal_nlba;
246 
247 	internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
248 	available_disk_size = internal_nlba * blk_dev->sector_size;
249 
250 	blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
251 	if (!blk_dev->queue)
252 		return -ENOMEM;
253 
254 	blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
255 	blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
256 	blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
257 	blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
258 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
259 
260 	disk = blk_dev->disk = alloc_disk(0);
261 	if (!disk) {
262 		blk_cleanup_queue(blk_dev->queue);
263 		return -ENOMEM;
264 	}
265 
266 	disk->driverfs_dev	= &ndns->dev;
267 	disk->major		= nd_blk_major;
268 	disk->first_minor	= 0;
269 	disk->fops		= &nd_blk_fops;
270 	disk->private_data	= blk_dev;
271 	disk->queue		= blk_dev->queue;
272 	disk->flags		= GENHD_FL_EXT_DEVT;
273 	nvdimm_namespace_disk_name(ndns, disk->disk_name);
274 	set_capacity(disk, 0);
275 	add_disk(disk);
276 
277 	if (nd_blk_meta_size(blk_dev)) {
278 		int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
279 
280 		if (rc) {
281 			del_gendisk(disk);
282 			put_disk(disk);
283 			blk_cleanup_queue(blk_dev->queue);
284 			return rc;
285 		}
286 	}
287 
288 	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
289 	revalidate_disk(disk);
290 	return 0;
291 }
292 
nd_blk_probe(struct device * dev)293 static int nd_blk_probe(struct device *dev)
294 {
295 	struct nd_namespace_common *ndns;
296 	struct nd_namespace_blk *nsblk;
297 	struct nd_blk_device *blk_dev;
298 	int rc;
299 
300 	ndns = nvdimm_namespace_common_probe(dev);
301 	if (IS_ERR(ndns))
302 		return PTR_ERR(ndns);
303 
304 	blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
305 	if (!blk_dev)
306 		return -ENOMEM;
307 
308 	nsblk = to_nd_namespace_blk(&ndns->dev);
309 	blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
310 	blk_dev->ndbr = to_nd_blk_region(dev->parent);
311 	blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
312 	blk_dev->internal_lbasize = roundup(nsblk->lbasize,
313 						INT_LBASIZE_ALIGNMENT);
314 	blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
315 	dev_set_drvdata(dev, blk_dev);
316 
317 	ndns->rw_bytes = nd_blk_rw_bytes;
318 	if (is_nd_btt(dev))
319 		rc = nvdimm_namespace_attach_btt(ndns);
320 	else if (nd_btt_probe(ndns, blk_dev) == 0) {
321 		/* we'll come back as btt-blk */
322 		rc = -ENXIO;
323 	} else
324 		rc = nd_blk_attach_disk(ndns, blk_dev);
325 	if (rc)
326 		kfree(blk_dev);
327 	return rc;
328 }
329 
nd_blk_detach_disk(struct nd_blk_device * blk_dev)330 static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
331 {
332 	del_gendisk(blk_dev->disk);
333 	put_disk(blk_dev->disk);
334 	blk_cleanup_queue(blk_dev->queue);
335 }
336 
nd_blk_remove(struct device * dev)337 static int nd_blk_remove(struct device *dev)
338 {
339 	struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
340 
341 	if (is_nd_btt(dev))
342 		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
343 	else
344 		nd_blk_detach_disk(blk_dev);
345 	kfree(blk_dev);
346 
347 	return 0;
348 }
349 
350 static struct nd_device_driver nd_blk_driver = {
351 	.probe = nd_blk_probe,
352 	.remove = nd_blk_remove,
353 	.drv = {
354 		.name = "nd_blk",
355 	},
356 	.type = ND_DRIVER_NAMESPACE_BLK,
357 };
358 
nd_blk_init(void)359 static int __init nd_blk_init(void)
360 {
361 	int rc;
362 
363 	rc = register_blkdev(0, "nd_blk");
364 	if (rc < 0)
365 		return rc;
366 
367 	nd_blk_major = rc;
368 	rc = nd_driver_register(&nd_blk_driver);
369 
370 	if (rc < 0)
371 		unregister_blkdev(nd_blk_major, "nd_blk");
372 
373 	return rc;
374 }
375 
nd_blk_exit(void)376 static void __exit nd_blk_exit(void)
377 {
378 	driver_unregister(&nd_blk_driver.drv);
379 	unregister_blkdev(nd_blk_major, "nd_blk");
380 }
381 
382 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
383 MODULE_LICENSE("GPL v2");
384 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
385 module_init(nd_blk_init);
386 module_exit(nd_blk_exit);
387