1 /*
2 * linux/fs/nfs/blocklayout/blocklayout.c
3 *
4 * Module for the NFSv4.1 pNFS block layout driver.
5 *
6 * Copyright (c) 2006 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Andy Adamson <andros@citi.umich.edu>
10 * Fred Isaman <iisaman@umich.edu>
11 *
12 * permission is granted to use, copy, create derivative works and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the university of michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. if
17 * the above copyright notice or any other identification of the
18 * university of michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * this software is provided as is, without representation from the
22 * university of michigan as to its fitness for any purpose, and without
23 * warranty by the university of michigan of any kind, either express
24 * or implied, including without limitation the implied warranties of
25 * merchantability and fitness for a particular purpose. the regents
26 * of the university of michigan shall not be liable for any damages,
27 * including special, indirect, incidental, or consequential damages,
28 * with respect to any claim arising out or in connection with the use
29 * of the software, even if it has been or is hereafter advised of the
30 * possibility of such damages.
31 */
32
33 #include <linux/module.h>
34 #include <linux/init.h>
35 #include <linux/mount.h>
36 #include <linux/namei.h>
37 #include <linux/bio.h> /* struct bio */
38 #include <linux/buffer_head.h> /* various write calls */
39 #include <linux/prefetch.h>
40
41 #include "../pnfs.h"
42 #include "../internal.h"
43 #include "blocklayout.h"
44
45 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
46
47 MODULE_LICENSE("GPL");
48 MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
49 MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
50
print_page(struct page * page)51 static void print_page(struct page *page)
52 {
53 dprintk("PRINTPAGE page %p\n", page);
54 dprintk(" PagePrivate %d\n", PagePrivate(page));
55 dprintk(" PageUptodate %d\n", PageUptodate(page));
56 dprintk(" PageError %d\n", PageError(page));
57 dprintk(" PageDirty %d\n", PageDirty(page));
58 dprintk(" PageReferenced %d\n", PageReferenced(page));
59 dprintk(" PageLocked %d\n", PageLocked(page));
60 dprintk(" PageWriteback %d\n", PageWriteback(page));
61 dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page));
62 dprintk("\n");
63 }
64
65 /* Given the be associated with isect, determine if page data needs to be
66 * initialized.
67 */
is_hole(struct pnfs_block_extent * be,sector_t isect)68 static int is_hole(struct pnfs_block_extent *be, sector_t isect)
69 {
70 if (be->be_state == PNFS_BLOCK_NONE_DATA)
71 return 1;
72 else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
73 return 0;
74 else
75 return !bl_is_sector_init(be->be_inval, isect);
76 }
77
78 /* Given the be associated with isect, determine if page data can be
79 * written to disk.
80 */
is_writable(struct pnfs_block_extent * be,sector_t isect)81 static int is_writable(struct pnfs_block_extent *be, sector_t isect)
82 {
83 return (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
84 be->be_state == PNFS_BLOCK_INVALID_DATA);
85 }
86
87 /* The data we are handed might be spread across several bios. We need
88 * to track when the last one is finished.
89 */
90 struct parallel_io {
91 struct kref refcnt;
92 void (*pnfs_callback) (void *data, int num_se);
93 void *data;
94 int bse_count;
95 };
96
alloc_parallel(void * data)97 static inline struct parallel_io *alloc_parallel(void *data)
98 {
99 struct parallel_io *rv;
100
101 rv = kmalloc(sizeof(*rv), GFP_NOFS);
102 if (rv) {
103 rv->data = data;
104 kref_init(&rv->refcnt);
105 rv->bse_count = 0;
106 }
107 return rv;
108 }
109
get_parallel(struct parallel_io * p)110 static inline void get_parallel(struct parallel_io *p)
111 {
112 kref_get(&p->refcnt);
113 }
114
destroy_parallel(struct kref * kref)115 static void destroy_parallel(struct kref *kref)
116 {
117 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
118
119 dprintk("%s enter\n", __func__);
120 p->pnfs_callback(p->data, p->bse_count);
121 kfree(p);
122 }
123
put_parallel(struct parallel_io * p)124 static inline void put_parallel(struct parallel_io *p)
125 {
126 kref_put(&p->refcnt, destroy_parallel);
127 }
128
129 static struct bio *
bl_submit_bio(int rw,struct bio * bio)130 bl_submit_bio(int rw, struct bio *bio)
131 {
132 if (bio) {
133 get_parallel(bio->bi_private);
134 dprintk("%s submitting %s bio %u@%llu\n", __func__,
135 rw == READ ? "read" : "write",
136 bio->bi_size, (unsigned long long)bio->bi_sector);
137 submit_bio(rw, bio);
138 }
139 return NULL;
140 }
141
bl_alloc_init_bio(int npg,sector_t isect,struct pnfs_block_extent * be,void (* end_io)(struct bio *,int err),struct parallel_io * par)142 static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
143 struct pnfs_block_extent *be,
144 void (*end_io)(struct bio *, int err),
145 struct parallel_io *par)
146 {
147 struct bio *bio;
148
149 npg = min(npg, BIO_MAX_PAGES);
150 bio = bio_alloc(GFP_NOIO, npg);
151 if (!bio && (current->flags & PF_MEMALLOC)) {
152 while (!bio && (npg /= 2))
153 bio = bio_alloc(GFP_NOIO, npg);
154 }
155
156 if (bio) {
157 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
158 bio->bi_bdev = be->be_mdev;
159 bio->bi_end_io = end_io;
160 bio->bi_private = par;
161 }
162 return bio;
163 }
164
do_add_page_to_bio(struct bio * bio,int npg,int rw,sector_t isect,struct page * page,struct pnfs_block_extent * be,void (* end_io)(struct bio *,int err),struct parallel_io * par,unsigned int offset,int len)165 static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
166 sector_t isect, struct page *page,
167 struct pnfs_block_extent *be,
168 void (*end_io)(struct bio *, int err),
169 struct parallel_io *par,
170 unsigned int offset, int len)
171 {
172 isect = isect + (offset >> SECTOR_SHIFT);
173 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
174 npg, rw, (unsigned long long)isect, offset, len);
175 retry:
176 if (!bio) {
177 bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
178 if (!bio)
179 return ERR_PTR(-ENOMEM);
180 }
181 if (bio_add_page(bio, page, len, offset) < len) {
182 bio = bl_submit_bio(rw, bio);
183 goto retry;
184 }
185 return bio;
186 }
187
bl_add_page_to_bio(struct bio * bio,int npg,int rw,sector_t isect,struct page * page,struct pnfs_block_extent * be,void (* end_io)(struct bio *,int err),struct parallel_io * par)188 static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
189 sector_t isect, struct page *page,
190 struct pnfs_block_extent *be,
191 void (*end_io)(struct bio *, int err),
192 struct parallel_io *par)
193 {
194 return do_add_page_to_bio(bio, npg, rw, isect, page, be,
195 end_io, par, 0, PAGE_CACHE_SIZE);
196 }
197
198 /* This is basically copied from mpage_end_io_read */
bl_end_io_read(struct bio * bio,int err)199 static void bl_end_io_read(struct bio *bio, int err)
200 {
201 struct parallel_io *par = bio->bi_private;
202 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
203 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
204 struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;
205
206 do {
207 struct page *page = bvec->bv_page;
208
209 if (--bvec >= bio->bi_io_vec)
210 prefetchw(&bvec->bv_page->flags);
211 if (uptodate)
212 SetPageUptodate(page);
213 } while (bvec >= bio->bi_io_vec);
214 if (!uptodate) {
215 if (!rdata->pnfs_error)
216 rdata->pnfs_error = -EIO;
217 pnfs_set_lo_fail(rdata->lseg);
218 }
219 bio_put(bio);
220 put_parallel(par);
221 }
222
bl_read_cleanup(struct work_struct * work)223 static void bl_read_cleanup(struct work_struct *work)
224 {
225 struct rpc_task *task;
226 struct nfs_read_data *rdata;
227 dprintk("%s enter\n", __func__);
228 task = container_of(work, struct rpc_task, u.tk_work);
229 rdata = container_of(task, struct nfs_read_data, task);
230 pnfs_ld_read_done(rdata);
231 }
232
233 static void
bl_end_par_io_read(void * data,int unused)234 bl_end_par_io_read(void *data, int unused)
235 {
236 struct nfs_read_data *rdata = data;
237
238 rdata->task.tk_status = rdata->pnfs_error;
239 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
240 schedule_work(&rdata->task.u.tk_work);
241 }
242
243 static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data * rdata)244 bl_read_pagelist(struct nfs_read_data *rdata)
245 {
246 int i, hole;
247 struct bio *bio = NULL;
248 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
249 sector_t isect, extent_length = 0;
250 struct parallel_io *par;
251 loff_t f_offset = rdata->args.offset;
252 struct page **pages = rdata->args.pages;
253 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
254
255 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
256 rdata->npages, f_offset, (unsigned int)rdata->args.count);
257
258 par = alloc_parallel(rdata);
259 if (!par)
260 goto use_mds;
261 par->pnfs_callback = bl_end_par_io_read;
262 /* At this point, we can no longer jump to use_mds */
263
264 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
265 /* Code assumes extents are page-aligned */
266 for (i = pg_index; i < rdata->npages; i++) {
267 if (!extent_length) {
268 /* We've used up the previous extent */
269 bl_put_extent(be);
270 bl_put_extent(cow_read);
271 bio = bl_submit_bio(READ, bio);
272 /* Get the next one */
273 be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
274 isect, &cow_read);
275 if (!be) {
276 rdata->pnfs_error = -EIO;
277 goto out;
278 }
279 extent_length = be->be_length -
280 (isect - be->be_f_offset);
281 if (cow_read) {
282 sector_t cow_length = cow_read->be_length -
283 (isect - cow_read->be_f_offset);
284 extent_length = min(extent_length, cow_length);
285 }
286 }
287 hole = is_hole(be, isect);
288 if (hole && !cow_read) {
289 bio = bl_submit_bio(READ, bio);
290 /* Fill hole w/ zeroes w/o accessing device */
291 dprintk("%s Zeroing page for hole\n", __func__);
292 zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
293 print_page(pages[i]);
294 SetPageUptodate(pages[i]);
295 } else {
296 struct pnfs_block_extent *be_read;
297
298 be_read = (hole && cow_read) ? cow_read : be;
299 bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
300 isect, pages[i], be_read,
301 bl_end_io_read, par);
302 if (IS_ERR(bio)) {
303 rdata->pnfs_error = PTR_ERR(bio);
304 bio = NULL;
305 goto out;
306 }
307 }
308 isect += PAGE_CACHE_SECTORS;
309 extent_length -= PAGE_CACHE_SECTORS;
310 }
311 if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
312 rdata->res.eof = 1;
313 rdata->res.count = rdata->inode->i_size - f_offset;
314 } else {
315 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
316 }
317 out:
318 bl_put_extent(be);
319 bl_put_extent(cow_read);
320 bl_submit_bio(READ, bio);
321 put_parallel(par);
322 return PNFS_ATTEMPTED;
323
324 use_mds:
325 dprintk("Giving up and using normal NFS\n");
326 return PNFS_NOT_ATTEMPTED;
327 }
328
mark_extents_written(struct pnfs_block_layout * bl,__u64 offset,__u32 count)329 static void mark_extents_written(struct pnfs_block_layout *bl,
330 __u64 offset, __u32 count)
331 {
332 sector_t isect, end;
333 struct pnfs_block_extent *be;
334 struct pnfs_block_short_extent *se;
335
336 dprintk("%s(%llu, %u)\n", __func__, offset, count);
337 if (count == 0)
338 return;
339 isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT;
340 end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
341 end >>= SECTOR_SHIFT;
342 while (isect < end) {
343 sector_t len;
344 be = bl_find_get_extent(bl, isect, NULL);
345 BUG_ON(!be); /* FIXME */
346 len = min(end, be->be_f_offset + be->be_length) - isect;
347 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
348 se = bl_pop_one_short_extent(be->be_inval);
349 BUG_ON(!se);
350 bl_mark_for_commit(be, isect, len, se);
351 }
352 isect += len;
353 bl_put_extent(be);
354 }
355 }
356
bl_end_io_write_zero(struct bio * bio,int err)357 static void bl_end_io_write_zero(struct bio *bio, int err)
358 {
359 struct parallel_io *par = bio->bi_private;
360 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
361 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
362 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
363
364 do {
365 struct page *page = bvec->bv_page;
366
367 if (--bvec >= bio->bi_io_vec)
368 prefetchw(&bvec->bv_page->flags);
369 /* This is the zeroing page we added */
370 end_page_writeback(page);
371 page_cache_release(page);
372 } while (bvec >= bio->bi_io_vec);
373
374 if (unlikely(!uptodate)) {
375 if (!wdata->pnfs_error)
376 wdata->pnfs_error = -EIO;
377 pnfs_set_lo_fail(wdata->lseg);
378 }
379 bio_put(bio);
380 put_parallel(par);
381 }
382
bl_end_io_write(struct bio * bio,int err)383 static void bl_end_io_write(struct bio *bio, int err)
384 {
385 struct parallel_io *par = bio->bi_private;
386 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
387 struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
388
389 if (!uptodate) {
390 if (!wdata->pnfs_error)
391 wdata->pnfs_error = -EIO;
392 pnfs_set_lo_fail(wdata->lseg);
393 }
394 bio_put(bio);
395 put_parallel(par);
396 }
397
398 /* Function scheduled for call during bl_end_par_io_write,
399 * it marks sectors as written and extends the commitlist.
400 */
bl_write_cleanup(struct work_struct * work)401 static void bl_write_cleanup(struct work_struct *work)
402 {
403 struct rpc_task *task;
404 struct nfs_write_data *wdata;
405 dprintk("%s enter\n", __func__);
406 task = container_of(work, struct rpc_task, u.tk_work);
407 wdata = container_of(task, struct nfs_write_data, task);
408 if (likely(!wdata->pnfs_error)) {
409 /* Marks for LAYOUTCOMMIT */
410 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
411 wdata->args.offset, wdata->args.count);
412 }
413 pnfs_ld_write_done(wdata);
414 }
415
416 /* Called when last of bios associated with a bl_write_pagelist call finishes */
bl_end_par_io_write(void * data,int num_se)417 static void bl_end_par_io_write(void *data, int num_se)
418 {
419 struct nfs_write_data *wdata = data;
420
421 if (unlikely(wdata->pnfs_error)) {
422 bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
423 num_se);
424 }
425
426 wdata->task.tk_status = wdata->pnfs_error;
427 wdata->verf.committed = NFS_FILE_SYNC;
428 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
429 schedule_work(&wdata->task.u.tk_work);
430 }
431
432 /* FIXME STUB - mark intersection of layout and page as bad, so is not
433 * used again.
434 */
mark_bad_read(void)435 static void mark_bad_read(void)
436 {
437 return;
438 }
439
440 /*
441 * map_block: map a requested I/0 block (isect) into an offset in the LVM
442 * block_device
443 */
444 static void
map_block(struct buffer_head * bh,sector_t isect,struct pnfs_block_extent * be)445 map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
446 {
447 dprintk("%s enter be=%p\n", __func__, be);
448
449 set_buffer_mapped(bh);
450 bh->b_bdev = be->be_mdev;
451 bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
452 (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT);
453
454 dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n",
455 __func__, (unsigned long long)isect, (long)bh->b_blocknr,
456 bh->b_size);
457 return;
458 }
459
460 static void
bl_read_single_end_io(struct bio * bio,int error)461 bl_read_single_end_io(struct bio *bio, int error)
462 {
463 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
464 struct page *page = bvec->bv_page;
465
466 /* Only one page in bvec */
467 unlock_page(page);
468 }
469
470 static int
bl_do_readpage_sync(struct page * page,struct pnfs_block_extent * be,unsigned int offset,unsigned int len)471 bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
472 unsigned int offset, unsigned int len)
473 {
474 struct bio *bio;
475 struct page *shadow_page;
476 sector_t isect;
477 char *kaddr, *kshadow_addr;
478 int ret = 0;
479
480 dprintk("%s: offset %u len %u\n", __func__, offset, len);
481
482 shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
483 if (shadow_page == NULL)
484 return -ENOMEM;
485
486 bio = bio_alloc(GFP_NOIO, 1);
487 if (bio == NULL)
488 return -ENOMEM;
489
490 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
491 (offset / SECTOR_SIZE);
492
493 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
494 bio->bi_bdev = be->be_mdev;
495 bio->bi_end_io = bl_read_single_end_io;
496
497 lock_page(shadow_page);
498 if (bio_add_page(bio, shadow_page,
499 SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
500 unlock_page(shadow_page);
501 bio_put(bio);
502 return -EIO;
503 }
504
505 submit_bio(READ, bio);
506 wait_on_page_locked(shadow_page);
507 if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
508 ret = -EIO;
509 } else {
510 kaddr = kmap_atomic(page);
511 kshadow_addr = kmap_atomic(shadow_page);
512 memcpy(kaddr + offset, kshadow_addr + offset, len);
513 kunmap_atomic(kshadow_addr);
514 kunmap_atomic(kaddr);
515 }
516 __free_page(shadow_page);
517 bio_put(bio);
518
519 return ret;
520 }
521
522 static int
bl_read_partial_page_sync(struct page * page,struct pnfs_block_extent * be,unsigned int dirty_offset,unsigned int dirty_len,bool full_page)523 bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
524 unsigned int dirty_offset, unsigned int dirty_len,
525 bool full_page)
526 {
527 int ret = 0;
528 unsigned int start, end;
529
530 if (full_page) {
531 start = 0;
532 end = PAGE_CACHE_SIZE;
533 } else {
534 start = round_down(dirty_offset, SECTOR_SIZE);
535 end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
536 }
537
538 dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
539 if (!be) {
540 zero_user_segments(page, start, dirty_offset,
541 dirty_offset + dirty_len, end);
542 if (start == 0 && end == PAGE_CACHE_SIZE &&
543 trylock_page(page)) {
544 SetPageUptodate(page);
545 unlock_page(page);
546 }
547 return ret;
548 }
549
550 if (start != dirty_offset)
551 ret = bl_do_readpage_sync(page, be, start,
552 dirty_offset - start);
553
554 if (!ret && (dirty_offset + dirty_len < end))
555 ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
556 end - dirty_offset - dirty_len);
557
558 return ret;
559 }
560
561 /* Given an unmapped page, zero it or read in page for COW, page is locked
562 * by caller.
563 */
564 static int
init_page_for_write(struct page * page,struct pnfs_block_extent * cow_read)565 init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
566 {
567 struct buffer_head *bh = NULL;
568 int ret = 0;
569 sector_t isect;
570
571 dprintk("%s enter, %p\n", __func__, page);
572 BUG_ON(PageUptodate(page));
573 if (!cow_read) {
574 zero_user_segment(page, 0, PAGE_SIZE);
575 SetPageUptodate(page);
576 goto cleanup;
577 }
578
579 bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
580 if (!bh) {
581 ret = -ENOMEM;
582 goto cleanup;
583 }
584
585 isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT;
586 map_block(bh, isect, cow_read);
587 if (!bh_uptodate_or_lock(bh))
588 ret = bh_submit_read(bh);
589 if (ret)
590 goto cleanup;
591 SetPageUptodate(page);
592
593 cleanup:
594 if (bh)
595 free_buffer_head(bh);
596 if (ret) {
597 /* Need to mark layout with bad read...should now
598 * just use nfs4 for reads and writes.
599 */
600 mark_bad_read();
601 }
602 return ret;
603 }
604
605 /* Find or create a zeroing page marked being writeback.
606 * Return ERR_PTR on error, NULL to indicate skip this page and page itself
607 * to indicate write out.
608 */
609 static struct page *
bl_find_get_zeroing_page(struct inode * inode,pgoff_t index,struct pnfs_block_extent * cow_read)610 bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
611 struct pnfs_block_extent *cow_read)
612 {
613 struct page *page;
614 int locked = 0;
615 page = find_get_page(inode->i_mapping, index);
616 if (page)
617 goto check_page;
618
619 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
620 if (unlikely(!page)) {
621 dprintk("%s oom\n", __func__);
622 return ERR_PTR(-ENOMEM);
623 }
624 locked = 1;
625
626 check_page:
627 /* PageDirty: Other will write this out
628 * PageWriteback: Other is writing this out
629 * PageUptodate: It was read before
630 */
631 if (PageDirty(page) || PageWriteback(page)) {
632 print_page(page);
633 if (locked)
634 unlock_page(page);
635 page_cache_release(page);
636 return NULL;
637 }
638
639 if (!locked) {
640 lock_page(page);
641 locked = 1;
642 goto check_page;
643 }
644 if (!PageUptodate(page)) {
645 /* New page, readin or zero it */
646 init_page_for_write(page, cow_read);
647 }
648 set_page_writeback(page);
649 unlock_page(page);
650
651 return page;
652 }
653
654 static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data * wdata,int sync)655 bl_write_pagelist(struct nfs_write_data *wdata, int sync)
656 {
657 int i, ret, npg_zero, pg_index, last = 0;
658 struct bio *bio = NULL;
659 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
660 sector_t isect, last_isect = 0, extent_length = 0;
661 struct parallel_io *par;
662 loff_t offset = wdata->args.offset;
663 size_t count = wdata->args.count;
664 unsigned int pg_offset, pg_len, saved_len;
665 struct page **pages = wdata->args.pages;
666 struct page *page;
667 pgoff_t index;
668 u64 temp;
669 int npg_per_block =
670 NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
671
672 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
673 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
674 * We want to write each, and if there is an error set pnfs_error
675 * to have it redone using nfs.
676 */
677 par = alloc_parallel(wdata);
678 if (!par)
679 goto out_mds;
680 par->pnfs_callback = bl_end_par_io_write;
681 /* At this point, have to be more careful with error handling */
682
683 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
684 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
685 if (!be || !is_writable(be, isect)) {
686 dprintk("%s no matching extents!\n", __func__);
687 goto out_mds;
688 }
689
690 /* First page inside INVALID extent */
691 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
692 if (likely(!bl_push_one_short_extent(be->be_inval)))
693 par->bse_count++;
694 else
695 goto out_mds;
696 temp = offset >> PAGE_CACHE_SHIFT;
697 npg_zero = do_div(temp, npg_per_block);
698 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
699 (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
700 extent_length = be->be_length - (isect - be->be_f_offset);
701
702 fill_invalid_ext:
703 dprintk("%s need to zero %d pages\n", __func__, npg_zero);
704 for (;npg_zero > 0; npg_zero--) {
705 if (bl_is_sector_init(be->be_inval, isect)) {
706 dprintk("isect %llu already init\n",
707 (unsigned long long)isect);
708 goto next_page;
709 }
710 /* page ref released in bl_end_io_write_zero */
711 index = isect >> PAGE_CACHE_SECTOR_SHIFT;
712 dprintk("%s zero %dth page: index %lu isect %llu\n",
713 __func__, npg_zero, index,
714 (unsigned long long)isect);
715 page = bl_find_get_zeroing_page(wdata->inode, index,
716 cow_read);
717 if (unlikely(IS_ERR(page))) {
718 wdata->pnfs_error = PTR_ERR(page);
719 goto out;
720 } else if (page == NULL)
721 goto next_page;
722
723 ret = bl_mark_sectors_init(be->be_inval, isect,
724 PAGE_CACHE_SECTORS);
725 if (unlikely(ret)) {
726 dprintk("%s bl_mark_sectors_init fail %d\n",
727 __func__, ret);
728 end_page_writeback(page);
729 page_cache_release(page);
730 wdata->pnfs_error = ret;
731 goto out;
732 }
733 if (likely(!bl_push_one_short_extent(be->be_inval)))
734 par->bse_count++;
735 else {
736 end_page_writeback(page);
737 page_cache_release(page);
738 wdata->pnfs_error = -ENOMEM;
739 goto out;
740 }
741 /* FIXME: This should be done in bi_end_io */
742 mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
743 page->index << PAGE_CACHE_SHIFT,
744 PAGE_CACHE_SIZE);
745
746 bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
747 isect, page, be,
748 bl_end_io_write_zero, par);
749 if (IS_ERR(bio)) {
750 wdata->pnfs_error = PTR_ERR(bio);
751 bio = NULL;
752 goto out;
753 }
754 next_page:
755 isect += PAGE_CACHE_SECTORS;
756 extent_length -= PAGE_CACHE_SECTORS;
757 }
758 if (last)
759 goto write_done;
760 }
761 bio = bl_submit_bio(WRITE, bio);
762
763 /* Middle pages */
764 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
765 for (i = pg_index; i < wdata->npages; i++) {
766 if (!extent_length) {
767 /* We've used up the previous extent */
768 bl_put_extent(be);
769 bl_put_extent(cow_read);
770 bio = bl_submit_bio(WRITE, bio);
771 /* Get the next one */
772 be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
773 isect, &cow_read);
774 if (!be || !is_writable(be, isect)) {
775 wdata->pnfs_error = -EINVAL;
776 goto out;
777 }
778 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
779 if (likely(!bl_push_one_short_extent(
780 be->be_inval)))
781 par->bse_count++;
782 else {
783 wdata->pnfs_error = -ENOMEM;
784 goto out;
785 }
786 }
787 extent_length = be->be_length -
788 (isect - be->be_f_offset);
789 }
790
791 dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
792 pg_offset = offset & ~PAGE_CACHE_MASK;
793 if (pg_offset + count > PAGE_CACHE_SIZE)
794 pg_len = PAGE_CACHE_SIZE - pg_offset;
795 else
796 pg_len = count;
797
798 saved_len = pg_len;
799 if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
800 !bl_is_sector_init(be->be_inval, isect)) {
801 ret = bl_read_partial_page_sync(pages[i], cow_read,
802 pg_offset, pg_len, true);
803 if (ret) {
804 dprintk("%s bl_read_partial_page_sync fail %d\n",
805 __func__, ret);
806 wdata->pnfs_error = ret;
807 goto out;
808 }
809
810 ret = bl_mark_sectors_init(be->be_inval, isect,
811 PAGE_CACHE_SECTORS);
812 if (unlikely(ret)) {
813 dprintk("%s bl_mark_sectors_init fail %d\n",
814 __func__, ret);
815 wdata->pnfs_error = ret;
816 goto out;
817 }
818
819 /* Expand to full page write */
820 pg_offset = 0;
821 pg_len = PAGE_CACHE_SIZE;
822 } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
823 (pg_len & (SECTOR_SIZE - 1))) {
824 /* ahh, nasty case. We have to do sync full sector
825 * read-modify-write cycles.
826 */
827 unsigned int saved_offset = pg_offset;
828 ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
829 pg_len, false);
830 pg_offset = round_down(pg_offset, SECTOR_SIZE);
831 pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
832 - pg_offset;
833 }
834 bio = do_add_page_to_bio(bio, wdata->npages - i, WRITE,
835 isect, pages[i], be,
836 bl_end_io_write, par,
837 pg_offset, pg_len);
838 if (IS_ERR(bio)) {
839 wdata->pnfs_error = PTR_ERR(bio);
840 bio = NULL;
841 goto out;
842 }
843 offset += saved_len;
844 count -= saved_len;
845 isect += PAGE_CACHE_SECTORS;
846 last_isect = isect;
847 extent_length -= PAGE_CACHE_SECTORS;
848 }
849
850 /* Last page inside INVALID extent */
851 if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
852 bio = bl_submit_bio(WRITE, bio);
853 temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT;
854 npg_zero = npg_per_block - do_div(temp, npg_per_block);
855 if (npg_zero < npg_per_block) {
856 last = 1;
857 goto fill_invalid_ext;
858 }
859 }
860
861 write_done:
862 wdata->res.count = wdata->args.count;
863 out:
864 bl_put_extent(be);
865 bl_put_extent(cow_read);
866 bl_submit_bio(WRITE, bio);
867 put_parallel(par);
868 return PNFS_ATTEMPTED;
869 out_mds:
870 bl_put_extent(be);
871 bl_put_extent(cow_read);
872 kfree(par);
873 return PNFS_NOT_ATTEMPTED;
874 }
875
876 /* FIXME - range ignored */
877 static void
release_extents(struct pnfs_block_layout * bl,struct pnfs_layout_range * range)878 release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range)
879 {
880 int i;
881 struct pnfs_block_extent *be;
882
883 spin_lock(&bl->bl_ext_lock);
884 for (i = 0; i < EXTENT_LISTS; i++) {
885 while (!list_empty(&bl->bl_extents[i])) {
886 be = list_first_entry(&bl->bl_extents[i],
887 struct pnfs_block_extent,
888 be_node);
889 list_del(&be->be_node);
890 bl_put_extent(be);
891 }
892 }
893 spin_unlock(&bl->bl_ext_lock);
894 }
895
896 static void
release_inval_marks(struct pnfs_inval_markings * marks)897 release_inval_marks(struct pnfs_inval_markings *marks)
898 {
899 struct pnfs_inval_tracking *pos, *temp;
900 struct pnfs_block_short_extent *se, *stemp;
901
902 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
903 list_del(&pos->it_link);
904 kfree(pos);
905 }
906
907 list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
908 list_del(&se->bse_node);
909 kfree(se);
910 }
911 return;
912 }
913
bl_free_layout_hdr(struct pnfs_layout_hdr * lo)914 static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
915 {
916 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
917
918 dprintk("%s enter\n", __func__);
919 release_extents(bl, NULL);
920 release_inval_marks(&bl->bl_inval);
921 kfree(bl);
922 }
923
bl_alloc_layout_hdr(struct inode * inode,gfp_t gfp_flags)924 static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
925 gfp_t gfp_flags)
926 {
927 struct pnfs_block_layout *bl;
928
929 dprintk("%s enter\n", __func__);
930 bl = kzalloc(sizeof(*bl), gfp_flags);
931 if (!bl)
932 return NULL;
933 spin_lock_init(&bl->bl_ext_lock);
934 INIT_LIST_HEAD(&bl->bl_extents[0]);
935 INIT_LIST_HEAD(&bl->bl_extents[1]);
936 INIT_LIST_HEAD(&bl->bl_commit);
937 INIT_LIST_HEAD(&bl->bl_committing);
938 bl->bl_count = 0;
939 bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT;
940 BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
941 return &bl->bl_layout;
942 }
943
bl_free_lseg(struct pnfs_layout_segment * lseg)944 static void bl_free_lseg(struct pnfs_layout_segment *lseg)
945 {
946 dprintk("%s enter\n", __func__);
947 kfree(lseg);
948 }
949
950 /* We pretty much ignore lseg, and store all data layout wide, so we
951 * can correctly merge.
952 */
bl_alloc_lseg(struct pnfs_layout_hdr * lo,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)953 static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo,
954 struct nfs4_layoutget_res *lgr,
955 gfp_t gfp_flags)
956 {
957 struct pnfs_layout_segment *lseg;
958 int status;
959
960 dprintk("%s enter\n", __func__);
961 lseg = kzalloc(sizeof(*lseg), gfp_flags);
962 if (!lseg)
963 return ERR_PTR(-ENOMEM);
964 status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags);
965 if (status) {
966 /* We don't want to call the full-blown bl_free_lseg,
967 * since on error extents were not touched.
968 */
969 kfree(lseg);
970 return ERR_PTR(status);
971 }
972 return lseg;
973 }
974
975 static void
bl_encode_layoutcommit(struct pnfs_layout_hdr * lo,struct xdr_stream * xdr,const struct nfs4_layoutcommit_args * arg)976 bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
977 const struct nfs4_layoutcommit_args *arg)
978 {
979 dprintk("%s enter\n", __func__);
980 encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
981 }
982
983 static void
bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data * lcdata)984 bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
985 {
986 struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout;
987
988 dprintk("%s enter\n", __func__);
989 clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status);
990 }
991
free_blk_mountid(struct block_mount_id * mid)992 static void free_blk_mountid(struct block_mount_id *mid)
993 {
994 if (mid) {
995 struct pnfs_block_dev *dev, *tmp;
996
997 /* No need to take bm_lock as we are last user freeing bm_devlist */
998 list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
999 list_del(&dev->bm_node);
1000 bl_free_block_dev(dev);
1001 }
1002 kfree(mid);
1003 }
1004 }
1005
1006 /* This is mostly copied from the filelayout's get_device_info function.
1007 * It seems much of this should be at the generic pnfs level.
1008 */
1009 static struct pnfs_block_dev *
nfs4_blk_get_deviceinfo(struct nfs_server * server,const struct nfs_fh * fh,struct nfs4_deviceid * d_id)1010 nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
1011 struct nfs4_deviceid *d_id)
1012 {
1013 struct pnfs_device *dev;
1014 struct pnfs_block_dev *rv;
1015 u32 max_resp_sz;
1016 int max_pages;
1017 struct page **pages = NULL;
1018 int i, rc;
1019
1020 /*
1021 * Use the session max response size as the basis for setting
1022 * GETDEVICEINFO's maxcount
1023 */
1024 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
1025 max_pages = nfs_page_array_len(0, max_resp_sz);
1026 dprintk("%s max_resp_sz %u max_pages %d\n",
1027 __func__, max_resp_sz, max_pages);
1028
1029 dev = kmalloc(sizeof(*dev), GFP_NOFS);
1030 if (!dev) {
1031 dprintk("%s kmalloc failed\n", __func__);
1032 return ERR_PTR(-ENOMEM);
1033 }
1034
1035 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
1036 if (pages == NULL) {
1037 kfree(dev);
1038 return ERR_PTR(-ENOMEM);
1039 }
1040 for (i = 0; i < max_pages; i++) {
1041 pages[i] = alloc_page(GFP_NOFS);
1042 if (!pages[i]) {
1043 rv = ERR_PTR(-ENOMEM);
1044 goto out_free;
1045 }
1046 }
1047
1048 memcpy(&dev->dev_id, d_id, sizeof(*d_id));
1049 dev->layout_type = LAYOUT_BLOCK_VOLUME;
1050 dev->pages = pages;
1051 dev->pgbase = 0;
1052 dev->pglen = PAGE_SIZE * max_pages;
1053 dev->mincount = 0;
1054
1055 dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
1056 rc = nfs4_proc_getdeviceinfo(server, dev);
1057 dprintk("%s getdevice info returns %d\n", __func__, rc);
1058 if (rc) {
1059 rv = ERR_PTR(rc);
1060 goto out_free;
1061 }
1062
1063 rv = nfs4_blk_decode_device(server, dev);
1064 out_free:
1065 for (i = 0; i < max_pages; i++)
1066 __free_page(pages[i]);
1067 kfree(pages);
1068 kfree(dev);
1069 return rv;
1070 }
1071
1072 static int
bl_set_layoutdriver(struct nfs_server * server,const struct nfs_fh * fh)1073 bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
1074 {
1075 struct block_mount_id *b_mt_id = NULL;
1076 struct pnfs_devicelist *dlist = NULL;
1077 struct pnfs_block_dev *bdev;
1078 LIST_HEAD(block_disklist);
1079 int status, i;
1080
1081 dprintk("%s enter\n", __func__);
1082
1083 if (server->pnfs_blksize == 0) {
1084 dprintk("%s Server did not return blksize\n", __func__);
1085 return -EINVAL;
1086 }
1087 b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS);
1088 if (!b_mt_id) {
1089 status = -ENOMEM;
1090 goto out_error;
1091 }
1092 /* Initialize nfs4 block layout mount id */
1093 spin_lock_init(&b_mt_id->bm_lock);
1094 INIT_LIST_HEAD(&b_mt_id->bm_devlist);
1095
1096 dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
1097 if (!dlist) {
1098 status = -ENOMEM;
1099 goto out_error;
1100 }
1101 dlist->eof = 0;
1102 while (!dlist->eof) {
1103 status = nfs4_proc_getdevicelist(server, fh, dlist);
1104 if (status)
1105 goto out_error;
1106 dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
1107 __func__, dlist->num_devs, dlist->eof);
1108 for (i = 0; i < dlist->num_devs; i++) {
1109 bdev = nfs4_blk_get_deviceinfo(server, fh,
1110 &dlist->dev_id[i]);
1111 if (IS_ERR(bdev)) {
1112 status = PTR_ERR(bdev);
1113 goto out_error;
1114 }
1115 spin_lock(&b_mt_id->bm_lock);
1116 list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
1117 spin_unlock(&b_mt_id->bm_lock);
1118 }
1119 }
1120 dprintk("%s SUCCESS\n", __func__);
1121 server->pnfs_ld_data = b_mt_id;
1122
1123 out_return:
1124 kfree(dlist);
1125 return status;
1126
1127 out_error:
1128 free_blk_mountid(b_mt_id);
1129 goto out_return;
1130 }
1131
1132 static int
bl_clear_layoutdriver(struct nfs_server * server)1133 bl_clear_layoutdriver(struct nfs_server *server)
1134 {
1135 struct block_mount_id *b_mt_id = server->pnfs_ld_data;
1136
1137 dprintk("%s enter\n", __func__);
1138 free_blk_mountid(b_mt_id);
1139 dprintk("%s RETURNS\n", __func__);
1140 return 0;
1141 }
1142
1143 static const struct nfs_pageio_ops bl_pg_read_ops = {
1144 .pg_init = pnfs_generic_pg_init_read,
1145 .pg_test = pnfs_generic_pg_test,
1146 .pg_doio = pnfs_generic_pg_readpages,
1147 };
1148
1149 static const struct nfs_pageio_ops bl_pg_write_ops = {
1150 .pg_init = pnfs_generic_pg_init_write,
1151 .pg_test = pnfs_generic_pg_test,
1152 .pg_doio = pnfs_generic_pg_writepages,
1153 };
1154
1155 static struct pnfs_layoutdriver_type blocklayout_type = {
1156 .id = LAYOUT_BLOCK_VOLUME,
1157 .name = "LAYOUT_BLOCK_VOLUME",
1158 .owner = THIS_MODULE,
1159 .read_pagelist = bl_read_pagelist,
1160 .write_pagelist = bl_write_pagelist,
1161 .alloc_layout_hdr = bl_alloc_layout_hdr,
1162 .free_layout_hdr = bl_free_layout_hdr,
1163 .alloc_lseg = bl_alloc_lseg,
1164 .free_lseg = bl_free_lseg,
1165 .encode_layoutcommit = bl_encode_layoutcommit,
1166 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
1167 .set_layoutdriver = bl_set_layoutdriver,
1168 .clear_layoutdriver = bl_clear_layoutdriver,
1169 .pg_read_ops = &bl_pg_read_ops,
1170 .pg_write_ops = &bl_pg_write_ops,
1171 };
1172
1173 static const struct rpc_pipe_ops bl_upcall_ops = {
1174 .upcall = rpc_pipe_generic_upcall,
1175 .downcall = bl_pipe_downcall,
1176 .destroy_msg = bl_pipe_destroy_msg,
1177 };
1178
nfs4blocklayout_register_sb(struct super_block * sb,struct rpc_pipe * pipe)1179 static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb,
1180 struct rpc_pipe *pipe)
1181 {
1182 struct dentry *dir, *dentry;
1183
1184 dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME);
1185 if (dir == NULL)
1186 return ERR_PTR(-ENOENT);
1187 dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe);
1188 dput(dir);
1189 return dentry;
1190 }
1191
nfs4blocklayout_unregister_sb(struct super_block * sb,struct rpc_pipe * pipe)1192 static void nfs4blocklayout_unregister_sb(struct super_block *sb,
1193 struct rpc_pipe *pipe)
1194 {
1195 if (pipe->dentry)
1196 rpc_unlink(pipe->dentry);
1197 }
1198
rpc_pipefs_event(struct notifier_block * nb,unsigned long event,void * ptr)1199 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
1200 void *ptr)
1201 {
1202 struct super_block *sb = ptr;
1203 struct net *net = sb->s_fs_info;
1204 struct nfs_net *nn = net_generic(net, nfs_net_id);
1205 struct dentry *dentry;
1206 int ret = 0;
1207
1208 if (!try_module_get(THIS_MODULE))
1209 return 0;
1210
1211 if (nn->bl_device_pipe == NULL) {
1212 module_put(THIS_MODULE);
1213 return 0;
1214 }
1215
1216 switch (event) {
1217 case RPC_PIPEFS_MOUNT:
1218 dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe);
1219 if (IS_ERR(dentry)) {
1220 ret = PTR_ERR(dentry);
1221 break;
1222 }
1223 nn->bl_device_pipe->dentry = dentry;
1224 break;
1225 case RPC_PIPEFS_UMOUNT:
1226 if (nn->bl_device_pipe->dentry)
1227 nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe);
1228 break;
1229 default:
1230 ret = -ENOTSUPP;
1231 break;
1232 }
1233 module_put(THIS_MODULE);
1234 return ret;
1235 }
1236
1237 static struct notifier_block nfs4blocklayout_block = {
1238 .notifier_call = rpc_pipefs_event,
1239 };
1240
nfs4blocklayout_register_net(struct net * net,struct rpc_pipe * pipe)1241 static struct dentry *nfs4blocklayout_register_net(struct net *net,
1242 struct rpc_pipe *pipe)
1243 {
1244 struct super_block *pipefs_sb;
1245 struct dentry *dentry;
1246
1247 pipefs_sb = rpc_get_sb_net(net);
1248 if (!pipefs_sb)
1249 return NULL;
1250 dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe);
1251 rpc_put_sb_net(net);
1252 return dentry;
1253 }
1254
nfs4blocklayout_unregister_net(struct net * net,struct rpc_pipe * pipe)1255 static void nfs4blocklayout_unregister_net(struct net *net,
1256 struct rpc_pipe *pipe)
1257 {
1258 struct super_block *pipefs_sb;
1259
1260 pipefs_sb = rpc_get_sb_net(net);
1261 if (pipefs_sb) {
1262 nfs4blocklayout_unregister_sb(pipefs_sb, pipe);
1263 rpc_put_sb_net(net);
1264 }
1265 }
1266
nfs4blocklayout_net_init(struct net * net)1267 static int nfs4blocklayout_net_init(struct net *net)
1268 {
1269 struct nfs_net *nn = net_generic(net, nfs_net_id);
1270 struct dentry *dentry;
1271
1272 init_waitqueue_head(&nn->bl_wq);
1273 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
1274 if (IS_ERR(nn->bl_device_pipe))
1275 return PTR_ERR(nn->bl_device_pipe);
1276 dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe);
1277 if (IS_ERR(dentry)) {
1278 rpc_destroy_pipe_data(nn->bl_device_pipe);
1279 return PTR_ERR(dentry);
1280 }
1281 nn->bl_device_pipe->dentry = dentry;
1282 return 0;
1283 }
1284
nfs4blocklayout_net_exit(struct net * net)1285 static void nfs4blocklayout_net_exit(struct net *net)
1286 {
1287 struct nfs_net *nn = net_generic(net, nfs_net_id);
1288
1289 nfs4blocklayout_unregister_net(net, nn->bl_device_pipe);
1290 rpc_destroy_pipe_data(nn->bl_device_pipe);
1291 nn->bl_device_pipe = NULL;
1292 }
1293
1294 static struct pernet_operations nfs4blocklayout_net_ops = {
1295 .init = nfs4blocklayout_net_init,
1296 .exit = nfs4blocklayout_net_exit,
1297 };
1298
nfs4blocklayout_init(void)1299 static int __init nfs4blocklayout_init(void)
1300 {
1301 int ret;
1302
1303 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
1304
1305 ret = pnfs_register_layoutdriver(&blocklayout_type);
1306 if (ret)
1307 goto out;
1308
1309 ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block);
1310 if (ret)
1311 goto out_remove;
1312 ret = register_pernet_subsys(&nfs4blocklayout_net_ops);
1313 if (ret)
1314 goto out_notifier;
1315 out:
1316 return ret;
1317
1318 out_notifier:
1319 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1320 out_remove:
1321 pnfs_unregister_layoutdriver(&blocklayout_type);
1322 return ret;
1323 }
1324
nfs4blocklayout_exit(void)1325 static void __exit nfs4blocklayout_exit(void)
1326 {
1327 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
1328 __func__);
1329
1330 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
1331 unregister_pernet_subsys(&nfs4blocklayout_net_ops);
1332 pnfs_unregister_layoutdriver(&blocklayout_type);
1333 }
1334
1335 MODULE_ALIAS("nfs-layouttype4-3");
1336
1337 module_init(nfs4blocklayout_init);
1338 module_exit(nfs4blocklayout_exit);
1339