• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  pNFS Objects layout implementation over open-osd initiator library
3  *
4  *  Copyright (C) 2009 Panasas Inc. [year of first publication]
5  *  All rights reserved.
6  *
7  *  Benny Halevy <bhalevy@panasas.com>
8  *  Boaz Harrosh <ooo@electrozaur.com>
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License version 2
12  *  See the file COPYING included with this distribution for more details.
13  *
14  *  Redistribution and use in source and binary forms, with or without
15  *  modification, are permitted provided that the following conditions
16  *  are met:
17  *
18  *  1. Redistributions of source code must retain the above copyright
19  *     notice, this list of conditions and the following disclaimer.
20  *  2. Redistributions in binary form must reproduce the above copyright
21  *     notice, this list of conditions and the following disclaimer in the
22  *     documentation and/or other materials provided with the distribution.
23  *  3. Neither the name of the Panasas company nor the names of its
24  *     contributors may be used to endorse or promote products derived
25  *     from this software without specific prior written permission.
26  *
27  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #include <linux/module.h>
41 #include <scsi/osd_ore.h>
42 
43 #include "objlayout.h"
44 #include "../internal.h"
45 
46 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
47 
48 struct objio_dev_ent {
49 	struct nfs4_deviceid_node id_node;
50 	struct ore_dev od;
51 };
52 
53 static void
objio_free_deviceid_node(struct nfs4_deviceid_node * d)54 objio_free_deviceid_node(struct nfs4_deviceid_node *d)
55 {
56 	struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
57 
58 	dprintk("%s: free od=%p\n", __func__, de->od.od);
59 	osduld_put_device(de->od.od);
60 	kfree_rcu(d, rcu);
61 }
62 
63 struct objio_segment {
64 	struct pnfs_layout_segment lseg;
65 
66 	struct ore_layout layout;
67 	struct ore_components oc;
68 };
69 
70 static inline struct objio_segment *
OBJIO_LSEG(struct pnfs_layout_segment * lseg)71 OBJIO_LSEG(struct pnfs_layout_segment *lseg)
72 {
73 	return container_of(lseg, struct objio_segment, lseg);
74 }
75 
76 struct objio_state {
77 	/* Generic layer */
78 	struct objlayout_io_res oir;
79 
80 	bool sync;
81 	/*FIXME: Support for extra_bytes at ore_get_rw_state() */
82 	struct ore_io_state *ios;
83 };
84 
85 /* Send and wait for a get_device_info of devices in the layout,
86    then look them up with the osd_initiator library */
87 struct nfs4_deviceid_node *
objio_alloc_deviceid_node(struct nfs_server * server,struct pnfs_device * pdev,gfp_t gfp_flags)88 objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
89 			gfp_t gfp_flags)
90 {
91 	struct pnfs_osd_deviceaddr *deviceaddr;
92 	struct objio_dev_ent *ode = NULL;
93 	struct osd_dev *od;
94 	struct osd_dev_info odi;
95 	bool retry_flag = true;
96 	__be32 *p;
97 	int err;
98 
99 	deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
100 	if (!deviceaddr)
101 		return NULL;
102 
103 	p = page_address(pdev->pages[0]);
104 	pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
105 
106 	odi.systemid_len = deviceaddr->oda_systemid.len;
107 	if (odi.systemid_len > sizeof(odi.systemid)) {
108 		dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
109 			__func__, sizeof(odi.systemid));
110 		err = -EINVAL;
111 		goto out;
112 	} else if (odi.systemid_len)
113 		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
114 		       odi.systemid_len);
115 	odi.osdname_len	 = deviceaddr->oda_osdname.len;
116 	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
117 
118 	if (!odi.osdname_len && !odi.systemid_len) {
119 		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
120 			__func__);
121 		err = -ENODEV;
122 		goto out;
123 	}
124 
125 retry_lookup:
126 	od = osduld_info_lookup(&odi);
127 	if (IS_ERR(od)) {
128 		err = PTR_ERR(od);
129 		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
130 		if (err == -ENODEV && retry_flag) {
131 			err = objlayout_autologin(deviceaddr);
132 			if (likely(!err)) {
133 				retry_flag = false;
134 				goto retry_lookup;
135 			}
136 		}
137 		goto out;
138 	}
139 
140 	dprintk("Adding new dev_id(%llx:%llx)\n",
141 		_DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
142 
143 	ode = kzalloc(sizeof(*ode), gfp_flags);
144 	if (!ode) {
145 		dprintk("%s: -ENOMEM od=%p\n", __func__, od);
146 		goto out;
147 	}
148 
149 	nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
150 	kfree(deviceaddr);
151 
152 	ode->od.od = od;
153 	return &ode->id_node;
154 
155 out:
156 	kfree(deviceaddr);
157 	return NULL;
158 }
159 
copy_single_comp(struct ore_components * oc,unsigned c,struct pnfs_osd_object_cred * src_comp)160 static void copy_single_comp(struct ore_components *oc, unsigned c,
161 			     struct pnfs_osd_object_cred *src_comp)
162 {
163 	struct ore_comp *ocomp = &oc->comps[c];
164 
165 	WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
166 	WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
167 
168 	ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
169 	ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
170 
171 	memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
172 }
173 
__alloc_objio_seg(unsigned numdevs,gfp_t gfp_flags,struct objio_segment ** pseg)174 static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
175 		       struct objio_segment **pseg)
176 {
177 /*	This is the in memory structure of the objio_segment
178  *
179  *	struct __alloc_objio_segment {
180  *		struct objio_segment olseg;
181  *		struct ore_dev *ods[numdevs];
182  *		struct ore_comp	comps[numdevs];
183  *	} *aolseg;
184  *	NOTE: The code as above compiles and runs perfectly. It is elegant,
185  *	type safe and compact. At some Past time Linus has decided he does not
186  *	like variable length arrays, For the sake of this principal we uglify
187  *	the code as below.
188  */
189 	struct objio_segment *lseg;
190 	size_t lseg_size = sizeof(*lseg) +
191 			numdevs * sizeof(lseg->oc.ods[0]) +
192 			numdevs * sizeof(*lseg->oc.comps);
193 
194 	lseg = kzalloc(lseg_size, gfp_flags);
195 	if (unlikely(!lseg)) {
196 		dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
197 			numdevs, lseg_size);
198 		return -ENOMEM;
199 	}
200 
201 	lseg->oc.numdevs = numdevs;
202 	lseg->oc.single_comp = EC_MULTPLE_COMPS;
203 	lseg->oc.ods = (void *)(lseg + 1);
204 	lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
205 
206 	*pseg = lseg;
207 	return 0;
208 }
209 
objio_alloc_lseg(struct pnfs_layout_segment ** outp,struct pnfs_layout_hdr * pnfslay,struct pnfs_layout_range * range,struct xdr_stream * xdr,gfp_t gfp_flags)210 int objio_alloc_lseg(struct pnfs_layout_segment **outp,
211 	struct pnfs_layout_hdr *pnfslay,
212 	struct pnfs_layout_range *range,
213 	struct xdr_stream *xdr,
214 	gfp_t gfp_flags)
215 {
216 	struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
217 	struct objio_segment *objio_seg;
218 	struct pnfs_osd_xdr_decode_layout_iter iter;
219 	struct pnfs_osd_layout layout;
220 	struct pnfs_osd_object_cred src_comp;
221 	unsigned cur_comp;
222 	int err;
223 
224 	err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
225 	if (unlikely(err))
226 		return err;
227 
228 	err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
229 	if (unlikely(err))
230 		return err;
231 
232 	objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
233 	objio_seg->layout.group_width = layout.olo_map.odm_group_width;
234 	objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
235 	objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
236 	objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
237 
238 	err = ore_verify_layout(layout.olo_map.odm_num_comps,
239 					  &objio_seg->layout);
240 	if (unlikely(err))
241 		goto err;
242 
243 	objio_seg->oc.first_dev = layout.olo_comps_index;
244 	cur_comp = 0;
245 	while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
246 		struct nfs4_deviceid_node *d;
247 		struct objio_dev_ent *ode;
248 
249 		copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
250 
251 		d = nfs4_find_get_deviceid(server,
252 				&src_comp.oc_object_id.oid_device_id,
253 				pnfslay->plh_lc_cred, gfp_flags);
254 		if (!d) {
255 			err = -ENXIO;
256 			goto err;
257 		}
258 
259 		ode = container_of(d, struct objio_dev_ent, id_node);
260 		objio_seg->oc.ods[cur_comp++] = &ode->od;
261 	}
262 	/* pnfs_osd_xdr_decode_layout_comp returns false on error */
263 	if (unlikely(err))
264 		goto err;
265 
266 	*outp = &objio_seg->lseg;
267 	return 0;
268 
269 err:
270 	kfree(objio_seg);
271 	dprintk("%s: Error: return %d\n", __func__, err);
272 	*outp = NULL;
273 	return err;
274 }
275 
objio_free_lseg(struct pnfs_layout_segment * lseg)276 void objio_free_lseg(struct pnfs_layout_segment *lseg)
277 {
278 	int i;
279 	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
280 
281 	for (i = 0; i < objio_seg->oc.numdevs; i++) {
282 		struct ore_dev *od = objio_seg->oc.ods[i];
283 		struct objio_dev_ent *ode;
284 
285 		if (!od)
286 			break;
287 		ode = container_of(od, typeof(*ode), od);
288 		nfs4_put_deviceid_node(&ode->id_node);
289 	}
290 	kfree(objio_seg);
291 }
292 
293 static int
objio_alloc_io_state(struct pnfs_layout_hdr * pnfs_layout_type,bool is_reading,struct pnfs_layout_segment * lseg,struct page ** pages,unsigned pgbase,loff_t offset,size_t count,void * rpcdata,gfp_t gfp_flags,struct objio_state ** outp)294 objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
295 	struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
296 	loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
297 	struct objio_state **outp)
298 {
299 	struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
300 	struct ore_io_state *ios;
301 	int ret;
302 	struct __alloc_objio_state {
303 		struct objio_state objios;
304 		struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
305 	} *aos;
306 
307 	aos = kzalloc(sizeof(*aos), gfp_flags);
308 	if (unlikely(!aos))
309 		return -ENOMEM;
310 
311 	objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
312 			aos->ioerrs, rpcdata, pnfs_layout_type);
313 
314 	ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
315 			       offset, count, &ios);
316 	if (unlikely(ret)) {
317 		kfree(aos);
318 		return ret;
319 	}
320 
321 	ios->pages = pages;
322 	ios->pgbase = pgbase;
323 	ios->private = aos;
324 	BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
325 
326 	aos->objios.sync = 0;
327 	aos->objios.ios = ios;
328 	*outp = &aos->objios;
329 	return 0;
330 }
331 
objio_free_result(struct objlayout_io_res * oir)332 void objio_free_result(struct objlayout_io_res *oir)
333 {
334 	struct objio_state *objios = container_of(oir, struct objio_state, oir);
335 
336 	ore_put_io_state(objios->ios);
337 	kfree(objios);
338 }
339 
osd_pri_2_pnfs_err(enum osd_err_priority oep)340 static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
341 {
342 	switch (oep) {
343 	case OSD_ERR_PRI_NO_ERROR:
344 		return (enum pnfs_osd_errno)0;
345 
346 	case OSD_ERR_PRI_CLEAR_PAGES:
347 		BUG_ON(1);
348 		return 0;
349 
350 	case OSD_ERR_PRI_RESOURCE:
351 		return PNFS_OSD_ERR_RESOURCE;
352 	case OSD_ERR_PRI_BAD_CRED:
353 		return PNFS_OSD_ERR_BAD_CRED;
354 	case OSD_ERR_PRI_NO_ACCESS:
355 		return PNFS_OSD_ERR_NO_ACCESS;
356 	case OSD_ERR_PRI_UNREACHABLE:
357 		return PNFS_OSD_ERR_UNREACHABLE;
358 	case OSD_ERR_PRI_NOT_FOUND:
359 		return PNFS_OSD_ERR_NOT_FOUND;
360 	case OSD_ERR_PRI_NO_SPACE:
361 		return PNFS_OSD_ERR_NO_SPACE;
362 	default:
363 		WARN_ON(1);
364 		/* fallthrough */
365 	case OSD_ERR_PRI_EIO:
366 		return PNFS_OSD_ERR_EIO;
367 	}
368 }
369 
__on_dev_error(struct ore_io_state * ios,struct ore_dev * od,unsigned dev_index,enum osd_err_priority oep,u64 dev_offset,u64 dev_len)370 static void __on_dev_error(struct ore_io_state *ios,
371 	struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
372 	u64 dev_offset, u64  dev_len)
373 {
374 	struct objio_state *objios = ios->private;
375 	struct pnfs_osd_objid pooid;
376 	struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
377 	/* FIXME: what to do with more-then-one-group layouts. We need to
378 	 * translate from ore_io_state index to oc->comps index
379 	 */
380 	unsigned comp = dev_index;
381 
382 	pooid.oid_device_id = ode->id_node.deviceid;
383 	pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
384 	pooid.oid_object_id = ios->oc->comps[comp].obj.id;
385 
386 	objlayout_io_set_result(&objios->oir, comp,
387 				&pooid, osd_pri_2_pnfs_err(oep),
388 				dev_offset, dev_len, !ios->reading);
389 }
390 
391 /*
392  * read
393  */
_read_done(struct ore_io_state * ios,void * private)394 static void _read_done(struct ore_io_state *ios, void *private)
395 {
396 	struct objio_state *objios = private;
397 	ssize_t status;
398 	int ret = ore_check_io(ios, &__on_dev_error);
399 
400 	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
401 
402 	if (likely(!ret))
403 		status = ios->length;
404 	else
405 		status = ret;
406 
407 	objlayout_read_done(&objios->oir, status, objios->sync);
408 }
409 
objio_read_pagelist(struct nfs_pgio_header * hdr)410 int objio_read_pagelist(struct nfs_pgio_header *hdr)
411 {
412 	struct objio_state *objios;
413 	int ret;
414 
415 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
416 			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
417 			hdr->args.offset, hdr->args.count, hdr,
418 			GFP_KERNEL, &objios);
419 	if (unlikely(ret))
420 		return ret;
421 
422 	objios->ios->done = _read_done;
423 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
424 		hdr->args.offset, hdr->args.count);
425 	ret = ore_read(objios->ios);
426 	if (unlikely(ret))
427 		objio_free_result(&objios->oir);
428 	return ret;
429 }
430 
431 /*
432  * write
433  */
_write_done(struct ore_io_state * ios,void * private)434 static void _write_done(struct ore_io_state *ios, void *private)
435 {
436 	struct objio_state *objios = private;
437 	ssize_t status;
438 	int ret = ore_check_io(ios, &__on_dev_error);
439 
440 	/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
441 
442 	if (likely(!ret)) {
443 		/* FIXME: should be based on the OSD's persistence model
444 		 * See OSD2r05 Section 4.13 Data persistence model */
445 		objios->oir.committed = NFS_FILE_SYNC;
446 		status = ios->length;
447 	} else {
448 		status = ret;
449 	}
450 
451 	objlayout_write_done(&objios->oir, status, objios->sync);
452 }
453 
__r4w_get_page(void * priv,u64 offset,bool * uptodate)454 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
455 {
456 	struct objio_state *objios = priv;
457 	struct nfs_pgio_header *hdr = objios->oir.rpcdata;
458 	struct address_space *mapping = hdr->inode->i_mapping;
459 	pgoff_t index = offset / PAGE_SIZE;
460 	struct page *page;
461 	loff_t i_size = i_size_read(hdr->inode);
462 
463 	if (offset >= i_size) {
464 		*uptodate = true;
465 		dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
466 		return ZERO_PAGE(0);
467 	}
468 
469 	page = find_get_page(mapping, index);
470 	if (!page) {
471 		page = find_or_create_page(mapping, index, GFP_NOFS);
472 		if (unlikely(!page)) {
473 			dprintk("%s: grab_cache_page Failed index=0x%lx\n",
474 				__func__, index);
475 			return NULL;
476 		}
477 		unlock_page(page);
478 	}
479 	*uptodate = PageUptodate(page);
480 	dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
481 	return page;
482 }
483 
__r4w_put_page(void * priv,struct page * page)484 static void __r4w_put_page(void *priv, struct page *page)
485 {
486 	dprintk("%s: index=0x%lx\n", __func__,
487 		(page == ZERO_PAGE(0)) ? -1UL : page->index);
488 	if (ZERO_PAGE(0) != page)
489 		put_page(page);
490 	return;
491 }
492 
493 static const struct _ore_r4w_op _r4w_op = {
494 	.get_page = &__r4w_get_page,
495 	.put_page = &__r4w_put_page,
496 };
497 
objio_write_pagelist(struct nfs_pgio_header * hdr,int how)498 int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
499 {
500 	struct objio_state *objios;
501 	int ret;
502 
503 	ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
504 			hdr->lseg, hdr->args.pages, hdr->args.pgbase,
505 			hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
506 			&objios);
507 	if (unlikely(ret))
508 		return ret;
509 
510 	objios->sync = 0 != (how & FLUSH_SYNC);
511 	objios->ios->r4w = &_r4w_op;
512 
513 	if (!objios->sync)
514 		objios->ios->done = _write_done;
515 
516 	dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
517 		hdr->args.offset, hdr->args.count);
518 	ret = ore_write(objios->ios);
519 	if (unlikely(ret)) {
520 		objio_free_result(&objios->oir);
521 		return ret;
522 	}
523 
524 	if (objios->sync)
525 		_write_done(objios->ios, objios);
526 
527 	return 0;
528 }
529 
530 /*
531  * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
532  * of bytes (maximum @req->wb_bytes) that can be coalesced.
533  */
objio_pg_test(struct nfs_pageio_descriptor * pgio,struct nfs_page * prev,struct nfs_page * req)534 static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
535 			  struct nfs_page *prev, struct nfs_page *req)
536 {
537 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
538 	unsigned int size;
539 
540 	size = pnfs_generic_pg_test(pgio, prev, req);
541 
542 	if (!size || mirror->pg_count + req->wb_bytes >
543 	    (unsigned long)pgio->pg_layout_private)
544 		return 0;
545 
546 	return min(size, req->wb_bytes);
547 }
548 
objio_init_read(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)549 static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
550 {
551 	pnfs_generic_pg_init_read(pgio, req);
552 	if (unlikely(pgio->pg_lseg == NULL))
553 		return; /* Not pNFS */
554 
555 	pgio->pg_layout_private = (void *)
556 				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
557 }
558 
aligned_on_raid_stripe(u64 offset,struct ore_layout * layout,unsigned long * stripe_end)559 static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
560 				   unsigned long *stripe_end)
561 {
562 	u32 stripe_off;
563 	unsigned stripe_size;
564 
565 	if (layout->raid_algorithm == PNFS_OSD_RAID_0)
566 		return true;
567 
568 	stripe_size = layout->stripe_unit *
569 				(layout->group_width - layout->parity);
570 
571 	div_u64_rem(offset, stripe_size, &stripe_off);
572 	if (!stripe_off)
573 		return true;
574 
575 	*stripe_end = stripe_size - stripe_off;
576 	return false;
577 }
578 
objio_init_write(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)579 static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
580 {
581 	unsigned long stripe_end = 0;
582 	u64 wb_size;
583 
584 	if (pgio->pg_dreq == NULL)
585 		wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
586 	else
587 		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
588 
589 	pnfs_generic_pg_init_write(pgio, req, wb_size);
590 	if (unlikely(pgio->pg_lseg == NULL))
591 		return; /* Not pNFS */
592 
593 	if (req->wb_offset ||
594 	    !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
595 			       &OBJIO_LSEG(pgio->pg_lseg)->layout,
596 			       &stripe_end)) {
597 		pgio->pg_layout_private = (void *)stripe_end;
598 	} else {
599 		pgio->pg_layout_private = (void *)
600 				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
601 	}
602 }
603 
604 static const struct nfs_pageio_ops objio_pg_read_ops = {
605 	.pg_init = objio_init_read,
606 	.pg_test = objio_pg_test,
607 	.pg_doio = pnfs_generic_pg_readpages,
608 	.pg_cleanup = pnfs_generic_pg_cleanup,
609 };
610 
611 static const struct nfs_pageio_ops objio_pg_write_ops = {
612 	.pg_init = objio_init_write,
613 	.pg_test = objio_pg_test,
614 	.pg_doio = pnfs_generic_pg_writepages,
615 	.pg_cleanup = pnfs_generic_pg_cleanup,
616 };
617 
618 static struct pnfs_layoutdriver_type objlayout_type = {
619 	.id = LAYOUT_OSD2_OBJECTS,
620 	.name = "LAYOUT_OSD2_OBJECTS",
621 	.flags                   = PNFS_LAYOUTRET_ON_SETATTR |
622 				   PNFS_LAYOUTRET_ON_ERROR,
623 
624 	.max_deviceinfo_size	 = PAGE_SIZE,
625 	.owner		       	 = THIS_MODULE,
626 	.alloc_layout_hdr        = objlayout_alloc_layout_hdr,
627 	.free_layout_hdr         = objlayout_free_layout_hdr,
628 
629 	.alloc_lseg              = objlayout_alloc_lseg,
630 	.free_lseg               = objlayout_free_lseg,
631 
632 	.read_pagelist           = objlayout_read_pagelist,
633 	.write_pagelist          = objlayout_write_pagelist,
634 	.pg_read_ops             = &objio_pg_read_ops,
635 	.pg_write_ops            = &objio_pg_write_ops,
636 
637 	.sync			 = pnfs_generic_sync,
638 
639 	.free_deviceid_node	 = objio_free_deviceid_node,
640 
641 	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
642 	.encode_layoutreturn     = objlayout_encode_layoutreturn,
643 };
644 
645 MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
646 MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
647 MODULE_LICENSE("GPL");
648 
649 static int __init
objlayout_init(void)650 objlayout_init(void)
651 {
652 	int ret = pnfs_register_layoutdriver(&objlayout_type);
653 
654 	if (ret)
655 		printk(KERN_INFO
656 			"NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
657 			__func__, ret);
658 	else
659 		printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
660 			__func__);
661 	return ret;
662 }
663 
664 static void __exit
objlayout_exit(void)665 objlayout_exit(void)
666 {
667 	pnfs_unregister_layoutdriver(&objlayout_type);
668 	printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
669 	       __func__);
670 }
671 
672 MODULE_ALIAS("nfs-layouttype4-2");
673 
674 module_init(objlayout_init);
675 module_exit(objlayout_exit);
676