• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  pNFS Objects layout driver high level definitions
3  *
4  *  Copyright (C) 2007 Panasas Inc. [year of first publication]
5  *  All rights reserved.
6  *
7  *  Benny Halevy <bhalevy@panasas.com>
8  *  Boaz Harrosh <ooo@electrozaur.com>
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License version 2
12  *  See the file COPYING included with this distribution for more details.
13  *
14  *  Redistribution and use in source and binary forms, with or without
15  *  modification, are permitted provided that the following conditions
16  *  are met:
17  *
18  *  1. Redistributions of source code must retain the above copyright
19  *     notice, this list of conditions and the following disclaimer.
20  *  2. Redistributions in binary form must reproduce the above copyright
21  *     notice, this list of conditions and the following disclaimer in the
22  *     documentation and/or other materials provided with the distribution.
23  *  3. Neither the name of the Panasas company nor the names of its
24  *     contributors may be used to endorse or promote products derived
25  *     from this software without specific prior written permission.
26  *
27  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #include <linux/kmod.h>
41 #include <linux/moduleparam.h>
42 #include <linux/ratelimit.h>
43 #include <scsi/osd_initiator.h>
44 #include "objlayout.h"
45 
46 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
47 /*
48  * Create a objlayout layout structure for the given inode and return it.
49  */
50 struct pnfs_layout_hdr *
objlayout_alloc_layout_hdr(struct inode * inode,gfp_t gfp_flags)51 objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
52 {
53 	struct objlayout *objlay;
54 
55 	objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
56 	if (!objlay)
57 		return NULL;
58 	spin_lock_init(&objlay->lock);
59 	INIT_LIST_HEAD(&objlay->err_list);
60 	dprintk("%s: Return %p\n", __func__, objlay);
61 	return &objlay->pnfs_layout;
62 }
63 
64 /*
65  * Free an objlayout layout structure
66  */
67 void
objlayout_free_layout_hdr(struct pnfs_layout_hdr * lo)68 objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
69 {
70 	struct objlayout *objlay = OBJLAYOUT(lo);
71 
72 	dprintk("%s: objlay %p\n", __func__, objlay);
73 
74 	WARN_ON(!list_empty(&objlay->err_list));
75 	kfree(objlay);
76 }
77 
78 /*
79  * Unmarshall layout and store it in pnfslay.
80  */
81 struct pnfs_layout_segment *
objlayout_alloc_lseg(struct pnfs_layout_hdr * pnfslay,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)82 objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
83 		     struct nfs4_layoutget_res *lgr,
84 		     gfp_t gfp_flags)
85 {
86 	int status = -ENOMEM;
87 	struct xdr_stream stream;
88 	struct xdr_buf buf = {
89 		.pages =  lgr->layoutp->pages,
90 		.page_len =  lgr->layoutp->len,
91 		.buflen =  lgr->layoutp->len,
92 		.len = lgr->layoutp->len,
93 	};
94 	struct page *scratch;
95 	struct pnfs_layout_segment *lseg;
96 
97 	dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
98 
99 	scratch = alloc_page(gfp_flags);
100 	if (!scratch)
101 		goto err_nofree;
102 
103 	xdr_init_decode(&stream, &buf, NULL);
104 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
105 
106 	status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
107 	if (unlikely(status)) {
108 		dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
109 			status);
110 		goto err;
111 	}
112 
113 	__free_page(scratch);
114 
115 	dprintk("%s: Return %p\n", __func__, lseg);
116 	return lseg;
117 
118 err:
119 	__free_page(scratch);
120 err_nofree:
121 	dprintk("%s: Err Return=>%d\n", __func__, status);
122 	return ERR_PTR(status);
123 }
124 
125 /*
126  * Free a layout segement
127  */
128 void
objlayout_free_lseg(struct pnfs_layout_segment * lseg)129 objlayout_free_lseg(struct pnfs_layout_segment *lseg)
130 {
131 	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
132 
133 	if (unlikely(!lseg))
134 		return;
135 
136 	objio_free_lseg(lseg);
137 }
138 
139 /*
140  * I/O Operations
141  */
142 static inline u64
end_offset(u64 start,u64 len)143 end_offset(u64 start, u64 len)
144 {
145 	u64 end;
146 
147 	end = start + len;
148 	return end >= start ? end : NFS4_MAX_UINT64;
149 }
150 
_fix_verify_io_params(struct pnfs_layout_segment * lseg,struct page *** p_pages,unsigned * p_pgbase,u64 offset,unsigned long count)151 static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
152 			   struct page ***p_pages, unsigned *p_pgbase,
153 			   u64 offset, unsigned long count)
154 {
155 	u64 lseg_end_offset;
156 
157 	BUG_ON(offset < lseg->pls_range.offset);
158 	lseg_end_offset = end_offset(lseg->pls_range.offset,
159 				     lseg->pls_range.length);
160 	BUG_ON(offset >= lseg_end_offset);
161 	WARN_ON(offset + count > lseg_end_offset);
162 
163 	if (*p_pgbase > PAGE_SIZE) {
164 		dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
165 		*p_pages += *p_pgbase >> PAGE_SHIFT;
166 		*p_pgbase &= ~PAGE_MASK;
167 	}
168 }
169 
170 /*
171  * I/O done common code
172  */
173 static void
objlayout_iodone(struct objlayout_io_res * oir)174 objlayout_iodone(struct objlayout_io_res *oir)
175 {
176 	if (likely(oir->status >= 0)) {
177 		objio_free_result(oir);
178 	} else {
179 		struct objlayout *objlay = oir->objlay;
180 
181 		spin_lock(&objlay->lock);
182 		objlay->delta_space_valid = OBJ_DSU_INVALID;
183 		list_add(&objlay->err_list, &oir->err_list);
184 		spin_unlock(&objlay->lock);
185 	}
186 }
187 
188 /*
189  * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
190  *
191  * The @index component IO failed (error returned from target). Register
192  * the error for later reporting at layout-return.
193  */
194 void
objlayout_io_set_result(struct objlayout_io_res * oir,unsigned index,struct pnfs_osd_objid * pooid,int osd_error,u64 offset,u64 length,bool is_write)195 objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
196 			struct pnfs_osd_objid *pooid, int osd_error,
197 			u64 offset, u64 length, bool is_write)
198 {
199 	struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
200 
201 	BUG_ON(index >= oir->num_comps);
202 	if (osd_error) {
203 		ioerr->oer_component = *pooid;
204 		ioerr->oer_comp_offset = offset;
205 		ioerr->oer_comp_length = length;
206 		ioerr->oer_iswrite = is_write;
207 		ioerr->oer_errno = osd_error;
208 
209 		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
210 			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
211 			__func__, index, ioerr->oer_errno,
212 			ioerr->oer_iswrite,
213 			_DEVID_LO(&ioerr->oer_component.oid_device_id),
214 			_DEVID_HI(&ioerr->oer_component.oid_device_id),
215 			ioerr->oer_component.oid_partition_id,
216 			ioerr->oer_component.oid_object_id,
217 			ioerr->oer_comp_offset,
218 			ioerr->oer_comp_length);
219 	} else {
220 		/* User need not call if no error is reported */
221 		ioerr->oer_errno = 0;
222 	}
223 }
224 
225 /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
226  * This is because the osd completion is called with ints-off from
227  * the block layer
228  */
_rpc_read_complete(struct work_struct * work)229 static void _rpc_read_complete(struct work_struct *work)
230 {
231 	struct rpc_task *task;
232 	struct nfs_pgio_header *hdr;
233 
234 	dprintk("%s enter\n", __func__);
235 	task = container_of(work, struct rpc_task, u.tk_work);
236 	hdr = container_of(task, struct nfs_pgio_header, task);
237 
238 	pnfs_ld_read_done(hdr);
239 }
240 
241 void
objlayout_read_done(struct objlayout_io_res * oir,ssize_t status,bool sync)242 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243 {
244 	struct nfs_pgio_header *hdr = oir->rpcdata;
245 
246 	oir->status = hdr->task.tk_status = status;
247 	if (status >= 0)
248 		hdr->res.count = status;
249 	else
250 		hdr->pnfs_error = status;
251 	objlayout_iodone(oir);
252 	/* must not use oir after this point */
253 
254 	dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
255 		status, hdr->res.eof, sync);
256 
257 	if (sync)
258 		pnfs_ld_read_done(hdr);
259 	else {
260 		INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
261 		schedule_work(&hdr->task.u.tk_work);
262 	}
263 }
264 
265 /*
266  * Perform sync or async reads.
267  */
268 enum pnfs_try_status
objlayout_read_pagelist(struct nfs_pgio_header * hdr)269 objlayout_read_pagelist(struct nfs_pgio_header *hdr)
270 {
271 	struct inode *inode = hdr->inode;
272 	loff_t offset = hdr->args.offset;
273 	size_t count = hdr->args.count;
274 	int err;
275 	loff_t eof;
276 
277 	eof = i_size_read(inode);
278 	if (unlikely(offset + count > eof)) {
279 		if (offset >= eof) {
280 			err = 0;
281 			hdr->res.count = 0;
282 			hdr->res.eof = 1;
283 			/*FIXME: do we need to call pnfs_ld_read_done() */
284 			goto out;
285 		}
286 		count = eof - offset;
287 	}
288 
289 	hdr->res.eof = (offset + count) >= eof;
290 	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
291 			      &hdr->args.pgbase,
292 			      hdr->args.offset, hdr->args.count);
293 
294 	dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
295 		__func__, inode->i_ino, offset, count, hdr->res.eof);
296 
297 	err = objio_read_pagelist(hdr);
298  out:
299 	if (unlikely(err)) {
300 		hdr->pnfs_error = err;
301 		dprintk("%s: Returned Error %d\n", __func__, err);
302 		return PNFS_NOT_ATTEMPTED;
303 	}
304 	return PNFS_ATTEMPTED;
305 }
306 
307 /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
308  * This is because the osd completion is called with ints-off from
309  * the block layer
310  */
_rpc_write_complete(struct work_struct * work)311 static void _rpc_write_complete(struct work_struct *work)
312 {
313 	struct rpc_task *task;
314 	struct nfs_pgio_header *hdr;
315 
316 	dprintk("%s enter\n", __func__);
317 	task = container_of(work, struct rpc_task, u.tk_work);
318 	hdr = container_of(task, struct nfs_pgio_header, task);
319 
320 	pnfs_ld_write_done(hdr);
321 }
322 
323 void
objlayout_write_done(struct objlayout_io_res * oir,ssize_t status,bool sync)324 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
325 {
326 	struct nfs_pgio_header *hdr = oir->rpcdata;
327 
328 	oir->status = hdr->task.tk_status = status;
329 	if (status >= 0) {
330 		hdr->res.count = status;
331 		hdr->verf.committed = oir->committed;
332 	} else {
333 		hdr->pnfs_error = status;
334 	}
335 	objlayout_iodone(oir);
336 	/* must not use oir after this point */
337 
338 	dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
339 		status, hdr->verf.committed, sync);
340 
341 	if (sync)
342 		pnfs_ld_write_done(hdr);
343 	else {
344 		INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
345 		schedule_work(&hdr->task.u.tk_work);
346 	}
347 }
348 
349 /*
350  * Perform sync or async writes.
351  */
352 enum pnfs_try_status
objlayout_write_pagelist(struct nfs_pgio_header * hdr,int how)353 objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
354 {
355 	int err;
356 
357 	_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
358 			      &hdr->args.pgbase,
359 			      hdr->args.offset, hdr->args.count);
360 
361 	err = objio_write_pagelist(hdr, how);
362 	if (unlikely(err)) {
363 		hdr->pnfs_error = err;
364 		dprintk("%s: Returned Error %d\n", __func__, err);
365 		return PNFS_NOT_ATTEMPTED;
366 	}
367 	return PNFS_ATTEMPTED;
368 }
369 
370 void
objlayout_encode_layoutcommit(struct pnfs_layout_hdr * pnfslay,struct xdr_stream * xdr,const struct nfs4_layoutcommit_args * args)371 objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
372 			      struct xdr_stream *xdr,
373 			      const struct nfs4_layoutcommit_args *args)
374 {
375 	struct objlayout *objlay = OBJLAYOUT(pnfslay);
376 	struct pnfs_osd_layoutupdate lou;
377 	__be32 *start;
378 
379 	dprintk("%s: Begin\n", __func__);
380 
381 	spin_lock(&objlay->lock);
382 	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
383 	lou.dsu_delta = objlay->delta_space_used;
384 	objlay->delta_space_used = 0;
385 	objlay->delta_space_valid = OBJ_DSU_INIT;
386 	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
387 	spin_unlock(&objlay->lock);
388 
389 	start = xdr_reserve_space(xdr, 4);
390 
391 	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
392 
393 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
394 
395 	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
396 		lou.dsu_delta, lou.olu_ioerr_flag);
397 }
398 
399 static int
err_prio(u32 oer_errno)400 err_prio(u32 oer_errno)
401 {
402 	switch (oer_errno) {
403 	case 0:
404 		return 0;
405 
406 	case PNFS_OSD_ERR_RESOURCE:
407 		return OSD_ERR_PRI_RESOURCE;
408 	case PNFS_OSD_ERR_BAD_CRED:
409 		return OSD_ERR_PRI_BAD_CRED;
410 	case PNFS_OSD_ERR_NO_ACCESS:
411 		return OSD_ERR_PRI_NO_ACCESS;
412 	case PNFS_OSD_ERR_UNREACHABLE:
413 		return OSD_ERR_PRI_UNREACHABLE;
414 	case PNFS_OSD_ERR_NOT_FOUND:
415 		return OSD_ERR_PRI_NOT_FOUND;
416 	case PNFS_OSD_ERR_NO_SPACE:
417 		return OSD_ERR_PRI_NO_SPACE;
418 	default:
419 		WARN_ON(1);
420 		/* fallthrough */
421 	case PNFS_OSD_ERR_EIO:
422 		return OSD_ERR_PRI_EIO;
423 	}
424 }
425 
426 static void
merge_ioerr(struct pnfs_osd_ioerr * dest_err,const struct pnfs_osd_ioerr * src_err)427 merge_ioerr(struct pnfs_osd_ioerr *dest_err,
428 	    const struct pnfs_osd_ioerr *src_err)
429 {
430 	u64 dest_end, src_end;
431 
432 	if (!dest_err->oer_errno) {
433 		*dest_err = *src_err;
434 		/* accumulated device must be blank */
435 		memset(&dest_err->oer_component.oid_device_id, 0,
436 			sizeof(dest_err->oer_component.oid_device_id));
437 
438 		return;
439 	}
440 
441 	if (dest_err->oer_component.oid_partition_id !=
442 				src_err->oer_component.oid_partition_id)
443 		dest_err->oer_component.oid_partition_id = 0;
444 
445 	if (dest_err->oer_component.oid_object_id !=
446 				src_err->oer_component.oid_object_id)
447 		dest_err->oer_component.oid_object_id = 0;
448 
449 	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
450 		dest_err->oer_comp_offset = src_err->oer_comp_offset;
451 
452 	dest_end = end_offset(dest_err->oer_comp_offset,
453 			      dest_err->oer_comp_length);
454 	src_end =  end_offset(src_err->oer_comp_offset,
455 			      src_err->oer_comp_length);
456 	if (dest_end < src_end)
457 		dest_end = src_end;
458 
459 	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
460 
461 	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
462 	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
463 			dest_err->oer_errno = src_err->oer_errno;
464 	} else if (src_err->oer_iswrite) {
465 		dest_err->oer_iswrite = true;
466 		dest_err->oer_errno = src_err->oer_errno;
467 	}
468 }
469 
470 static void
encode_accumulated_error(struct objlayout * objlay,__be32 * p)471 encode_accumulated_error(struct objlayout *objlay, __be32 *p)
472 {
473 	struct objlayout_io_res *oir, *tmp;
474 	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
475 
476 	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
477 		unsigned i;
478 
479 		for (i = 0; i < oir->num_comps; i++) {
480 			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
481 
482 			if (!ioerr->oer_errno)
483 				continue;
484 
485 			printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
486 				"is_write=%d dev(%llx:%llx) par=0x%llx "
487 				"obj=0x%llx offset=0x%llx length=0x%llx\n",
488 				__func__, i, ioerr->oer_errno,
489 				ioerr->oer_iswrite,
490 				_DEVID_LO(&ioerr->oer_component.oid_device_id),
491 				_DEVID_HI(&ioerr->oer_component.oid_device_id),
492 				ioerr->oer_component.oid_partition_id,
493 				ioerr->oer_component.oid_object_id,
494 				ioerr->oer_comp_offset,
495 				ioerr->oer_comp_length);
496 
497 			merge_ioerr(&accumulated_err, ioerr);
498 		}
499 		list_del(&oir->err_list);
500 		objio_free_result(oir);
501 	}
502 
503 	pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
504 }
505 
506 void
objlayout_encode_layoutreturn(struct pnfs_layout_hdr * pnfslay,struct xdr_stream * xdr,const struct nfs4_layoutreturn_args * args)507 objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
508 			      struct xdr_stream *xdr,
509 			      const struct nfs4_layoutreturn_args *args)
510 {
511 	struct objlayout *objlay = OBJLAYOUT(pnfslay);
512 	struct objlayout_io_res *oir, *tmp;
513 	__be32 *start;
514 
515 	dprintk("%s: Begin\n", __func__);
516 	start = xdr_reserve_space(xdr, 4);
517 	BUG_ON(!start);
518 
519 	spin_lock(&objlay->lock);
520 
521 	list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
522 		__be32 *last_xdr = NULL, *p;
523 		unsigned i;
524 		int res = 0;
525 
526 		for (i = 0; i < oir->num_comps; i++) {
527 			struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
528 
529 			if (!ioerr->oer_errno)
530 				continue;
531 
532 			dprintk("%s: err[%d]: errno=%d is_write=%d "
533 				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
534 				"offset=0x%llx length=0x%llx\n",
535 				__func__, i, ioerr->oer_errno,
536 				ioerr->oer_iswrite,
537 				_DEVID_LO(&ioerr->oer_component.oid_device_id),
538 				_DEVID_HI(&ioerr->oer_component.oid_device_id),
539 				ioerr->oer_component.oid_partition_id,
540 				ioerr->oer_component.oid_object_id,
541 				ioerr->oer_comp_offset,
542 				ioerr->oer_comp_length);
543 
544 			p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
545 			if (unlikely(!p)) {
546 				res = -E2BIG;
547 				break; /* accumulated_error */
548 			}
549 
550 			last_xdr = p;
551 			pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
552 		}
553 
554 		/* TODO: use xdr_write_pages */
555 		if (unlikely(res)) {
556 			/* no space for even one error descriptor */
557 			BUG_ON(!last_xdr);
558 
559 			/* we've encountered a situation with lots and lots of
560 			 * errors and no space to encode them all. Use the last
561 			 * available slot to report the union of all the
562 			 * remaining errors.
563 			 */
564 			encode_accumulated_error(objlay, last_xdr);
565 			goto loop_done;
566 		}
567 		list_del(&oir->err_list);
568 		objio_free_result(oir);
569 	}
570 loop_done:
571 	spin_unlock(&objlay->lock);
572 
573 	*start = cpu_to_be32((xdr->p - start - 1) * 4);
574 	dprintk("%s: Return\n", __func__);
575 }
576 
577 enum {
578 	OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
579 	OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
580 	OSD_LOGIN_UPCALL_PATHLEN  = 256
581 };
582 
583 static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
584 
585 module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
586 		    0600);
587 MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
588 
589 struct __auto_login {
590 	char uri[OBJLAYOUT_MAX_URI_LEN];
591 	char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
592 	char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
593 };
594 
__objlayout_upcall(struct __auto_login * login)595 static int __objlayout_upcall(struct __auto_login *login)
596 {
597 	static char *envp[] = { "HOME=/",
598 		"TERM=linux",
599 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
600 		NULL
601 	};
602 	char *argv[8];
603 	int ret;
604 
605 	if (unlikely(!osd_login_prog[0])) {
606 		dprintk("%s: osd_login_prog is disabled\n", __func__);
607 		return -EACCES;
608 	}
609 
610 	dprintk("%s uri: %s\n", __func__, login->uri);
611 	dprintk("%s osdname %s\n", __func__, login->osdname);
612 	dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
613 
614 	argv[0] = (char *)osd_login_prog;
615 	argv[1] = "-u";
616 	argv[2] = login->uri;
617 	argv[3] = "-o";
618 	argv[4] = login->osdname;
619 	argv[5] = "-s";
620 	argv[6] = login->systemid_hex;
621 	argv[7] = NULL;
622 
623 	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
624 	/*
625 	 * Disable the upcall mechanism if we're getting an ENOENT or
626 	 * EACCES error. The admin can re-enable it on the fly by using
627 	 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
628 	 * the problem has been fixed.
629 	 */
630 	if (ret == -ENOENT || ret == -EACCES) {
631 		printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
632 			"objlayoutdriver.osd_login_prog kernel parameter!\n",
633 			osd_login_prog);
634 		osd_login_prog[0] = '\0';
635 	}
636 	dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
637 
638 	return ret;
639 }
640 
641 /* Assume dest is all zeros */
__copy_nfsS_and_zero_terminate(struct nfs4_string s,char * dest,int max_len,const char * var_name)642 static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
643 					   char *dest, int max_len,
644 					   const char *var_name)
645 {
646 	if (!s.len)
647 		return;
648 
649 	if (s.len >= max_len) {
650 		pr_warn_ratelimited(
651 			"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
652 			var_name, s.len, max_len);
653 		s.len = max_len - 1; /* space for null terminator */
654 	}
655 
656 	memcpy(dest, s.data, s.len);
657 }
658 
659 /* Assume sysid is all zeros */
_sysid_2_hex(struct nfs4_string s,char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])660 static void _sysid_2_hex(struct nfs4_string s,
661 		  char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
662 {
663 	int i;
664 	char *cur;
665 
666 	if (!s.len)
667 		return;
668 
669 	if (s.len != OSD_SYSTEMID_LEN) {
670 		pr_warn_ratelimited(
671 		    "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
672 		    s.len);
673 		if (s.len > OSD_SYSTEMID_LEN)
674 			s.len = OSD_SYSTEMID_LEN;
675 	}
676 
677 	cur = sysid;
678 	for (i = 0; i < s.len; i++)
679 		cur = hex_byte_pack(cur, s.data[i]);
680 }
681 
objlayout_autologin(struct pnfs_osd_deviceaddr * deviceaddr)682 int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
683 {
684 	int rc;
685 	struct __auto_login login;
686 
687 	if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
688 		return -ENODEV;
689 
690 	memset(&login, 0, sizeof(login));
691 	__copy_nfsS_and_zero_terminate(
692 		deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
693 		login.uri, sizeof(login.uri), "URI");
694 
695 	__copy_nfsS_and_zero_terminate(
696 		deviceaddr->oda_osdname,
697 		login.osdname, sizeof(login.osdname), "OSDNAME");
698 
699 	_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
700 
701 	rc = __objlayout_upcall(&login);
702 	if (rc > 0) /* script returns positive values */
703 		rc = -ENODEV;
704 
705 	return rc;
706 }
707