1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2012, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/obdclass/obd_mount.c
37 *
38 * Client mount routines
39 *
40 * Author: Nathan Rutman <nathan@clusterfs.com>
41 */
42
43 #define DEBUG_SUBSYSTEM S_CLASS
44 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
45 #define PRINT_CMD CDEBUG
46
47 #include "../include/obd.h"
48 #include "../include/linux/lustre_compat25.h"
49 #include "../include/obd_class.h"
50 #include "../include/lustre/lustre_user.h"
51 #include "../include/lustre_log.h"
52 #include "../include/lustre_disk.h"
53 #include "../include/lustre_param.h"
54
55 static int (*client_fill_super)(struct super_block *sb,
56 struct vfsmount *mnt);
57
58 static void (*kill_super_cb)(struct super_block *sb);
59
60 /**************** config llog ********************/
61
62 /** Get a config log from the MGS and process it.
63 * This func is called for both clients and servers.
64 * Continue to process new statements appended to the logs
65 * (whenever the config lock is revoked) until lustre_end_log
66 * is called.
67 * @param sb The superblock is used by the MGC to write to the local copy of
68 * the config log
69 * @param logname The name of the llog to replicate from the MGS
70 * @param cfg Since the same mgc may be used to follow multiple config logs
71 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
72 * this log, and is added to the mgc's list of logs to follow.
73 */
lustre_process_log(struct super_block * sb,char * logname,struct config_llog_instance * cfg)74 int lustre_process_log(struct super_block *sb, char *logname,
75 struct config_llog_instance *cfg)
76 {
77 struct lustre_cfg *lcfg;
78 struct lustre_cfg_bufs *bufs;
79 struct lustre_sb_info *lsi = s2lsi(sb);
80 struct obd_device *mgc = lsi->lsi_mgc;
81 int rc;
82
83 LASSERT(mgc);
84 LASSERT(cfg);
85
86 bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
87 if (!bufs)
88 return -ENOMEM;
89
90 /* mgc_process_config */
91 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
92 lustre_cfg_bufs_set_string(bufs, 1, logname);
93 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
94 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
95 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
96 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
97 lustre_cfg_free(lcfg);
98
99 kfree(bufs);
100
101 if (rc == -EINVAL)
102 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
103 mgc->obd_name, logname, rc);
104
105 if (rc)
106 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
107 mgc->obd_name, logname,
108 rc);
109
110 /* class_obd_list(); */
111 return rc;
112 }
113 EXPORT_SYMBOL(lustre_process_log);
114
115 /* Stop watching this config log for updates */
lustre_end_log(struct super_block * sb,char * logname,struct config_llog_instance * cfg)116 int lustre_end_log(struct super_block *sb, char *logname,
117 struct config_llog_instance *cfg)
118 {
119 struct lustre_cfg *lcfg;
120 struct lustre_cfg_bufs bufs;
121 struct lustre_sb_info *lsi = s2lsi(sb);
122 struct obd_device *mgc = lsi->lsi_mgc;
123 int rc;
124
125 if (!mgc)
126 return -ENOENT;
127
128 /* mgc_process_config */
129 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
130 lustre_cfg_bufs_set_string(&bufs, 1, logname);
131 if (cfg)
132 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
133 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
134 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
135 lustre_cfg_free(lcfg);
136 return rc;
137 }
138 EXPORT_SYMBOL(lustre_end_log);
139
140 /**************** obd start *******************/
141
142 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
143 * lctl (and do for echo cli/srv.
144 */
do_lcfg(char * cfgname,lnet_nid_t nid,int cmd,char * s1,char * s2,char * s3,char * s4)145 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
146 char *s1, char *s2, char *s3, char *s4)
147 {
148 struct lustre_cfg_bufs bufs;
149 struct lustre_cfg *lcfg = NULL;
150 int rc;
151
152 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
153 cmd, s1, s2, s3, s4);
154
155 lustre_cfg_bufs_reset(&bufs, cfgname);
156 if (s1)
157 lustre_cfg_bufs_set_string(&bufs, 1, s1);
158 if (s2)
159 lustre_cfg_bufs_set_string(&bufs, 2, s2);
160 if (s3)
161 lustre_cfg_bufs_set_string(&bufs, 3, s3);
162 if (s4)
163 lustre_cfg_bufs_set_string(&bufs, 4, s4);
164
165 lcfg = lustre_cfg_new(cmd, &bufs);
166 lcfg->lcfg_nid = nid;
167 rc = class_process_config(lcfg);
168 lustre_cfg_free(lcfg);
169 return rc;
170 }
171
172 /** Call class_attach and class_setup. These methods in turn call
173 * obd type-specific methods.
174 */
lustre_start_simple(char * obdname,char * type,char * uuid,char * s1,char * s2,char * s3,char * s4)175 static int lustre_start_simple(char *obdname, char *type, char *uuid,
176 char *s1, char *s2, char *s3, char *s4)
177 {
178 int rc;
179
180 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
181
182 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
183 if (rc) {
184 CERROR("%s attach error %d\n", obdname, rc);
185 return rc;
186 }
187 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
188 if (rc) {
189 CERROR("%s setup error %d\n", obdname, rc);
190 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
191 }
192 return rc;
193 }
194
195 DEFINE_MUTEX(mgc_start_lock);
196
197 /** Set up a mgc obd to process startup logs
198 *
199 * \param sb [in] super block of the mgc obd
200 *
201 * \retval 0 success, otherwise error code
202 */
lustre_start_mgc(struct super_block * sb)203 int lustre_start_mgc(struct super_block *sb)
204 {
205 struct obd_connect_data *data = NULL;
206 struct lustre_sb_info *lsi = s2lsi(sb);
207 struct obd_device *obd;
208 struct obd_export *exp;
209 struct obd_uuid *uuid;
210 class_uuid_t uuidc;
211 lnet_nid_t nid;
212 char nidstr[LNET_NIDSTR_SIZE];
213 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
214 char *ptr;
215 int rc = 0, i = 0, j;
216
217 LASSERT(lsi->lsi_lmd);
218
219 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220 ptr = lsi->lsi_lmd->lmd_dev;
221 if (class_parse_nid(ptr, &nid, &ptr) == 0)
222 i++;
223 if (i == 0) {
224 CERROR("No valid MGS nids found.\n");
225 return -EINVAL;
226 }
227
228 mutex_lock(&mgc_start_lock);
229
230 libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
231 mgcname = kasprintf(GFP_NOFS,
232 "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
233 niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
234 if (!mgcname || !niduuid) {
235 rc = -ENOMEM;
236 goto out_free;
237 }
238
239 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
240
241 data = kzalloc(sizeof(*data), GFP_NOFS);
242 if (!data) {
243 rc = -ENOMEM;
244 goto out_free;
245 }
246
247 obd = class_name2obd(mgcname);
248 if (obd && !obd->obd_stopping) {
249 int recov_bk;
250
251 rc = obd_set_info_async(NULL, obd->obd_self_export,
252 strlen(KEY_MGSSEC), KEY_MGSSEC,
253 strlen(mgssec), mgssec, NULL);
254 if (rc)
255 goto out_free;
256
257 /* Re-using an existing MGC */
258 atomic_inc(&obd->u.cli.cl_mgc_refcount);
259
260 /* IR compatibility check, only for clients */
261 if (lmd_is_client(lsi->lsi_lmd)) {
262 int has_ir;
263 int vallen = sizeof(*data);
264 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
265
266 rc = obd_get_info(NULL, obd->obd_self_export,
267 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
268 &vallen, data, NULL);
269 LASSERT(rc == 0);
270 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
271 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
272 /* LMD_FLG_NOIR is for test purpose only */
273 LCONSOLE_WARN(
274 "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275 has_ir ? "enabled" : "disabled");
276 if (has_ir)
277 *flags &= ~LMD_FLG_NOIR;
278 else
279 *flags |= LMD_FLG_NOIR;
280 }
281 }
282
283 recov_bk = 0;
284
285 /* Try all connections, but only once (again).
286 We don't want to block another target from starting
287 (using its local copy of the log), but we do want to connect
288 if at all possible. */
289 recov_bk++;
290 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
291 recov_bk);
292 rc = obd_set_info_async(NULL, obd->obd_self_export,
293 sizeof(KEY_INIT_RECOV_BACKUP),
294 KEY_INIT_RECOV_BACKUP,
295 sizeof(recov_bk), &recov_bk, NULL);
296 rc = 0;
297 goto out;
298 }
299
300 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
301
302 /* Add the primary nids for the MGS */
303 i = 0;
304 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
305 ptr = lsi->lsi_lmd->lmd_dev;
306 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
307 rc = do_lcfg(mgcname, nid,
308 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
309 i++;
310 /* Stop at the first failover nid */
311 if (*ptr == ':')
312 break;
313 }
314 if (i == 0) {
315 CERROR("No valid MGS nids found.\n");
316 rc = -EINVAL;
317 goto out_free;
318 }
319 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
320
321 /* Random uuid for MGC allows easier reconnects */
322 uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
323 if (!uuid) {
324 rc = -ENOMEM;
325 goto out_free;
326 }
327
328 ll_generate_random_uuid(uuidc);
329 class_uuid_unparse(uuidc, uuid);
330
331 /* Start the MGC */
332 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
333 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
334 niduuid, NULL, NULL);
335 kfree(uuid);
336 if (rc)
337 goto out_free;
338
339 /* Add any failover MGS nids */
340 i = 1;
341 while (ptr && ((*ptr == ':' ||
342 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
343 /* New failover node */
344 sprintf(niduuid, "%s_%x", mgcname, i);
345 j = 0;
346 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
347 j++;
348 rc = do_lcfg(mgcname, nid,
349 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
350 if (*ptr == ':')
351 break;
352 }
353 if (j > 0) {
354 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
355 niduuid, NULL, NULL, NULL);
356 i++;
357 } else {
358 /* at ":/fsname" */
359 break;
360 }
361 }
362 lsi->lsi_lmd->lmd_mgs_failnodes = i;
363
364 obd = class_name2obd(mgcname);
365 if (!obd) {
366 CERROR("Can't find mgcobd %s\n", mgcname);
367 rc = -ENOTCONN;
368 goto out_free;
369 }
370
371 rc = obd_set_info_async(NULL, obd->obd_self_export,
372 strlen(KEY_MGSSEC), KEY_MGSSEC,
373 strlen(mgssec), mgssec, NULL);
374 if (rc)
375 goto out_free;
376
377 /* Keep a refcount of servers/clients who started with "mount",
378 so we know when we can get rid of the mgc. */
379 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
380
381 /* We connect to the MGS at setup, and don't disconnect until cleanup */
382 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
383 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
384 OBD_CONNECT_LVB_TYPE;
385
386 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
387 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
388 #else
389 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
390 #endif
391
392 if (lmd_is_client(lsi->lsi_lmd) &&
393 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
394 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
395 data->ocd_version = LUSTRE_VERSION_CODE;
396 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
397 if (rc) {
398 CERROR("connect failed %d\n", rc);
399 goto out;
400 }
401
402 obd->u.cli.cl_mgc_mgsexp = exp;
403
404 out:
405 /* Keep the mgc info in the sb. Note that many lsi's can point
406 to the same mgc.*/
407 lsi->lsi_mgc = obd;
408 out_free:
409 mutex_unlock(&mgc_start_lock);
410
411 kfree(data);
412 kfree(mgcname);
413 kfree(niduuid);
414 return rc;
415 }
416
lustre_stop_mgc(struct super_block * sb)417 static int lustre_stop_mgc(struct super_block *sb)
418 {
419 struct lustre_sb_info *lsi = s2lsi(sb);
420 struct obd_device *obd;
421 char *niduuid = NULL, *ptr = NULL;
422 int i, rc = 0, len = 0;
423
424 if (!lsi)
425 return -ENOENT;
426 obd = lsi->lsi_mgc;
427 if (!obd)
428 return -ENOENT;
429 lsi->lsi_mgc = NULL;
430
431 mutex_lock(&mgc_start_lock);
432 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
433 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
434 /* This is not fatal, every client that stops
435 will call in here. */
436 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
437 atomic_read(&obd->u.cli.cl_mgc_refcount));
438 rc = -EBUSY;
439 goto out;
440 }
441
442 /* The MGC has no recoverable data in any case.
443 * force shutdown set in umount_begin */
444 obd->obd_no_recov = 1;
445
446 if (obd->u.cli.cl_mgc_mgsexp) {
447 /* An error is not fatal, if we are unable to send the
448 disconnect mgs ping evictor cleans up the export */
449 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
450 if (rc)
451 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
452 }
453
454 /* Save the obdname for cleaning the nid uuids, which are
455 obdname_XX */
456 len = strlen(obd->obd_name) + 6;
457 niduuid = kzalloc(len, GFP_NOFS);
458 if (niduuid) {
459 strcpy(niduuid, obd->obd_name);
460 ptr = niduuid + strlen(niduuid);
461 }
462
463 rc = class_manual_cleanup(obd);
464 if (rc)
465 goto out;
466
467 /* Clean the nid uuids */
468 if (!niduuid) {
469 rc = -ENOMEM;
470 goto out;
471 }
472
473 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
474 sprintf(ptr, "_%x", i);
475 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
476 niduuid, NULL, NULL, NULL);
477 if (rc)
478 CERROR("del MDC UUID %s failed: rc = %d\n",
479 niduuid, rc);
480 }
481 out:
482 kfree(niduuid);
483
484 /* class_import_put will get rid of the additional connections */
485 mutex_unlock(&mgc_start_lock);
486 return rc;
487 }
488
489 /***************** lustre superblock **************/
490
lustre_init_lsi(struct super_block * sb)491 static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
492 {
493 struct lustre_sb_info *lsi;
494
495 lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
496 if (!lsi)
497 return NULL;
498 lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
499 if (!lsi->lsi_lmd) {
500 kfree(lsi);
501 return NULL;
502 }
503
504 lsi->lsi_lmd->lmd_exclude_count = 0;
505 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
506 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
507 s2lsi_nocast(sb) = lsi;
508 /* we take 1 extra ref for our setup */
509 atomic_set(&lsi->lsi_mounts, 1);
510
511 /* Default umount style */
512 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
513
514 return lsi;
515 }
516
lustre_free_lsi(struct super_block * sb)517 static int lustre_free_lsi(struct super_block *sb)
518 {
519 struct lustre_sb_info *lsi = s2lsi(sb);
520
521 LASSERT(lsi != NULL);
522 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
523
524 /* someone didn't call server_put_mount. */
525 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
526
527 if (lsi->lsi_lmd != NULL) {
528 kfree(lsi->lsi_lmd->lmd_dev);
529 kfree(lsi->lsi_lmd->lmd_profile);
530 kfree(lsi->lsi_lmd->lmd_mgssec);
531 kfree(lsi->lsi_lmd->lmd_opts);
532 if (lsi->lsi_lmd->lmd_exclude_count)
533 kfree(lsi->lsi_lmd->lmd_exclude);
534 kfree(lsi->lsi_lmd->lmd_mgs);
535 kfree(lsi->lsi_lmd->lmd_osd_type);
536 kfree(lsi->lsi_lmd->lmd_params);
537
538 kfree(lsi->lsi_lmd);
539 }
540
541 LASSERT(lsi->lsi_llsbi == NULL);
542 kfree(lsi);
543 s2lsi_nocast(sb) = NULL;
544
545 return 0;
546 }
547
548 /* The lsi has one reference for every server that is using the disk -
549 e.g. MDT, MGS, and potentially MGC */
lustre_put_lsi(struct super_block * sb)550 static int lustre_put_lsi(struct super_block *sb)
551 {
552 struct lustre_sb_info *lsi = s2lsi(sb);
553
554 LASSERT(lsi != NULL);
555
556 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
557 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
558 lustre_free_lsi(sb);
559 return 1;
560 }
561 return 0;
562 }
563
564 /*** SERVER NAME ***
565 * <FSNAME><SEPARATOR><TYPE><INDEX>
566 * FSNAME is between 1 and 8 characters (inclusive).
567 * Excluded characters are '/' and ':'
568 * SEPARATOR is either ':' or '-'
569 * TYPE: "OST", "MDT", etc.
570 * INDEX: Hex representation of the index
571 */
572
573 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
574 * @param [in] svname server name including type and index
575 * @param [out] fsname Buffer to copy filesystem name prefix into.
576 * Must have at least 'strlen(fsname) + 1' chars.
577 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
578 * rc < 0 on error
579 */
server_name2fsname(const char * svname,char * fsname,const char ** endptr)580 static int server_name2fsname(const char *svname, char *fsname,
581 const char **endptr)
582 {
583 const char *dash;
584
585 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
586 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
587 ;
588 if (dash == svname)
589 return -EINVAL;
590
591 if (fsname != NULL) {
592 strncpy(fsname, svname, dash - svname);
593 fsname[dash - svname] = '\0';
594 }
595
596 if (endptr != NULL)
597 *endptr = dash;
598
599 return 0;
600 }
601
602 /* Get the index from the obd name.
603 rc = server type, or
604 rc < 0 on error
605 if endptr isn't NULL it is set to end of name */
server_name2index(const char * svname,__u32 * idx,const char ** endptr)606 static int server_name2index(const char *svname, __u32 *idx,
607 const char **endptr)
608 {
609 unsigned long index;
610 int rc;
611 const char *dash;
612
613 /* We use server_name2fsname() just for parsing */
614 rc = server_name2fsname(svname, NULL, &dash);
615 if (rc != 0)
616 return rc;
617
618 dash++;
619
620 if (strncmp(dash, "MDT", 3) == 0)
621 rc = LDD_F_SV_TYPE_MDT;
622 else if (strncmp(dash, "OST", 3) == 0)
623 rc = LDD_F_SV_TYPE_OST;
624 else
625 return -EINVAL;
626
627 dash += 3;
628
629 if (strncmp(dash, "all", 3) == 0) {
630 if (endptr != NULL)
631 *endptr = dash + 3;
632 return rc | LDD_F_SV_ALL;
633 }
634
635 index = simple_strtoul(dash, (char **)endptr, 16);
636 if (idx != NULL)
637 *idx = index;
638
639 /* Account for -mdc after index that is possible when specifying mdt */
640 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
641 sizeof(LUSTRE_MDC_NAME)-1) == 0)
642 *endptr += sizeof(LUSTRE_MDC_NAME);
643
644 return rc;
645 }
646
647 /*************** mount common between server and client ***************/
648
649 /* Common umount */
lustre_common_put_super(struct super_block * sb)650 int lustre_common_put_super(struct super_block *sb)
651 {
652 int rc;
653
654 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
655
656 /* Drop a ref to the MGC */
657 rc = lustre_stop_mgc(sb);
658 if (rc && (rc != -ENOENT)) {
659 if (rc != -EBUSY) {
660 CERROR("Can't stop MGC: %d\n", rc);
661 return rc;
662 }
663 /* BUSY just means that there's some other obd that
664 needs the mgc. Let him clean it up. */
665 CDEBUG(D_MOUNT, "MGC still in use\n");
666 }
667 /* Drop a ref to the mounted disk */
668 lustre_put_lsi(sb);
669 lu_types_stop();
670 return rc;
671 }
672 EXPORT_SYMBOL(lustre_common_put_super);
673
lmd_print(struct lustre_mount_data * lmd)674 static void lmd_print(struct lustre_mount_data *lmd)
675 {
676 int i;
677
678 PRINT_CMD(D_MOUNT, " mount data:\n");
679 if (lmd_is_client(lmd))
680 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
681 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
682 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
683
684 if (lmd->lmd_opts)
685 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
686
687 if (lmd->lmd_recovery_time_soft)
688 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
689 lmd->lmd_recovery_time_soft);
690
691 if (lmd->lmd_recovery_time_hard)
692 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
693 lmd->lmd_recovery_time_hard);
694
695 for (i = 0; i < lmd->lmd_exclude_count; i++) {
696 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
697 lmd->lmd_exclude[i]);
698 }
699 }
700
701 /* Is this server on the exclusion list */
lustre_check_exclusion(struct super_block * sb,char * svname)702 int lustre_check_exclusion(struct super_block *sb, char *svname)
703 {
704 struct lustre_sb_info *lsi = s2lsi(sb);
705 struct lustre_mount_data *lmd = lsi->lsi_lmd;
706 __u32 index;
707 int i, rc;
708
709 rc = server_name2index(svname, &index, NULL);
710 if (rc != LDD_F_SV_TYPE_OST)
711 /* Only exclude OSTs */
712 return 0;
713
714 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
715 index, lmd->lmd_exclude_count, lmd->lmd_dev);
716
717 for (i = 0; i < lmd->lmd_exclude_count; i++) {
718 if (index == lmd->lmd_exclude[i]) {
719 CWARN("Excluding %s (on exclusion list)\n", svname);
720 return 1;
721 }
722 }
723 return 0;
724 }
725
726 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
lmd_make_exclusion(struct lustre_mount_data * lmd,const char * ptr)727 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
728 {
729 const char *s1 = ptr, *s2;
730 __u32 index, *exclude_list;
731 int rc = 0, devmax;
732
733 /* The shortest an ost name can be is 8 chars: -OST0000.
734 We don't actually know the fsname at this time, so in fact
735 a user could specify any fsname. */
736 devmax = strlen(ptr) / 8 + 1;
737
738 /* temp storage until we figure out how many we have */
739 exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
740 if (!exclude_list)
741 return -ENOMEM;
742
743 /* we enter this fn pointing at the '=' */
744 while (*s1 && *s1 != ' ' && *s1 != ',') {
745 s1++;
746 rc = server_name2index(s1, &index, &s2);
747 if (rc < 0) {
748 CERROR("Can't parse server name '%s': rc = %d\n",
749 s1, rc);
750 break;
751 }
752 if (rc == LDD_F_SV_TYPE_OST)
753 exclude_list[lmd->lmd_exclude_count++] = index;
754 else
755 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
756 (uint)(s2-s1), s1, rc);
757 s1 = s2;
758 /* now we are pointing at ':' (next exclude)
759 or ',' (end of excludes) */
760 if (lmd->lmd_exclude_count >= devmax)
761 break;
762 }
763 if (rc >= 0) /* non-err */
764 rc = 0;
765
766 if (lmd->lmd_exclude_count) {
767 /* permanent, freed in lustre_free_lsi */
768 lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
769 sizeof(index), GFP_NOFS);
770 if (lmd->lmd_exclude) {
771 memcpy(lmd->lmd_exclude, exclude_list,
772 sizeof(index) * lmd->lmd_exclude_count);
773 } else {
774 rc = -ENOMEM;
775 lmd->lmd_exclude_count = 0;
776 }
777 }
778 kfree(exclude_list);
779 return rc;
780 }
781
lmd_parse_mgssec(struct lustre_mount_data * lmd,char * ptr)782 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
783 {
784 char *tail;
785 int length;
786
787 kfree(lmd->lmd_mgssec);
788 lmd->lmd_mgssec = NULL;
789
790 tail = strchr(ptr, ',');
791 if (tail == NULL)
792 length = strlen(ptr);
793 else
794 length = tail - ptr;
795
796 lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
797 if (!lmd->lmd_mgssec)
798 return -ENOMEM;
799
800 memcpy(lmd->lmd_mgssec, ptr, length);
801 lmd->lmd_mgssec[length] = '\0';
802 return 0;
803 }
804
lmd_parse_string(char ** handle,char * ptr)805 static int lmd_parse_string(char **handle, char *ptr)
806 {
807 char *tail;
808 int length;
809
810 if ((handle == NULL) || (ptr == NULL))
811 return -EINVAL;
812
813 kfree(*handle);
814 *handle = NULL;
815
816 tail = strchr(ptr, ',');
817 if (tail == NULL)
818 length = strlen(ptr);
819 else
820 length = tail - ptr;
821
822 *handle = kzalloc(length + 1, GFP_NOFS);
823 if (!*handle)
824 return -ENOMEM;
825
826 memcpy(*handle, ptr, length);
827 (*handle)[length] = '\0';
828
829 return 0;
830 }
831
832 /* Collect multiple values for mgsnid specifiers */
lmd_parse_mgs(struct lustre_mount_data * lmd,char ** ptr)833 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
834 {
835 lnet_nid_t nid;
836 char *tail = *ptr;
837 char *mgsnid;
838 int length;
839 int oldlen = 0;
840
841 /* Find end of nidlist */
842 while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
843 ;
844 length = tail - *ptr;
845 if (length == 0) {
846 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
847 return -EINVAL;
848 }
849
850 if (lmd->lmd_mgs != NULL)
851 oldlen = strlen(lmd->lmd_mgs) + 1;
852
853 mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
854 if (!mgsnid)
855 return -ENOMEM;
856
857 if (lmd->lmd_mgs != NULL) {
858 /* Multiple mgsnid= are taken to mean failover locations */
859 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
860 mgsnid[oldlen - 1] = ':';
861 kfree(lmd->lmd_mgs);
862 }
863 memcpy(mgsnid + oldlen, *ptr, length);
864 mgsnid[oldlen + length] = '\0';
865 lmd->lmd_mgs = mgsnid;
866 *ptr = tail;
867
868 return 0;
869 }
870
871 /** Parse mount line options
872 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
873 * dev is passed as device=uml1:/lustre by mount.lustre
874 */
lmd_parse(char * options,struct lustre_mount_data * lmd)875 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
876 {
877 char *s1, *s2, *devname = NULL;
878 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
879 int rc = 0;
880
881 LASSERT(lmd);
882 if (!options) {
883 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
884 return -EINVAL;
885 }
886
887 /* Options should be a string - try to detect old lmd data */
888 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
889 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
890 LUSTRE_VERSION_STRING);
891 return -EINVAL;
892 }
893 lmd->lmd_magic = LMD_MAGIC;
894
895 lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS);
896 if (!lmd->lmd_params)
897 return -ENOMEM;
898 lmd->lmd_params[0] = '\0';
899
900 /* Set default flags here */
901
902 s1 = options;
903 while (*s1) {
904 int clear = 0;
905 int time_min = OBD_RECOVERY_TIME_MIN;
906
907 /* Skip whitespace and extra commas */
908 while (*s1 == ' ' || *s1 == ',')
909 s1++;
910
911 /* Client options are parsed in ll_options: eg. flock,
912 user_xattr, acl */
913
914 /* Parse non-ldiskfs options here. Rather than modifying
915 ldiskfs, we just zero these out here */
916 if (strncmp(s1, "abort_recov", 11) == 0) {
917 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
918 clear++;
919 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
920 lmd->lmd_recovery_time_soft = max_t(int,
921 simple_strtoul(s1 + 19, NULL, 10), time_min);
922 clear++;
923 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
924 lmd->lmd_recovery_time_hard = max_t(int,
925 simple_strtoul(s1 + 19, NULL, 10), time_min);
926 clear++;
927 } else if (strncmp(s1, "noir", 4) == 0) {
928 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
929 clear++;
930 } else if (strncmp(s1, "nosvc", 5) == 0) {
931 lmd->lmd_flags |= LMD_FLG_NOSVC;
932 clear++;
933 } else if (strncmp(s1, "nomgs", 5) == 0) {
934 lmd->lmd_flags |= LMD_FLG_NOMGS;
935 clear++;
936 } else if (strncmp(s1, "noscrub", 7) == 0) {
937 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
938 clear++;
939 } else if (strncmp(s1, PARAM_MGSNODE,
940 sizeof(PARAM_MGSNODE) - 1) == 0) {
941 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
942 /* Assume the next mount opt is the first
943 invalid nid we get to. */
944 rc = lmd_parse_mgs(lmd, &s2);
945 if (rc)
946 goto invalid;
947 clear++;
948 } else if (strncmp(s1, "writeconf", 9) == 0) {
949 lmd->lmd_flags |= LMD_FLG_WRITECONF;
950 clear++;
951 } else if (strncmp(s1, "update", 6) == 0) {
952 lmd->lmd_flags |= LMD_FLG_UPDATE;
953 clear++;
954 } else if (strncmp(s1, "virgin", 6) == 0) {
955 lmd->lmd_flags |= LMD_FLG_VIRGIN;
956 clear++;
957 } else if (strncmp(s1, "noprimnode", 10) == 0) {
958 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
959 clear++;
960 } else if (strncmp(s1, "mgssec=", 7) == 0) {
961 rc = lmd_parse_mgssec(lmd, s1 + 7);
962 if (rc)
963 goto invalid;
964 clear++;
965 /* ost exclusion list */
966 } else if (strncmp(s1, "exclude=", 8) == 0) {
967 rc = lmd_make_exclusion(lmd, s1 + 7);
968 if (rc)
969 goto invalid;
970 clear++;
971 } else if (strncmp(s1, "mgs", 3) == 0) {
972 /* We are an MGS */
973 lmd->lmd_flags |= LMD_FLG_MGS;
974 clear++;
975 } else if (strncmp(s1, "svname=", 7) == 0) {
976 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
977 if (rc)
978 goto invalid;
979 clear++;
980 } else if (strncmp(s1, "param=", 6) == 0) {
981 size_t length, params_length;
982 char *tail = strchr(s1 + 6, ',');
983
984 if (tail == NULL)
985 length = strlen(s1);
986 else
987 length = tail - s1;
988 length -= 6;
989 params_length = strlen(lmd->lmd_params);
990 if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
991 return -E2BIG;
992 strncat(lmd->lmd_params, s1 + 6, length);
993 lmd->lmd_params[params_length + length] = '\0';
994 strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
995 clear++;
996 } else if (strncmp(s1, "osd=", 4) == 0) {
997 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
998 if (rc)
999 goto invalid;
1000 clear++;
1001 }
1002 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1003 end of the options. */
1004 else if (strncmp(s1, "device=", 7) == 0) {
1005 devname = s1 + 7;
1006 /* terminate options right before device. device
1007 must be the last one. */
1008 *s1 = '\0';
1009 break;
1010 }
1011
1012 /* Find next opt */
1013 s2 = strchr(s1, ',');
1014 if (s2 == NULL) {
1015 if (clear)
1016 *s1 = '\0';
1017 break;
1018 }
1019 s2++;
1020 if (clear)
1021 memmove(s1, s2, strlen(s2) + 1);
1022 else
1023 s1 = s2;
1024 }
1025
1026 if (!devname) {
1027 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1028 goto invalid;
1029 }
1030
1031 s1 = strstr(devname, ":/");
1032 if (s1) {
1033 ++s1;
1034 lmd->lmd_flags |= LMD_FLG_CLIENT;
1035 /* Remove leading /s from fsname */
1036 while (*++s1 == '/')
1037 ;
1038 /* Freed in lustre_free_lsi */
1039 lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
1040 if (!lmd->lmd_profile)
1041 return -ENOMEM;
1042 }
1043
1044 /* Freed in lustre_free_lsi */
1045 lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
1046 if (!lmd->lmd_dev)
1047 return -ENOMEM;
1048 strcpy(lmd->lmd_dev, devname);
1049
1050 /* Save mount options */
1051 s1 = options + strlen(options) - 1;
1052 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1053 *s1-- = 0;
1054 if (*options != 0) {
1055 /* Freed in lustre_free_lsi */
1056 lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
1057 if (!lmd->lmd_opts)
1058 return -ENOMEM;
1059 strcpy(lmd->lmd_opts, options);
1060 }
1061
1062 lmd_print(lmd);
1063 lmd->lmd_magic = LMD_MAGIC;
1064
1065 return rc;
1066
1067 invalid:
1068 CERROR("Bad mount options %s\n", options);
1069 return -EINVAL;
1070 }
1071
1072 struct lustre_mount_data2 {
1073 void *lmd2_data;
1074 struct vfsmount *lmd2_mnt;
1075 };
1076
1077 /** This is the entry point for the mount call into Lustre.
1078 * This is called when a server or client is mounted,
1079 * and this is where we start setting things up.
1080 * @param data Mount options (e.g. -o flock,abort_recov)
1081 */
lustre_fill_super(struct super_block * sb,void * data,int silent)1082 static int lustre_fill_super(struct super_block *sb, void *data, int silent)
1083 {
1084 struct lustre_mount_data *lmd;
1085 struct lustre_mount_data2 *lmd2 = data;
1086 struct lustre_sb_info *lsi;
1087 int rc;
1088
1089 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1090
1091 lsi = lustre_init_lsi(sb);
1092 if (!lsi)
1093 return -ENOMEM;
1094 lmd = lsi->lsi_lmd;
1095
1096 /*
1097 * Disable lockdep during mount, because mount locking patterns are
1098 * `special'.
1099 */
1100 lockdep_off();
1101
1102 /*
1103 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1104 */
1105 obd_zombie_barrier();
1106
1107 /* Figure out the lmd from the mount options */
1108 if (lmd_parse((lmd2->lmd2_data), lmd)) {
1109 lustre_put_lsi(sb);
1110 rc = -EINVAL;
1111 goto out;
1112 }
1113
1114 if (lmd_is_client(lmd)) {
1115 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1116 if (client_fill_super == NULL)
1117 request_module("lustre");
1118 if (client_fill_super == NULL) {
1119 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1120 lustre_put_lsi(sb);
1121 rc = -ENODEV;
1122 } else {
1123 rc = lustre_start_mgc(sb);
1124 if (rc) {
1125 lustre_put_lsi(sb);
1126 goto out;
1127 }
1128 /* Connect and start */
1129 /* (should always be ll_fill_super) */
1130 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1131 /* c_f_s will call lustre_common_put_super on failure */
1132 }
1133 } else {
1134 CERROR("This is client-side-only module, cannot handle server mount.\n");
1135 rc = -EINVAL;
1136 }
1137
1138 /* If error happens in fill_super() call, @lsi will be killed there.
1139 * This is why we do not put it here. */
1140 goto out;
1141 out:
1142 if (rc) {
1143 CERROR("Unable to mount %s (%d)\n",
1144 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1145 } else {
1146 CDEBUG(D_SUPER, "Mount %s complete\n",
1147 lmd->lmd_dev);
1148 }
1149 lockdep_on();
1150 return rc;
1151 }
1152
1153 /* We can't call ll_fill_super by name because it lives in a module that
1154 must be loaded after this one. */
lustre_register_client_fill_super(int (* cfs)(struct super_block * sb,struct vfsmount * mnt))1155 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1156 struct vfsmount *mnt))
1157 {
1158 client_fill_super = cfs;
1159 }
1160 EXPORT_SYMBOL(lustre_register_client_fill_super);
1161
lustre_register_kill_super_cb(void (* cfs)(struct super_block * sb))1162 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1163 {
1164 kill_super_cb = cfs;
1165 }
1166 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1167
1168 /***************** FS registration ******************/
lustre_mount(struct file_system_type * fs_type,int flags,const char * devname,void * data)1169 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1170 const char *devname, void *data)
1171 {
1172 struct lustre_mount_data2 lmd2 = {
1173 .lmd2_data = data,
1174 .lmd2_mnt = NULL
1175 };
1176
1177 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1178 }
1179
lustre_kill_super(struct super_block * sb)1180 static void lustre_kill_super(struct super_block *sb)
1181 {
1182 struct lustre_sb_info *lsi = s2lsi(sb);
1183
1184 if (kill_super_cb && lsi)
1185 (*kill_super_cb)(sb);
1186
1187 kill_anon_super(sb);
1188 }
1189
1190 /** Register the "lustre" fs type
1191 */
1192 static struct file_system_type lustre_fs_type = {
1193 .owner = THIS_MODULE,
1194 .name = "lustre",
1195 .mount = lustre_mount,
1196 .kill_sb = lustre_kill_super,
1197 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1198 FS_RENAME_DOES_D_MOVE,
1199 };
1200 MODULE_ALIAS_FS("lustre");
1201
lustre_register_fs(void)1202 int lustre_register_fs(void)
1203 {
1204 return register_filesystem(&lustre_fs_type);
1205 }
1206
lustre_unregister_fs(void)1207 int lustre_unregister_fs(void)
1208 {
1209 return unregister_filesystem(&lustre_fs_type);
1210 }
1211