• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/include/lustre_disk.h
37  *
38  * Lustre disk format definitions.
39  *
40  * Author: Nathan Rutman <nathan@clusterfs.com>
41  */
42 
43 #ifndef _LUSTRE_DISK_H
44 #define _LUSTRE_DISK_H
45 
46 /** \defgroup disk disk
47  *
48  * @{
49  */
50 
51 #include "../../include/linux/libcfs/libcfs.h"
52 #include "../../include/linux/lnet/types.h"
53 #include <linux/backing-dev.h>
54 
55 /****************** persistent mount data *********************/
56 
57 #define LDD_F_SV_TYPE_MDT   0x0001
58 #define LDD_F_SV_TYPE_OST   0x0002
59 #define LDD_F_SV_TYPE_MGS   0x0004
60 #define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
61 			    LDD_F_SV_TYPE_OST  | \
62 			    LDD_F_SV_TYPE_MGS)
63 #define LDD_F_SV_ALL	0x0008
64 
65 /****************** mount command *********************/
66 
67 /* The lmd is only used internally by Lustre; mount simply passes
68    everything as string options */
69 
70 #define LMD_MAGIC    0xbdacbd03
71 #define LMD_PARAMS_MAXLEN	4096
72 
73 /* gleaned from the mount command - no persistent info here */
74 struct lustre_mount_data {
75 	__u32      lmd_magic;
76 	__u32      lmd_flags;	 /* lustre mount flags */
77 	int	lmd_mgs_failnodes; /* mgs failover node count */
78 	int	lmd_exclude_count;
79 	int	lmd_recovery_time_soft;
80 	int	lmd_recovery_time_hard;
81 	char      *lmd_dev;	   /* device name */
82 	char      *lmd_profile;       /* client only */
83 	char      *lmd_mgssec;	/* sptlrpc flavor to mgs */
84 	char      *lmd_opts;	  /* lustre mount options (as opposed to
85 					 _device_ mount options) */
86 	char      *lmd_params;	/* lustre params */
87 	__u32     *lmd_exclude;       /* array of OSTs to ignore */
88 	char	*lmd_mgs;	   /* MGS nid */
89 	char	*lmd_osd_type;      /* OSD type */
90 };
91 
92 #define LMD_FLG_SERVER		0x0001	/* Mounting a server */
93 #define LMD_FLG_CLIENT		0x0002	/* Mounting a client */
94 #define LMD_FLG_ABORT_RECOV	0x0008	/* Abort recovery */
95 #define LMD_FLG_NOSVC		0x0010	/* Only start MGS/MGC for servers,
96 					   no other services */
97 #define LMD_FLG_NOMGS		0x0020	/* Only start target for servers, reusing
98 					   existing MGS services */
99 #define LMD_FLG_WRITECONF	0x0040	/* Rewrite config log */
100 #define LMD_FLG_NOIR		0x0080	/* NO imperative recovery */
101 #define LMD_FLG_NOSCRUB		0x0100	/* Do not trigger scrub automatically */
102 #define LMD_FLG_MGS		0x0200	/* Also start MGS along with server */
103 #define LMD_FLG_IAM		0x0400	/* IAM dir */
104 #define LMD_FLG_NO_PRIMNODE	0x0800	/* all nodes are service nodes */
105 #define LMD_FLG_VIRGIN		0x1000	/* the service registers first time */
106 #define LMD_FLG_UPDATE		0x2000	/* update parameters */
107 #define LMD_FLG_HSM		0x4000	/* Start coordinator */
108 
109 #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
110 
111 /****************** last_rcvd file *********************/
112 
113 /** version recovery epoch */
114 #define LR_EPOCH_BITS   32
115 #define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
116 #define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
117 #define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
118 
119 #define LR_SERVER_SIZE   512
120 #define LR_CLIENT_START 8192
121 #define LR_CLIENT_SIZE   128
122 #if LR_CLIENT_START < LR_SERVER_SIZE
123 #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
124 #endif
125 
126 /*
127  * This limit is arbitrary (131072 clients on x86), but it is convenient to use
128  * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
129  * If we need more than 131072 clients (order-2 allocation on x86) then this
130  * should become an array of single-page pointers that are allocated on demand.
131  */
132 #if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
133 #define LR_MAX_CLIENTS (128 * 1024UL)
134 #else
135 #define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
136 #endif
137 
138 /** COMPAT_146: this is an OST (temporary) */
139 #define OBD_COMPAT_OST	  0x00000002
140 /** COMPAT_146: this is an MDT (temporary) */
141 #define OBD_COMPAT_MDT	  0x00000004
142 /** 2.0 server, interop flag to show server version is changed */
143 #define OBD_COMPAT_20	   0x00000008
144 
145 /** MDS handles LOV_OBJID file */
146 #define OBD_ROCOMPAT_LOVOBJID   0x00000001
147 
148 /** OST handles group subdirs */
149 #define OBD_INCOMPAT_GROUPS     0x00000001
150 /** this is an OST */
151 #define OBD_INCOMPAT_OST	0x00000002
152 /** this is an MDT */
153 #define OBD_INCOMPAT_MDT	0x00000004
154 /** common last_rvcd format */
155 #define OBD_INCOMPAT_COMMON_LR  0x00000008
156 /** FID is enabled */
157 #define OBD_INCOMPAT_FID	0x00000010
158 /** Size-on-MDS is enabled */
159 #define OBD_INCOMPAT_SOM	0x00000020
160 /** filesystem using iam format to store directory entries */
161 #define OBD_INCOMPAT_IAM_DIR    0x00000040
162 /** LMA attribute contains per-inode incompatible flags */
163 #define OBD_INCOMPAT_LMA	0x00000080
164 /** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
165  * bits are now used to store a generation. Once we start changing the layout
166  * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
167  * will be confused by interpreting stripe_count | gen << 16 as the actual
168  * stripe count */
169 #define OBD_INCOMPAT_LMM_VER    0x00000100
170 /** multiple OI files for MDT */
171 #define OBD_INCOMPAT_MULTI_OI   0x00000200
172 
173 /* Data stored per server at the head of the last_rcvd file.  In le32 order.
174    This should be common to filter_internal.h, lustre_mds.h */
175 struct lr_server_data {
176 	__u8  lsd_uuid[40];	/* server UUID */
177 	__u64 lsd_last_transno;    /* last completed transaction ID */
178 	__u64 lsd_compat14;	/* reserved - compat with old last_rcvd */
179 	__u64 lsd_mount_count;     /* incarnation number */
180 	__u32 lsd_feature_compat;  /* compatible feature flags */
181 	__u32 lsd_feature_rocompat;/* read-only compatible feature flags */
182 	__u32 lsd_feature_incompat;/* incompatible feature flags */
183 	__u32 lsd_server_size;     /* size of server data area */
184 	__u32 lsd_client_start;    /* start of per-client data area */
185 	__u16 lsd_client_size;     /* size of per-client data area */
186 	__u16 lsd_subdir_count;    /* number of subdirectories for objects */
187 	__u64 lsd_catalog_oid;     /* recovery catalog object id */
188 	__u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
189 	__u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
190 	__u32 lsd_osd_index;       /* index number of OST in LOV */
191 	__u32 lsd_padding1;	/* was lsd_mdt_index, unused in 2.4.0 */
192 	__u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
193 	/** transaction values since lsd_trans_table_time */
194 	__u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
195 	/** start point of transno table below */
196 	__u32 lsd_trans_table_time; /* time of first slot in table above */
197 	__u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
198 	__u8  lsd_padding[LR_SERVER_SIZE - 288];
199 };
200 
201 /* Data stored per client in the last_rcvd file.  In le32 order. */
202 struct lsd_client_data {
203 	__u8  lcd_uuid[40];      /* client UUID */
204 	__u64 lcd_last_transno; /* last completed transaction ID */
205 	__u64 lcd_last_xid;     /* xid for the last transaction */
206 	__u32 lcd_last_result;  /* result from last RPC */
207 	__u32 lcd_last_data;    /* per-op data (disposition for open &c.) */
208 	/* for MDS_CLOSE requests */
209 	__u64 lcd_last_close_transno; /* last completed transaction ID */
210 	__u64 lcd_last_close_xid;     /* xid for the last transaction */
211 	__u32 lcd_last_close_result;  /* result from last RPC */
212 	__u32 lcd_last_close_data;    /* per-op data */
213 	/* VBR: last versions */
214 	__u64 lcd_pre_versions[4];
215 	__u32 lcd_last_epoch;
216 	/** orphans handling for delayed export rely on that */
217 	__u32 lcd_first_epoch;
218 	__u8  lcd_padding[LR_CLIENT_SIZE - 128];
219 };
220 
221 /* bug20354: the lcd_uuid for export of clients may be wrong */
check_lcd(char * obd_name,int index,struct lsd_client_data * lcd)222 static inline void check_lcd(char *obd_name, int index,
223 			     struct lsd_client_data *lcd)
224 {
225 	int length = sizeof(lcd->lcd_uuid);
226 
227 	if (strnlen((char *)lcd->lcd_uuid, length) == length) {
228 		lcd->lcd_uuid[length - 1] = '\0';
229 
230 		LCONSOLE_ERROR("the client UUID (%s) on %s for exports stored in last_rcvd(index = %d) is bad!\n",
231 			       lcd->lcd_uuid, obd_name, index);
232 	}
233 }
234 
235 /* last_rcvd handling */
lsd_le_to_cpu(struct lr_server_data * buf,struct lr_server_data * lsd)236 static inline void lsd_le_to_cpu(struct lr_server_data *buf,
237 				 struct lr_server_data *lsd)
238 {
239 	int i;
240 
241 	memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
242 	lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
243 	lsd->lsd_compat14	 = le64_to_cpu(buf->lsd_compat14);
244 	lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
245 	lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
246 	lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
247 	lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
248 	lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
249 	lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
250 	lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
251 	lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
252 	lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
253 	lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
254 	memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
255 	lsd->lsd_osd_index	= le32_to_cpu(buf->lsd_osd_index);
256 	lsd->lsd_padding1	= le32_to_cpu(buf->lsd_padding1);
257 	lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
258 	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
259 		lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
260 	lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
261 	lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
262 }
263 
lsd_cpu_to_le(struct lr_server_data * lsd,struct lr_server_data * buf)264 static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
265 				 struct lr_server_data *buf)
266 {
267 	int i;
268 
269 	memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
270 	buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
271 	buf->lsd_compat14	 = cpu_to_le64(lsd->lsd_compat14);
272 	buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
273 	buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
274 	buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
275 	buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
276 	buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
277 	buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
278 	buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
279 	buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
280 	buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
281 	buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
282 	memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
283 	buf->lsd_osd_index	  = cpu_to_le32(lsd->lsd_osd_index);
284 	buf->lsd_padding1	  = cpu_to_le32(lsd->lsd_padding1);
285 	buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
286 	for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
287 		buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
288 	buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
289 	buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
290 }
291 
lcd_le_to_cpu(struct lsd_client_data * buf,struct lsd_client_data * lcd)292 static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
293 				 struct lsd_client_data *lcd)
294 {
295 	memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
296 	lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
297 	lcd->lcd_last_xid	   = le64_to_cpu(buf->lcd_last_xid);
298 	lcd->lcd_last_result	= le32_to_cpu(buf->lcd_last_result);
299 	lcd->lcd_last_data	  = le32_to_cpu(buf->lcd_last_data);
300 	lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
301 	lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
302 	lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
303 	lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
304 	lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
305 	lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
306 	lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
307 	lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
308 	lcd->lcd_last_epoch	 = le32_to_cpu(buf->lcd_last_epoch);
309 	lcd->lcd_first_epoch	= le32_to_cpu(buf->lcd_first_epoch);
310 }
311 
lcd_cpu_to_le(struct lsd_client_data * lcd,struct lsd_client_data * buf)312 static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
313 				 struct lsd_client_data *buf)
314 {
315 	memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
316 	buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
317 	buf->lcd_last_xid	   = cpu_to_le64(lcd->lcd_last_xid);
318 	buf->lcd_last_result	= cpu_to_le32(lcd->lcd_last_result);
319 	buf->lcd_last_data	  = cpu_to_le32(lcd->lcd_last_data);
320 	buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
321 	buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
322 	buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
323 	buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
324 	buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
325 	buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
326 	buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
327 	buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
328 	buf->lcd_last_epoch	 = cpu_to_le32(lcd->lcd_last_epoch);
329 	buf->lcd_first_epoch	= cpu_to_le32(lcd->lcd_first_epoch);
330 }
331 
lcd_last_transno(struct lsd_client_data * lcd)332 static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
333 {
334 	return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
335 		lcd->lcd_last_transno : lcd->lcd_last_close_transno);
336 }
337 
lcd_last_xid(struct lsd_client_data * lcd)338 static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
339 {
340 	return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
341 		lcd->lcd_last_xid : lcd->lcd_last_close_xid);
342 }
343 
344 /****************** superblock additional info *********************/
345 
346 struct ll_sb_info;
347 
348 struct lustre_sb_info {
349 	int		       lsi_flags;
350 	struct obd_device	*lsi_mgc;     /* mgc obd */
351 	struct lustre_mount_data *lsi_lmd;     /* mount command info */
352 	struct ll_sb_info	*lsi_llsbi;   /* add'l client sbi info */
353 	struct dt_device	 *lsi_dt_dev;  /* dt device to access disk fs*/
354 	struct vfsmount	  *lsi_srv_mnt; /* the one server mount */
355 	atomic_t	      lsi_mounts;  /* references to the srv_mnt */
356 	char			  lsi_svname[MTI_NAME_MAXLEN];
357 	char			  lsi_osd_obdname[64];
358 	char			  lsi_osd_uuid[64];
359 	struct obd_export	 *lsi_osd_exp;
360 	char			  lsi_osd_type[16];
361 	char			  lsi_fstype[16];
362 	struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
363 						  own backing_dev_info */
364 };
365 
366 #define LSI_UMOUNT_FAILOVER	      0x00200000
367 #define LSI_BDI_INITIALIZED	      0x00400000
368 
369 #define     s2lsi(sb)	((struct lustre_sb_info *)((sb)->s_fs_info))
370 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
371 
372 #define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
373 #define	    get_mount_flags(sb)	   (s2lsi(sb)->lsi_lmd->lmd_flags)
374 #define	    get_mntdev_name(sb)	   (s2lsi(sb)->lsi_lmd->lmd_dev)
375 
376 /****************** mount lookup info *********************/
377 
378 struct lustre_mount_info {
379 	char		 *lmi_name;
380 	struct super_block   *lmi_sb;
381 	struct vfsmount      *lmi_mnt;
382 	struct list_head	    lmi_list_chain;
383 };
384 
385 /****************** prototypes *********************/
386 
387 /* obd_mount.c */
388 
389 int lustre_start_mgc(struct super_block *sb);
390 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
391 						  struct vfsmount *mnt));
392 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
393 int lustre_common_put_super(struct super_block *sb);
394 
395 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
396 
397 /** @} disk */
398 
399 #endif /* _LUSTRE_DISK_H */
400