• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/llite/llite_lib.c
37  *
38  * Lustre Light Super operations
39  */
40 
41 #define DEBUG_SUBSYSTEM S_LLITE
42 
43 #include <linux/module.h>
44 #include <linux/statfs.h>
45 #include <linux/types.h>
46 #include <linux/mm.h>
47 
48 #include "../include/lustre_lite.h"
49 #include "../include/lustre_ha.h"
50 #include "../include/lustre_dlm.h"
51 #include "../include/lprocfs_status.h"
52 #include "../include/lustre_disk.h"
53 #include "../include/lustre_param.h"
54 #include "../include/lustre_log.h"
55 #include "../include/cl_object.h"
56 #include "../include/obd_cksum.h"
57 #include "llite_internal.h"
58 
59 struct kmem_cache *ll_file_data_slab;
60 struct dentry *llite_root;
61 struct kset *llite_kset;
62 
63 #ifndef log2
64 #define log2(n) ffz(~(n))
65 #endif
66 
ll_init_sbi(struct super_block * sb)67 static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
68 {
69 	struct ll_sb_info *sbi = NULL;
70 	unsigned long pages;
71 	unsigned long lru_page_max;
72 	struct sysinfo si;
73 	class_uuid_t uuid;
74 	int i;
75 
76 	sbi = kzalloc(sizeof(*sbi), GFP_NOFS);
77 	if (!sbi)
78 		return NULL;
79 
80 	spin_lock_init(&sbi->ll_lock);
81 	mutex_init(&sbi->ll_lco.lco_lock);
82 	spin_lock_init(&sbi->ll_pp_extent_lock);
83 	spin_lock_init(&sbi->ll_process_lock);
84 	sbi->ll_rw_stats_on = 0;
85 
86 	si_meminfo(&si);
87 	pages = si.totalram - si.totalhigh;
88 	if (pages >> (20 - PAGE_CACHE_SHIFT) < 512)
89 		lru_page_max = pages / 2;
90 	else
91 		lru_page_max = (pages / 4) * 3;
92 
93 	/* initialize lru data */
94 	atomic_set(&sbi->ll_cache.ccc_users, 0);
95 	sbi->ll_cache.ccc_lru_max = lru_page_max;
96 	atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
97 	spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
98 	INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
99 
100 	sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
101 					   SBI_DEFAULT_READAHEAD_MAX);
102 	sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
103 	sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
104 					   SBI_DEFAULT_READAHEAD_WHOLE_MAX;
105 	INIT_LIST_HEAD(&sbi->ll_conn_chain);
106 	INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
107 
108 	ll_generate_random_uuid(uuid);
109 	class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
110 	CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
111 
112 	sbi->ll_flags |= LL_SBI_VERBOSE;
113 	sbi->ll_flags |= LL_SBI_CHECKSUM;
114 
115 	sbi->ll_flags |= LL_SBI_LRU_RESIZE;
116 
117 	for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
118 		spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
119 			       pp_r_hist.oh_lock);
120 		spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
121 			       pp_w_hist.oh_lock);
122 	}
123 
124 	/* metadata statahead is enabled by default */
125 	sbi->ll_sa_max = LL_SA_RPC_DEF;
126 	atomic_set(&sbi->ll_sa_total, 0);
127 	atomic_set(&sbi->ll_sa_wrong, 0);
128 	atomic_set(&sbi->ll_agl_total, 0);
129 	sbi->ll_flags |= LL_SBI_AGL_ENABLED;
130 
131 	sbi->ll_sb = sb;
132 
133 	return sbi;
134 }
135 
ll_free_sbi(struct super_block * sb)136 static void ll_free_sbi(struct super_block *sb)
137 {
138 	struct ll_sb_info *sbi = ll_s2sbi(sb);
139 
140 	kfree(sbi);
141 }
142 
client_common_fill_super(struct super_block * sb,char * md,char * dt,struct vfsmount * mnt)143 static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
144 				    struct vfsmount *mnt)
145 {
146 	struct inode *root = NULL;
147 	struct ll_sb_info *sbi = ll_s2sbi(sb);
148 	struct obd_device *obd;
149 	struct obd_statfs *osfs = NULL;
150 	struct ptlrpc_request *request = NULL;
151 	struct obd_connect_data *data = NULL;
152 	struct obd_uuid *uuid;
153 	struct md_op_data *op_data;
154 	struct lustre_md lmd;
155 	u64 valid;
156 	int size, err, checksum;
157 
158 	obd = class_name2obd(md);
159 	if (!obd) {
160 		CERROR("MD %s: not setup or attached\n", md);
161 		return -EINVAL;
162 	}
163 
164 	data = kzalloc(sizeof(*data), GFP_NOFS);
165 	if (!data)
166 		return -ENOMEM;
167 
168 	osfs = kzalloc(sizeof(*osfs), GFP_NOFS);
169 	if (!osfs) {
170 		kfree(data);
171 		return -ENOMEM;
172 	}
173 
174 	if (llite_root != NULL) {
175 		err = ldebugfs_register_mountpoint(llite_root, sb, dt, md);
176 		if (err < 0)
177 			CERROR("could not register mount in <debugfs>/lustre/llite\n");
178 	}
179 
180 	/* indicate the features supported by this client */
181 	data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
182 				  OBD_CONNECT_ATTRFID  |
183 				  OBD_CONNECT_VERSION  | OBD_CONNECT_BRW_SIZE |
184 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID     |
185 				  OBD_CONNECT_AT       | OBD_CONNECT_LOV_V3   |
186 				  OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR    |
187 				  OBD_CONNECT_FULL20   | OBD_CONNECT_64BITHASH|
188 				  OBD_CONNECT_EINPROGRESS |
189 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
190 				  OBD_CONNECT_LAYOUTLOCK |
191 				  OBD_CONNECT_PINGLESS |
192 				  OBD_CONNECT_MAX_EASIZE |
193 				  OBD_CONNECT_FLOCK_DEAD |
194 				  OBD_CONNECT_DISP_STRIPE;
195 
196 	if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
197 		data->ocd_connect_flags |= OBD_CONNECT_SOM;
198 
199 	if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
200 		data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
201 #ifdef CONFIG_FS_POSIX_ACL
202 	data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK;
203 #endif
204 
205 	if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
206 		/* flag mdc connection as lightweight, only used for test
207 		 * purpose, use with care */
208 		data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
209 
210 	data->ocd_ibits_known = MDS_INODELOCK_FULL;
211 	data->ocd_version = LUSTRE_VERSION_CODE;
212 
213 	if (sb->s_flags & MS_RDONLY)
214 		data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
215 	if (sbi->ll_flags & LL_SBI_USER_XATTR)
216 		data->ocd_connect_flags |= OBD_CONNECT_XATTR;
217 
218 	if (sbi->ll_flags & LL_SBI_FLOCK)
219 		sbi->ll_fop = &ll_file_operations_flock;
220 	else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
221 		sbi->ll_fop = &ll_file_operations;
222 	else
223 		sbi->ll_fop = &ll_file_operations_noflock;
224 
225 	/* real client */
226 	data->ocd_connect_flags |= OBD_CONNECT_REAL;
227 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
228 		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
229 
230 	data->ocd_brw_size = MD_MAX_BRW_SIZE;
231 
232 	err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid,
233 			  data, NULL);
234 	if (err == -EBUSY) {
235 		LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n",
236 				   md);
237 		goto out;
238 	} else if (err) {
239 		CERROR("cannot connect to %s: rc = %d\n", md, err);
240 		goto out;
241 	}
242 
243 	sbi->ll_md_exp->exp_connect_data = *data;
244 
245 	err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
246 			   LUSTRE_SEQ_METADATA);
247 	if (err) {
248 		CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n",
249 		       sbi->ll_md_exp->exp_obd->obd_name, err);
250 		goto out_md;
251 	}
252 
253 	/* For mount, we only need fs info from MDT0, and also in DNE, it
254 	 * can make sure the client can be mounted as long as MDT0 is
255 	 * available */
256 	err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
257 			cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
258 			OBD_STATFS_FOR_MDT0);
259 	if (err)
260 		goto out_md_fid;
261 
262 	/* This needs to be after statfs to ensure connect has finished.
263 	 * Note that "data" does NOT contain the valid connect reply.
264 	 * If connecting to a 1.8 server there will be no LMV device, so
265 	 * we can access the MDC export directly and exp_connect_flags will
266 	 * be non-zero, but if accessing an upgraded 2.1 server it will
267 	 * have the correct flags filled in.
268 	 * XXX: fill in the LMV exp_connect_flags from MDC(s). */
269 	valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
270 	if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
271 	    valid != CLIENT_CONNECT_MDT_REQD) {
272 		char *buf;
273 
274 		buf = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
275 		if (!buf) {
276 			err = -ENOMEM;
277 			goto out_md_fid;
278 		}
279 		obd_connect_flags2str(buf, PAGE_CACHE_SIZE,
280 				      valid ^ CLIENT_CONNECT_MDT_REQD, ",");
281 		LCONSOLE_ERROR_MSG(0x170, "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n",
282 				   sbi->ll_md_exp->exp_obd->obd_name, buf);
283 		kfree(buf);
284 		err = -EPROTO;
285 		goto out_md_fid;
286 	}
287 
288 	size = sizeof(*data);
289 	err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
290 			   KEY_CONN_DATA,  &size, data, NULL);
291 	if (err) {
292 		CERROR("%s: Get connect data failed: rc = %d\n",
293 		       sbi->ll_md_exp->exp_obd->obd_name, err);
294 		goto out_md_fid;
295 	}
296 
297 	LASSERT(osfs->os_bsize);
298 	sb->s_blocksize = osfs->os_bsize;
299 	sb->s_blocksize_bits = log2(osfs->os_bsize);
300 	sb->s_magic = LL_SUPER_MAGIC;
301 	sb->s_maxbytes = MAX_LFS_FILESIZE;
302 	sbi->ll_namelen = osfs->os_namelen;
303 
304 	if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
305 	    !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
306 		LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
307 		sbi->ll_flags &= ~LL_SBI_USER_XATTR;
308 	}
309 
310 	if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
311 #ifdef MS_POSIXACL
312 		sb->s_flags |= MS_POSIXACL;
313 #endif
314 		sbi->ll_flags |= LL_SBI_ACL;
315 	} else {
316 		LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
317 #ifdef MS_POSIXACL
318 		sb->s_flags &= ~MS_POSIXACL;
319 #endif
320 		sbi->ll_flags &= ~LL_SBI_ACL;
321 	}
322 
323 	if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
324 		if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
325 			sbi->ll_flags |= LL_SBI_RMT_CLIENT;
326 			LCONSOLE_INFO("client is set as remote by default.\n");
327 		}
328 	} else {
329 		if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
330 			sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
331 			LCONSOLE_INFO("client claims to be remote, but server rejected, forced to be local.\n");
332 		}
333 	}
334 
335 	if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
336 		sbi->ll_flags |= LL_SBI_64BIT_HASH;
337 
338 	if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
339 		sbi->ll_md_brw_size = data->ocd_brw_size;
340 	else
341 		sbi->ll_md_brw_size = PAGE_CACHE_SIZE;
342 
343 	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) {
344 		LCONSOLE_INFO("Layout lock feature supported.\n");
345 		sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
346 	}
347 
348 	if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
349 		if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
350 			LCONSOLE_INFO(
351 				"%s: disabling xattr cache due to unknown maximum xattr size.\n",
352 				dt);
353 		} else {
354 			sbi->ll_flags |= LL_SBI_XATTR_CACHE;
355 			sbi->ll_xattr_cache_enabled = 1;
356 		}
357 	}
358 
359 	obd = class_name2obd(dt);
360 	if (!obd) {
361 		CERROR("DT %s: not setup or attached\n", dt);
362 		err = -ENODEV;
363 		goto out_md_fid;
364 	}
365 
366 	data->ocd_connect_flags = OBD_CONNECT_GRANT     | OBD_CONNECT_VERSION  |
367 				  OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
368 				  OBD_CONNECT_CANCELSET | OBD_CONNECT_FID      |
369 				  OBD_CONNECT_SRVLOCK   | OBD_CONNECT_TRUNCLOCK|
370 				  OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
371 				  OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR|
372 				  OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH |
373 				  OBD_CONNECT_MAXBYTES |
374 				  OBD_CONNECT_EINPROGRESS |
375 				  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
376 				  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
377 
378 	if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
379 		data->ocd_connect_flags |= OBD_CONNECT_SOM;
380 
381 	if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
382 		/* OBD_CONNECT_CKSUM should always be set, even if checksums are
383 		 * disabled by default, because it can still be enabled on the
384 		 * fly via /sys. As a consequence, we still need to come to an
385 		 * agreement on the supported algorithms at connect time */
386 		data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
387 
388 		if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
389 			data->ocd_cksum_types = OBD_CKSUM_ADLER;
390 		else
391 			data->ocd_cksum_types = cksum_types_supported_client();
392 	}
393 
394 	data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
395 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
396 		data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
397 
398 	CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n",
399 	       data->ocd_connect_flags,
400 	       data->ocd_version, data->ocd_grant);
401 
402 	obd->obd_upcall.onu_owner = &sbi->ll_lco;
403 	obd->obd_upcall.onu_upcall = cl_ocd_update;
404 
405 	data->ocd_brw_size = DT_MAX_BRW_SIZE;
406 
407 	err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data,
408 			  NULL);
409 	if (err == -EBUSY) {
410 		LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing recovery, of which this client is not a part.  Please wait for recovery to complete, abort, or time out.\n",
411 				   dt);
412 		goto out_md;
413 	} else if (err) {
414 		CERROR("%s: Cannot connect to %s: rc = %d\n",
415 		       sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
416 		goto out_md;
417 	}
418 
419 	sbi->ll_dt_exp->exp_connect_data = *data;
420 
421 	err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
422 			   LUSTRE_SEQ_METADATA);
423 	if (err) {
424 		CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n",
425 		       sbi->ll_dt_exp->exp_obd->obd_name, err);
426 		goto out_dt;
427 	}
428 
429 	mutex_lock(&sbi->ll_lco.lco_lock);
430 	sbi->ll_lco.lco_flags = data->ocd_connect_flags;
431 	sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
432 	sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
433 	mutex_unlock(&sbi->ll_lco.lco_lock);
434 
435 	fid_zero(&sbi->ll_root_fid);
436 	err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid);
437 	if (err) {
438 		CERROR("cannot mds_connect: rc = %d\n", err);
439 		goto out_lock_cn_cb;
440 	}
441 	if (!fid_is_sane(&sbi->ll_root_fid)) {
442 		CERROR("%s: Invalid root fid "DFID" during mount\n",
443 		       sbi->ll_md_exp->exp_obd->obd_name,
444 		       PFID(&sbi->ll_root_fid));
445 		err = -EINVAL;
446 		goto out_lock_cn_cb;
447 	}
448 	CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
449 
450 	sb->s_op = &lustre_super_operations;
451 #if THREAD_SIZE >= 8192 /*b=17630*/
452 	sb->s_export_op = &lustre_export_operations;
453 #endif
454 
455 	/* make root inode
456 	 * XXX: move this to after cbd setup? */
457 	valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
458 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
459 		valid |= OBD_MD_FLRMTPERM;
460 	else if (sbi->ll_flags & LL_SBI_ACL)
461 		valid |= OBD_MD_FLACL;
462 
463 	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
464 	if (!op_data) {
465 		err = -ENOMEM;
466 		goto out_lock_cn_cb;
467 	}
468 
469 	op_data->op_fid1 = sbi->ll_root_fid;
470 	op_data->op_mode = 0;
471 	op_data->op_valid = valid;
472 
473 	err = md_getattr(sbi->ll_md_exp, op_data, &request);
474 	kfree(op_data);
475 	if (err) {
476 		CERROR("%s: md_getattr failed for root: rc = %d\n",
477 		       sbi->ll_md_exp->exp_obd->obd_name, err);
478 		goto out_lock_cn_cb;
479 	}
480 
481 	err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
482 			       sbi->ll_md_exp, &lmd);
483 	if (err) {
484 		CERROR("failed to understand root inode md: rc = %d\n", err);
485 		ptlrpc_req_finished(request);
486 		goto out_lock_cn_cb;
487 	}
488 
489 	LASSERT(fid_is_sane(&sbi->ll_root_fid));
490 	root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
491 					    sbi->ll_flags & LL_SBI_32BIT_API),
492 		       &lmd);
493 	md_free_lustre_md(sbi->ll_md_exp, &lmd);
494 	ptlrpc_req_finished(request);
495 
496 	if (root == NULL || IS_ERR(root)) {
497 		if (lmd.lsm)
498 			obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
499 #ifdef CONFIG_FS_POSIX_ACL
500 		if (lmd.posix_acl) {
501 			posix_acl_release(lmd.posix_acl);
502 			lmd.posix_acl = NULL;
503 		}
504 #endif
505 		err = IS_ERR(root) ? PTR_ERR(root) : -EBADF;
506 		root = NULL;
507 		CERROR("lustre_lite: bad iget4 for root\n");
508 		goto out_root;
509 	}
510 
511 	err = ll_close_thread_start(&sbi->ll_lcq);
512 	if (err) {
513 		CERROR("cannot start close thread: rc %d\n", err);
514 		goto out_root;
515 	}
516 
517 #ifdef CONFIG_FS_POSIX_ACL
518 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
519 		rct_init(&sbi->ll_rct);
520 		et_init(&sbi->ll_et);
521 	}
522 #endif
523 
524 	checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
525 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
526 				 KEY_CHECKSUM, sizeof(checksum), &checksum,
527 				 NULL);
528 	cl_sb_init(sb);
529 
530 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
531 				 KEY_CACHE_SET, sizeof(sbi->ll_cache),
532 				 &sbi->ll_cache, NULL);
533 
534 	sb->s_root = d_make_root(root);
535 	if (sb->s_root == NULL) {
536 		CERROR("%s: can't make root dentry\n",
537 			ll_get_fsname(sb, NULL, 0));
538 		err = -ENOMEM;
539 		goto out_lock_cn_cb;
540 	}
541 
542 	sbi->ll_sdev_orig = sb->s_dev;
543 
544 	/* We set sb->s_dev equal on all lustre clients in order to support
545 	 * NFS export clustering.  NFSD requires that the FSID be the same
546 	 * on all clients. */
547 	/* s_dev is also used in lt_compare() to compare two fs, but that is
548 	 * only a node-local comparison. */
549 	uuid = obd_get_uuid(sbi->ll_md_exp);
550 	if (uuid != NULL) {
551 		sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
552 		get_uuid2fsid(uuid->uuid, strlen(uuid->uuid), &sbi->ll_fsid);
553 	}
554 
555 	kfree(data);
556 	kfree(osfs);
557 
558 	return err;
559 out_root:
560 	iput(root);
561 out_lock_cn_cb:
562 	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
563 out_dt:
564 	obd_disconnect(sbi->ll_dt_exp);
565 	sbi->ll_dt_exp = NULL;
566 	/* Make sure all OScs are gone, since cl_cache is accessing sbi. */
567 	obd_zombie_barrier();
568 out_md_fid:
569 	obd_fid_fini(sbi->ll_md_exp->exp_obd);
570 out_md:
571 	obd_disconnect(sbi->ll_md_exp);
572 	sbi->ll_md_exp = NULL;
573 out:
574 	kfree(data);
575 	kfree(osfs);
576 	ldebugfs_unregister_mountpoint(sbi);
577 	return err;
578 }
579 
ll_get_max_mdsize(struct ll_sb_info * sbi,int * lmmsize)580 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
581 {
582 	int size, rc;
583 
584 	*lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL);
585 	size = sizeof(int);
586 	rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
587 			  KEY_MAX_EASIZE, &size, lmmsize, NULL);
588 	if (rc)
589 		CERROR("Get max mdsize error rc %d\n", rc);
590 
591 	return rc;
592 }
593 
ll_get_default_mdsize(struct ll_sb_info * sbi,int * lmmsize)594 int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
595 {
596 	int size, rc;
597 
598 	size = sizeof(int);
599 	rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
600 			 KEY_DEFAULT_EASIZE, &size, lmmsize, NULL);
601 	if (rc)
602 		CERROR("Get default mdsize error rc %d\n", rc);
603 
604 	return rc;
605 }
606 
client_common_put_super(struct super_block * sb)607 static void client_common_put_super(struct super_block *sb)
608 {
609 	struct ll_sb_info *sbi = ll_s2sbi(sb);
610 
611 #ifdef CONFIG_FS_POSIX_ACL
612 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
613 		et_fini(&sbi->ll_et);
614 		rct_fini(&sbi->ll_rct);
615 	}
616 #endif
617 
618 	ll_close_thread_shutdown(sbi->ll_lcq);
619 
620 	cl_sb_fini(sb);
621 
622 	list_del(&sbi->ll_conn_chain);
623 
624 	obd_fid_fini(sbi->ll_dt_exp->exp_obd);
625 	obd_disconnect(sbi->ll_dt_exp);
626 	sbi->ll_dt_exp = NULL;
627 	/* wait till all OSCs are gone, since cl_cache is accessing sbi.
628 	 * see LU-2543. */
629 	obd_zombie_barrier();
630 
631 	ldebugfs_unregister_mountpoint(sbi);
632 
633 	obd_fid_fini(sbi->ll_md_exp->exp_obd);
634 	obd_disconnect(sbi->ll_md_exp);
635 	sbi->ll_md_exp = NULL;
636 }
637 
ll_kill_super(struct super_block * sb)638 void ll_kill_super(struct super_block *sb)
639 {
640 	struct ll_sb_info *sbi;
641 
642 	/* not init sb ?*/
643 	if (!(sb->s_flags & MS_ACTIVE))
644 		return;
645 
646 	sbi = ll_s2sbi(sb);
647 	/* we need to restore s_dev from changed for clustered NFS before
648 	 * put_super because new kernels have cached s_dev and change sb->s_dev
649 	 * in put_super not affected real removing devices */
650 	if (sbi) {
651 		sb->s_dev = sbi->ll_sdev_orig;
652 		sbi->ll_umounting = 1;
653 	}
654 }
655 
ll_set_opt(const char * opt,char * data,int fl)656 static inline int ll_set_opt(const char *opt, char *data, int fl)
657 {
658 	if (strncmp(opt, data, strlen(opt)) != 0)
659 		return 0;
660 	else
661 		return fl;
662 }
663 
664 /* non-client-specific mount options are parsed in lmd_parse */
ll_options(char * options,int * flags)665 static int ll_options(char *options, int *flags)
666 {
667 	int tmp;
668 	char *s1 = options, *s2;
669 
670 	if (!options)
671 		return 0;
672 
673 	CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
674 
675 	while (*s1) {
676 		CDEBUG(D_SUPER, "next opt=%s\n", s1);
677 		tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
678 		if (tmp) {
679 			*flags |= tmp;
680 			goto next;
681 		}
682 		tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
683 		if (tmp) {
684 			*flags |= tmp;
685 			goto next;
686 		}
687 		tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
688 		if (tmp) {
689 			*flags |= tmp;
690 			goto next;
691 		}
692 		tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
693 		if (tmp) {
694 			*flags &= ~tmp;
695 			goto next;
696 		}
697 		tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
698 		if (tmp) {
699 			*flags |= tmp;
700 			goto next;
701 		}
702 		tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
703 		if (tmp) {
704 			*flags &= ~tmp;
705 			goto next;
706 		}
707 		tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
708 		if (tmp) {
709 			*flags |= tmp;
710 			goto next;
711 		}
712 		tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
713 		if (tmp) {
714 			*flags |= tmp;
715 			goto next;
716 		}
717 		tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
718 		if (tmp) {
719 			*flags &= ~tmp;
720 			goto next;
721 		}
722 
723 		tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
724 		if (tmp) {
725 			*flags |= tmp;
726 			goto next;
727 		}
728 		tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
729 		if (tmp) {
730 			*flags &= ~tmp;
731 			goto next;
732 		}
733 		tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
734 		if (tmp) {
735 			*flags |= tmp;
736 			goto next;
737 		}
738 		tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
739 		if (tmp) {
740 			*flags &= ~tmp;
741 			goto next;
742 		}
743 		tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
744 		if (tmp) {
745 			*flags |= tmp;
746 			goto next;
747 		}
748 		tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
749 		if (tmp) {
750 			*flags &= ~tmp;
751 			goto next;
752 		}
753 		tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW);
754 		if (tmp) {
755 			*flags |= tmp;
756 			goto next;
757 		}
758 		tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
759 		if (tmp) {
760 			*flags |= tmp;
761 			goto next;
762 		}
763 		tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
764 		if (tmp) {
765 			*flags |= tmp;
766 			goto next;
767 		}
768 		tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
769 		if (tmp) {
770 			*flags &= ~tmp;
771 			goto next;
772 		}
773 		LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
774 				   s1);
775 		return -EINVAL;
776 
777 next:
778 		/* Find next opt */
779 		s2 = strchr(s1, ',');
780 		if (s2 == NULL)
781 			break;
782 		s1 = s2 + 1;
783 	}
784 	return 0;
785 }
786 
ll_lli_init(struct ll_inode_info * lli)787 void ll_lli_init(struct ll_inode_info *lli)
788 {
789 	lli->lli_inode_magic = LLI_INODE_MAGIC;
790 	lli->lli_flags = 0;
791 	lli->lli_ioepoch = 0;
792 	lli->lli_maxbytes = MAX_LFS_FILESIZE;
793 	spin_lock_init(&lli->lli_lock);
794 	lli->lli_posix_acl = NULL;
795 	lli->lli_remote_perms = NULL;
796 	mutex_init(&lli->lli_rmtperm_mutex);
797 	/* Do not set lli_fid, it has been initialized already. */
798 	fid_zero(&lli->lli_pfid);
799 	INIT_LIST_HEAD(&lli->lli_close_list);
800 	atomic_set(&lli->lli_open_count, 0);
801 	lli->lli_rmtperm_time = 0;
802 	lli->lli_pending_och = NULL;
803 	lli->lli_mds_read_och = NULL;
804 	lli->lli_mds_write_och = NULL;
805 	lli->lli_mds_exec_och = NULL;
806 	lli->lli_open_fd_read_count = 0;
807 	lli->lli_open_fd_write_count = 0;
808 	lli->lli_open_fd_exec_count = 0;
809 	mutex_init(&lli->lli_och_mutex);
810 	spin_lock_init(&lli->lli_agl_lock);
811 	lli->lli_has_smd = false;
812 	spin_lock_init(&lli->lli_layout_lock);
813 	ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE);
814 	lli->lli_clob = NULL;
815 
816 	init_rwsem(&lli->lli_xattrs_list_rwsem);
817 	mutex_init(&lli->lli_xattrs_enq_lock);
818 
819 	LASSERT(lli->lli_vfs_inode.i_mode != 0);
820 	if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
821 		mutex_init(&lli->lli_readdir_mutex);
822 		lli->lli_opendir_key = NULL;
823 		lli->lli_sai = NULL;
824 		spin_lock_init(&lli->lli_sa_lock);
825 		lli->lli_opendir_pid = 0;
826 	} else {
827 		mutex_init(&lli->lli_size_mutex);
828 		lli->lli_symlink_name = NULL;
829 		init_rwsem(&lli->lli_trunc_sem);
830 		mutex_init(&lli->lli_write_mutex);
831 		init_rwsem(&lli->lli_glimpse_sem);
832 		lli->lli_glimpse_time = 0;
833 		INIT_LIST_HEAD(&lli->lli_agl_list);
834 		lli->lli_agl_index = 0;
835 		lli->lli_async_rc = 0;
836 	}
837 	mutex_init(&lli->lli_layout_mutex);
838 }
839 
ll_bdi_register(struct backing_dev_info * bdi)840 static inline int ll_bdi_register(struct backing_dev_info *bdi)
841 {
842 	static atomic_t ll_bdi_num = ATOMIC_INIT(0);
843 
844 	bdi->name = "lustre";
845 	return bdi_register(bdi, NULL, "lustre-%d",
846 			    atomic_inc_return(&ll_bdi_num));
847 }
848 
ll_fill_super(struct super_block * sb,struct vfsmount * mnt)849 int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
850 {
851 	struct lustre_profile *lprof = NULL;
852 	struct lustre_sb_info *lsi = s2lsi(sb);
853 	struct ll_sb_info *sbi;
854 	char  *dt = NULL, *md = NULL;
855 	char  *profilenm = get_profile_name(sb);
856 	struct config_llog_instance *cfg;
857 	int    err;
858 
859 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
860 
861 	cfg = kzalloc(sizeof(*cfg), GFP_NOFS);
862 	if (!cfg)
863 		return -ENOMEM;
864 
865 	try_module_get(THIS_MODULE);
866 
867 	/* client additional sb info */
868 	lsi->lsi_llsbi = sbi = ll_init_sbi(sb);
869 	if (!sbi) {
870 		module_put(THIS_MODULE);
871 		kfree(cfg);
872 		return -ENOMEM;
873 	}
874 
875 	err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
876 	if (err)
877 		goto out_free;
878 
879 	err = bdi_init(&lsi->lsi_bdi);
880 	if (err)
881 		goto out_free;
882 	lsi->lsi_flags |= LSI_BDI_INITIALIZED;
883 	lsi->lsi_bdi.capabilities = 0;
884 	err = ll_bdi_register(&lsi->lsi_bdi);
885 	if (err)
886 		goto out_free;
887 
888 	sb->s_bdi = &lsi->lsi_bdi;
889 	/* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
890 	sb->s_d_op = &ll_d_ops;
891 
892 	/* Generate a string unique to this super, in case some joker tries
893 	   to mount the same fs at two mount points.
894 	   Use the address of the super itself.*/
895 	cfg->cfg_instance = sb;
896 	cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
897 	cfg->cfg_callback = class_config_llog_handler;
898 	/* set up client obds */
899 	err = lustre_process_log(sb, profilenm, cfg);
900 	if (err < 0) {
901 		CERROR("Unable to process log: %d\n", err);
902 		goto out_free;
903 	}
904 
905 	/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
906 	lprof = class_get_profile(profilenm);
907 	if (lprof == NULL) {
908 		LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be read from the MGS.  Does that filesystem exist?\n",
909 				   profilenm);
910 		err = -EINVAL;
911 		goto out_free;
912 	}
913 	CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
914 	       lprof->lp_md, lprof->lp_dt);
915 
916 	dt = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
917 	if (!dt) {
918 		err = -ENOMEM;
919 		goto out_free;
920 	}
921 
922 	md = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_md, cfg->cfg_instance);
923 	if (!md) {
924 		err = -ENOMEM;
925 		goto out_free;
926 	}
927 
928 	/* connections, registrations, sb setup */
929 	err = client_common_fill_super(sb, md, dt, mnt);
930 
931 out_free:
932 	kfree(md);
933 	kfree(dt);
934 	if (err)
935 		ll_put_super(sb);
936 	else if (sbi->ll_flags & LL_SBI_VERBOSE)
937 		LCONSOLE_WARN("Mounted %s\n", profilenm);
938 
939 	kfree(cfg);
940 	return err;
941 } /* ll_fill_super */
942 
ll_put_super(struct super_block * sb)943 void ll_put_super(struct super_block *sb)
944 {
945 	struct config_llog_instance cfg, params_cfg;
946 	struct obd_device *obd;
947 	struct lustre_sb_info *lsi = s2lsi(sb);
948 	struct ll_sb_info *sbi = ll_s2sbi(sb);
949 	char *profilenm = get_profile_name(sb);
950 	int next, force = 1;
951 
952 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
953 
954 	cfg.cfg_instance = sb;
955 	lustre_end_log(sb, profilenm, &cfg);
956 
957 	params_cfg.cfg_instance = sb;
958 	lustre_end_log(sb, PARAMS_FILENAME, &params_cfg);
959 
960 	if (sbi->ll_md_exp) {
961 		obd = class_exp2obd(sbi->ll_md_exp);
962 		if (obd)
963 			force = obd->obd_force;
964 	}
965 
966 	/* We need to set force before the lov_disconnect in
967 	   lustre_common_put_super, since l_d cleans up osc's as well. */
968 	if (force) {
969 		next = 0;
970 		while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
971 						     &next)) != NULL) {
972 			obd->obd_force = force;
973 		}
974 	}
975 
976 	if (sbi->ll_lcq) {
977 		/* Only if client_common_fill_super succeeded */
978 		client_common_put_super(sb);
979 	}
980 
981 	next = 0;
982 	while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
983 		class_manual_cleanup(obd);
984 
985 	if (sbi->ll_flags & LL_SBI_VERBOSE)
986 		LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
987 
988 	if (profilenm)
989 		class_del_profile(profilenm);
990 
991 	if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
992 		bdi_destroy(&lsi->lsi_bdi);
993 		lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
994 	}
995 
996 	ll_free_sbi(sb);
997 	lsi->lsi_llsbi = NULL;
998 
999 	lustre_common_put_super(sb);
1000 
1001 	module_put(THIS_MODULE);
1002 } /* client_put_super */
1003 
ll_inode_from_resource_lock(struct ldlm_lock * lock)1004 struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
1005 {
1006 	struct inode *inode = NULL;
1007 
1008 	/* NOTE: we depend on atomic igrab() -bzzz */
1009 	lock_res_and_lock(lock);
1010 	if (lock->l_resource->lr_lvb_inode) {
1011 		struct ll_inode_info *lli;
1012 
1013 		lli = ll_i2info(lock->l_resource->lr_lvb_inode);
1014 		if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
1015 			inode = igrab(lock->l_resource->lr_lvb_inode);
1016 		} else {
1017 			inode = lock->l_resource->lr_lvb_inode;
1018 			LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ?  D_INFO :
1019 					 D_WARNING, lock, "lr_lvb_inode %p is bogus: magic %08x",
1020 					 lock->l_resource->lr_lvb_inode,
1021 					 lli->lli_inode_magic);
1022 			inode = NULL;
1023 		}
1024 	}
1025 	unlock_res_and_lock(lock);
1026 	return inode;
1027 }
1028 
ll_clear_inode(struct inode * inode)1029 void ll_clear_inode(struct inode *inode)
1030 {
1031 	struct ll_inode_info *lli = ll_i2info(inode);
1032 	struct ll_sb_info *sbi = ll_i2sbi(inode);
1033 
1034 	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1035 	       inode->i_generation, inode);
1036 
1037 	if (S_ISDIR(inode->i_mode)) {
1038 		/* these should have been cleared in ll_file_release */
1039 		LASSERT(lli->lli_opendir_key == NULL);
1040 		LASSERT(lli->lli_sai == NULL);
1041 		LASSERT(lli->lli_opendir_pid == 0);
1042 	}
1043 
1044 	spin_lock(&lli->lli_lock);
1045 	ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
1046 	spin_unlock(&lli->lli_lock);
1047 	md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
1048 
1049 	LASSERT(!lli->lli_open_fd_write_count);
1050 	LASSERT(!lli->lli_open_fd_read_count);
1051 	LASSERT(!lli->lli_open_fd_exec_count);
1052 
1053 	if (lli->lli_mds_write_och)
1054 		ll_md_real_close(inode, FMODE_WRITE);
1055 	if (lli->lli_mds_exec_och)
1056 		ll_md_real_close(inode, FMODE_EXEC);
1057 	if (lli->lli_mds_read_och)
1058 		ll_md_real_close(inode, FMODE_READ);
1059 
1060 	if (S_ISLNK(inode->i_mode)) {
1061 		kfree(lli->lli_symlink_name);
1062 		lli->lli_symlink_name = NULL;
1063 	}
1064 
1065 	ll_xattr_cache_destroy(inode);
1066 
1067 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
1068 		LASSERT(lli->lli_posix_acl == NULL);
1069 		if (lli->lli_remote_perms) {
1070 			free_rmtperm_hash(lli->lli_remote_perms);
1071 			lli->lli_remote_perms = NULL;
1072 		}
1073 	}
1074 #ifdef CONFIG_FS_POSIX_ACL
1075 	else if (lli->lli_posix_acl) {
1076 		LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
1077 		LASSERT(lli->lli_remote_perms == NULL);
1078 		posix_acl_release(lli->lli_posix_acl);
1079 		lli->lli_posix_acl = NULL;
1080 	}
1081 #endif
1082 	lli->lli_inode_magic = LLI_INODE_DEAD;
1083 
1084 	if (!S_ISDIR(inode->i_mode))
1085 		LASSERT(list_empty(&lli->lli_agl_list));
1086 
1087 	/*
1088 	 * XXX This has to be done before lsm is freed below, because
1089 	 * cl_object still uses inode lsm.
1090 	 */
1091 	cl_inode_fini(inode);
1092 	lli->lli_has_smd = false;
1093 }
1094 
1095 #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
1096 
ll_md_setattr(struct dentry * dentry,struct md_op_data * op_data,struct md_open_data ** mod)1097 static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
1098 		  struct md_open_data **mod)
1099 {
1100 	struct lustre_md md;
1101 	struct inode *inode = d_inode(dentry);
1102 	struct ll_sb_info *sbi = ll_i2sbi(inode);
1103 	struct ptlrpc_request *request = NULL;
1104 	int rc, ia_valid;
1105 
1106 	op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
1107 				     LUSTRE_OPC_ANY, NULL);
1108 	if (IS_ERR(op_data))
1109 		return PTR_ERR(op_data);
1110 
1111 	rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
1112 			&request, mod);
1113 	if (rc) {
1114 		ptlrpc_req_finished(request);
1115 		if (rc == -ENOENT) {
1116 			clear_nlink(inode);
1117 			/* Unlinked special device node? Or just a race?
1118 			 * Pretend we done everything. */
1119 			if (!S_ISREG(inode->i_mode) &&
1120 			    !S_ISDIR(inode->i_mode)) {
1121 				ia_valid = op_data->op_attr.ia_valid;
1122 				op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
1123 				rc = simple_setattr(dentry, &op_data->op_attr);
1124 				op_data->op_attr.ia_valid = ia_valid;
1125 			}
1126 		} else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
1127 			CERROR("md_setattr fails: rc = %d\n", rc);
1128 		}
1129 		return rc;
1130 	}
1131 
1132 	rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
1133 			      sbi->ll_md_exp, &md);
1134 	if (rc) {
1135 		ptlrpc_req_finished(request);
1136 		return rc;
1137 	}
1138 
1139 	ia_valid = op_data->op_attr.ia_valid;
1140 	/* inode size will be in cl_setattr_ost, can't do it now since dirty
1141 	 * cache is not cleared yet. */
1142 	op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
1143 	rc = simple_setattr(dentry, &op_data->op_attr);
1144 	op_data->op_attr.ia_valid = ia_valid;
1145 
1146 	/* Extract epoch data if obtained. */
1147 	op_data->op_handle = md.body->handle;
1148 	op_data->op_ioepoch = md.body->ioepoch;
1149 
1150 	ll_update_inode(inode, &md);
1151 	ptlrpc_req_finished(request);
1152 
1153 	return rc;
1154 }
1155 
1156 /* Close IO epoch and send Size-on-MDS attribute update. */
ll_setattr_done_writing(struct inode * inode,struct md_op_data * op_data,struct md_open_data * mod)1157 static int ll_setattr_done_writing(struct inode *inode,
1158 				   struct md_op_data *op_data,
1159 				   struct md_open_data *mod)
1160 {
1161 	struct ll_inode_info *lli = ll_i2info(inode);
1162 	int rc = 0;
1163 
1164 	LASSERT(op_data != NULL);
1165 	if (!S_ISREG(inode->i_mode))
1166 		return 0;
1167 
1168 	CDEBUG(D_INODE, "Epoch %llu closed on "DFID" for truncate\n",
1169 	       op_data->op_ioepoch, PFID(&lli->lli_fid));
1170 
1171 	op_data->op_flags = MF_EPOCH_CLOSE;
1172 	ll_done_writing_attr(inode, op_data);
1173 	ll_pack_inode2opdata(inode, op_data, NULL);
1174 
1175 	rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
1176 	if (rc == -EAGAIN)
1177 		/* MDS has instructed us to obtain Size-on-MDS attribute
1178 		 * from OSTs and send setattr to back to MDS. */
1179 		rc = ll_som_update(inode, op_data);
1180 	else if (rc)
1181 		CERROR("inode %lu mdc truncate failed: rc = %d\n",
1182 		       inode->i_ino, rc);
1183 	return rc;
1184 }
1185 
1186 /* If this inode has objects allocated to it (lsm != NULL), then the OST
1187  * object(s) determine the file size and mtime.  Otherwise, the MDS will
1188  * keep these values until such a time that objects are allocated for it.
1189  * We do the MDS operations first, as it is checking permissions for us.
1190  * We don't to the MDS RPC if there is nothing that we want to store there,
1191  * otherwise there is no harm in updating mtime/atime on the MDS if we are
1192  * going to do an RPC anyways.
1193  *
1194  * If we are doing a truncate, we will send the mtime and ctime updates
1195  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
1196  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
1197  * at the same time.
1198  *
1199  * In case of HSMimport, we only set attr on MDS.
1200  */
ll_setattr_raw(struct dentry * dentry,struct iattr * attr,bool hsm_import)1201 int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
1202 {
1203 	struct inode *inode = d_inode(dentry);
1204 	struct ll_inode_info *lli = ll_i2info(inode);
1205 	struct md_op_data *op_data = NULL;
1206 	struct md_open_data *mod = NULL;
1207 	bool file_is_released = false;
1208 	int rc = 0, rc1 = 0;
1209 
1210 	CDEBUG(D_VFSTRACE,
1211 		"%s: setattr inode %p/fid:"DFID
1212 		" from %llu to %llu, valid %x, hsm_import %d\n",
1213 		ll_get_fsname(inode->i_sb, NULL, 0), inode,
1214 		PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size,
1215 		attr->ia_valid, hsm_import);
1216 
1217 	if (attr->ia_valid & ATTR_SIZE) {
1218 		/* Check new size against VFS/VM file size limit and rlimit */
1219 		rc = inode_newsize_ok(inode, attr->ia_size);
1220 		if (rc)
1221 			return rc;
1222 
1223 		/* The maximum Lustre file size is variable, based on the
1224 		 * OST maximum object size and number of stripes.  This
1225 		 * needs another check in addition to the VFS check above. */
1226 		if (attr->ia_size > ll_file_maxbytes(inode)) {
1227 			CDEBUG(D_INODE, "file "DFID" too large %llu > %llu\n",
1228 			       PFID(&lli->lli_fid), attr->ia_size,
1229 			       ll_file_maxbytes(inode));
1230 			return -EFBIG;
1231 		}
1232 
1233 		attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1234 	}
1235 
1236 	/* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
1237 	if (attr->ia_valid & TIMES_SET_FLAGS) {
1238 		if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
1239 		    !capable(CFS_CAP_FOWNER))
1240 			return -EPERM;
1241 	}
1242 
1243 	/* We mark all of the fields "set" so MDS/OST does not re-set them */
1244 	if (attr->ia_valid & ATTR_CTIME) {
1245 		attr->ia_ctime = CURRENT_TIME;
1246 		attr->ia_valid |= ATTR_CTIME_SET;
1247 	}
1248 	if (!(attr->ia_valid & ATTR_ATIME_SET) &&
1249 	    (attr->ia_valid & ATTR_ATIME)) {
1250 		attr->ia_atime = CURRENT_TIME;
1251 		attr->ia_valid |= ATTR_ATIME_SET;
1252 	}
1253 	if (!(attr->ia_valid & ATTR_MTIME_SET) &&
1254 	    (attr->ia_valid & ATTR_MTIME)) {
1255 		attr->ia_mtime = CURRENT_TIME;
1256 		attr->ia_valid |= ATTR_MTIME_SET;
1257 	}
1258 
1259 	if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
1260 		CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n",
1261 		       LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
1262 		       (s64)ktime_get_real_seconds());
1263 
1264 	/* If we are changing file size, file content is modified, flag it. */
1265 	if (attr->ia_valid & ATTR_SIZE) {
1266 		attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
1267 		spin_lock(&lli->lli_lock);
1268 		lli->lli_flags |= LLIF_DATA_MODIFIED;
1269 		spin_unlock(&lli->lli_lock);
1270 	}
1271 
1272 	/* We always do an MDS RPC, even if we're only changing the size;
1273 	 * only the MDS knows whether truncate() should fail with -ETXTBUSY */
1274 
1275 	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
1276 	if (!op_data)
1277 		return -ENOMEM;
1278 
1279 	if (!S_ISDIR(inode->i_mode))
1280 		mutex_unlock(&inode->i_mutex);
1281 
1282 	memcpy(&op_data->op_attr, attr, sizeof(*attr));
1283 
1284 	/* Open epoch for truncate. */
1285 	if (exp_connect_som(ll_i2mdexp(inode)) &&
1286 	    (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
1287 		op_data->op_flags = MF_EPOCH_OPEN;
1288 
1289 	/* truncate on a released file must failed with -ENODATA,
1290 	 * so size must not be set on MDS for released file
1291 	 * but other attributes must be set
1292 	 */
1293 	if (S_ISREG(inode->i_mode)) {
1294 		struct lov_stripe_md *lsm;
1295 		__u32 gen;
1296 
1297 		ll_layout_refresh(inode, &gen);
1298 		lsm = ccc_inode_lsm_get(inode);
1299 		if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
1300 			file_is_released = true;
1301 		ccc_inode_lsm_put(inode, lsm);
1302 	}
1303 
1304 	/* if not in HSM import mode, clear size attr for released file
1305 	 * we clear the attribute send to MDT in op_data, not the original
1306 	 * received from caller in attr which is used later to
1307 	 * decide return code */
1308 	if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import)
1309 		op_data->op_attr.ia_valid &= ~ATTR_SIZE;
1310 
1311 	rc = ll_md_setattr(dentry, op_data, &mod);
1312 	if (rc)
1313 		goto out;
1314 
1315 	/* truncate failed (only when non HSM import), others succeed */
1316 	if (file_is_released) {
1317 		if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
1318 			rc = -ENODATA;
1319 		else
1320 			rc = 0;
1321 		goto out;
1322 	}
1323 
1324 	/* RPC to MDT is sent, cancel data modification flag */
1325 	if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
1326 		spin_lock(&lli->lli_lock);
1327 		lli->lli_flags &= ~LLIF_DATA_MODIFIED;
1328 		spin_unlock(&lli->lli_lock);
1329 	}
1330 
1331 	ll_ioepoch_open(lli, op_data->op_ioepoch);
1332 	if (!S_ISREG(inode->i_mode)) {
1333 		rc = 0;
1334 		goto out;
1335 	}
1336 
1337 	if (attr->ia_valid & (ATTR_SIZE |
1338 			      ATTR_ATIME | ATTR_ATIME_SET |
1339 			      ATTR_MTIME | ATTR_MTIME_SET)) {
1340 		/* For truncate and utimes sending attributes to OSTs, setting
1341 		 * mtime/atime to the past will be performed under PW [0:EOF]
1342 		 * extent lock (new_size:EOF for truncate).  It may seem
1343 		 * excessive to send mtime/atime updates to OSTs when not
1344 		 * setting times to past, but it is necessary due to possible
1345 		 * time de-synchronization between MDT inode and OST objects */
1346 		if (attr->ia_valid & ATTR_SIZE)
1347 			down_write(&lli->lli_trunc_sem);
1348 		rc = cl_setattr_ost(inode, attr);
1349 		if (attr->ia_valid & ATTR_SIZE)
1350 			up_write(&lli->lli_trunc_sem);
1351 	}
1352 out:
1353 	if (op_data->op_ioepoch) {
1354 		rc1 = ll_setattr_done_writing(inode, op_data, mod);
1355 		if (!rc)
1356 			rc = rc1;
1357 	}
1358 	ll_finish_md_op_data(op_data);
1359 
1360 	if (!S_ISDIR(inode->i_mode)) {
1361 		mutex_lock(&inode->i_mutex);
1362 		if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
1363 			inode_dio_wait(inode);
1364 	}
1365 
1366 	ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ?
1367 			LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1);
1368 
1369 	return rc;
1370 }
1371 
ll_setattr(struct dentry * de,struct iattr * attr)1372 int ll_setattr(struct dentry *de, struct iattr *attr)
1373 {
1374 	int mode = d_inode(de)->i_mode;
1375 
1376 	if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
1377 			      (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
1378 		attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
1379 
1380 	if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
1381 			       (ATTR_SIZE|ATTR_MODE)) &&
1382 	    (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
1383 	     (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
1384 	      !(attr->ia_mode & S_ISGID))))
1385 		attr->ia_valid |= ATTR_FORCE;
1386 
1387 	if ((attr->ia_valid & ATTR_MODE) &&
1388 	    (mode & S_ISUID) &&
1389 	    !(attr->ia_mode & S_ISUID) &&
1390 	    !(attr->ia_valid & ATTR_KILL_SUID))
1391 		attr->ia_valid |= ATTR_KILL_SUID;
1392 
1393 	if ((attr->ia_valid & ATTR_MODE) &&
1394 	    ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
1395 	    !(attr->ia_mode & S_ISGID) &&
1396 	    !(attr->ia_valid & ATTR_KILL_SGID))
1397 		attr->ia_valid |= ATTR_KILL_SGID;
1398 
1399 	return ll_setattr_raw(de, attr, false);
1400 }
1401 
ll_statfs_internal(struct super_block * sb,struct obd_statfs * osfs,__u64 max_age,__u32 flags)1402 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
1403 		       __u64 max_age, __u32 flags)
1404 {
1405 	struct ll_sb_info *sbi = ll_s2sbi(sb);
1406 	struct obd_statfs obd_osfs;
1407 	int rc;
1408 
1409 	rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
1410 	if (rc) {
1411 		CERROR("md_statfs fails: rc = %d\n", rc);
1412 		return rc;
1413 	}
1414 
1415 	osfs->os_type = sb->s_magic;
1416 
1417 	CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
1418 	       osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,
1419 	       osfs->os_files);
1420 
1421 	if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
1422 		flags |= OBD_STATFS_NODELAY;
1423 
1424 	rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags);
1425 	if (rc) {
1426 		CERROR("obd_statfs fails: rc = %d\n", rc);
1427 		return rc;
1428 	}
1429 
1430 	CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
1431 	       obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1432 	       obd_osfs.os_files);
1433 
1434 	osfs->os_bsize = obd_osfs.os_bsize;
1435 	osfs->os_blocks = obd_osfs.os_blocks;
1436 	osfs->os_bfree = obd_osfs.os_bfree;
1437 	osfs->os_bavail = obd_osfs.os_bavail;
1438 
1439 	/* If we don't have as many objects free on the OST as inodes
1440 	 * on the MDS, we reduce the total number of inodes to
1441 	 * compensate, so that the "inodes in use" number is correct.
1442 	 */
1443 	if (obd_osfs.os_ffree < osfs->os_ffree) {
1444 		osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1445 			obd_osfs.os_ffree;
1446 		osfs->os_ffree = obd_osfs.os_ffree;
1447 	}
1448 
1449 	return rc;
1450 }
1451 
ll_statfs(struct dentry * de,struct kstatfs * sfs)1452 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
1453 {
1454 	struct super_block *sb = de->d_sb;
1455 	struct obd_statfs osfs;
1456 	int rc;
1457 
1458 	CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64());
1459 	ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
1460 
1461 	/* Some amount of caching on the client is allowed */
1462 	rc = ll_statfs_internal(sb, &osfs,
1463 				cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
1464 				0);
1465 	if (rc)
1466 		return rc;
1467 
1468 	statfs_unpack(sfs, &osfs);
1469 
1470 	/* We need to downshift for all 32-bit kernels, because we can't
1471 	 * tell if the kernel is being called via sys_statfs64() or not.
1472 	 * Stop before overflowing f_bsize - in which case it is better
1473 	 * to just risk EOVERFLOW if caller is using old sys_statfs(). */
1474 	if (sizeof(long) < 8) {
1475 		while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
1476 			sfs->f_bsize <<= 1;
1477 
1478 			osfs.os_blocks >>= 1;
1479 			osfs.os_bfree >>= 1;
1480 			osfs.os_bavail >>= 1;
1481 		}
1482 	}
1483 
1484 	sfs->f_blocks = osfs.os_blocks;
1485 	sfs->f_bfree = osfs.os_bfree;
1486 	sfs->f_bavail = osfs.os_bavail;
1487 	sfs->f_fsid = ll_s2sbi(sb)->ll_fsid;
1488 	return 0;
1489 }
1490 
ll_inode_size_lock(struct inode * inode)1491 void ll_inode_size_lock(struct inode *inode)
1492 {
1493 	struct ll_inode_info *lli;
1494 
1495 	LASSERT(!S_ISDIR(inode->i_mode));
1496 
1497 	lli = ll_i2info(inode);
1498 	mutex_lock(&lli->lli_size_mutex);
1499 }
1500 
ll_inode_size_unlock(struct inode * inode)1501 void ll_inode_size_unlock(struct inode *inode)
1502 {
1503 	struct ll_inode_info *lli;
1504 
1505 	lli = ll_i2info(inode);
1506 	mutex_unlock(&lli->lli_size_mutex);
1507 }
1508 
ll_update_inode(struct inode * inode,struct lustre_md * md)1509 void ll_update_inode(struct inode *inode, struct lustre_md *md)
1510 {
1511 	struct ll_inode_info *lli = ll_i2info(inode);
1512 	struct mdt_body *body = md->body;
1513 	struct lov_stripe_md *lsm = md->lsm;
1514 	struct ll_sb_info *sbi = ll_i2sbi(inode);
1515 
1516 	LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
1517 	if (lsm != NULL) {
1518 		if (!lli->lli_has_smd &&
1519 		    !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
1520 			cl_file_inode_init(inode, md);
1521 
1522 		lli->lli_maxbytes = lsm->lsm_maxbytes;
1523 		if (lli->lli_maxbytes > MAX_LFS_FILESIZE)
1524 			lli->lli_maxbytes = MAX_LFS_FILESIZE;
1525 	}
1526 
1527 	if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
1528 		if (body->valid & OBD_MD_FLRMTPERM)
1529 			ll_update_remote_perm(inode, md->remote_perm);
1530 	}
1531 #ifdef CONFIG_FS_POSIX_ACL
1532 	else if (body->valid & OBD_MD_FLACL) {
1533 		spin_lock(&lli->lli_lock);
1534 		if (lli->lli_posix_acl)
1535 			posix_acl_release(lli->lli_posix_acl);
1536 		lli->lli_posix_acl = md->posix_acl;
1537 		spin_unlock(&lli->lli_lock);
1538 	}
1539 #endif
1540 	inode->i_ino = cl_fid_build_ino(&body->fid1,
1541 					sbi->ll_flags & LL_SBI_32BIT_API);
1542 	inode->i_generation = cl_fid_build_gen(&body->fid1);
1543 
1544 	if (body->valid & OBD_MD_FLATIME) {
1545 		if (body->atime > LTIME_S(inode->i_atime))
1546 			LTIME_S(inode->i_atime) = body->atime;
1547 		lli->lli_lvb.lvb_atime = body->atime;
1548 	}
1549 	if (body->valid & OBD_MD_FLMTIME) {
1550 		if (body->mtime > LTIME_S(inode->i_mtime)) {
1551 			CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
1552 			       inode->i_ino, LTIME_S(inode->i_mtime),
1553 			       body->mtime);
1554 			LTIME_S(inode->i_mtime) = body->mtime;
1555 		}
1556 		lli->lli_lvb.lvb_mtime = body->mtime;
1557 	}
1558 	if (body->valid & OBD_MD_FLCTIME) {
1559 		if (body->ctime > LTIME_S(inode->i_ctime))
1560 			LTIME_S(inode->i_ctime) = body->ctime;
1561 		lli->lli_lvb.lvb_ctime = body->ctime;
1562 	}
1563 	if (body->valid & OBD_MD_FLMODE)
1564 		inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
1565 	if (body->valid & OBD_MD_FLTYPE)
1566 		inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
1567 	LASSERT(inode->i_mode != 0);
1568 	if (S_ISREG(inode->i_mode))
1569 		inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
1570 				       LL_MAX_BLKSIZE_BITS);
1571 	else
1572 		inode->i_blkbits = inode->i_sb->s_blocksize_bits;
1573 	if (body->valid & OBD_MD_FLUID)
1574 		inode->i_uid = make_kuid(&init_user_ns, body->uid);
1575 	if (body->valid & OBD_MD_FLGID)
1576 		inode->i_gid = make_kgid(&init_user_ns, body->gid);
1577 	if (body->valid & OBD_MD_FLFLAGS)
1578 		inode->i_flags = ll_ext_to_inode_flags(body->flags);
1579 	if (body->valid & OBD_MD_FLNLINK)
1580 		set_nlink(inode, body->nlink);
1581 	if (body->valid & OBD_MD_FLRDEV)
1582 		inode->i_rdev = old_decode_dev(body->rdev);
1583 
1584 	if (body->valid & OBD_MD_FLID) {
1585 		/* FID shouldn't be changed! */
1586 		if (fid_is_sane(&lli->lli_fid)) {
1587 			LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
1588 				 "Trying to change FID "DFID
1589 				 " to the "DFID", inode %lu/%u(%p)\n",
1590 				 PFID(&lli->lli_fid), PFID(&body->fid1),
1591 				 inode->i_ino, inode->i_generation, inode);
1592 		} else
1593 			lli->lli_fid = body->fid1;
1594 	}
1595 
1596 	LASSERT(fid_seq(&lli->lli_fid) != 0);
1597 
1598 	if (body->valid & OBD_MD_FLSIZE) {
1599 		if (exp_connect_som(ll_i2mdexp(inode)) &&
1600 		    S_ISREG(inode->i_mode)) {
1601 			struct lustre_handle lockh;
1602 			ldlm_mode_t mode;
1603 
1604 			/* As it is possible a blocking ast has been processed
1605 			 * by this time, we need to check there is an UPDATE
1606 			 * lock on the client and set LLIF_MDS_SIZE_LOCK holding
1607 			 * it. */
1608 			mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
1609 					       &lockh, LDLM_FL_CBPENDING,
1610 					       LCK_CR | LCK_CW |
1611 					       LCK_PR | LCK_PW);
1612 			if (mode) {
1613 				if (lli->lli_flags & (LLIF_DONE_WRITING |
1614 						      LLIF_EPOCH_PENDING |
1615 						      LLIF_SOM_DIRTY)) {
1616 					CERROR("ino %lu flags %u still has size authority! do not trust the size got from MDS\n",
1617 					       inode->i_ino, lli->lli_flags);
1618 				} else {
1619 					/* Use old size assignment to avoid
1620 					 * deadlock bz14138 & bz14326 */
1621 					i_size_write(inode, body->size);
1622 					spin_lock(&lli->lli_lock);
1623 					lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
1624 					spin_unlock(&lli->lli_lock);
1625 				}
1626 				ldlm_lock_decref(&lockh, mode);
1627 			}
1628 		} else {
1629 			/* Use old size assignment to avoid
1630 			 * deadlock bz14138 & bz14326 */
1631 			i_size_write(inode, body->size);
1632 
1633 			CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
1634 			       inode->i_ino, (unsigned long long)body->size);
1635 		}
1636 
1637 		if (body->valid & OBD_MD_FLBLOCKS)
1638 			inode->i_blocks = body->blocks;
1639 	}
1640 
1641 	if (body->valid & OBD_MD_TSTATE) {
1642 		if (body->t_state & MS_RESTORE)
1643 			lli->lli_flags |= LLIF_FILE_RESTORING;
1644 	}
1645 }
1646 
ll_read_inode2(struct inode * inode,void * opaque)1647 void ll_read_inode2(struct inode *inode, void *opaque)
1648 {
1649 	struct lustre_md *md = opaque;
1650 	struct ll_inode_info *lli = ll_i2info(inode);
1651 
1652 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
1653 	       PFID(&lli->lli_fid), inode);
1654 
1655 	LASSERT(!lli->lli_has_smd);
1656 
1657 	/* Core attributes from the MDS first.  This is a new inode, and
1658 	 * the VFS doesn't zero times in the core inode so we have to do
1659 	 * it ourselves.  They will be overwritten by either MDS or OST
1660 	 * attributes - we just need to make sure they aren't newer. */
1661 	LTIME_S(inode->i_mtime) = 0;
1662 	LTIME_S(inode->i_atime) = 0;
1663 	LTIME_S(inode->i_ctime) = 0;
1664 	inode->i_rdev = 0;
1665 	ll_update_inode(inode, md);
1666 
1667 	/* OIDEBUG(inode); */
1668 
1669 	if (S_ISREG(inode->i_mode)) {
1670 		struct ll_sb_info *sbi = ll_i2sbi(inode);
1671 
1672 		inode->i_op = &ll_file_inode_operations;
1673 		inode->i_fop = sbi->ll_fop;
1674 		inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
1675 	} else if (S_ISDIR(inode->i_mode)) {
1676 		inode->i_op = &ll_dir_inode_operations;
1677 		inode->i_fop = &ll_dir_operations;
1678 	} else if (S_ISLNK(inode->i_mode)) {
1679 		inode->i_op = &ll_fast_symlink_inode_operations;
1680 	} else {
1681 		inode->i_op = &ll_special_inode_operations;
1682 
1683 		init_special_inode(inode, inode->i_mode,
1684 				   inode->i_rdev);
1685 	}
1686 }
1687 
ll_delete_inode(struct inode * inode)1688 void ll_delete_inode(struct inode *inode)
1689 {
1690 	struct cl_inode_info *lli = cl_i2info(inode);
1691 
1692 	if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL)
1693 		/* discard all dirty pages before truncating them, required by
1694 		 * osc_extent implementation at LU-1030. */
1695 		cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
1696 				   CL_FSYNC_DISCARD, 1);
1697 
1698 	truncate_inode_pages_final(&inode->i_data);
1699 
1700 	/* Workaround for LU-118 */
1701 	if (inode->i_data.nrpages) {
1702 		spin_lock_irq(&inode->i_data.tree_lock);
1703 		spin_unlock_irq(&inode->i_data.tree_lock);
1704 		LASSERTF(inode->i_data.nrpages == 0,
1705 			 "inode=%lu/%u(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
1706 			 inode->i_ino, inode->i_generation, inode,
1707 			 inode->i_data.nrpages);
1708 	}
1709 	/* Workaround end */
1710 
1711 	ll_clear_inode(inode);
1712 	clear_inode(inode);
1713 }
1714 
ll_iocontrol(struct inode * inode,struct file * file,unsigned int cmd,unsigned long arg)1715 int ll_iocontrol(struct inode *inode, struct file *file,
1716 		 unsigned int cmd, unsigned long arg)
1717 {
1718 	struct ll_sb_info *sbi = ll_i2sbi(inode);
1719 	struct ptlrpc_request *req = NULL;
1720 	int rc, flags = 0;
1721 
1722 	switch (cmd) {
1723 	case FSFILT_IOC_GETFLAGS: {
1724 		struct mdt_body *body;
1725 		struct md_op_data *op_data;
1726 
1727 		op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
1728 					     0, 0, LUSTRE_OPC_ANY,
1729 					     NULL);
1730 		if (IS_ERR(op_data))
1731 			return PTR_ERR(op_data);
1732 
1733 		op_data->op_valid = OBD_MD_FLFLAGS;
1734 		rc = md_getattr(sbi->ll_md_exp, op_data, &req);
1735 		ll_finish_md_op_data(op_data);
1736 		if (rc) {
1737 			CERROR("failure %d inode %lu\n", rc, inode->i_ino);
1738 			return -abs(rc);
1739 		}
1740 
1741 		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1742 
1743 		flags = body->flags;
1744 
1745 		ptlrpc_req_finished(req);
1746 
1747 		return put_user(flags, (int *)arg);
1748 	}
1749 	case FSFILT_IOC_SETFLAGS: {
1750 		struct lov_stripe_md *lsm;
1751 		struct obd_info oinfo = { };
1752 		struct md_op_data *op_data;
1753 
1754 		if (get_user(flags, (int *)arg))
1755 			return -EFAULT;
1756 
1757 		op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1758 					     LUSTRE_OPC_ANY, NULL);
1759 		if (IS_ERR(op_data))
1760 			return PTR_ERR(op_data);
1761 
1762 		((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
1763 		op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
1764 		rc = md_setattr(sbi->ll_md_exp, op_data,
1765 				NULL, 0, NULL, 0, &req, NULL);
1766 		ll_finish_md_op_data(op_data);
1767 		ptlrpc_req_finished(req);
1768 		if (rc)
1769 			return rc;
1770 
1771 		inode->i_flags = ll_ext_to_inode_flags(flags);
1772 
1773 		lsm = ccc_inode_lsm_get(inode);
1774 		if (!lsm_has_objects(lsm)) {
1775 			ccc_inode_lsm_put(inode, lsm);
1776 			return 0;
1777 		}
1778 
1779 		oinfo.oi_oa = kmem_cache_alloc(obdo_cachep,
1780 					       GFP_NOFS | __GFP_ZERO);
1781 		if (!oinfo.oi_oa) {
1782 			ccc_inode_lsm_put(inode, lsm);
1783 			return -ENOMEM;
1784 		}
1785 		oinfo.oi_md = lsm;
1786 		oinfo.oi_oa->o_oi = lsm->lsm_oi;
1787 		oinfo.oi_oa->o_flags = flags;
1788 		oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS |
1789 				       OBD_MD_FLGROUP;
1790 		obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid);
1791 		rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
1792 		kmem_cache_free(obdo_cachep, oinfo.oi_oa);
1793 		ccc_inode_lsm_put(inode, lsm);
1794 
1795 		if (rc && rc != -EPERM && rc != -EACCES)
1796 			CERROR("osc_setattr_async fails: rc = %d\n", rc);
1797 
1798 		return rc;
1799 	}
1800 	default:
1801 		return -ENOSYS;
1802 	}
1803 
1804 	return 0;
1805 }
1806 
ll_flush_ctx(struct inode * inode)1807 int ll_flush_ctx(struct inode *inode)
1808 {
1809 	struct ll_sb_info  *sbi = ll_i2sbi(inode);
1810 
1811 	CDEBUG(D_SEC, "flush context for user %d\n",
1812 		      from_kuid(&init_user_ns, current_uid()));
1813 
1814 	obd_set_info_async(NULL, sbi->ll_md_exp,
1815 			   sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
1816 			   0, NULL, NULL);
1817 	obd_set_info_async(NULL, sbi->ll_dt_exp,
1818 			   sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
1819 			   0, NULL, NULL);
1820 	return 0;
1821 }
1822 
1823 /* umount -f client means force down, don't save state */
ll_umount_begin(struct super_block * sb)1824 void ll_umount_begin(struct super_block *sb)
1825 {
1826 	struct ll_sb_info *sbi = ll_s2sbi(sb);
1827 	struct obd_device *obd;
1828 	struct obd_ioctl_data *ioc_data;
1829 
1830 	CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
1831 	       sb->s_count, atomic_read(&sb->s_active));
1832 
1833 	obd = class_exp2obd(sbi->ll_md_exp);
1834 	if (obd == NULL) {
1835 		CERROR("Invalid MDC connection handle %#llx\n",
1836 		       sbi->ll_md_exp->exp_handle.h_cookie);
1837 		return;
1838 	}
1839 	obd->obd_force = 1;
1840 
1841 	obd = class_exp2obd(sbi->ll_dt_exp);
1842 	if (obd == NULL) {
1843 		CERROR("Invalid LOV connection handle %#llx\n",
1844 		       sbi->ll_dt_exp->exp_handle.h_cookie);
1845 		return;
1846 	}
1847 	obd->obd_force = 1;
1848 
1849 	ioc_data = kzalloc(sizeof(*ioc_data), GFP_NOFS);
1850 	if (ioc_data) {
1851 		obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
1852 			      sizeof(*ioc_data), ioc_data, NULL);
1853 
1854 		obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
1855 			      sizeof(*ioc_data), ioc_data, NULL);
1856 
1857 		kfree(ioc_data);
1858 	}
1859 
1860 	/* Really, we'd like to wait until there are no requests outstanding,
1861 	 * and then continue.  For now, we just invalidate the requests,
1862 	 * schedule() and sleep one second if needed, and hope.
1863 	 */
1864 	schedule();
1865 }
1866 
ll_remount_fs(struct super_block * sb,int * flags,char * data)1867 int ll_remount_fs(struct super_block *sb, int *flags, char *data)
1868 {
1869 	struct ll_sb_info *sbi = ll_s2sbi(sb);
1870 	char *profilenm = get_profile_name(sb);
1871 	int err;
1872 	__u32 read_only;
1873 
1874 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
1875 		read_only = *flags & MS_RDONLY;
1876 		err = obd_set_info_async(NULL, sbi->ll_md_exp,
1877 					 sizeof(KEY_READ_ONLY),
1878 					 KEY_READ_ONLY, sizeof(read_only),
1879 					 &read_only, NULL);
1880 		if (err) {
1881 			LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
1882 				      profilenm, read_only ?
1883 				      "read-only" : "read-write", err);
1884 			return err;
1885 		}
1886 
1887 		if (read_only)
1888 			sb->s_flags |= MS_RDONLY;
1889 		else
1890 			sb->s_flags &= ~MS_RDONLY;
1891 
1892 		if (sbi->ll_flags & LL_SBI_VERBOSE)
1893 			LCONSOLE_WARN("Remounted %s %s\n", profilenm,
1894 				      read_only ?  "read-only" : "read-write");
1895 	}
1896 	return 0;
1897 }
1898 
1899 /**
1900  * Cleanup the open handle that is cached on MDT-side.
1901  *
1902  * For open case, the client side open handling thread may hit error
1903  * after the MDT grant the open. Under such case, the client should
1904  * send close RPC to the MDT as cleanup; otherwise, the open handle
1905  * on the MDT will be leaked there until the client umount or evicted.
1906  *
1907  * In further, if someone unlinked the file, because the open handle
1908  * holds the reference on such file/object, then it will block the
1909  * subsequent threads that want to locate such object via FID.
1910  *
1911  * \param[in] sb	super block for this file-system
1912  * \param[in] open_req	pointer to the original open request
1913  */
ll_open_cleanup(struct super_block * sb,struct ptlrpc_request * open_req)1914 void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
1915 {
1916 	struct mdt_body			*body;
1917 	struct md_op_data		*op_data;
1918 	struct ptlrpc_request		*close_req = NULL;
1919 	struct obd_export		*exp	   = ll_s2sbi(sb)->ll_md_exp;
1920 
1921 	body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
1922 	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
1923 	if (!op_data) {
1924 		CWARN("%s: cannot allocate op_data to release open handle for "
1925 		      DFID "\n",
1926 		      ll_get_fsname(sb, NULL, 0), PFID(&body->fid1));
1927 
1928 		return;
1929 	}
1930 
1931 	op_data->op_fid1 = body->fid1;
1932 	op_data->op_ioepoch = body->ioepoch;
1933 	op_data->op_handle = body->handle;
1934 	op_data->op_mod_time = get_seconds();
1935 	md_close(exp, op_data, NULL, &close_req);
1936 	ptlrpc_req_finished(close_req);
1937 	ll_finish_md_op_data(op_data);
1938 }
1939 
ll_prep_inode(struct inode ** inode,struct ptlrpc_request * req,struct super_block * sb,struct lookup_intent * it)1940 int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
1941 		  struct super_block *sb, struct lookup_intent *it)
1942 {
1943 	struct ll_sb_info *sbi = NULL;
1944 	struct lustre_md md;
1945 	int rc;
1946 
1947 	LASSERT(*inode || sb);
1948 	sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
1949 	rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
1950 			      sbi->ll_md_exp, &md);
1951 	if (rc)
1952 		goto cleanup;
1953 
1954 	if (*inode) {
1955 		ll_update_inode(*inode, &md);
1956 	} else {
1957 		LASSERT(sb != NULL);
1958 
1959 		/*
1960 		 * At this point server returns to client's same fid as client
1961 		 * generated for creating. So using ->fid1 is okay here.
1962 		 */
1963 		LASSERT(fid_is_sane(&md.body->fid1));
1964 
1965 		*inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
1966 					     sbi->ll_flags & LL_SBI_32BIT_API),
1967 				 &md);
1968 		if (*inode == NULL || IS_ERR(*inode)) {
1969 #ifdef CONFIG_FS_POSIX_ACL
1970 			if (md.posix_acl) {
1971 				posix_acl_release(md.posix_acl);
1972 				md.posix_acl = NULL;
1973 			}
1974 #endif
1975 			rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM;
1976 			*inode = NULL;
1977 			CERROR("new_inode -fatal: rc %d\n", rc);
1978 			goto out;
1979 		}
1980 	}
1981 
1982 	/* Handling piggyback layout lock.
1983 	 * Layout lock can be piggybacked by getattr and open request.
1984 	 * The lsm can be applied to inode only if it comes with a layout lock
1985 	 * otherwise correct layout may be overwritten, for example:
1986 	 * 1. proc1: mdt returns a lsm but not granting layout
1987 	 * 2. layout was changed by another client
1988 	 * 3. proc2: refresh layout and layout lock granted
1989 	 * 4. proc1: to apply a stale layout */
1990 	if (it != NULL && it->d.lustre.it_lock_mode != 0) {
1991 		struct lustre_handle lockh;
1992 		struct ldlm_lock *lock;
1993 
1994 		lockh.cookie = it->d.lustre.it_lock_handle;
1995 		lock = ldlm_handle2lock(&lockh);
1996 		LASSERT(lock != NULL);
1997 		if (ldlm_has_layout(lock)) {
1998 			struct cl_object_conf conf;
1999 
2000 			memset(&conf, 0, sizeof(conf));
2001 			conf.coc_opc = OBJECT_CONF_SET;
2002 			conf.coc_inode = *inode;
2003 			conf.coc_lock = lock;
2004 			conf.u.coc_md = &md;
2005 			(void)ll_layout_conf(*inode, &conf);
2006 		}
2007 		LDLM_LOCK_PUT(lock);
2008 	}
2009 
2010 out:
2011 	if (md.lsm != NULL)
2012 		obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
2013 	md_free_lustre_md(sbi->ll_md_exp, &md);
2014 
2015 cleanup:
2016 	if (rc != 0 && it && it->it_op & IT_OPEN)
2017 		ll_open_cleanup(sb ? sb : (*inode)->i_sb, req);
2018 
2019 	return rc;
2020 }
2021 
ll_obd_statfs(struct inode * inode,void * arg)2022 int ll_obd_statfs(struct inode *inode, void *arg)
2023 {
2024 	struct ll_sb_info *sbi = NULL;
2025 	struct obd_export *exp;
2026 	char *buf = NULL;
2027 	struct obd_ioctl_data *data = NULL;
2028 	__u32 type;
2029 	__u32 flags;
2030 	int len = 0, rc;
2031 
2032 	if (!inode) {
2033 		rc = -EINVAL;
2034 		goto out_statfs;
2035 	}
2036 
2037 	sbi = ll_i2sbi(inode);
2038 	if (!sbi) {
2039 		rc = -EINVAL;
2040 		goto out_statfs;
2041 	}
2042 
2043 	rc = obd_ioctl_getdata(&buf, &len, arg);
2044 	if (rc)
2045 		goto out_statfs;
2046 
2047 	data = (void *)buf;
2048 	if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
2049 	    !data->ioc_pbuf1 || !data->ioc_pbuf2) {
2050 		rc = -EINVAL;
2051 		goto out_statfs;
2052 	}
2053 
2054 	if (data->ioc_inllen1 != sizeof(__u32) ||
2055 	    data->ioc_inllen2 != sizeof(__u32) ||
2056 	    data->ioc_plen1 != sizeof(struct obd_statfs) ||
2057 	    data->ioc_plen2 != sizeof(struct obd_uuid)) {
2058 		rc = -EINVAL;
2059 		goto out_statfs;
2060 	}
2061 
2062 	memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
2063 	if (type & LL_STATFS_LMV)
2064 		exp = sbi->ll_md_exp;
2065 	else if (type & LL_STATFS_LOV)
2066 		exp = sbi->ll_dt_exp;
2067 	else {
2068 		rc = -ENODEV;
2069 		goto out_statfs;
2070 	}
2071 
2072 	flags = (type & LL_STATFS_NODELAY) ? OBD_STATFS_NODELAY : 0;
2073 	rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, &flags);
2074 	if (rc)
2075 		goto out_statfs;
2076 out_statfs:
2077 	if (buf)
2078 		obd_ioctl_freedata(buf, len);
2079 	return rc;
2080 }
2081 
ll_process_config(struct lustre_cfg * lcfg)2082 int ll_process_config(struct lustre_cfg *lcfg)
2083 {
2084 	char *ptr;
2085 	void *sb;
2086 	struct lprocfs_static_vars lvars;
2087 	unsigned long x;
2088 	int rc = 0;
2089 
2090 	lprocfs_llite_init_vars(&lvars);
2091 
2092 	/* The instance name contains the sb: lustre-client-aacfe000 */
2093 	ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
2094 	if (!ptr || !*(++ptr))
2095 		return -EINVAL;
2096 	rc = kstrtoul(ptr, 16, &x);
2097 	if (rc != 0)
2098 		return -EINVAL;
2099 	sb = (void *)x;
2100 	/* This better be a real Lustre superblock! */
2101 	LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
2102 
2103 	/* Note we have not called client_common_fill_super yet, so
2104 	   proc fns must be able to handle that! */
2105 	rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
2106 				      lcfg, sb);
2107 	if (rc > 0)
2108 		rc = 0;
2109 	return rc;
2110 }
2111 
2112 /* this function prepares md_op_data hint for passing ot down to MD stack. */
ll_prep_md_op_data(struct md_op_data * op_data,struct inode * i1,struct inode * i2,const char * name,int namelen,int mode,__u32 opc,void * data)2113 struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
2114 				       struct inode *i1, struct inode *i2,
2115 				       const char *name, int namelen,
2116 				       int mode, __u32 opc, void *data)
2117 {
2118 	LASSERT(i1 != NULL);
2119 
2120 	if (namelen > ll_i2sbi(i1)->ll_namelen)
2121 		return ERR_PTR(-ENAMETOOLONG);
2122 
2123 	if (op_data == NULL)
2124 		op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
2125 
2126 	if (op_data == NULL)
2127 		return ERR_PTR(-ENOMEM);
2128 
2129 	ll_i2gids(op_data->op_suppgids, i1, i2);
2130 	op_data->op_fid1 = *ll_inode2fid(i1);
2131 
2132 	if (i2)
2133 		op_data->op_fid2 = *ll_inode2fid(i2);
2134 	else
2135 		fid_zero(&op_data->op_fid2);
2136 
2137 	op_data->op_name = name;
2138 	op_data->op_namelen = namelen;
2139 	op_data->op_mode = mode;
2140 	op_data->op_mod_time = ktime_get_real_seconds();
2141 	op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
2142 	op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
2143 	op_data->op_cap = cfs_curproc_cap_pack();
2144 	op_data->op_bias = 0;
2145 	op_data->op_cli_flags = 0;
2146 	if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) &&
2147 	     filename_is_volatile(name, namelen, NULL))
2148 		op_data->op_bias |= MDS_CREATE_VOLATILE;
2149 	op_data->op_opc = opc;
2150 	op_data->op_mds = 0;
2151 	op_data->op_data = data;
2152 
2153 	/* If the file is being opened after mknod() (normally due to NFS)
2154 	 * try to use the default stripe data from parent directory for
2155 	 * allocating OST objects.  Try to pass the parent FID to MDS. */
2156 	if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
2157 	    !ll_i2info(i2)->lli_has_smd) {
2158 		struct ll_inode_info *lli = ll_i2info(i2);
2159 
2160 		spin_lock(&lli->lli_lock);
2161 		if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
2162 			op_data->op_fid1 = lli->lli_pfid;
2163 		spin_unlock(&lli->lli_lock);
2164 	}
2165 
2166 	/* When called by ll_setattr_raw, file is i1. */
2167 	if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
2168 		op_data->op_bias |= MDS_DATA_MODIFIED;
2169 
2170 	return op_data;
2171 }
2172 
ll_finish_md_op_data(struct md_op_data * op_data)2173 void ll_finish_md_op_data(struct md_op_data *op_data)
2174 {
2175 	kfree(op_data);
2176 }
2177 
ll_show_options(struct seq_file * seq,struct dentry * dentry)2178 int ll_show_options(struct seq_file *seq, struct dentry *dentry)
2179 {
2180 	struct ll_sb_info *sbi;
2181 
2182 	LASSERT((seq != NULL) && (dentry != NULL));
2183 	sbi = ll_s2sbi(dentry->d_sb);
2184 
2185 	if (sbi->ll_flags & LL_SBI_NOLCK)
2186 		seq_puts(seq, ",nolock");
2187 
2188 	if (sbi->ll_flags & LL_SBI_FLOCK)
2189 		seq_puts(seq, ",flock");
2190 
2191 	if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
2192 		seq_puts(seq, ",localflock");
2193 
2194 	if (sbi->ll_flags & LL_SBI_USER_XATTR)
2195 		seq_puts(seq, ",user_xattr");
2196 
2197 	if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
2198 		seq_puts(seq, ",lazystatfs");
2199 
2200 	if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
2201 		seq_puts(seq, ",user_fid2path");
2202 
2203 	return 0;
2204 }
2205 
2206 /**
2207  * Get obd name by cmd, and copy out to user space
2208  */
ll_get_obd_name(struct inode * inode,unsigned int cmd,unsigned long arg)2209 int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
2210 {
2211 	struct ll_sb_info *sbi = ll_i2sbi(inode);
2212 	struct obd_device *obd;
2213 
2214 	if (cmd == OBD_IOC_GETDTNAME)
2215 		obd = class_exp2obd(sbi->ll_dt_exp);
2216 	else if (cmd == OBD_IOC_GETMDNAME)
2217 		obd = class_exp2obd(sbi->ll_md_exp);
2218 	else
2219 		return -EINVAL;
2220 
2221 	if (!obd)
2222 		return -ENOENT;
2223 
2224 	if (copy_to_user((void *)arg, obd->obd_name,
2225 			     strlen(obd->obd_name) + 1))
2226 		return -EFAULT;
2227 
2228 	return 0;
2229 }
2230 
2231 /**
2232  * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
2233  * fsname will be returned in this buffer; otherwise, a static buffer will be
2234  * used to store the fsname and returned to caller.
2235  */
ll_get_fsname(struct super_block * sb,char * buf,int buflen)2236 char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
2237 {
2238 	static char fsname_static[MTI_NAME_MAXLEN];
2239 	struct lustre_sb_info *lsi = s2lsi(sb);
2240 	char *ptr;
2241 	int len;
2242 
2243 	if (buf == NULL) {
2244 		/* this means the caller wants to use static buffer
2245 		 * and it doesn't care about race. Usually this is
2246 		 * in error reporting path */
2247 		buf = fsname_static;
2248 		buflen = sizeof(fsname_static);
2249 	}
2250 
2251 	len = strlen(lsi->lsi_lmd->lmd_profile);
2252 	ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
2253 	if (ptr && (strcmp(ptr, "-client") == 0))
2254 		len -= 7;
2255 
2256 	if (unlikely(len >= buflen))
2257 		len = buflen - 1;
2258 	strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
2259 	buf[len] = '\0';
2260 
2261 	return buf;
2262 }
2263 
ll_dirty_page_discard_warn(struct page * page,int ioret)2264 void ll_dirty_page_discard_warn(struct page *page, int ioret)
2265 {
2266 	char *buf, *path = NULL;
2267 	struct dentry *dentry = NULL;
2268 	struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
2269 
2270 	/* this can be called inside spin lock so use GFP_ATOMIC. */
2271 	buf = (char *)__get_free_page(GFP_ATOMIC);
2272 	if (buf != NULL) {
2273 		dentry = d_find_alias(page->mapping->host);
2274 		if (dentry != NULL)
2275 			path = dentry_path_raw(dentry, buf, PAGE_SIZE);
2276 	}
2277 
2278 	CDEBUG(D_WARNING,
2279 	       "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n",
2280 	       ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
2281 	       s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
2282 	       PFID(&obj->cob_header.coh_lu.loh_fid),
2283 	       (path && !IS_ERR(path)) ? path : "", ioret);
2284 
2285 	if (dentry != NULL)
2286 		dput(dentry);
2287 
2288 	if (buf != NULL)
2289 		free_page((unsigned long)buf);
2290 }
2291