• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright (c) 2011, 2012, Intel Corporation.
28  */
29 /*
30  * This file is part of Lustre, http://www.lustre.org/
31  * Lustre is a trademark of Sun Microsystems, Inc.
32  *
33  * lustre/include/lustre_mdc.h
34  *
35  * MDS data structures.
36  * See also lustre_idl.h for wire formats of requests.
37  */
38 
39 #ifndef _LUSTRE_MDC_H
40 #define _LUSTRE_MDC_H
41 
42 /** \defgroup mdc mdc
43  *
44  * @{
45  */
46 
47 #include <linux/fs.h>
48 #include <linux/dcache.h>
49 #include <lustre_intent.h>
50 #include <lustre_handles.h>
51 #include <linux/libcfs/libcfs.h>
52 #include <obd_class.h>
53 #include <lustre_lib.h>
54 #include <lustre_dlm.h>
55 #include <lustre_export.h>
56 
57 struct ptlrpc_client;
58 struct obd_export;
59 struct ptlrpc_request;
60 struct obd_device;
61 
62 /**
63  * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
64  *
65  * This mutex is used to implement execute-once semantics on the MDT.
66  * The MDT stores the last transaction ID and result for every client in
67  * its last_rcvd file. If the client doesn't get a reply, it can safely
68  * resend the request and the MDT will reconstruct the reply being aware
69  * that the request has already been executed. Without this lock,
70  * execution status of concurrent in-flight requests would be
71  * overwritten.
72  *
73  * This design limits the extent to which we can keep a full pipeline of
74  * in-flight requests from a single client.  This limitation could be
75  * overcome by allowing multiple slots per client in the last_rcvd file.
76  */
77 struct mdc_rpc_lock {
78 	/** Lock protecting in-flight RPC concurrency. */
79 	struct mutex		rpcl_mutex;
80 	/** Intent associated with currently executing request. */
81 	struct lookup_intent	*rpcl_it;
82 	/** Used for MDS/RPC load testing purposes. */
83 	int			rpcl_fakes;
84 };
85 
86 #define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
87 
mdc_init_rpc_lock(struct mdc_rpc_lock * lck)88 static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
89 {
90 	mutex_init(&lck->rpcl_mutex);
91 	lck->rpcl_it = NULL;
92 }
93 
mdc_get_rpc_lock(struct mdc_rpc_lock * lck,struct lookup_intent * it)94 static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
95 				    struct lookup_intent *it)
96 {
97 	if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
98 		   it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
99 		return;
100 
101 	/* This would normally block until the existing request finishes.
102 	 * If fail_loc is set it will block until the regular request is
103 	 * done, then set rpcl_it to MDC_FAKE_RPCL_IT.  Once that is set
104 	 * it will only be cleared when all fake requests are finished.
105 	 * Only when all fake requests are finished can normal requests
106 	 * be sent, to ensure they are recoverable again.
107 	 */
108  again:
109 	mutex_lock(&lck->rpcl_mutex);
110 
111 	if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
112 		lck->rpcl_it = MDC_FAKE_RPCL_IT;
113 		lck->rpcl_fakes++;
114 		mutex_unlock(&lck->rpcl_mutex);
115 		return;
116 	}
117 
118 	/* This will only happen when the CFS_FAIL_CHECK() was
119 	 * just turned off but there are still requests in progress.
120 	 * Wait until they finish.  It doesn't need to be efficient
121 	 * in this extremely rare case, just have low overhead in
122 	 * the common case when it isn't true.
123 	 */
124 	while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
125 		mutex_unlock(&lck->rpcl_mutex);
126 		schedule_timeout(cfs_time_seconds(1) / 4);
127 		goto again;
128 	}
129 
130 	LASSERT(!lck->rpcl_it);
131 	lck->rpcl_it = it;
132 }
133 
mdc_put_rpc_lock(struct mdc_rpc_lock * lck,struct lookup_intent * it)134 static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
135 				    struct lookup_intent *it)
136 {
137 	if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
138 		   it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
139 		return;
140 
141 	if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
142 		mutex_lock(&lck->rpcl_mutex);
143 
144 		LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
145 		lck->rpcl_fakes--;
146 
147 		if (lck->rpcl_fakes == 0)
148 			lck->rpcl_it = NULL;
149 
150 	} else {
151 		LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
152 		lck->rpcl_it = NULL;
153 	}
154 
155 	mutex_unlock(&lck->rpcl_mutex);
156 }
157 
mdc_get_mod_rpc_slot(struct ptlrpc_request * req,struct lookup_intent * it)158 static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req,
159 					struct lookup_intent *it)
160 {
161 	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
162 	u32 opc;
163 	u16 tag;
164 
165 	opc = lustre_msg_get_opc(req->rq_reqmsg);
166 	tag = obd_get_mod_rpc_slot(cli, opc, it);
167 	lustre_msg_set_tag(req->rq_reqmsg, tag);
168 }
169 
mdc_put_mod_rpc_slot(struct ptlrpc_request * req,struct lookup_intent * it)170 static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req,
171 					struct lookup_intent *it)
172 {
173 	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
174 	u32 opc;
175 	u16 tag;
176 
177 	opc = lustre_msg_get_opc(req->rq_reqmsg);
178 	tag = lustre_msg_get_tag(req->rq_reqmsg);
179 	obd_put_mod_rpc_slot(cli, opc, it, tag);
180 }
181 
182 /**
183  * Update the maximum possible easize.
184  *
185  * This value is learned from ptlrpc replies sent by the MDT. The
186  * default easize is initialized to the minimum value but allowed
187  * to grow up to a single page in size if required to handle the
188  * common case.
189  *
190  * \see client_obd::cl_default_mds_easize
191  *
192  * \param[in] exp	export for MDC device
193  * \param[in] body	body of ptlrpc reply from MDT
194  *
195  */
mdc_update_max_ea_from_body(struct obd_export * exp,struct mdt_body * body)196 static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
197 					       struct mdt_body *body)
198 {
199 	if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
200 		struct client_obd *cli = &exp->exp_obd->u.cli;
201 		u32 def_easize;
202 
203 		if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
204 			cli->cl_max_mds_easize = body->mbo_max_mdsize;
205 
206 		def_easize = min_t(__u32, body->mbo_max_mdsize,
207 				   OBD_MAX_DEFAULT_EA_SIZE);
208 		cli->cl_default_mds_easize = def_easize;
209 	}
210 }
211 
212 /* mdc/mdc_locks.c */
213 int it_open_error(int phase, struct lookup_intent *it);
214 
cl_is_lov_delay_create(unsigned int flags)215 static inline bool cl_is_lov_delay_create(unsigned int flags)
216 {
217 	return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE;
218 }
219 
cl_lov_delay_create_clear(unsigned int * flags)220 static inline void cl_lov_delay_create_clear(unsigned int *flags)
221 {
222 	if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE)
223 		*flags &= ~O_LOV_DELAY_CREATE;
224 }
225 
226 /** @} mdc */
227 
228 #endif
229