• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36 
37 #ifndef __LUSTRE_LU_OBJECT_H
38 #define __LUSTRE_LU_OBJECT_H
39 
40 #include <stdarg.h>
41 #include "../../include/linux/libcfs/libcfs.h"
42 #include "lustre/lustre_idl.h"
43 #include "lu_ref.h"
44 
45 struct seq_file;
46 struct lustre_cfg;
47 struct lprocfs_stats;
48 
49 /** \defgroup lu lu
50  * lu_* data-types represent server-side entities shared by data and meta-data
51  * stacks.
52  *
53  * Design goals:
54  *
55  * -# support for layering.
56  *
57  *     Server side object is split into layers, one per device in the
58  *     corresponding device stack. Individual layer is represented by struct
59  *     lu_object. Compound layered object --- by struct lu_object_header. Most
60  *     interface functions take lu_object as an argument and operate on the
61  *     whole compound object. This decision was made due to the following
62  *     reasons:
63  *
64  *	- it's envisaged that lu_object will be used much more often than
65  *	lu_object_header;
66  *
67  *	- we want lower (non-top) layers to be able to initiate operations
68  *	on the whole object.
69  *
70  *     Generic code supports layering more complex than simple stacking, e.g.,
71  *     it is possible that at some layer object "spawns" multiple sub-objects
72  *     on the lower layer.
73  *
74  * -# fid-based identification.
75  *
76  *     Compound object is uniquely identified by its fid. Objects are indexed
77  *     by their fids (hash table is used for index).
78  *
79  * -# caching and life-cycle management.
80  *
81  *     Object's life-time is controlled by reference counting. When reference
82  *     count drops to 0, object is returned to cache. Cached objects still
83  *     retain their identity (i.e., fid), and can be recovered from cache.
84  *
85  *     Objects are kept in the global LRU list, and lu_site_purge() function
86  *     can be used to reclaim given number of unused objects from the tail of
87  *     the LRU.
88  *
89  * -# avoiding recursion.
90  *
91  *     Generic code tries to replace recursion through layers by iterations
92  *     where possible. Additionally to the end of reducing stack consumption,
93  *     data, when practically possible, are allocated through lu_context_key
94  *     interface rather than on stack.
95  * @{
96  */
97 
98 struct lu_site;
99 struct lu_object;
100 struct lu_device;
101 struct lu_object_header;
102 struct lu_context;
103 struct lu_env;
104 
105 /**
106  * Operations common for data and meta-data devices.
107  */
108 struct lu_device_operations {
109 	/**
110 	 * Allocate object for the given device (without lower-layer
111 	 * parts). This is called by lu_object_operations::loo_object_init()
112 	 * from the parent layer, and should setup at least lu_object::lo_dev
113 	 * and lu_object::lo_ops fields of resulting lu_object.
114 	 *
115 	 * Object creation protocol.
116 	 *
117 	 * Due to design goal of avoiding recursion, object creation (see
118 	 * lu_object_alloc()) is somewhat involved:
119 	 *
120 	 *  - first, lu_device_operations::ldo_object_alloc() method of the
121 	 *  top-level device in the stack is called. It should allocate top
122 	 *  level object (including lu_object_header), but without any
123 	 *  lower-layer sub-object(s).
124 	 *
125 	 *  - then lu_object_alloc() sets fid in the header of newly created
126 	 *  object.
127 	 *
128 	 *  - then lu_object_operations::loo_object_init() is called. It has
129 	 *  to allocate lower-layer object(s). To do this,
130 	 *  lu_object_operations::loo_object_init() calls ldo_object_alloc()
131 	 *  of the lower-layer device(s).
132 	 *
133 	 *  - for all new objects allocated by
134 	 *  lu_object_operations::loo_object_init() (and inserted into object
135 	 *  stack), lu_object_operations::loo_object_init() is called again
136 	 *  repeatedly, until no new objects are created.
137 	 *
138 	 * \post ergo(!IS_ERR(result), result->lo_dev == d &&
139 	 *			     result->lo_ops != NULL);
140 	 */
141 	struct lu_object *(*ldo_object_alloc)(const struct lu_env *env,
142 					      const struct lu_object_header *h,
143 					      struct lu_device *d);
144 	/**
145 	 * process config specific for device.
146 	 */
147 	int (*ldo_process_config)(const struct lu_env *env,
148 				  struct lu_device *, struct lustre_cfg *);
149 	int (*ldo_recovery_complete)(const struct lu_env *,
150 				     struct lu_device *);
151 
152 	/**
153 	 * initialize local objects for device. this method called after layer has
154 	 * been initialized (after LCFG_SETUP stage) and before it starts serving
155 	 * user requests.
156 	 */
157 
158 	int (*ldo_prepare)(const struct lu_env *,
159 			   struct lu_device *parent,
160 			   struct lu_device *dev);
161 
162 };
163 
164 /**
165  * For lu_object_conf flags
166  */
167 typedef enum {
168 	/* This is a new object to be allocated, or the file
169 	 * corresponding to the object does not exists. */
170 	LOC_F_NEW	= 0x00000001,
171 } loc_flags_t;
172 
173 /**
174  * Object configuration, describing particulars of object being created. On
175  * server this is not used, as server objects are full identified by fid. On
176  * client configuration contains struct lustre_md.
177  */
178 struct lu_object_conf {
179 	/**
180 	 * Some hints for obj find and alloc.
181 	 */
182 	loc_flags_t     loc_flags;
183 };
184 
185 /**
186  * Type of "printer" function used by lu_object_operations::loo_object_print()
187  * method.
188  *
189  * Printer function is needed to provide some flexibility in (semi-)debugging
190  * output: possible implementations: printk, CDEBUG, sysfs/seq_file
191  */
192 typedef int (*lu_printer_t)(const struct lu_env *env,
193 			    void *cookie, const char *format, ...)
194 	__printf(3, 4);
195 
196 /**
197  * Operations specific for particular lu_object.
198  */
199 struct lu_object_operations {
200 
201 	/**
202 	 * Allocate lower-layer parts of the object by calling
203 	 * lu_device_operations::ldo_object_alloc() of the corresponding
204 	 * underlying device.
205 	 *
206 	 * This method is called once for each object inserted into object
207 	 * stack. It's responsibility of this method to insert lower-layer
208 	 * object(s) it create into appropriate places of object stack.
209 	 */
210 	int (*loo_object_init)(const struct lu_env *env,
211 			       struct lu_object *o,
212 			       const struct lu_object_conf *conf);
213 	/**
214 	 * Called (in top-to-bottom order) during object allocation after all
215 	 * layers were allocated and initialized. Can be used to perform
216 	 * initialization depending on lower layers.
217 	 */
218 	int (*loo_object_start)(const struct lu_env *env,
219 				struct lu_object *o);
220 	/**
221 	 * Called before lu_object_operations::loo_object_free() to signal
222 	 * that object is being destroyed. Dual to
223 	 * lu_object_operations::loo_object_init().
224 	 */
225 	void (*loo_object_delete)(const struct lu_env *env,
226 				  struct lu_object *o);
227 	/**
228 	 * Dual to lu_device_operations::ldo_object_alloc(). Called when
229 	 * object is removed from memory.
230 	 */
231 	void (*loo_object_free)(const struct lu_env *env,
232 				struct lu_object *o);
233 	/**
234 	 * Called when last active reference to the object is released (and
235 	 * object returns to the cache). This method is optional.
236 	 */
237 	void (*loo_object_release)(const struct lu_env *env,
238 				   struct lu_object *o);
239 	/**
240 	 * Optional debugging helper. Print given object.
241 	 */
242 	int (*loo_object_print)(const struct lu_env *env, void *cookie,
243 				lu_printer_t p, const struct lu_object *o);
244 	/**
245 	 * Optional debugging method. Returns true iff method is internally
246 	 * consistent.
247 	 */
248 	int (*loo_object_invariant)(const struct lu_object *o);
249 };
250 
251 /**
252  * Type of lu_device.
253  */
254 struct lu_device_type;
255 
256 /**
257  * Device: a layer in the server side abstraction stacking.
258  */
259 struct lu_device {
260 	/**
261 	 * reference count. This is incremented, in particular, on each object
262 	 * created at this layer.
263 	 *
264 	 * \todo XXX which means that atomic_t is probably too small.
265 	 */
266 	atomic_t		       ld_ref;
267 	/**
268 	 * Pointer to device type. Never modified once set.
269 	 */
270 	struct lu_device_type       *ld_type;
271 	/**
272 	 * Operation vector for this device.
273 	 */
274 	const struct lu_device_operations *ld_ops;
275 	/**
276 	 * Stack this device belongs to.
277 	 */
278 	struct lu_site		    *ld_site;
279 
280 	/** \todo XXX: temporary back pointer into obd. */
281 	struct obd_device		 *ld_obd;
282 	/**
283 	 * A list of references to this object, for debugging.
284 	 */
285 	struct lu_ref		      ld_reference;
286 	/**
287 	 * Link the device to the site.
288 	 **/
289 	struct list_head			 ld_linkage;
290 };
291 
292 struct lu_device_type_operations;
293 
294 /**
295  * Tag bits for device type. They are used to distinguish certain groups of
296  * device types.
297  */
298 enum lu_device_tag {
299 	/** this is meta-data device */
300 	LU_DEVICE_MD = (1 << 0),
301 	/** this is data device */
302 	LU_DEVICE_DT = (1 << 1),
303 	/** data device in the client stack */
304 	LU_DEVICE_CL = (1 << 2)
305 };
306 
307 /**
308  * Type of device.
309  */
310 struct lu_device_type {
311 	/**
312 	 * Tag bits. Taken from enum lu_device_tag. Never modified once set.
313 	 */
314 	__u32				   ldt_tags;
315 	/**
316 	 * Name of this class. Unique system-wide. Never modified once set.
317 	 */
318 	char				   *ldt_name;
319 	/**
320 	 * Operations for this type.
321 	 */
322 	const struct lu_device_type_operations *ldt_ops;
323 	/**
324 	 * \todo XXX: temporary pointer to associated obd_type.
325 	 */
326 	struct obd_type			*ldt_obd_type;
327 	/**
328 	 * \todo XXX: temporary: context tags used by obd_*() calls.
329 	 */
330 	__u32				   ldt_ctx_tags;
331 	/**
332 	 * Number of existing device type instances.
333 	 */
334 	unsigned				ldt_device_nr;
335 	/**
336 	 * Linkage into a global list of all device types.
337 	 *
338 	 * \see lu_device_types.
339 	 */
340 	struct list_head			      ldt_linkage;
341 };
342 
343 /**
344  * Operations on a device type.
345  */
346 struct lu_device_type_operations {
347 	/**
348 	 * Allocate new device.
349 	 */
350 	struct lu_device *(*ldto_device_alloc)(const struct lu_env *env,
351 					       struct lu_device_type *t,
352 					       struct lustre_cfg *lcfg);
353 	/**
354 	 * Free device. Dual to
355 	 * lu_device_type_operations::ldto_device_alloc(). Returns pointer to
356 	 * the next device in the stack.
357 	 */
358 	struct lu_device *(*ldto_device_free)(const struct lu_env *,
359 					      struct lu_device *);
360 
361 	/**
362 	 * Initialize the devices after allocation
363 	 */
364 	int  (*ldto_device_init)(const struct lu_env *env,
365 				 struct lu_device *, const char *,
366 				 struct lu_device *);
367 	/**
368 	 * Finalize device. Dual to
369 	 * lu_device_type_operations::ldto_device_init(). Returns pointer to
370 	 * the next device in the stack.
371 	 */
372 	struct lu_device *(*ldto_device_fini)(const struct lu_env *env,
373 					      struct lu_device *);
374 	/**
375 	 * Initialize device type. This is called on module load.
376 	 */
377 	int  (*ldto_init)(struct lu_device_type *t);
378 	/**
379 	 * Finalize device type. Dual to
380 	 * lu_device_type_operations::ldto_init(). Called on module unload.
381 	 */
382 	void (*ldto_fini)(struct lu_device_type *t);
383 	/**
384 	 * Called when the first device is created.
385 	 */
386 	void (*ldto_start)(struct lu_device_type *t);
387 	/**
388 	 * Called when number of devices drops to 0.
389 	 */
390 	void (*ldto_stop)(struct lu_device_type *t);
391 };
392 
lu_device_is_md(const struct lu_device * d)393 static inline int lu_device_is_md(const struct lu_device *d)
394 {
395 	return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD);
396 }
397 
398 /**
399  * Common object attributes.
400  */
401 struct lu_attr {
402 	/** size in bytes */
403 	__u64	  la_size;
404 	/** modification time in seconds since Epoch */
405 	s64	  la_mtime;
406 	/** access time in seconds since Epoch */
407 	s64	  la_atime;
408 	/** change time in seconds since Epoch */
409 	s64	  la_ctime;
410 	/** 512-byte blocks allocated to object */
411 	__u64	  la_blocks;
412 	/** permission bits and file type */
413 	__u32	  la_mode;
414 	/** owner id */
415 	__u32	  la_uid;
416 	/** group id */
417 	__u32	  la_gid;
418 	/** object flags */
419 	__u32	  la_flags;
420 	/** number of persistent references to this object */
421 	__u32	  la_nlink;
422 	/** blk bits of the object*/
423 	__u32	  la_blkbits;
424 	/** blk size of the object*/
425 	__u32	  la_blksize;
426 	/** real device */
427 	__u32	  la_rdev;
428 	/**
429 	 * valid bits
430 	 *
431 	 * \see enum la_valid
432 	 */
433 	__u64	  la_valid;
434 };
435 
436 /** Bit-mask of valid attributes */
437 enum la_valid {
438 	LA_ATIME = 1 << 0,
439 	LA_MTIME = 1 << 1,
440 	LA_CTIME = 1 << 2,
441 	LA_SIZE  = 1 << 3,
442 	LA_MODE  = 1 << 4,
443 	LA_UID   = 1 << 5,
444 	LA_GID   = 1 << 6,
445 	LA_BLOCKS = 1 << 7,
446 	LA_TYPE   = 1 << 8,
447 	LA_FLAGS  = 1 << 9,
448 	LA_NLINK  = 1 << 10,
449 	LA_RDEV   = 1 << 11,
450 	LA_BLKSIZE = 1 << 12,
451 	LA_KILL_SUID = 1 << 13,
452 	LA_KILL_SGID = 1 << 14,
453 };
454 
455 /**
456  * Layer in the layered object.
457  */
458 struct lu_object {
459 	/**
460 	 * Header for this object.
461 	 */
462 	struct lu_object_header	   *lo_header;
463 	/**
464 	 * Device for this layer.
465 	 */
466 	struct lu_device		  *lo_dev;
467 	/**
468 	 * Operations for this object.
469 	 */
470 	const struct lu_object_operations *lo_ops;
471 	/**
472 	 * Linkage into list of all layers.
473 	 */
474 	struct list_head			 lo_linkage;
475 	/**
476 	 * Link to the device, for debugging.
477 	 */
478 	struct lu_ref_link                 lo_dev_ref;
479 };
480 
481 enum lu_object_header_flags {
482 	/**
483 	 * Don't keep this object in cache. Object will be destroyed as soon
484 	 * as last reference to it is released. This flag cannot be cleared
485 	 * once set.
486 	 */
487 	LU_OBJECT_HEARD_BANSHEE = 0,
488 	/**
489 	 * Mark this object has already been taken out of cache.
490 	 */
491 	LU_OBJECT_UNHASHED = 1
492 };
493 
494 enum lu_object_header_attr {
495 	LOHA_EXISTS   = 1 << 0,
496 	LOHA_REMOTE   = 1 << 1,
497 	/**
498 	 * UNIX file type is stored in S_IFMT bits.
499 	 */
500 	LOHA_FT_START = 001 << 12, /**< S_IFIFO */
501 	LOHA_FT_END   = 017 << 12, /**< S_IFMT */
502 };
503 
504 /**
505  * "Compound" object, consisting of multiple layers.
506  *
507  * Compound object with given fid is unique with given lu_site.
508  *
509  * Note, that object does *not* necessary correspond to the real object in the
510  * persistent storage: object is an anchor for locking and method calling, so
511  * it is created for things like not-yet-existing child created by mkdir or
512  * create calls. lu_object_operations::loo_exists() can be used to check
513  * whether object is backed by persistent storage entity.
514  */
515 struct lu_object_header {
516 	/**
517 	 * Fid, uniquely identifying this object.
518 	 */
519 	struct lu_fid		loh_fid;
520 	/**
521 	 * Object flags from enum lu_object_header_flags. Set and checked
522 	 * atomically.
523 	 */
524 	unsigned long	  loh_flags;
525 	/**
526 	 * Object reference count. Protected by lu_site::ls_guard.
527 	 */
528 	atomic_t	   loh_ref;
529 	/**
530 	 * Common object attributes, cached for efficiency. From enum
531 	 * lu_object_header_attr.
532 	 */
533 	__u32		  loh_attr;
534 	/**
535 	 * Linkage into per-site hash table. Protected by lu_site::ls_guard.
536 	 */
537 	struct hlist_node       loh_hash;
538 	/**
539 	 * Linkage into per-site LRU list. Protected by lu_site::ls_guard.
540 	 */
541 	struct list_head	     loh_lru;
542 	/**
543 	 * Linkage into list of layers. Never modified once set (except lately
544 	 * during object destruction). No locking is necessary.
545 	 */
546 	struct list_head	     loh_layers;
547 	/**
548 	 * A list of references to this object, for debugging.
549 	 */
550 	struct lu_ref	  loh_reference;
551 };
552 
553 struct fld;
554 
555 struct lu_site_bkt_data {
556 	/**
557 	 * number of object in this bucket on the lsb_lru list.
558 	 */
559 	long			lsb_lru_len;
560 	/**
561 	 * LRU list, updated on each access to object. Protected by
562 	 * bucket lock of lu_site::ls_obj_hash.
563 	 *
564 	 * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are
565 	 * moved to the lu_site::ls_lru.prev (this is due to the non-existence
566 	 * of list_for_each_entry_safe_reverse()).
567 	 */
568 	struct list_head		lsb_lru;
569 	/**
570 	 * Wait-queue signaled when an object in this site is ultimately
571 	 * destroyed (lu_object_free()). It is used by lu_object_find() to
572 	 * wait before re-trying when object in the process of destruction is
573 	 * found in the hash table.
574 	 *
575 	 * \see htable_lookup().
576 	 */
577 	wait_queue_head_t	       lsb_marche_funebre;
578 };
579 
580 enum {
581 	LU_SS_CREATED	 = 0,
582 	LU_SS_CACHE_HIT,
583 	LU_SS_CACHE_MISS,
584 	LU_SS_CACHE_RACE,
585 	LU_SS_CACHE_DEATH_RACE,
586 	LU_SS_LRU_PURGED,
587 	LU_SS_LRU_LEN,	/* # of objects in lsb_lru lists */
588 	LU_SS_LAST_STAT
589 };
590 
591 /**
592  * lu_site is a "compartment" within which objects are unique, and LRU
593  * discipline is maintained.
594  *
595  * lu_site exists so that multiple layered stacks can co-exist in the same
596  * address space.
597  *
598  * lu_site has the same relation to lu_device as lu_object_header to
599  * lu_object.
600  */
601 struct lu_site {
602 	/**
603 	 * objects hash table
604 	 */
605 	struct cfs_hash	       *ls_obj_hash;
606 	/**
607 	 * index of bucket on hash table while purging
608 	 */
609 	int		       ls_purge_start;
610 	/**
611 	 * Top-level device for this stack.
612 	 */
613 	struct lu_device	 *ls_top_dev;
614 	/**
615 	 * Bottom-level device for this stack
616 	 */
617 	struct lu_device	*ls_bottom_dev;
618 	/**
619 	 * Linkage into global list of sites.
620 	 */
621 	struct list_head		ls_linkage;
622 	/**
623 	 * List for lu device for this site, protected
624 	 * by ls_ld_lock.
625 	 **/
626 	struct list_head		ls_ld_linkage;
627 	spinlock_t		ls_ld_lock;
628 
629 	/**
630 	 * lu_site stats
631 	 */
632 	struct lprocfs_stats	*ls_stats;
633 	/**
634 	 * XXX: a hack! fld has to find md_site via site, remove when possible
635 	 */
636 	struct seq_server_site	*ld_seq_site;
637 };
638 
639 static inline struct lu_site_bkt_data *
lu_site_bkt_from_fid(struct lu_site * site,struct lu_fid * fid)640 lu_site_bkt_from_fid(struct lu_site *site, struct lu_fid *fid)
641 {
642 	struct cfs_hash_bd bd;
643 
644 	cfs_hash_bd_get(site->ls_obj_hash, fid, &bd);
645 	return cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
646 }
647 
lu_site2seq(const struct lu_site * s)648 static inline struct seq_server_site *lu_site2seq(const struct lu_site *s)
649 {
650 	return s->ld_seq_site;
651 }
652 
653 /** \name ctors
654  * Constructors/destructors.
655  * @{
656  */
657 
658 int  lu_site_init	 (struct lu_site *s, struct lu_device *d);
659 void lu_site_fini	 (struct lu_site *s);
660 int  lu_site_init_finish  (struct lu_site *s);
661 void lu_stack_fini	(const struct lu_env *env, struct lu_device *top);
662 void lu_device_get	(struct lu_device *d);
663 void lu_device_put	(struct lu_device *d);
664 int  lu_device_init       (struct lu_device *d, struct lu_device_type *t);
665 void lu_device_fini       (struct lu_device *d);
666 int  lu_object_header_init(struct lu_object_header *h);
667 void lu_object_header_fini(struct lu_object_header *h);
668 int  lu_object_init       (struct lu_object *o,
669 			   struct lu_object_header *h, struct lu_device *d);
670 void lu_object_fini       (struct lu_object *o);
671 void lu_object_add_top    (struct lu_object_header *h, struct lu_object *o);
672 void lu_object_add	(struct lu_object *before, struct lu_object *o);
673 
674 /**
675  * Helpers to initialize and finalize device types.
676  */
677 
678 int  lu_device_type_init(struct lu_device_type *ldt);
679 void lu_device_type_fini(struct lu_device_type *ldt);
680 void lu_types_stop(void);
681 
682 /** @} ctors */
683 
684 /** \name caching
685  * Caching and reference counting.
686  * @{
687  */
688 
689 /**
690  * Acquire additional reference to the given object. This function is used to
691  * attain additional reference. To acquire initial reference use
692  * lu_object_find().
693  */
lu_object_get(struct lu_object * o)694 static inline void lu_object_get(struct lu_object *o)
695 {
696 	LASSERT(atomic_read(&o->lo_header->loh_ref) > 0);
697 	atomic_inc(&o->lo_header->loh_ref);
698 }
699 
700 /**
701  * Return true of object will not be cached after last reference to it is
702  * released.
703  */
lu_object_is_dying(const struct lu_object_header * h)704 static inline int lu_object_is_dying(const struct lu_object_header *h)
705 {
706 	return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
707 }
708 
709 void lu_object_put(const struct lu_env *env, struct lu_object *o);
710 void lu_object_unhash(const struct lu_env *env, struct lu_object *o);
711 
712 int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr);
713 
714 void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
715 		   lu_printer_t printer);
716 struct lu_object *lu_object_find_at(const struct lu_env *env,
717 				    struct lu_device *dev,
718 				    const struct lu_fid *f,
719 				    const struct lu_object_conf *conf);
720 struct lu_object *lu_object_find_slice(const struct lu_env *env,
721 				       struct lu_device *dev,
722 				       const struct lu_fid *f,
723 				       const struct lu_object_conf *conf);
724 /** @} caching */
725 
726 /** \name helpers
727  * Helpers.
728  * @{
729  */
730 
731 /**
732  * First (topmost) sub-object of given compound object
733  */
lu_object_top(struct lu_object_header * h)734 static inline struct lu_object *lu_object_top(struct lu_object_header *h)
735 {
736 	LASSERT(!list_empty(&h->loh_layers));
737 	return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
738 }
739 
740 /**
741  * Next sub-object in the layering
742  */
lu_object_next(const struct lu_object * o)743 static inline struct lu_object *lu_object_next(const struct lu_object *o)
744 {
745 	return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
746 }
747 
748 /**
749  * Pointer to the fid of this object.
750  */
lu_object_fid(const struct lu_object * o)751 static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
752 {
753 	return &o->lo_header->loh_fid;
754 }
755 
756 /**
757  * return device operations vector for this object
758  */
759 static const inline struct lu_device_operations *
lu_object_ops(const struct lu_object * o)760 lu_object_ops(const struct lu_object *o)
761 {
762 	return o->lo_dev->ld_ops;
763 }
764 
765 /**
766  * Given a compound object, find its slice, corresponding to the device type
767  * \a dtype.
768  */
769 struct lu_object *lu_object_locate(struct lu_object_header *h,
770 				   const struct lu_device_type *dtype);
771 
772 /**
773  * Printer function emitting messages through libcfs_debug_msg().
774  */
775 int lu_cdebug_printer(const struct lu_env *env,
776 		      void *cookie, const char *format, ...);
777 
778 /**
779  * Print object description followed by a user-supplied message.
780  */
781 #define LU_OBJECT_DEBUG(mask, env, object, format, ...)		   \
782 do {								      \
783 	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		  \
784 									  \
785 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		     \
786 		lu_object_print(env, &msgdata, lu_cdebug_printer, object);\
787 		CDEBUG(mask, format, ## __VA_ARGS__);		    \
788 	}								 \
789 } while (0)
790 
791 /**
792  * Print short object description followed by a user-supplied message.
793  */
794 #define LU_OBJECT_HEADER(mask, env, object, format, ...)		\
795 do {								    \
796 	LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL);		\
797 									\
798 	if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) {		   \
799 		lu_object_header_print(env, &msgdata, lu_cdebug_printer,\
800 				       (object)->lo_header);	    \
801 		lu_cdebug_printer(env, &msgdata, "\n");		 \
802 		CDEBUG(mask, format, ## __VA_ARGS__);		  \
803 	}							       \
804 } while (0)
805 
806 void lu_object_print       (const struct lu_env *env, void *cookie,
807 			    lu_printer_t printer, const struct lu_object *o);
808 void lu_object_header_print(const struct lu_env *env, void *cookie,
809 			    lu_printer_t printer,
810 			    const struct lu_object_header *hdr);
811 
812 /**
813  * Check object consistency.
814  */
815 int lu_object_invariant(const struct lu_object *o);
816 
817 /**
818  * Check whether object exists, no matter on local or remote storage.
819  * Note: LOHA_EXISTS will be set once some one created the object,
820  * and it does not needs to be committed to storage.
821  */
822 #define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS)
823 
824 /**
825  * Check whether object on the remote storage.
826  */
827 #define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE)
828 
lu_object_assert_exists(const struct lu_object * o)829 static inline int lu_object_assert_exists(const struct lu_object *o)
830 {
831 	return lu_object_exists(o);
832 }
833 
lu_object_assert_not_exists(const struct lu_object * o)834 static inline int lu_object_assert_not_exists(const struct lu_object *o)
835 {
836 	return !lu_object_exists(o);
837 }
838 
839 /**
840  * Attr of this object.
841  */
lu_object_attr(const struct lu_object * o)842 static inline __u32 lu_object_attr(const struct lu_object *o)
843 {
844 	LASSERT(lu_object_exists(o) != 0);
845 	return o->lo_header->loh_attr;
846 }
847 
lu_object_ref_add(struct lu_object * o,const char * scope,const void * source)848 static inline void lu_object_ref_add(struct lu_object *o,
849 				     const char *scope,
850 				     const void *source)
851 {
852 	lu_ref_add(&o->lo_header->loh_reference, scope, source);
853 }
854 
lu_object_ref_add_at(struct lu_object * o,struct lu_ref_link * link,const char * scope,const void * source)855 static inline void lu_object_ref_add_at(struct lu_object *o,
856 					struct lu_ref_link *link,
857 					const char *scope,
858 					const void *source)
859 {
860 	lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source);
861 }
862 
lu_object_ref_del(struct lu_object * o,const char * scope,const void * source)863 static inline void lu_object_ref_del(struct lu_object *o,
864 				     const char *scope, const void *source)
865 {
866 	lu_ref_del(&o->lo_header->loh_reference, scope, source);
867 }
868 
lu_object_ref_del_at(struct lu_object * o,struct lu_ref_link * link,const char * scope,const void * source)869 static inline void lu_object_ref_del_at(struct lu_object *o,
870 					struct lu_ref_link *link,
871 					const char *scope, const void *source)
872 {
873 	lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source);
874 }
875 
876 /** input params, should be filled out by mdt */
877 struct lu_rdpg {
878 	/** hash */
879 	__u64		   rp_hash;
880 	/** count in bytes */
881 	unsigned int	    rp_count;
882 	/** number of pages */
883 	unsigned int	    rp_npages;
884 	/** requested attr */
885 	__u32		   rp_attrs;
886 	/** pointers to pages */
887 	struct page	   **rp_pages;
888 };
889 
890 enum lu_xattr_flags {
891 	LU_XATTR_REPLACE = (1 << 0),
892 	LU_XATTR_CREATE  = (1 << 1)
893 };
894 
895 /** @} helpers */
896 
897 /** \name lu_context
898  * @{ */
899 
900 /** For lu_context health-checks */
901 enum lu_context_state {
902 	LCS_INITIALIZED = 1,
903 	LCS_ENTERED,
904 	LCS_LEFT,
905 	LCS_FINALIZED
906 };
907 
908 /**
909  * lu_context. Execution context for lu_object methods. Currently associated
910  * with thread.
911  *
912  * All lu_object methods, except device and device type methods (called during
913  * system initialization and shutdown) are executed "within" some
914  * lu_context. This means, that pointer to some "current" lu_context is passed
915  * as an argument to all methods.
916  *
917  * All service ptlrpc threads create lu_context as part of their
918  * initialization. It is possible to create "stand-alone" context for other
919  * execution environments (like system calls).
920  *
921  * lu_object methods mainly use lu_context through lu_context_key interface
922  * that allows each layer to associate arbitrary pieces of data with each
923  * context (see pthread_key_create(3) for similar interface).
924  *
925  * On a client, lu_context is bound to a thread, see cl_env_get().
926  *
927  * \see lu_context_key
928  */
929 struct lu_context {
930 	/**
931 	 * lu_context is used on the client side too. Yet we don't want to
932 	 * allocate values of server-side keys for the client contexts and
933 	 * vice versa.
934 	 *
935 	 * To achieve this, set of tags in introduced. Contexts and keys are
936 	 * marked with tags. Key value are created only for context whose set
937 	 * of tags has non-empty intersection with one for key. Tags are taken
938 	 * from enum lu_context_tag.
939 	 */
940 	__u32		  lc_tags;
941 	enum lu_context_state  lc_state;
942 	/**
943 	 * Pointer to the home service thread. NULL for other execution
944 	 * contexts.
945 	 */
946 	struct ptlrpc_thread  *lc_thread;
947 	/**
948 	 * Pointer to an array with key values. Internal implementation
949 	 * detail.
950 	 */
951 	void		 **lc_value;
952 	/**
953 	 * Linkage into a list of all remembered contexts. Only
954 	 * `non-transient' contexts, i.e., ones created for service threads
955 	 * are placed here.
956 	 */
957 	struct list_head	     lc_remember;
958 	/**
959 	 * Version counter used to skip calls to lu_context_refill() when no
960 	 * keys were registered.
961 	 */
962 	unsigned	       lc_version;
963 	/**
964 	 * Debugging cookie.
965 	 */
966 	unsigned	       lc_cookie;
967 };
968 
969 /**
970  * lu_context_key interface. Similar to pthread_key.
971  */
972 
973 enum lu_context_tag {
974 	/**
975 	 * Thread on md server
976 	 */
977 	LCT_MD_THREAD = 1 << 0,
978 	/**
979 	 * Thread on dt server
980 	 */
981 	LCT_DT_THREAD = 1 << 1,
982 	/**
983 	 * Context for transaction handle
984 	 */
985 	LCT_TX_HANDLE = 1 << 2,
986 	/**
987 	 * Thread on client
988 	 */
989 	LCT_CL_THREAD = 1 << 3,
990 	/**
991 	 * A per-request session on a server, and a per-system-call session on
992 	 * a client.
993 	 */
994 	LCT_SESSION   = 1 << 4,
995 	/**
996 	 * A per-request data on OSP device
997 	 */
998 	LCT_OSP_THREAD = 1 << 5,
999 	/**
1000 	 * MGS device thread
1001 	 */
1002 	LCT_MG_THREAD = 1 << 6,
1003 	/**
1004 	 * Context for local operations
1005 	 */
1006 	LCT_LOCAL = 1 << 7,
1007 	/**
1008 	 * Set when at least one of keys, having values in this context has
1009 	 * non-NULL lu_context_key::lct_exit() method. This is used to
1010 	 * optimize lu_context_exit() call.
1011 	 */
1012 	LCT_HAS_EXIT  = 1 << 28,
1013 	/**
1014 	 * Don't add references for modules creating key values in that context.
1015 	 * This is only for contexts used internally by lu_object framework.
1016 	 */
1017 	LCT_NOREF     = 1 << 29,
1018 	/**
1019 	 * Key is being prepared for retiring, don't create new values for it.
1020 	 */
1021 	LCT_QUIESCENT = 1 << 30,
1022 	/**
1023 	 * Context should be remembered.
1024 	 */
1025 	LCT_REMEMBER  = 1 << 31,
1026 	/**
1027 	 * Contexts usable in cache shrinker thread.
1028 	 */
1029 	LCT_SHRINKER  = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
1030 };
1031 
1032 /**
1033  * Key. Represents per-context value slot.
1034  *
1035  * Keys are usually registered when module owning the key is initialized, and
1036  * de-registered when module is unloaded. Once key is registered, all new
1037  * contexts with matching tags, will get key value. "Old" contexts, already
1038  * initialized at the time of key registration, can be forced to get key value
1039  * by calling lu_context_refill().
1040  *
1041  * Every key value is counted in lu_context_key::lct_used and acquires a
1042  * reference on an owning module. This means, that all key values have to be
1043  * destroyed before module can be unloaded. This is usually achieved by
1044  * stopping threads started by the module, that created contexts in their
1045  * entry functions. Situation is complicated by the threads shared by multiple
1046  * modules, like ptlrpcd daemon on a client. To work around this problem,
1047  * contexts, created in such threads, are `remembered' (see
1048  * LCT_REMEMBER)---i.e., added into a global list. When module is preparing
1049  * for unloading it does the following:
1050  *
1051  *     - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT)
1052  *       preventing new key values from being allocated in the new contexts,
1053  *       and
1054  *
1055  *     - scans a list of remembered contexts, destroying values of module
1056  *       keys, thus releasing references to the module.
1057  *
1058  * This is done by lu_context_key_quiesce(). If module is re-activated
1059  * before key has been de-registered, lu_context_key_revive() call clears
1060  * `quiescent' marker.
1061  *
1062  * lu_context code doesn't provide any internal synchronization for these
1063  * activities---it's assumed that startup (including threads start-up) and
1064  * shutdown are serialized by some external means.
1065  *
1066  * \see lu_context
1067  */
1068 struct lu_context_key {
1069 	/**
1070 	 * Set of tags for which values of this key are to be instantiated.
1071 	 */
1072 	__u32 lct_tags;
1073 	/**
1074 	 * Value constructor. This is called when new value is created for a
1075 	 * context. Returns pointer to new value of error pointer.
1076 	 */
1077 	void  *(*lct_init)(const struct lu_context *ctx,
1078 			   struct lu_context_key *key);
1079 	/**
1080 	 * Value destructor. Called when context with previously allocated
1081 	 * value of this slot is destroyed. \a data is a value that was returned
1082 	 * by a matching call to lu_context_key::lct_init().
1083 	 */
1084 	void   (*lct_fini)(const struct lu_context *ctx,
1085 			   struct lu_context_key *key, void *data);
1086 	/**
1087 	 * Optional method called on lu_context_exit() for all allocated
1088 	 * keys. Can be used by debugging code checking that locks are
1089 	 * released, etc.
1090 	 */
1091 	void   (*lct_exit)(const struct lu_context *ctx,
1092 			   struct lu_context_key *key, void *data);
1093 	/**
1094 	 * Internal implementation detail: index within lu_context::lc_value[]
1095 	 * reserved for this key.
1096 	 */
1097 	int      lct_index;
1098 	/**
1099 	 * Internal implementation detail: number of values created for this
1100 	 * key.
1101 	 */
1102 	atomic_t lct_used;
1103 	/**
1104 	 * Internal implementation detail: module for this key.
1105 	 */
1106 	struct module *lct_owner;
1107 	/**
1108 	 * References to this key. For debugging.
1109 	 */
1110 	struct lu_ref  lct_reference;
1111 };
1112 
1113 #define LU_KEY_INIT(mod, type)				    \
1114 	static void *mod##_key_init(const struct lu_context *ctx, \
1115 				    struct lu_context_key *key)   \
1116 	{							 \
1117 		type *value;				      \
1118 								  \
1119 		CLASSERT(PAGE_CACHE_SIZE >= sizeof (*value));       \
1120 								  \
1121 		value = kzalloc(sizeof(*value), GFP_NOFS);	\
1122 		if (value == NULL)				\
1123 			value = ERR_PTR(-ENOMEM);		 \
1124 								  \
1125 		return value;				     \
1126 	}							 \
1127 	struct __##mod##__dummy_init {; } /* semicolon catcher */
1128 
1129 #define LU_KEY_FINI(mod, type)					      \
1130 	static void mod##_key_fini(const struct lu_context *ctx,	    \
1131 				    struct lu_context_key *key, void *data) \
1132 	{								   \
1133 		type *info = data;					  \
1134 									    \
1135 		kfree(info);					 \
1136 	}								   \
1137 	struct __##mod##__dummy_fini {; } /* semicolon catcher */
1138 
1139 #define LU_KEY_INIT_FINI(mod, type)   \
1140 	LU_KEY_INIT(mod, type);	\
1141 	LU_KEY_FINI(mod, type)
1142 
1143 #define LU_CONTEXT_KEY_DEFINE(mod, tags)		\
1144 	struct lu_context_key mod##_thread_key = {      \
1145 		.lct_tags = tags,		       \
1146 		.lct_init = mod##_key_init,	     \
1147 		.lct_fini = mod##_key_fini	      \
1148 	}
1149 
1150 #define LU_CONTEXT_KEY_INIT(key)			\
1151 do {						    \
1152 	(key)->lct_owner = THIS_MODULE;		 \
1153 } while (0)
1154 
1155 int   lu_context_key_register(struct lu_context_key *key);
1156 void  lu_context_key_degister(struct lu_context_key *key);
1157 void *lu_context_key_get     (const struct lu_context *ctx,
1158 			       const struct lu_context_key *key);
1159 void  lu_context_key_quiesce (struct lu_context_key *key);
1160 void  lu_context_key_revive  (struct lu_context_key *key);
1161 
1162 /*
1163  * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an
1164  * owning module.
1165  */
1166 
1167 #define LU_KEY_INIT_GENERIC(mod)					\
1168 	static void mod##_key_init_generic(struct lu_context_key *k, ...) \
1169 	{							       \
1170 		struct lu_context_key *key = k;			 \
1171 		va_list args;					   \
1172 									\
1173 		va_start(args, k);				      \
1174 		do {						    \
1175 			LU_CONTEXT_KEY_INIT(key);		       \
1176 			key = va_arg(args, struct lu_context_key *);    \
1177 		} while (key != NULL);				  \
1178 		va_end(args);					   \
1179 	}
1180 
1181 #define LU_TYPE_INIT(mod, ...)					  \
1182 	LU_KEY_INIT_GENERIC(mod)					\
1183 	static int mod##_type_init(struct lu_device_type *t)	    \
1184 	{							       \
1185 		mod##_key_init_generic(__VA_ARGS__, NULL);	      \
1186 		return lu_context_key_register_many(__VA_ARGS__, NULL); \
1187 	}							       \
1188 	struct __##mod##_dummy_type_init {; }
1189 
1190 #define LU_TYPE_FINI(mod, ...)					  \
1191 	static void mod##_type_fini(struct lu_device_type *t)	   \
1192 	{							       \
1193 		lu_context_key_degister_many(__VA_ARGS__, NULL);	\
1194 	}							       \
1195 	struct __##mod##_dummy_type_fini {; }
1196 
1197 #define LU_TYPE_START(mod, ...)				 \
1198 	static void mod##_type_start(struct lu_device_type *t)  \
1199 	{						       \
1200 		lu_context_key_revive_many(__VA_ARGS__, NULL);  \
1201 	}						       \
1202 	struct __##mod##_dummy_type_start {; }
1203 
1204 #define LU_TYPE_STOP(mod, ...)				  \
1205 	static void mod##_type_stop(struct lu_device_type *t)   \
1206 	{						       \
1207 		lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
1208 	}						       \
1209 	struct __##mod##_dummy_type_stop {; }
1210 
1211 #define LU_TYPE_INIT_FINI(mod, ...)	     \
1212 	LU_TYPE_INIT(mod, __VA_ARGS__);	 \
1213 	LU_TYPE_FINI(mod, __VA_ARGS__);	 \
1214 	LU_TYPE_START(mod, __VA_ARGS__);	\
1215 	LU_TYPE_STOP(mod, __VA_ARGS__)
1216 
1217 int   lu_context_init  (struct lu_context *ctx, __u32 tags);
1218 void  lu_context_fini  (struct lu_context *ctx);
1219 void  lu_context_enter (struct lu_context *ctx);
1220 void  lu_context_exit  (struct lu_context *ctx);
1221 int   lu_context_refill(struct lu_context *ctx);
1222 
1223 /*
1224  * Helper functions to operate on multiple keys. These are used by the default
1225  * device type operations, defined by LU_TYPE_INIT_FINI().
1226  */
1227 
1228 int  lu_context_key_register_many(struct lu_context_key *k, ...);
1229 void lu_context_key_degister_many(struct lu_context_key *k, ...);
1230 void lu_context_key_revive_many  (struct lu_context_key *k, ...);
1231 void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
1232 
1233 /**
1234  * Environment.
1235  */
1236 struct lu_env {
1237 	/**
1238 	 * "Local" context, used to store data instead of stack.
1239 	 */
1240 	struct lu_context  le_ctx;
1241 	/**
1242 	 * "Session" context for per-request data.
1243 	 */
1244 	struct lu_context *le_ses;
1245 };
1246 
1247 int  lu_env_init  (struct lu_env *env, __u32 tags);
1248 void lu_env_fini  (struct lu_env *env);
1249 int  lu_env_refill(struct lu_env *env);
1250 
1251 /** @} lu_context */
1252 
1253 /**
1254  * Output site statistical counters into a buffer. Suitable for
1255  * ll_rd_*()-style functions.
1256  */
1257 int lu_site_stats_print(const struct lu_site *s, struct seq_file *m);
1258 
1259 /**
1260  * Common name structure to be passed around for various name related methods.
1261  */
1262 struct lu_name {
1263 	const char    *ln_name;
1264 	int	    ln_namelen;
1265 };
1266 
1267 /**
1268  * Common buffer structure to be passed around for various xattr_{s,g}et()
1269  * methods.
1270  */
1271 struct lu_buf {
1272 	void   *lb_buf;
1273 	ssize_t lb_len;
1274 };
1275 
1276 #define DLUBUF "(%p %zu)"
1277 #define PLUBUF(buf) (buf)->lb_buf, (buf)->lb_len
1278 /**
1279  * One-time initializers, called at obdclass module initialization, not
1280  * exported.
1281  */
1282 
1283 /**
1284  * Initialization of global lu_* data.
1285  */
1286 int lu_global_init(void);
1287 
1288 /**
1289  * Dual to lu_global_init().
1290  */
1291 void lu_global_fini(void);
1292 
1293 struct lu_kmem_descr {
1294 	struct kmem_cache **ckd_cache;
1295 	const char       *ckd_name;
1296 	const size_t      ckd_size;
1297 };
1298 
1299 int  lu_kmem_init(struct lu_kmem_descr *caches);
1300 void lu_kmem_fini(struct lu_kmem_descr *caches);
1301 
1302 /** @} lu */
1303 #endif /* __LUSTRE_LU_OBJECT_H */
1304