• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lnet/include/lnet/lib-types.h
37  *
38  * Types used by the library side routines that do not need to be
39  * exposed to the user application
40  */
41 
42 #ifndef __LNET_LIB_TYPES_H__
43 #define __LNET_LIB_TYPES_H__
44 
45 #include "linux/lib-types.h"
46 
47 #include "../libcfs/libcfs.h"
48 #include <linux/list.h>
49 #include "types.h"
50 
51 #define WIRE_ATTR       __attribute__((packed))
52 
53 /* Packed version of lnet_process_id_t to transfer via network */
54 typedef struct {
55 	lnet_nid_t nid;
56 	lnet_pid_t pid;   /* node id / process id */
57 } WIRE_ATTR lnet_process_id_packed_t;
58 
59 /* The wire handle's interface cookie only matches one network interface in
60  * one epoch (i.e. new cookie when the interface restarts or the node
61  * reboots).  The object cookie only matches one object on that interface
62  * during that object's lifetime (i.e. no cookie re-use). */
63 typedef struct {
64 	__u64 wh_interface_cookie;
65 	__u64 wh_object_cookie;
66 } WIRE_ATTR lnet_handle_wire_t;
67 
68 typedef enum {
69 	LNET_MSG_ACK = 0,
70 	LNET_MSG_PUT,
71 	LNET_MSG_GET,
72 	LNET_MSG_REPLY,
73 	LNET_MSG_HELLO,
74 } lnet_msg_type_t;
75 
76 /* The variant fields of the portals message header are aligned on an 8
77  * byte boundary in the message header.  Note that all types used in these
78  * wire structs MUST be fixed size and the smaller types are placed at the
79  * end. */
80 typedef struct lnet_ack {
81 	lnet_handle_wire_t  dst_wmd;
82 	__u64	       match_bits;
83 	__u32	       mlength;
84 } WIRE_ATTR lnet_ack_t;
85 
86 typedef struct lnet_put {
87 	lnet_handle_wire_t  ack_wmd;
88 	__u64	       match_bits;
89 	__u64	       hdr_data;
90 	__u32	       ptl_index;
91 	__u32	       offset;
92 } WIRE_ATTR lnet_put_t;
93 
94 typedef struct lnet_get {
95 	lnet_handle_wire_t  return_wmd;
96 	__u64	       match_bits;
97 	__u32	       ptl_index;
98 	__u32	       src_offset;
99 	__u32	       sink_length;
100 } WIRE_ATTR lnet_get_t;
101 
102 typedef struct lnet_reply {
103 	lnet_handle_wire_t  dst_wmd;
104 } WIRE_ATTR lnet_reply_t;
105 
106 typedef struct lnet_hello {
107 	__u64	      incarnation;
108 	__u32	      type;
109 } WIRE_ATTR lnet_hello_t;
110 
111 typedef struct {
112 	lnet_nid_t	  dest_nid;
113 	lnet_nid_t	  src_nid;
114 	lnet_pid_t	  dest_pid;
115 	lnet_pid_t	  src_pid;
116 	__u32	       type;	       /* lnet_msg_type_t */
117 	__u32	       payload_length;     /* payload data to follow */
118 	/*<------__u64 aligned------->*/
119 	union {
120 		lnet_ack_t   ack;
121 		lnet_put_t   put;
122 		lnet_get_t   get;
123 		lnet_reply_t reply;
124 		lnet_hello_t hello;
125 	} msg;
126 } WIRE_ATTR lnet_hdr_t;
127 
128 /* A HELLO message contains a magic number and protocol version
129  * code in the header's dest_nid, the peer's NID in the src_nid, and
130  * LNET_MSG_HELLO in the type field.  All other common fields are zero
131  * (including payload_size; i.e. no payload).
132  * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is
133  * running the same protocol and to find out its NID. These LNDs should
134  * exchange HELLO messages when a connection is first established.  Individual
135  * LNDs can put whatever else they fancy in lnet_hdr_t::msg.
136  */
137 typedef struct {
138 	__u32   magic;			  /* LNET_PROTO_TCP_MAGIC */
139 	__u16   version_major;		  /* increment on incompatible change */
140 	__u16   version_minor;		  /* increment on compatible change */
141 } WIRE_ATTR lnet_magicversion_t;
142 
143 /* PROTO MAGIC for LNDs */
144 #define LNET_PROTO_IB_MAGIC		 0x0be91b91
145 #define LNET_PROTO_RA_MAGIC		 0x0be91b92
146 #define LNET_PROTO_QSW_MAGIC		0x0be91b93
147 #define LNET_PROTO_GNI_MAGIC		0xb00fbabe /* ask Kim */
148 #define LNET_PROTO_TCP_MAGIC		0xeebc0ded
149 #define LNET_PROTO_PTL_MAGIC		0x50746C4E /* 'PtlN' unique magic */
150 #define LNET_PROTO_MX_MAGIC		 0x4d583130 /* 'MX10'! */
151 #define LNET_PROTO_ACCEPTOR_MAGIC	   0xacce7100
152 #define LNET_PROTO_PING_MAGIC	       0x70696E67 /* 'ping' */
153 
154 /* Placeholder for a future "unified" protocol across all LNDs */
155 /* Current LNDs that receive a request with this magic will respond with a
156  * "stub" reply using their current protocol */
157 #define LNET_PROTO_MAGIC		    0x45726963 /* ! */
158 
159 #define LNET_PROTO_TCP_VERSION_MAJOR	1
160 #define LNET_PROTO_TCP_VERSION_MINOR	0
161 
162 /* Acceptor connection request */
163 typedef struct {
164 	__u32       acr_magic;		  /* PTL_ACCEPTOR_PROTO_MAGIC */
165 	__u32       acr_version;		/* protocol version */
166 	__u64       acr_nid;		    /* target NID */
167 } WIRE_ATTR lnet_acceptor_connreq_t;
168 
169 #define LNET_PROTO_ACCEPTOR_VERSION       1
170 
171 /* forward refs */
172 struct lnet_libmd;
173 
174 typedef struct lnet_msg {
175 	struct list_head	    msg_activelist;
176 	struct list_head	    msg_list;	   /* Q for credits/MD */
177 
178 	lnet_process_id_t     msg_target;
179 	/* where is it from, it's only for building event */
180 	lnet_nid_t		msg_from;
181 	__u32			msg_type;
182 
183 	/* committed for sending */
184 	unsigned int		msg_tx_committed:1;
185 	/* CPT # this message committed for sending */
186 	unsigned int		msg_tx_cpt:15;
187 	/* committed for receiving */
188 	unsigned int		msg_rx_committed:1;
189 	/* CPT # this message committed for receiving */
190 	unsigned int		msg_rx_cpt:15;
191 	/* queued for tx credit */
192 	unsigned int		msg_tx_delayed:1;
193 	/* queued for RX buffer */
194 	unsigned int		msg_rx_delayed:1;
195 	/* ready for pending on RX delay list */
196 	unsigned int		msg_rx_ready_delay:1;
197 
198 	unsigned int	  msg_vmflush:1;      /* VM trying to free memory */
199 	unsigned int	  msg_target_is_router:1; /* sending to a router */
200 	unsigned int	  msg_routing:1;      /* being forwarded */
201 	unsigned int	  msg_ack:1;	  /* ack on finalize (PUT) */
202 	unsigned int	  msg_sending:1;      /* outgoing message */
203 	unsigned int	  msg_receiving:1;    /* being received */
204 	unsigned int	  msg_txcredit:1;     /* taken an NI send credit */
205 	unsigned int	  msg_peertxcredit:1; /* taken a peer send credit */
206 	unsigned int	  msg_rtrcredit:1;    /* taken a global router credit */
207 	unsigned int	  msg_peerrtrcredit:1; /* taken a peer router credit */
208 	unsigned int	  msg_onactivelist:1; /* on the activelist */
209 
210 	struct lnet_peer     *msg_txpeer;	 /* peer I'm sending to */
211 	struct lnet_peer     *msg_rxpeer;	 /* peer I received from */
212 
213 	void		 *msg_private;
214 	struct lnet_libmd    *msg_md;
215 
216 	unsigned int	  msg_len;
217 	unsigned int	  msg_wanted;
218 	unsigned int	  msg_offset;
219 	unsigned int	  msg_niov;
220 	struct iovec	 *msg_iov;
221 	lnet_kiov_t	  *msg_kiov;
222 
223 	lnet_event_t	  msg_ev;
224 	lnet_hdr_t	    msg_hdr;
225 } lnet_msg_t;
226 
227 typedef struct lnet_libhandle {
228 	struct list_head	    lh_hash_chain;
229 	__u64		 lh_cookie;
230 } lnet_libhandle_t;
231 
232 #define lh_entry(ptr, type, member) \
233 	((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
234 
235 typedef struct lnet_eq {
236 	struct list_head		eq_list;
237 	lnet_libhandle_t	eq_lh;
238 	lnet_seq_t		eq_enq_seq;
239 	lnet_seq_t		eq_deq_seq;
240 	unsigned int		eq_size;
241 	lnet_eq_handler_t	eq_callback;
242 	lnet_event_t		*eq_events;
243 	int			**eq_refs;	/* percpt refcount for EQ */
244 } lnet_eq_t;
245 
246 typedef struct lnet_me {
247 	struct list_head	     me_list;
248 	lnet_libhandle_t       me_lh;
249 	lnet_process_id_t      me_match_id;
250 	unsigned int	   me_portal;
251 	unsigned int	   me_pos;		/* hash offset in mt_hash */
252 	__u64		  me_match_bits;
253 	__u64		  me_ignore_bits;
254 	lnet_unlink_t	  me_unlink;
255 	struct lnet_libmd     *me_md;
256 } lnet_me_t;
257 
258 typedef struct lnet_libmd {
259 	struct list_head	    md_list;
260 	lnet_libhandle_t      md_lh;
261 	lnet_me_t	    *md_me;
262 	char		 *md_start;
263 	unsigned int	  md_offset;
264 	unsigned int	  md_length;
265 	unsigned int	  md_max_size;
266 	int		   md_threshold;
267 	int		   md_refcount;
268 	unsigned int	  md_options;
269 	unsigned int	  md_flags;
270 	void		 *md_user_ptr;
271 	lnet_eq_t	    *md_eq;
272 	unsigned int	  md_niov;		/* # frags */
273 	union {
274 		struct iovec  iov[LNET_MAX_IOV];
275 		lnet_kiov_t   kiov[LNET_MAX_IOV];
276 	} md_iov;
277 } lnet_libmd_t;
278 
279 #define LNET_MD_FLAG_ZOMBIE	   (1 << 0)
280 #define LNET_MD_FLAG_AUTO_UNLINK      (1 << 1)
281 #define LNET_MD_FLAG_ABORTED	 (1 << 2)
282 
283 #ifdef LNET_USE_LIB_FREELIST
284 typedef struct {
285 	void		  *fl_objs;	  /* single contiguous array of objects */
286 	int		    fl_nobjs;	 /* the number of them */
287 	int		    fl_objsize;       /* the size (including overhead) of each of them */
288 	struct list_head	     fl_list;	  /* where they are enqueued */
289 } lnet_freelist_t;
290 
291 typedef struct {
292 	struct list_head	     fo_list;	     /* enqueue on fl_list */
293 	void		  *fo_contents;	 /* aligned contents */
294 } lnet_freeobj_t;
295 #endif
296 
297 typedef struct {
298 	/* info about peers we are trying to fail */
299 	struct list_head	     tp_list;	     /* ln_test_peers */
300 	lnet_nid_t	     tp_nid;	      /* matching nid */
301 	unsigned int	   tp_threshold;	/* # failures to simulate */
302 } lnet_test_peer_t;
303 
304 #define LNET_COOKIE_TYPE_MD    1
305 #define LNET_COOKIE_TYPE_ME    2
306 #define LNET_COOKIE_TYPE_EQ    3
307 #define LNET_COOKIE_TYPE_BITS  2
308 #define LNET_COOKIE_MASK	((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL)
309 
310 struct lnet_ni;				  /* forward ref */
311 
312 typedef struct lnet_lnd {
313 	/* fields managed by portals */
314 	struct list_head	    lnd_list;	     /* stash in the LND table */
315 	int		   lnd_refcount;	 /* # active instances */
316 
317 	/* fields initialised by the LND */
318 	unsigned int	  lnd_type;
319 
320 	int  (*lnd_startup)(struct lnet_ni *ni);
321 	void (*lnd_shutdown)(struct lnet_ni *ni);
322 	int  (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg);
323 
324 	/* In data movement APIs below, payload buffers are described as a set
325 	 * of 'niov' fragments which are...
326 	 * EITHER
327 	 *    in virtual memory (struct iovec *iov != NULL)
328 	 * OR
329 	 *    in pages (kernel only: plt_kiov_t *kiov != NULL).
330 	 * The LND may NOT overwrite these fragment descriptors.
331 	 * An 'offset' and may specify a byte offset within the set of
332 	 * fragments to start from
333 	 */
334 
335 	/* Start sending a preformatted message.  'private' is NULL for PUT and
336 	 * GET messages; otherwise this is a response to an incoming message
337 	 * and 'private' is the 'private' passed to lnet_parse().  Return
338 	 * non-zero for immediate failure, otherwise complete later with
339 	 * lnet_finalize() */
340 	int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg);
341 
342 	/* Start receiving 'mlen' bytes of payload data, skipping the following
343 	 * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to
344 	 * lnet_parse().  Return non-zero for immediate failure, otherwise
345 	 * complete later with lnet_finalize().  This also gives back a receive
346 	 * credit if the LND does flow control. */
347 	int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
348 			int delayed, unsigned int niov,
349 			struct iovec *iov, lnet_kiov_t *kiov,
350 			unsigned int offset, unsigned int mlen, unsigned int rlen);
351 
352 	/* lnet_parse() has had to delay processing of this message
353 	 * (e.g. waiting for a forwarding buffer or send credits).  Give the
354 	 * LND a chance to free urgently needed resources.  If called, return 0
355 	 * for success and do NOT give back a receive credit; that has to wait
356 	 * until lnd_recv() gets called.  On failure return < 0 and
357 	 * release resources; lnd_recv() will not be called. */
358 	int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
359 			      void **new_privatep);
360 
361 	/* notification of peer health */
362 	void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
363 
364 	/* query of peer aliveness */
365 	void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, unsigned long *when);
366 
367 	/* accept a new connection */
368 	int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
369 
370 } lnd_t;
371 
372 #define LNET_NI_STATUS_UP      0x15aac0de
373 #define LNET_NI_STATUS_DOWN    0xdeadface
374 #define LNET_NI_STATUS_INVALID 0x00000000
375 typedef struct {
376 	lnet_nid_t ns_nid;
377 	__u32      ns_status;
378 	__u32      ns_unused;
379 } WIRE_ATTR lnet_ni_status_t;
380 
381 struct lnet_tx_queue {
382 	int			tq_credits;	/* # tx credits free */
383 	int			tq_credits_min;	/* lowest it's been */
384 	int			tq_credits_max;	/* total # tx credits */
385 	struct list_head		tq_delayed;	/* delayed TXs */
386 };
387 
388 #define LNET_MAX_INTERFACES   16
389 
390 typedef struct lnet_ni {
391 	spinlock_t		ni_lock;
392 	struct list_head		ni_list;	/* chain on ln_nis */
393 	struct list_head		ni_cptlist;	/* chain on ln_nis_cpt */
394 	int			ni_maxtxcredits; /* # tx credits  */
395 	/* # per-peer send credits */
396 	int			ni_peertxcredits;
397 	/* # per-peer router buffer credits */
398 	int			ni_peerrtrcredits;
399 	/* seconds to consider peer dead */
400 	int			ni_peertimeout;
401 	int			ni_ncpts;	/* number of CPTs */
402 	__u32			*ni_cpts;	/* bond NI on some CPTs */
403 	lnet_nid_t		ni_nid;		/* interface's NID */
404 	void			*ni_data;	/* instance-specific data */
405 	lnd_t			*ni_lnd;	/* procedural interface */
406 	struct lnet_tx_queue	**ni_tx_queues;	/* percpt TX queues */
407 	int			**ni_refs;	/* percpt reference count */
408 	long			ni_last_alive;	/* when I was last alive */
409 	lnet_ni_status_t	*ni_status;	/* my health status */
410 	/* equivalent interfaces to use */
411 	char			*ni_interfaces[LNET_MAX_INTERFACES];
412 } lnet_ni_t;
413 
414 #define LNET_PROTO_PING_MATCHBITS	0x8000000000000000LL
415 
416 /* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
417  * of old LNet, so there shouldn't be any compatibility issue */
418 #define LNET_PING_FEAT_INVAL		(0)		/* no feature */
419 #define LNET_PING_FEAT_BASE		(1 << 0)	/* just a ping */
420 #define LNET_PING_FEAT_NI_STATUS	(1 << 1)	/* return NI status */
421 
422 #define LNET_PING_FEAT_MASK		(LNET_PING_FEAT_BASE | \
423 					 LNET_PING_FEAT_NI_STATUS)
424 
425 typedef struct {
426 	__u32			pi_magic;
427 	__u32			pi_features;
428 	lnet_pid_t		pi_pid;
429 	__u32			pi_nnis;
430 	lnet_ni_status_t	pi_ni[0];
431 } WIRE_ATTR lnet_ping_info_t;
432 
433 /* router checker data, per router */
434 #define LNET_MAX_RTR_NIS   16
435 #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS])
436 typedef struct {
437 	/* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */
438 	struct list_head		rcd_list;
439 	lnet_handle_md_t	rcd_mdh;	/* ping buffer MD */
440 	struct lnet_peer	*rcd_gateway;	/* reference to gateway */
441 	lnet_ping_info_t	*rcd_pinginfo;	/* ping buffer */
442 } lnet_rc_data_t;
443 
444 typedef struct lnet_peer {
445 	struct list_head	lp_hashlist;	  /* chain on peer hash */
446 	struct list_head	lp_txq;	       /* messages blocking for tx credits */
447 	struct list_head	lp_rtrq;	      /* messages blocking for router credits */
448 	struct list_head	lp_rtr_list;	  /* chain on router list */
449 	int	       lp_txcredits;	 /* # tx credits available */
450 	int	       lp_mintxcredits;      /* low water mark */
451 	int	       lp_rtrcredits;	/* # router credits */
452 	int	       lp_minrtrcredits;     /* low water mark */
453 	unsigned int      lp_alive:1;	   /* alive/dead? */
454 	unsigned int      lp_notify:1;	  /* notification outstanding? */
455 	unsigned int      lp_notifylnd:1;       /* outstanding notification for LND? */
456 	unsigned int      lp_notifying:1;       /* some thread is handling notification */
457 	unsigned int      lp_ping_notsent;      /* SEND event outstanding from ping */
458 	int	       lp_alive_count;       /* # times router went dead<->alive */
459 	long	      lp_txqnob;	    /* bytes queued for sending */
460 	unsigned long	lp_timestamp;	 /* time of last aliveness news */
461 	unsigned long	lp_ping_timestamp;    /* time of last ping attempt */
462 	unsigned long	lp_ping_deadline;     /* != 0 if ping reply expected */
463 	unsigned long	lp_last_alive;	/* when I was last alive */
464 	unsigned long	lp_last_query;	/* when lp_ni was queried last time */
465 	lnet_ni_t	*lp_ni;		/* interface peer is on */
466 	lnet_nid_t	lp_nid;	       /* peer's NID */
467 	int	       lp_refcount;	  /* # refs */
468 	int			lp_cpt;		/* CPT this peer attached on */
469 	/* # refs from lnet_route_t::lr_gateway */
470 	int			lp_rtr_refcount;
471 	/* returned RC ping features */
472 	unsigned int		lp_ping_feats;
473 	struct list_head		lp_routes;	/* routers on this peer */
474 	lnet_rc_data_t		*lp_rcd;	/* router checker state */
475 } lnet_peer_t;
476 
477 /* peer hash size */
478 #define LNET_PEER_HASH_BITS     9
479 #define LNET_PEER_HASH_SIZE     (1 << LNET_PEER_HASH_BITS)
480 
481 /* peer hash table */
482 struct lnet_peer_table {
483 	int			pt_version;	/* /proc validity stamp */
484 	int			pt_number;	/* # peers extant */
485 	struct list_head		pt_deathrow;	/* zombie peers */
486 	struct list_head		*pt_hash;	/* NID->peer hash */
487 };
488 
489 /* peer aliveness is enabled only on routers for peers in a network where the
490  * lnet_ni_t::ni_peertimeout has been set to a positive value */
491 #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
492 					 (lp)->lp_ni->ni_peertimeout > 0)
493 
494 typedef struct {
495 	struct list_head		lr_list;	/* chain on net */
496 	struct list_head		lr_gwlist;	/* chain on gateway */
497 	lnet_peer_t		*lr_gateway;	/* router node */
498 	__u32			lr_net;		/* remote network number */
499 	int			lr_seq;		/* sequence for round-robin */
500 	unsigned int		lr_downis;	/* number of down NIs */
501 	unsigned int		lr_hops;	/* how far I am */
502 	unsigned int            lr_priority;    /* route priority */
503 } lnet_route_t;
504 
505 #define LNET_REMOTE_NETS_HASH_DEFAULT	(1U << 7)
506 #define LNET_REMOTE_NETS_HASH_MAX	(1U << 16)
507 #define LNET_REMOTE_NETS_HASH_SIZE	(1 << the_lnet.ln_remote_nets_hbits)
508 
509 typedef struct {
510 	struct list_head	      lrn_list;       /* chain on ln_remote_nets_hash */
511 	struct list_head	      lrn_routes;     /* routes to me */
512 	__u32		   lrn_net;	/* my net number */
513 } lnet_remotenet_t;
514 
515 typedef struct {
516 	struct list_head rbp_bufs;	     /* my free buffer pool */
517 	struct list_head rbp_msgs;	     /* messages blocking for a buffer */
518 	int	rbp_npages;	   /* # pages in each buffer */
519 	int	rbp_nbuffers;	 /* # buffers */
520 	int	rbp_credits;	  /* # free buffers / blocked messages */
521 	int	rbp_mincredits;       /* low water mark */
522 } lnet_rtrbufpool_t;
523 
524 typedef struct {
525 	struct list_head	     rb_list;	     /* chain on rbp_bufs */
526 	lnet_rtrbufpool_t     *rb_pool;	     /* owning pool */
527 	lnet_kiov_t	    rb_kiov[0];	  /* the buffer space */
528 } lnet_rtrbuf_t;
529 
530 typedef struct {
531 	__u32	msgs_alloc;
532 	__u32	msgs_max;
533 	__u32	errors;
534 	__u32	send_count;
535 	__u32	recv_count;
536 	__u32	route_count;
537 	__u32	drop_count;
538 	__u64	send_length;
539 	__u64	recv_length;
540 	__u64	route_length;
541 	__u64	drop_length;
542 } WIRE_ATTR lnet_counters_t;
543 
544 #define LNET_PEER_HASHSIZE   503		/* prime! */
545 
546 #define LNET_NRBPOOLS	 3		 /* # different router buffer pools */
547 
548 enum {
549 	/* Didn't match anything */
550 	LNET_MATCHMD_NONE	= (1 << 0),
551 	/* Matched OK */
552 	LNET_MATCHMD_OK		= (1 << 1),
553 	/* Must be discarded */
554 	LNET_MATCHMD_DROP	= (1 << 2),
555 	/* match and buffer is exhausted */
556 	LNET_MATCHMD_EXHAUSTED  = (1 << 3),
557 	/* match or drop */
558 	LNET_MATCHMD_FINISH     = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP),
559 };
560 
561 /* Options for lnet_portal_t::ptl_options */
562 #define LNET_PTL_LAZY	       (1 << 0)
563 #define LNET_PTL_MATCH_UNIQUE       (1 << 1)    /* unique match, for RDMA */
564 #define LNET_PTL_MATCH_WILDCARD     (1 << 2)    /* wildcard match, request portal */
565 
566 /* parameter for matching operations (GET, PUT) */
567 struct lnet_match_info {
568 	__u64			mi_mbits;
569 	lnet_process_id_t	mi_id;
570 	unsigned int		mi_opc;
571 	unsigned int		mi_portal;
572 	unsigned int		mi_rlength;
573 	unsigned int		mi_roffset;
574 };
575 
576 /* ME hash of RDMA portal */
577 #define LNET_MT_HASH_BITS		8
578 #define LNET_MT_HASH_SIZE		(1 << LNET_MT_HASH_BITS)
579 #define LNET_MT_HASH_MASK		(LNET_MT_HASH_SIZE - 1)
580 /* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash,
581  * the last entry is reserved for MEs with ignore-bits */
582 #define LNET_MT_HASH_IGNORE		LNET_MT_HASH_SIZE
583 /* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which
584  * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the
585  * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */
586 #define LNET_MT_BITS_U64		6	/* 2^6 bits */
587 #define LNET_MT_EXHAUSTED_BITS		(LNET_MT_HASH_BITS - LNET_MT_BITS_U64)
588 #define LNET_MT_EXHAUSTED_BMAP		((1 << LNET_MT_EXHAUSTED_BITS) + 1)
589 
590 /* portal match table */
591 struct lnet_match_table {
592 	/* reserved for upcoming patches, CPU partition ID */
593 	unsigned int		mt_cpt;
594 	unsigned int		mt_portal;      /* portal index */
595 	/* match table is set as "enabled" if there's non-exhausted MD
596 	 * attached on mt_mhash, it's only valid for wildcard portal */
597 	unsigned int		mt_enabled;
598 	/* bitmap to flag whether MEs on mt_hash are exhausted or not */
599 	__u64			mt_exhausted[LNET_MT_EXHAUSTED_BMAP];
600 	struct list_head		*mt_mhash;      /* matching hash */
601 };
602 
603 /* these are only useful for wildcard portal */
604 /* Turn off message rotor for wildcard portals */
605 #define	LNET_PTL_ROTOR_OFF	0
606 /* round-robin dispatch all PUT messages for wildcard portals */
607 #define	LNET_PTL_ROTOR_ON	1
608 /* round-robin dispatch routed PUT message for wildcard portals */
609 #define	LNET_PTL_ROTOR_RR_RT	2
610 /* dispatch routed PUT message by hashing source NID for wildcard portals */
611 #define	LNET_PTL_ROTOR_HASH_RT	3
612 
613 typedef struct lnet_portal {
614 	spinlock_t		ptl_lock;
615 	unsigned int		ptl_index;	/* portal ID, reserved */
616 	/* flags on this portal: lazy, unique... */
617 	unsigned int		ptl_options;
618 	/* list of messages which are stealing buffer */
619 	struct list_head		ptl_msg_stealing;
620 	/* messages blocking for MD */
621 	struct list_head		ptl_msg_delayed;
622 	/* Match table for each CPT */
623 	struct lnet_match_table	**ptl_mtables;
624 	/* spread rotor of incoming "PUT" */
625 	int			ptl_rotor;
626 	/* # active entries for this portal */
627 	int		     ptl_mt_nmaps;
628 	/* array of active entries' cpu-partition-id */
629 	int		     ptl_mt_maps[0];
630 } lnet_portal_t;
631 
632 #define LNET_LH_HASH_BITS	12
633 #define LNET_LH_HASH_SIZE	(1ULL << LNET_LH_HASH_BITS)
634 #define LNET_LH_HASH_MASK	(LNET_LH_HASH_SIZE - 1)
635 
636 /* resource container (ME, MD, EQ) */
637 struct lnet_res_container {
638 	unsigned int		rec_type;	/* container type */
639 	__u64			rec_lh_cookie;	/* cookie generator */
640 	struct list_head		rec_active;	/* active resource list */
641 	struct list_head		*rec_lh_hash;	/* handle hash */
642 #ifdef LNET_USE_LIB_FREELIST
643 	lnet_freelist_t		rec_freelist;	/* freelist for resources */
644 #endif
645 };
646 
647 /* message container */
648 struct lnet_msg_container {
649 	int			msc_init;	/* initialized or not */
650 	/* max # threads finalizing */
651 	int			msc_nfinalizers;
652 	/* msgs waiting to complete finalizing */
653 	struct list_head		msc_finalizing;
654 	struct list_head		msc_active;	/* active message list */
655 	/* threads doing finalization */
656 	void			**msc_finalizers;
657 #ifdef LNET_USE_LIB_FREELIST
658 	lnet_freelist_t		msc_freelist;	/* freelist for messages */
659 #endif
660 };
661 
662 /* Router Checker states */
663 #define LNET_RC_STATE_SHUTDOWN		0	/* not started */
664 #define LNET_RC_STATE_RUNNING		1	/* started up OK */
665 #define LNET_RC_STATE_STOPPING		2	/* telling thread to stop */
666 
667 typedef struct {
668 	/* CPU partition table of LNet */
669 	struct cfs_cpt_table		*ln_cpt_table;
670 	/* number of CPTs in ln_cpt_table */
671 	unsigned int			ln_cpt_number;
672 	unsigned int			ln_cpt_bits;
673 
674 	/* protect LNet resources (ME/MD/EQ) */
675 	struct cfs_percpt_lock		*ln_res_lock;
676 	/* # portals */
677 	int				ln_nportals;
678 	/* the vector of portals */
679 	lnet_portal_t			**ln_portals;
680 	/* percpt ME containers */
681 	struct lnet_res_container	**ln_me_containers;
682 	/* percpt MD container */
683 	struct lnet_res_container	**ln_md_containers;
684 
685 	/* Event Queue container */
686 	struct lnet_res_container	ln_eq_container;
687 	wait_queue_head_t			ln_eq_waitq;
688 	spinlock_t			ln_eq_wait_lock;
689 	unsigned int			ln_remote_nets_hbits;
690 
691 	/* protect NI, peer table, credits, routers, rtrbuf... */
692 	struct cfs_percpt_lock		*ln_net_lock;
693 	/* percpt message containers for active/finalizing/freed message */
694 	struct lnet_msg_container	**ln_msg_containers;
695 	lnet_counters_t			**ln_counters;
696 	struct lnet_peer_table		**ln_peer_tables;
697 	/* failure simulation */
698 	struct list_head			ln_test_peers;
699 
700 	struct list_head			ln_nis;		/* LND instances */
701 	/* NIs bond on specific CPT(s) */
702 	struct list_head			ln_nis_cpt;
703 	/* dying LND instances */
704 	struct list_head			ln_nis_zombie;
705 	lnet_ni_t			*ln_loni;	/* the loopback NI */
706 	/* NI to wait for events in */
707 	lnet_ni_t			*ln_eq_waitni;
708 
709 	/* remote networks with routes to them */
710 	struct list_head			*ln_remote_nets_hash;
711 	/* validity stamp */
712 	__u64				ln_remote_nets_version;
713 	/* list of all known routers */
714 	struct list_head			ln_routers;
715 	/* validity stamp */
716 	__u64				ln_routers_version;
717 	/* percpt router buffer pools */
718 	lnet_rtrbufpool_t		**ln_rtrpools;
719 
720 	lnet_handle_md_t		ln_ping_target_md;
721 	lnet_handle_eq_t		ln_ping_target_eq;
722 	lnet_ping_info_t		*ln_ping_info;
723 
724 	/* router checker startup/shutdown state */
725 	int				ln_rc_state;
726 	/* router checker's event queue */
727 	lnet_handle_eq_t		ln_rc_eqh;
728 	/* rcd still pending on net */
729 	struct list_head			ln_rcd_deathrow;
730 	/* rcd ready for free */
731 	struct list_head			ln_rcd_zombie;
732 	/* serialise startup/shutdown */
733 	struct semaphore		ln_rc_signal;
734 
735 	struct mutex			ln_api_mutex;
736 	struct mutex			ln_lnd_mutex;
737 	int				ln_init;	/* LNetInit() called? */
738 	/* Have I called LNetNIInit myself? */
739 	int				ln_niinit_self;
740 	/* LNetNIInit/LNetNIFini counter */
741 	int				ln_refcount;
742 	/* shutdown in progress */
743 	int				ln_shutdown;
744 
745 	int				ln_routing;	/* am I a router? */
746 	lnet_pid_t			ln_pid;		/* requested pid */
747 	/* uniquely identifies this ni in this epoch */
748 	__u64				ln_interface_cookie;
749 	/* registered LNDs */
750 	struct list_head			ln_lnds;
751 
752 	/* space for network names */
753 	char				*ln_network_tokens;
754 	int				ln_network_tokens_nob;
755 	/* test protocol compatibility flags */
756 	int				ln_testprotocompat;
757 
758 } lnet_t;
759 
760 #endif
761