1 /*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 #ifndef __XFS_DA_FORMAT_H__
20 #define __XFS_DA_FORMAT_H__
21
22 /*
23 * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
24 *
25 * It is used to manage a doubly linked list of all blocks at the same
26 * level in the Btree, and to identify which type of block this is.
27 */
28 #define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
29 #define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */
30 #define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */
31 #define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */
32
33 typedef struct xfs_da_blkinfo {
34 __be32 forw; /* previous block in list */
35 __be32 back; /* following block in list */
36 __be16 magic; /* validity check on block */
37 __be16 pad; /* unused */
38 } xfs_da_blkinfo_t;
39
40 /*
41 * CRC enabled directory structure types
42 *
43 * The headers change size for the additional verification information, but
44 * otherwise the tree layouts and contents are unchanged. Hence the da btree
45 * code can use the struct xfs_da_blkinfo for manipulating the tree links and
46 * magic numbers without modification for both v2 and v3 nodes.
47 */
48 #define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */
49 #define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */
50 #define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */
51 #define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */
52
53 struct xfs_da3_blkinfo {
54 /*
55 * the node link manipulation code relies on the fact that the first
56 * element of this structure is the struct xfs_da_blkinfo so it can
57 * ignore the differences in the rest of the structures.
58 */
59 struct xfs_da_blkinfo hdr;
60 __be32 crc; /* CRC of block */
61 __be64 blkno; /* first block of the buffer */
62 __be64 lsn; /* sequence number of last write */
63 uuid_t uuid; /* filesystem we belong to */
64 __be64 owner; /* inode that owns the block */
65 };
66
67 /*
68 * This is the structure of the root and intermediate nodes in the Btree.
69 * The leaf nodes are defined above.
70 *
71 * Entries are not packed.
72 *
73 * Since we have duplicate keys, use a binary search but always follow
74 * all match in the block, not just the first match found.
75 */
76 #define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */
77
78 typedef struct xfs_da_node_hdr {
79 struct xfs_da_blkinfo info; /* block type, links, etc. */
80 __be16 __count; /* count of active entries */
81 __be16 __level; /* level above leaves (leaf == 0) */
82 } xfs_da_node_hdr_t;
83
84 struct xfs_da3_node_hdr {
85 struct xfs_da3_blkinfo info; /* block type, links, etc. */
86 __be16 __count; /* count of active entries */
87 __be16 __level; /* level above leaves (leaf == 0) */
88 __be32 __pad32;
89 };
90
91 #define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc))
92
93 typedef struct xfs_da_node_entry {
94 __be32 hashval; /* hash value for this descendant */
95 __be32 before; /* Btree block before this key */
96 } xfs_da_node_entry_t;
97
98 typedef struct xfs_da_intnode {
99 struct xfs_da_node_hdr hdr;
100 struct xfs_da_node_entry __btree[];
101 } xfs_da_intnode_t;
102
103 struct xfs_da3_intnode {
104 struct xfs_da3_node_hdr hdr;
105 struct xfs_da_node_entry __btree[];
106 };
107
108 /*
109 * In-core version of the node header to abstract the differences in the v2 and
110 * v3 disk format of the headers. Callers need to convert to/from disk format as
111 * appropriate.
112 */
113 struct xfs_da3_icnode_hdr {
114 __uint32_t forw;
115 __uint32_t back;
116 __uint16_t magic;
117 __uint16_t count;
118 __uint16_t level;
119 };
120
121 /*
122 * Directory version 2.
123 *
124 * There are 4 possible formats:
125 * - shortform - embedded into the inode
126 * - single block - data with embedded leaf at the end
127 * - multiple data blocks, single leaf+freeindex block
128 * - data blocks, node and leaf blocks (btree), freeindex blocks
129 *
130 * Note: many node blocks structures and constants are shared with the attr
131 * code and defined in xfs_da_btree.h.
132 */
133
134 #define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */
135 #define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */
136 #define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
137
138 /*
139 * Directory Version 3 With CRCs.
140 *
141 * The tree formats are the same as for version 2 directories. The difference
142 * is in the block header and dirent formats. In many cases the v3 structures
143 * use v2 definitions as they are no different and this makes code sharing much
144 * easier.
145 *
146 * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
147 * format is v2 then they switch to the existing v2 code, or the format is v3
148 * they implement the v3 functionality. This means the existing dir2 is a mix of
149 * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
150 * where there is a difference in the formats, otherwise the code is unchanged.
151 *
152 * Where it is possible, the code decides what to do based on the magic numbers
153 * in the blocks rather than feature bits in the superblock. This means the code
154 * is as independent of the external XFS code as possible as doesn't require
155 * passing struct xfs_mount pointers into places where it isn't really
156 * necessary.
157 *
158 * Version 3 includes:
159 *
160 * - a larger block header for CRC and identification purposes and so the
161 * offsets of all the structures inside the blocks are different.
162 *
163 * - new magic numbers to be able to detect the v2/v3 types on the fly.
164 */
165
166 #define XFS_DIR3_BLOCK_MAGIC 0x58444233 /* XDB3: single block dirs */
167 #define XFS_DIR3_DATA_MAGIC 0x58444433 /* XDD3: multiblock dirs */
168 #define XFS_DIR3_FREE_MAGIC 0x58444633 /* XDF3: free index blocks */
169
170 /*
171 * Dirents in version 3 directories have a file type field. Additions to this
172 * list are an on-disk format change, requiring feature bits. Valid values
173 * are as follows:
174 */
175 #define XFS_DIR3_FT_UNKNOWN 0
176 #define XFS_DIR3_FT_REG_FILE 1
177 #define XFS_DIR3_FT_DIR 2
178 #define XFS_DIR3_FT_CHRDEV 3
179 #define XFS_DIR3_FT_BLKDEV 4
180 #define XFS_DIR3_FT_FIFO 5
181 #define XFS_DIR3_FT_SOCK 6
182 #define XFS_DIR3_FT_SYMLINK 7
183 #define XFS_DIR3_FT_WHT 8
184
185 #define XFS_DIR3_FT_MAX 9
186
187 /*
188 * Byte offset in data block and shortform entry.
189 */
190 typedef __uint16_t xfs_dir2_data_off_t;
191 #define NULLDATAOFF 0xffffU
192 typedef uint xfs_dir2_data_aoff_t; /* argument form */
193
194 /*
195 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
196 * Only need 16 bits, this is the byte offset into the single block form.
197 */
198 typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
199
200 /*
201 * Offset in data space of a data entry.
202 */
203 typedef __uint32_t xfs_dir2_dataptr_t;
204 #define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
205 #define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
206
207 /*
208 * Byte offset in a directory.
209 */
210 typedef xfs_off_t xfs_dir2_off_t;
211
212 /*
213 * Directory block number (logical dirblk in file)
214 */
215 typedef __uint32_t xfs_dir2_db_t;
216
217 /*
218 * Inode number stored as 8 8-bit values.
219 */
220 typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
221
222 /*
223 * Inode number stored as 4 8-bit values.
224 * Works a lot of the time, when all the inode numbers in a directory
225 * fit in 32 bits.
226 */
227 typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
228
229 typedef union {
230 xfs_dir2_ino8_t i8;
231 xfs_dir2_ino4_t i4;
232 } xfs_dir2_inou_t;
233 #define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
234
235 /*
236 * Directory layout when stored internal to an inode.
237 *
238 * Small directories are packed as tightly as possible so as to fit into the
239 * literal area of the inode. These "shortform" directories consist of a
240 * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
241 * structures. Due the different inode number storage size and the variable
242 * length name field in the xfs_dir2_sf_entry all these structure are
243 * variable length, and the accessors in this file should be used to iterate
244 * over them.
245 */
246 typedef struct xfs_dir2_sf_hdr {
247 __uint8_t count; /* count of entries */
248 __uint8_t i8count; /* count of 8-byte inode #s */
249 xfs_dir2_inou_t parent; /* parent dir inode number */
250 } __arch_pack xfs_dir2_sf_hdr_t;
251
252 typedef struct xfs_dir2_sf_entry {
253 __u8 namelen; /* actual name length */
254 xfs_dir2_sf_off_t offset; /* saved offset */
255 __u8 name[]; /* name, variable size */
256 /*
257 * A single byte containing the file type field follows the inode
258 * number for version 3 directory entries.
259 *
260 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
261 * variable offset after the name.
262 */
263 } __arch_pack xfs_dir2_sf_entry_t;
264
xfs_dir2_sf_hdr_size(int i8count)265 static inline int xfs_dir2_sf_hdr_size(int i8count)
266 {
267 return sizeof(struct xfs_dir2_sf_hdr) -
268 (i8count == 0) *
269 (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
270 }
271
272 static inline xfs_dir2_data_aoff_t
xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t * sfep)273 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
274 {
275 return get_unaligned_be16(&sfep->offset.i);
276 }
277
278 static inline void
xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t * sfep,xfs_dir2_data_aoff_t off)279 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
280 {
281 put_unaligned_be16(off, &sfep->offset.i);
282 }
283
284 static inline struct xfs_dir2_sf_entry *
xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr * hdr)285 xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
286 {
287 return (struct xfs_dir2_sf_entry *)
288 ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
289 }
290
291 /*
292 * Data block structures.
293 *
294 * A pure data block looks like the following drawing on disk:
295 *
296 * +-------------------------------------------------+
297 * | xfs_dir2_data_hdr_t |
298 * +-------------------------------------------------+
299 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
300 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
301 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
302 * | ... |
303 * +-------------------------------------------------+
304 * | unused space |
305 * +-------------------------------------------------+
306 *
307 * As all the entries are variable size structures the accessors below should
308 * be used to iterate over them.
309 *
310 * In addition to the pure data blocks for the data and node formats,
311 * most structures are also used for the combined data/freespace "block"
312 * format below.
313 */
314
315 #define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
316 #define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
317 #define XFS_DIR2_DATA_FREE_TAG 0xffff
318 #define XFS_DIR2_DATA_FD_COUNT 3
319
320 /*
321 * Directory address space divided into sections,
322 * spaces separated by 32GB.
323 */
324 #define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
325 #define XFS_DIR2_DATA_SPACE 0
326 #define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
327
328 /*
329 * Describe a free area in the data block.
330 *
331 * The freespace will be formatted as a xfs_dir2_data_unused_t.
332 */
333 typedef struct xfs_dir2_data_free {
334 __be16 offset; /* start of freespace */
335 __be16 length; /* length of freespace */
336 } xfs_dir2_data_free_t;
337
338 /*
339 * Header for the data blocks.
340 *
341 * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
342 */
343 typedef struct xfs_dir2_data_hdr {
344 __be32 magic; /* XFS_DIR2_DATA_MAGIC or */
345 /* XFS_DIR2_BLOCK_MAGIC */
346 xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
347 } xfs_dir2_data_hdr_t;
348
349 /*
350 * define a structure for all the verification fields we are adding to the
351 * directory block structures. This will be used in several structures.
352 * The magic number must be the first entry to align with all the dir2
353 * structures so we determine how to decode them just by the magic number.
354 */
355 struct xfs_dir3_blk_hdr {
356 __be32 magic; /* magic number */
357 __be32 crc; /* CRC of block */
358 __be64 blkno; /* first block of the buffer */
359 __be64 lsn; /* sequence number of last write */
360 uuid_t uuid; /* filesystem we belong to */
361 __be64 owner; /* inode that owns the block */
362 };
363
364 struct xfs_dir3_data_hdr {
365 struct xfs_dir3_blk_hdr hdr;
366 xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
367 __be32 pad; /* 64 bit alignment */
368 };
369
370 #define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
371
372 /*
373 * Active entry in a data block.
374 *
375 * Aligned to 8 bytes. After the variable length name field there is a
376 * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
377 *
378 * For dir3 structures, there is file type field between the name and the tag.
379 * This can only be manipulated by helper functions. It is packed hard against
380 * the end of the name so any padding for rounding is between the file type and
381 * the tag.
382 */
383 typedef struct xfs_dir2_data_entry {
384 __be64 inumber; /* inode number */
385 __u8 namelen; /* name length */
386 __u8 name[]; /* name bytes, no null */
387 /* __u8 filetype; */ /* type of inode we point to */
388 /* __be16 tag; */ /* starting offset of us */
389 } xfs_dir2_data_entry_t;
390
391 /*
392 * Unused entry in a data block.
393 *
394 * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed
395 * using xfs_dir2_data_unused_tag_p.
396 */
397 typedef struct xfs_dir2_data_unused {
398 __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
399 __be16 length; /* total free length */
400 /* variable offset */
401 __be16 tag; /* starting offset of us */
402 } xfs_dir2_data_unused_t;
403
404 /*
405 * Pointer to a freespace's tag word.
406 */
407 static inline __be16 *
xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused * dup)408 xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
409 {
410 return (__be16 *)((char *)dup +
411 be16_to_cpu(dup->length) - sizeof(__be16));
412 }
413
414 /*
415 * Leaf block structures.
416 *
417 * A pure leaf block looks like the following drawing on disk:
418 *
419 * +---------------------------+
420 * | xfs_dir2_leaf_hdr_t |
421 * +---------------------------+
422 * | xfs_dir2_leaf_entry_t |
423 * | xfs_dir2_leaf_entry_t |
424 * | xfs_dir2_leaf_entry_t |
425 * | xfs_dir2_leaf_entry_t |
426 * | ... |
427 * +---------------------------+
428 * | xfs_dir2_data_off_t |
429 * | xfs_dir2_data_off_t |
430 * | xfs_dir2_data_off_t |
431 * | ... |
432 * +---------------------------+
433 * | xfs_dir2_leaf_tail_t |
434 * +---------------------------+
435 *
436 * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
437 * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
438 * for directories with separate leaf nodes and free space blocks
439 * (magic = XFS_DIR2_LEAFN_MAGIC).
440 *
441 * As all the entries are variable size structures the accessors below should
442 * be used to iterate over them.
443 */
444
445 /*
446 * Offset of the leaf/node space. First block in this space
447 * is the btree root.
448 */
449 #define XFS_DIR2_LEAF_SPACE 1
450 #define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
451
452 /*
453 * Leaf block header.
454 */
455 typedef struct xfs_dir2_leaf_hdr {
456 xfs_da_blkinfo_t info; /* header for da routines */
457 __be16 count; /* count of entries */
458 __be16 stale; /* count of stale entries */
459 } xfs_dir2_leaf_hdr_t;
460
461 struct xfs_dir3_leaf_hdr {
462 struct xfs_da3_blkinfo info; /* header for da routines */
463 __be16 count; /* count of entries */
464 __be16 stale; /* count of stale entries */
465 __be32 pad; /* 64 bit alignment */
466 };
467
468 struct xfs_dir3_icleaf_hdr {
469 __uint32_t forw;
470 __uint32_t back;
471 __uint16_t magic;
472 __uint16_t count;
473 __uint16_t stale;
474 };
475
476 /*
477 * Leaf block entry.
478 */
479 typedef struct xfs_dir2_leaf_entry {
480 __be32 hashval; /* hash value of name */
481 __be32 address; /* address of data entry */
482 } xfs_dir2_leaf_entry_t;
483
484 /*
485 * Leaf block tail.
486 */
487 typedef struct xfs_dir2_leaf_tail {
488 __be32 bestcount;
489 } xfs_dir2_leaf_tail_t;
490
491 /*
492 * Leaf block.
493 */
494 typedef struct xfs_dir2_leaf {
495 xfs_dir2_leaf_hdr_t hdr; /* leaf header */
496 xfs_dir2_leaf_entry_t __ents[]; /* entries */
497 } xfs_dir2_leaf_t;
498
499 struct xfs_dir3_leaf {
500 struct xfs_dir3_leaf_hdr hdr; /* leaf header */
501 struct xfs_dir2_leaf_entry __ents[]; /* entries */
502 };
503
504 #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc)
505
506 /*
507 * Get address of the bests array in the single-leaf block.
508 */
509 static inline __be16 *
xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail * ltp)510 xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
511 {
512 return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
513 }
514
515 /*
516 * Free space block defintions for the node format.
517 */
518
519 /*
520 * Offset of the freespace index.
521 */
522 #define XFS_DIR2_FREE_SPACE 2
523 #define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
524
525 typedef struct xfs_dir2_free_hdr {
526 __be32 magic; /* XFS_DIR2_FREE_MAGIC */
527 __be32 firstdb; /* db of first entry */
528 __be32 nvalid; /* count of valid entries */
529 __be32 nused; /* count of used entries */
530 } xfs_dir2_free_hdr_t;
531
532 typedef struct xfs_dir2_free {
533 xfs_dir2_free_hdr_t hdr; /* block header */
534 __be16 bests[]; /* best free counts */
535 /* unused entries are -1 */
536 } xfs_dir2_free_t;
537
538 struct xfs_dir3_free_hdr {
539 struct xfs_dir3_blk_hdr hdr;
540 __be32 firstdb; /* db of first entry */
541 __be32 nvalid; /* count of valid entries */
542 __be32 nused; /* count of used entries */
543 __be32 pad; /* 64 bit alignment */
544 };
545
546 struct xfs_dir3_free {
547 struct xfs_dir3_free_hdr hdr;
548 __be16 bests[]; /* best free counts */
549 /* unused entries are -1 */
550 };
551
552 #define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
553
554 /*
555 * In core version of the free block header, abstracted away from on-disk format
556 * differences. Use this in the code, and convert to/from the disk version using
557 * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
558 */
559 struct xfs_dir3_icfree_hdr {
560 __uint32_t magic;
561 __uint32_t firstdb;
562 __uint32_t nvalid;
563 __uint32_t nused;
564
565 };
566
567 /*
568 * Single block format.
569 *
570 * The single block format looks like the following drawing on disk:
571 *
572 * +-------------------------------------------------+
573 * | xfs_dir2_data_hdr_t |
574 * +-------------------------------------------------+
575 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
576 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
577 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
578 * | ... |
579 * +-------------------------------------------------+
580 * | unused space |
581 * +-------------------------------------------------+
582 * | ... |
583 * | xfs_dir2_leaf_entry_t |
584 * | xfs_dir2_leaf_entry_t |
585 * +-------------------------------------------------+
586 * | xfs_dir2_block_tail_t |
587 * +-------------------------------------------------+
588 *
589 * As all the entries are variable size structures the accessors below should
590 * be used to iterate over them.
591 */
592
593 typedef struct xfs_dir2_block_tail {
594 __be32 count; /* count of leaf entries */
595 __be32 stale; /* count of stale lf entries */
596 } xfs_dir2_block_tail_t;
597
598 /*
599 * Pointer to the leaf entries embedded in a data block (1-block format)
600 */
601 static inline struct xfs_dir2_leaf_entry *
xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail * btp)602 xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
603 {
604 return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
605 }
606
607
608 /*
609 * Attribute storage layout
610 *
611 * Attribute lists are structured around Btrees where all the data
612 * elements are in the leaf nodes. Attribute names are hashed into an int,
613 * then that int is used as the index into the Btree. Since the hashval
614 * of an attribute name may not be unique, we may have duplicate keys. The
615 * internal links in the Btree are logical block offsets into the file.
616 *
617 * Struct leaf_entry's are packed from the top. Name/values grow from the
618 * bottom but are not packed. The freemap contains run-length-encoded entries
619 * for the free bytes after the leaf_entry's, but only the N largest such,
620 * smaller runs are dropped. When the freemap doesn't show enough space
621 * for an allocation, we compact the name/value area and try again. If we
622 * still don't have enough space, then we have to split the block. The
623 * name/value structs (both local and remote versions) must be 32bit aligned.
624 *
625 * Since we have duplicate hash keys, for each key that matches, compare
626 * the actual name string. The root and intermediate node search always
627 * takes the first-in-the-block key match found, so we should only have
628 * to work "forw"ard. If none matches, continue with the "forw"ard leaf
629 * nodes until the hash key changes or the attribute name is found.
630 *
631 * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
632 * the leaf_entry. The namespaces are independent only because we also look
633 * at the namespace bit when we are looking for a matching attribute name.
634 *
635 * We also store an "incomplete" bit in the leaf_entry. It shows that an
636 * attribute is in the middle of being created and should not be shown to
637 * the user if we crash during the time that the bit is set. We clear the
638 * bit when we have finished setting up the attribute. We do this because
639 * we cannot create some large attributes inside a single transaction, and we
640 * need some indication that we weren't finished if we crash in the middle.
641 */
642 #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
643
644 typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
645 __be16 base; /* base of free region */
646 __be16 size; /* length of free region */
647 } xfs_attr_leaf_map_t;
648
649 typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */
650 xfs_da_blkinfo_t info; /* block type, links, etc. */
651 __be16 count; /* count of active leaf_entry's */
652 __be16 usedbytes; /* num bytes of names/values stored */
653 __be16 firstused; /* first used byte in name area */
654 __u8 holes; /* != 0 if blk needs compaction */
655 __u8 pad1;
656 xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
657 /* N largest free regions */
658 } xfs_attr_leaf_hdr_t;
659
660 typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
661 __be32 hashval; /* hash value of name */
662 __be16 nameidx; /* index into buffer of name/value */
663 __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */
664 __u8 pad2; /* unused pad byte */
665 } xfs_attr_leaf_entry_t;
666
667 typedef struct xfs_attr_leaf_name_local {
668 __be16 valuelen; /* number of bytes in value */
669 __u8 namelen; /* length of name bytes */
670 __u8 nameval[1]; /* name/value bytes */
671 } xfs_attr_leaf_name_local_t;
672
673 typedef struct xfs_attr_leaf_name_remote {
674 __be32 valueblk; /* block number of value bytes */
675 __be32 valuelen; /* number of bytes in value */
676 __u8 namelen; /* length of name bytes */
677 __u8 name[1]; /* name bytes */
678 } xfs_attr_leaf_name_remote_t;
679
680 typedef struct xfs_attr_leafblock {
681 xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
682 xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
683 /*
684 * The rest of the block contains the following structures after the
685 * leaf entries, growing from the bottom up. The variables are never
686 * referenced and definining them can actually make gcc optimize away
687 * accesses to the 'entries' array above index 0 so don't do that.
688 *
689 * xfs_attr_leaf_name_local_t namelist;
690 * xfs_attr_leaf_name_remote_t valuelist;
691 */
692 } xfs_attr_leafblock_t;
693
694 /*
695 * CRC enabled leaf structures. Called "version 3" structures to match the
696 * version number of the directory and dablk structures for this feature, and
697 * attr2 is already taken by the variable inode attribute fork size feature.
698 */
699 struct xfs_attr3_leaf_hdr {
700 struct xfs_da3_blkinfo info;
701 __be16 count;
702 __be16 usedbytes;
703 __be16 firstused;
704 __u8 holes;
705 __u8 pad1;
706 struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
707 __be32 pad2; /* 64 bit alignment */
708 };
709
710 #define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
711
712 struct xfs_attr3_leafblock {
713 struct xfs_attr3_leaf_hdr hdr;
714 struct xfs_attr_leaf_entry entries[1];
715
716 /*
717 * The rest of the block contains the following structures after the
718 * leaf entries, growing from the bottom up. The variables are never
719 * referenced, the locations accessed purely from helper functions.
720 *
721 * struct xfs_attr_leaf_name_local
722 * struct xfs_attr_leaf_name_remote
723 */
724 };
725
726 /*
727 * incore, neutral version of the attribute leaf header
728 */
729 struct xfs_attr3_icleaf_hdr {
730 __uint32_t forw;
731 __uint32_t back;
732 __uint16_t magic;
733 __uint16_t count;
734 __uint16_t usedbytes;
735 /*
736 * firstused is 32-bit here instead of 16-bit like the on-disk variant
737 * to support maximum fsb size of 64k without overflow issues throughout
738 * the attr code. Instead, the overflow condition is handled on
739 * conversion to/from disk.
740 */
741 __uint32_t firstused;
742 __u8 holes;
743 struct {
744 __uint16_t base;
745 __uint16_t size;
746 } freemap[XFS_ATTR_LEAF_MAPSIZE];
747 };
748
749 /*
750 * Special value to represent fs block size in the leaf header firstused field.
751 * Only used when block size overflows the 2-bytes available on disk.
752 */
753 #define XFS_ATTR3_LEAF_NULLOFF 0
754
755 /*
756 * Flags used in the leaf_entry[i].flags field.
757 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
758 * on the system call, they are "or"ed together for various operations.
759 */
760 #define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
761 #define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
762 #define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
763 #define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
764 #define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT)
765 #define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT)
766 #define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT)
767 #define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
768
769 /*
770 * Conversion macros for converting namespace bits from argument flags
771 * to ondisk flags.
772 */
773 #define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE)
774 #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
775 #define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK)
776 #define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK)
777 #define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
778 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
779 #define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
780 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
781
782 /*
783 * Alignment for namelist and valuelist entries (since they are mixed
784 * there can be only one alignment value)
785 */
786 #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
787
788 static inline int
xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock * leafp)789 xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
790 {
791 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
792 return sizeof(struct xfs_attr3_leaf_hdr);
793 return sizeof(struct xfs_attr_leaf_hdr);
794 }
795
796 static inline struct xfs_attr_leaf_entry *
xfs_attr3_leaf_entryp(xfs_attr_leafblock_t * leafp)797 xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
798 {
799 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
800 return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
801 return &leafp->entries[0];
802 }
803
804 /*
805 * Cast typed pointers for "local" and "remote" name/value structs.
806 */
807 static inline char *
xfs_attr3_leaf_name(xfs_attr_leafblock_t * leafp,int idx)808 xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
809 {
810 struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
811
812 return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
813 }
814
815 static inline xfs_attr_leaf_name_remote_t *
xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t * leafp,int idx)816 xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
817 {
818 return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
819 }
820
821 static inline xfs_attr_leaf_name_local_t *
xfs_attr3_leaf_name_local(xfs_attr_leafblock_t * leafp,int idx)822 xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
823 {
824 return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
825 }
826
827 /*
828 * Calculate total bytes used (including trailing pad for alignment) for
829 * a "local" name/value structure, a "remote" name/value structure, and
830 * a pointer which might be either.
831 */
xfs_attr_leaf_entsize_remote(int nlen)832 static inline int xfs_attr_leaf_entsize_remote(int nlen)
833 {
834 return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
835 XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
836 }
837
xfs_attr_leaf_entsize_local(int nlen,int vlen)838 static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
839 {
840 return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
841 XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
842 }
843
xfs_attr_leaf_entsize_local_max(int bsize)844 static inline int xfs_attr_leaf_entsize_local_max(int bsize)
845 {
846 return (((bsize) >> 1) + ((bsize) >> 2));
847 }
848
849
850
851 /*
852 * Remote attribute block format definition
853 *
854 * There is one of these headers per filesystem block in a remote attribute.
855 * This is done to ensure there is a 1:1 mapping between the attribute value
856 * length and the number of blocks needed to store the attribute. This makes the
857 * verification of a buffer a little more complex, but greatly simplifies the
858 * allocation, reading and writing of these attributes as we don't have to guess
859 * the number of blocks needed to store the attribute data.
860 */
861 #define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
862
863 struct xfs_attr3_rmt_hdr {
864 __be32 rm_magic;
865 __be32 rm_offset;
866 __be32 rm_bytes;
867 __be32 rm_crc;
868 uuid_t rm_uuid;
869 __be64 rm_owner;
870 __be64 rm_blkno;
871 __be64 rm_lsn;
872 };
873
874 #define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
875
876 #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
877 ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
878 sizeof(struct xfs_attr3_rmt_hdr) : 0))
879
880 #endif /* __XFS_DA_FORMAT_H__ */
881