• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3  *  GRUB  --  GRand Unified Bootloader
4  *  Copyright (C) 2000, 2001  Free Software Foundation, Inc.
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19  */
20 
21 #ifdef FSYS_REISERFS
22 #include "shared.h"
23 #include "filesys.h"
24 
25 #undef REISERDEBUG
26 
27 /* Some parts of this code (mainly the structures and defines) are
28  * from the original reiser fs code, as found in the linux kernel.
29  */
30 
31 /* include/asm-i386/types.h */
32 typedef __signed__ char __s8;
33 typedef unsigned char __u8;
34 typedef __signed__ short __s16;
35 typedef unsigned short __u16;
36 typedef __signed__ int __s32;
37 typedef unsigned int __u32;
38 typedef unsigned long long __u64;
39 
40 /* linux/posix_type.h */
41 typedef long linux_off_t;
42 
43 /* linux/little_endian.h */
44 #define __cpu_to_le64(x) ((__u64) (x))
45 #define __le64_to_cpu(x) ((__u64) (x))
46 #define __cpu_to_le32(x) ((__u32) (x))
47 #define __le32_to_cpu(x) ((__u32) (x))
48 #define __cpu_to_le16(x) ((__u16) (x))
49 #define __le16_to_cpu(x) ((__u16) (x))
50 
51 /* include/linux/reiser_fs.h */
52 /* This is the new super block of a journaling reiserfs system */
53 struct reiserfs_super_block
54 {
55   __u32 s_block_count;			/* blocks count         */
56   __u32 s_free_blocks;                  /* free blocks count    */
57   __u32 s_root_block;           	/* root block number    */
58   __u32 s_journal_block;           	/* journal block number    */
59   __u32 s_journal_dev;           	/* journal device number  */
60   __u32 s_journal_size; 		/* size of the journal on FS creation.  used to make sure they don't overflow it */
61   __u32 s_journal_trans_max;            /* max number of blocks in a transaction.  */
62   __u32 s_journal_magic;                /* random value made on fs creation */
63   __u32 s_journal_max_batch;            /* max number of blocks to batch into a trans */
64   __u32 s_journal_max_commit_age;       /* in seconds, how old can an async commit be */
65   __u32 s_journal_max_trans_age;        /* in seconds, how old can a transaction be */
66   __u16 s_blocksize;                   	/* block size           */
67   __u16 s_oid_maxsize;			/* max size of object id array  */
68   __u16 s_oid_cursize;			/* current size of object id array */
69   __u16 s_state;                       	/* valid or error       */
70   char s_magic[16];                     /* reiserfs magic string indicates that file system is reiserfs */
71   __u16 s_tree_height;                  /* height of disk tree */
72   __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
73   __u16 s_version;
74   char s_unused[128];			/* zero filled by mkreiserfs */
75 };
76 
77 #define REISERFS_MAX_SUPPORTED_VERSION 2
78 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
79 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
80 #define REISER3FS_SUPER_MAGIC_STRING "ReIsEr3Fs"
81 
82 #define MAX_HEIGHT 7
83 
84 /* must be correct to keep the desc and commit structs at 4k */
85 #define JOURNAL_TRANS_HALF 1018
86 
87 /* first block written in a commit.  */
88 struct reiserfs_journal_desc {
89   __u32 j_trans_id;			/* id of commit */
90   __u32 j_len;				/* length of commit. len +1 is the commit block */
91   __u32 j_mount_id;			/* mount id of this trans*/
92   __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
93   char j_magic[12];
94 };
95 
96 /* last block written in a commit */
97 struct reiserfs_journal_commit {
98   __u32 j_trans_id;			/* must match j_trans_id from the desc block */
99   __u32 j_len;			/* ditto */
100   __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
101   char j_digest[16];			/* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
102 };
103 
104 /* this header block gets written whenever a transaction is considered
105    fully flushed, and is more recent than the last fully flushed
106    transaction.
107    fully flushed means all the log blocks and all the real blocks are
108    on disk, and this transaction does not need to be replayed.
109 */
110 struct reiserfs_journal_header {
111   /* id of last fully flushed transaction */
112   __u32 j_last_flush_trans_id;
113   /* offset in the log of where to start replay after a crash */
114   __u32 j_first_unflushed_offset;
115   /* mount id to detect very old transactions */
116   __u32 j_mount_id;
117 };
118 
119 /* magic string to find desc blocks in the journal */
120 #define JOURNAL_DESC_MAGIC "ReIsErLB"
121 
122 
123 /*
124  * directories use this key as well as old files
125  */
126 struct offset_v1
127 {
128   /*
129    * for regular files this is the offset to the first byte of the
130    * body, contained in the object-item, as measured from the start of
131    * the entire body of the object.
132    *
133    * for directory entries, k_offset consists of hash derived from
134    * hashing the name and using few bits (23 or more) of the resulting
135    * hash, and generation number that allows distinguishing names with
136    * hash collisions. If number of collisions overflows generation
137    * number, we return EEXIST.  High order bit is 0 always
138    */
139   __u32 k_offset;
140   __u32 k_uniqueness;
141 };
142 
143 struct offset_v2
144 {
145   /*
146    * for regular files this is the offset to the first byte of the
147    * body, contained in the object-item, as measured from the start of
148    * the entire body of the object.
149    *
150    * for directory entries, k_offset consists of hash derived from
151    * hashing the name and using few bits (23 or more) of the resulting
152    * hash, and generation number that allows distinguishing names with
153    * hash collisions. If number of collisions overflows generation
154    * number, we return EEXIST.  High order bit is 0 always
155    */
156   __u64 k_offset:60;
157   __u64 k_type: 4;
158 };
159 
160 
161 struct key
162 {
163   /* packing locality: by default parent directory object id */
164   __u32 k_dir_id;
165   /* object identifier */
166   __u32 k_objectid;
167   /* the offset and node type (old and new form) */
168   union
169   {
170     struct offset_v1 v1;
171     struct offset_v2 v2;
172   }
173   u;
174 };
175 
176 #define KEY_SIZE (sizeof (struct key))
177 
178 /* Header of a disk block.  More precisely, header of a formatted leaf
179    or internal node, and not the header of an unformatted node. */
180 struct block_head
181 {
182   __u16 blk_level;        /* Level of a block in the tree. */
183   __u16 blk_nr_item;      /* Number of keys/items in a block. */
184   __u16 blk_free_space;   /* Block free space in bytes. */
185   struct key  blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
186 				      only) */
187 };
188 #define BLKH_SIZE (sizeof (struct block_head))
189 #define DISK_LEAF_NODE_LEVEL  1 /* Leaf node level.                       */
190 
191 struct item_head
192 {
193   struct key ih_key; 	/* Everything in the tree is found by searching for it based on its key.*/
194 
195   union
196   {
197     __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
198 			    is an indirect item.  This equals 0xFFFF iff this is a direct item or
199 			    stat data item. Note that the key, not this field, is used to determine
200 			    the item type, and thus which field this union contains. */
201     __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
202 			     entries in the directory item. */
203   }
204   u;
205   __u16 ih_item_len;           /* total size of the item body                  */
206   __u16 ih_item_location;      /* an offset to the item body within the block  */
207   __u16 ih_version;	       /* ITEM_VERSION_1 for all old items,
208 				  ITEM_VERSION_2 for new ones.
209 				  Highest bit is set by fsck
210                                   temporary, cleaned after all done */
211 };
212 /* size of item header     */
213 #define IH_SIZE (sizeof (struct item_head))
214 
215 #define ITEM_VERSION_1 0
216 #define ITEM_VERSION_2 1
217 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
218 			   ? (ih)->ih_key.u.v1.k_offset \
219 			   : (ih)->ih_key.u.v2.k_offset)
220 
221 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
222 				 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
223 				 : (ih)->ih_key.u.v2.k_type == V2_##type)
224 
225 struct disk_child
226 {
227   unsigned long       dc_block_number;              /* Disk child's block number. */
228   unsigned short      dc_size;		            /* Disk child's used space.   */
229 };
230 
231 #define DC_SIZE (sizeof (struct disk_child))
232 
233 /* Stat Data on disk.
234  *
235  * Note that reiserfs has two different forms of stat data.  Luckily
236  * the fields needed by grub are at the same position.
237  */
238 struct stat_data
239 {
240   __u16 sd_mode;	/* file type, permissions */
241   __u16 sd_notused1[3]; /* fields not needed by reiserfs */
242   __u32 sd_size;	/* file size */
243   __u32 sd_size_hi;	/* file size high 32 bits (since version 2) */
244 };
245 
246 struct reiserfs_de_head
247 {
248   __u32 deh_offset;  /* third component of the directory entry key */
249   __u32 deh_dir_id;  /* objectid of the parent directory of the
250 			object, that is referenced by directory entry */
251   __u32 deh_objectid;/* objectid of the object, that is referenced by
252                         directory entry */
253   __u16 deh_location;/* offset of name in the whole item */
254   __u16 deh_state;   /* whether 1) entry contains stat data (for
255 			future), and 2) whether entry is hidden
256 			(unlinked) */
257 };
258 
259 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
260 
261 #define DEH_Statdata (1 << 0)			/* not used now */
262 #define DEH_Visible  (1 << 2)
263 
264 #define SD_OFFSET  0
265 #define SD_UNIQUENESS 0
266 #define DOT_OFFSET 1
267 #define DOT_DOT_OFFSET 2
268 #define DIRENTRY_UNIQUENESS 500
269 
270 #define V1_TYPE_STAT_DATA 0x0
271 #define V1_TYPE_DIRECT 0xffffffff
272 #define V1_TYPE_INDIRECT 0xfffffffe
273 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
274 #define V2_TYPE_STAT_DATA 0
275 #define V2_TYPE_INDIRECT 1
276 #define V2_TYPE_DIRECT 2
277 #define V2_TYPE_DIRENTRY 3
278 
279 #define REISERFS_ROOT_OBJECTID 2
280 #define REISERFS_ROOT_PARENT_OBJECTID 1
281 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
282 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
283 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
284 #define REISERFS_OLD_BLOCKSIZE 4096
285 
286 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
287 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
288 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
289 
290 #define PATH_MAX       1024	/* include/linux/limits.h */
291 #define MAX_LINK_COUNT    5	/* number of symbolic links to follow */
292 
293 /* The size of the node cache */
294 #define FSYSREISER_CACHE_SIZE 24*1024
295 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
296 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
297 
298 /* Info about currently opened file */
299 struct fsys_reiser_fileinfo
300 {
301   __u32 k_dir_id;
302   __u32 k_objectid;
303 };
304 
305 /* In memory info about the currently mounted filesystem */
306 struct fsys_reiser_info
307 {
308   /* The last read item head */
309   struct item_head *current_ih;
310   /* The last read item */
311   char *current_item;
312   /* The information for the currently opened file */
313   struct fsys_reiser_fileinfo fileinfo;
314   /* The start of the journal */
315   __u32 journal_block;
316   /* The size of the journal */
317   __u32 journal_block_count;
318   /* The first valid descriptor block in journal
319      (relative to journal_block) */
320   __u32 journal_first_desc;
321 
322   /* The ReiserFS version. */
323   __u16 version;
324   /* The current depth of the reiser tree. */
325   __u16 tree_depth;
326   /* SECTOR_SIZE << blocksize_shift == blocksize. */
327   __u8  blocksize_shift;
328   /* 1 << full_blocksize_shift == blocksize. */
329   __u8  fullblocksize_shift;
330   /* The reiserfs block size  (must be a power of 2) */
331   __u16 blocksize;
332   /* The number of cached tree nodes */
333   __u16 cached_slots;
334   /* The number of valid transactions in journal */
335   __u16 journal_transactions;
336 
337   unsigned int blocks[MAX_HEIGHT];
338   unsigned int next_key_nr[MAX_HEIGHT];
339 };
340 
341 /* The cached s+tree blocks in FSYS_BUF,  see below
342  * for a more detailed description.
343  */
344 #define ROOT     ((char *) ((int) FSYS_BUF))
345 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
346 #define LEAF     CACHE (DISK_LEAF_NODE_LEVEL)
347 
348 #define BLOCKHEAD(cache) ((struct block_head *) cache)
349 #define ITEMHEAD         ((struct item_head  *) ((int) LEAF + BLKH_SIZE))
350 #define KEY(cache)       ((struct key        *) ((int) cache + BLKH_SIZE))
351 #define DC(cache)        ((struct disk_child *) \
352 			  ((int) cache + BLKH_SIZE + KEY_SIZE * nr_item))
353 /* The fsys_reiser_info block.
354  */
355 #define INFO \
356     ((struct fsys_reiser_info *) ((int) FSYS_BUF + FSYSREISER_CACHE_SIZE))
357 /*
358  * The journal cache.  For each transaction it contains the number of
359  * blocks followed by the real block numbers of this transaction.
360  *
361  * If the block numbers of some transaction won't fit in this space,
362  * this list is stopped with a 0xffffffff marker and the remaining
363  * uncommitted transactions aren't cached.
364  */
365 #define JOURNAL_START    ((__u32 *) (INFO + 1))
366 #define JOURNAL_END      ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
367 
368 
369 static __inline__ unsigned long
log2(unsigned long word)370 log2 (unsigned long word)
371 {
372   __asm__ ("bsfl %1,%0"
373 	   : "=r" (word)
374 	   : "r" (word));
375   return word;
376 }
377 
378 static __inline__ int
is_power_of_two(unsigned long word)379 is_power_of_two (unsigned long word)
380 {
381   return (word & -word) == word;
382 }
383 
384 static int
journal_read(int block,int len,char * buffer)385 journal_read (int block, int len, char *buffer)
386 {
387   return devread ((INFO->journal_block + block) << INFO->blocksize_shift,
388 		  0, len, buffer);
389 }
390 
391 /* Read a block from ReiserFS file system, taking the journal into
392  * account.  If the block nr is in the journal, the block from the
393  * journal taken.
394  */
395 static int
block_read(int blockNr,int start,int len,char * buffer)396 block_read (int blockNr, int start, int len, char *buffer)
397 {
398   int transactions = INFO->journal_transactions;
399   int desc_block = INFO->journal_first_desc;
400   int journal_mask = INFO->journal_block_count - 1;
401   int translatedNr = blockNr;
402   __u32 *journal_table = JOURNAL_START;
403   while (transactions-- > 0)
404     {
405       int i = 0;
406       int j_len;
407       if (*journal_table != 0xffffffff)
408 	{
409 	  /* Search for the blockNr in cached journal */
410 	  j_len = *journal_table++;
411 	  while (i++ < j_len)
412 	    {
413 	      if (*journal_table++ == blockNr)
414 		{
415 		  journal_table += j_len - i;
416 		  goto found;
417 		}
418 	    }
419 	}
420       else
421 	{
422 	  /* This is the end of cached journal marker.  The remaining
423 	   * transactions are still on disk.
424 	   */
425 	  struct reiserfs_journal_desc   desc;
426 	  struct reiserfs_journal_commit commit;
427 
428 	  if (! journal_read (desc_block, sizeof (desc), (char *) &desc))
429 	    return 0;
430 
431 	  j_len = desc.j_len;
432 	  while (i < j_len && i < JOURNAL_TRANS_HALF)
433 	    if (desc.j_realblock[i++] == blockNr)
434 	      goto found;
435 
436 	  if (j_len >= JOURNAL_TRANS_HALF)
437 	    {
438 	      int commit_block = (desc_block + 1 + j_len) & journal_mask;
439 	      if (! journal_read (commit_block,
440 				  sizeof (commit), (char *) &commit))
441 		return 0;
442 	      while (i < j_len)
443 		if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
444 		  goto found;
445 	    }
446 	}
447       goto not_found;
448 
449     found:
450       translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
451 #ifdef REISERDEBUG
452       printf ("block_read: block %d is mapped to journal block %d.\n",
453 	      blockNr, translatedNr - INFO->journal_block);
454 #endif
455       /* We must continue the search, as this block may be overwritten
456        * in later transactions.
457        */
458     not_found:
459       desc_block = (desc_block + 2 + j_len) & journal_mask;
460     }
461   return devread (translatedNr << INFO->blocksize_shift, start, len, buffer);
462 }
463 
464 /* Init the journal data structure.  We try to cache as much as
465  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
466  * we can still read the rest from the disk on demand.
467  *
468  * The first number of valid transactions and the descriptor block of the
469  * first valid transaction are held in INFO.  The transactions are all
470  * adjacent, but we must take care of the journal wrap around.
471  */
472 static int
journal_init(void)473 journal_init (void)
474 {
475   unsigned int block_count = INFO->journal_block_count;
476   unsigned int desc_block;
477   unsigned int commit_block;
478   unsigned int next_trans_id;
479   struct reiserfs_journal_header header;
480   struct reiserfs_journal_desc   desc;
481   struct reiserfs_journal_commit commit;
482   __u32 *journal_table = JOURNAL_START;
483 
484   journal_read (block_count, sizeof (header), (char *) &header);
485   desc_block = header.j_first_unflushed_offset;
486   if (desc_block >= block_count)
487     return 0;
488 
489   INFO->journal_first_desc = desc_block;
490   next_trans_id = header.j_last_flush_trans_id + 1;
491 
492 #ifdef REISERDEBUG
493   printf ("journal_init: last flushed %d\n",
494 	  header.j_last_flush_trans_id);
495 #endif
496 
497   while (1)
498     {
499       journal_read (desc_block, sizeof (desc), (char *) &desc);
500       if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0
501 	  || desc.j_trans_id != next_trans_id
502 	  || desc.j_mount_id != header.j_mount_id)
503 	/* no more valid transactions */
504 	break;
505 
506       commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
507       journal_read (commit_block, sizeof (commit), (char *) &commit);
508       if (desc.j_trans_id != commit.j_trans_id
509 	  || desc.j_len != commit.j_len)
510 	/* no more valid transactions */
511 	break;
512 
513 #ifdef REISERDEBUG
514       printf ("Found valid transaction %d/%d at %d.\n",
515 	      desc.j_trans_id, desc.j_mount_id, desc_block);
516 #endif
517 
518       next_trans_id++;
519       if (journal_table < JOURNAL_END)
520 	{
521 	  if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
522 	    {
523 	      /* The table is almost full; mark the end of the cached
524 	       * journal.*/
525 	      *journal_table = 0xffffffff;
526 	      journal_table = JOURNAL_END;
527 	    }
528 	  else
529 	    {
530 	      int i;
531 	      /* Cache the length and the realblock numbers in the table.
532 	       * The block number of descriptor can easily be computed.
533 	       * and need not to be stored here.
534 	       */
535 	      *journal_table++ = desc.j_len;
536 	      for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
537 		{
538 		  *journal_table++ = desc.j_realblock[i];
539 #ifdef REISERDEBUG
540 		  printf ("block %d is in journal %d.\n",
541 			  desc.j_realblock[i], desc_block);
542 #endif
543 		}
544 	      for (     ; i < desc.j_len; i++)
545 		{
546 		  *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
547 #ifdef REISERDEBUG
548 		  printf ("block %d is in journal %d.\n",
549 			  commit.j_realblock[i-JOURNAL_TRANS_HALF],
550 			  desc_block);
551 #endif
552 		}
553 	    }
554 	}
555       desc_block = (commit_block + 1) & (block_count - 1);
556     }
557 #ifdef REISERDEBUG
558   printf ("Transaction %d/%d at %d isn't valid.\n",
559 	  desc.j_trans_id, desc.j_mount_id, desc_block);
560 #endif
561 
562   INFO->journal_transactions
563     = next_trans_id - header.j_last_flush_trans_id - 1;
564   return errnum == 0;
565 }
566 
567 /* check filesystem types and read superblock into memory buffer */
568 int
reiserfs_mount(void)569 reiserfs_mount (void)
570 {
571   struct reiserfs_super_block super;
572   int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
573 
574   if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
575       || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
576 		(char *) &super)
577       || (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
578 	  && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
579 	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
580       || (/* check that this is not a copy inside the journal log */
581 	  super.s_journal_block * super.s_blocksize
582 	  <= REISERFS_DISK_OFFSET_IN_BYTES))
583     {
584       /* Try old super block position */
585       superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
586       if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
587 	  || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
588 			(char *) &super))
589 	return 0;
590 
591       if (substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) > 0
592 	  && substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
593 	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
594 	{
595 	  /* pre journaling super block ? */
596 	  if (substring (REISERFS_SUPER_MAGIC_STRING,
597 			 (char*) ((int) &super + 20)) > 0)
598 	    return 0;
599 
600 	  super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
601 	  super.s_journal_block = 0;
602 	  super.s_version = 0;
603 	}
604     }
605 
606   /* check the version number.  */
607   if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
608     return 0;
609 
610   INFO->version = super.s_version;
611   INFO->blocksize = super.s_blocksize;
612   INFO->fullblocksize_shift = log2 (super.s_blocksize);
613   INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
614   INFO->cached_slots =
615     (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
616 
617 #ifdef REISERDEBUG
618   printf ("reiserfs_mount: version=%d, blocksize=%d\n",
619 	  INFO->version, INFO->blocksize);
620 #endif /* REISERDEBUG */
621 
622   /* Clear node cache. */
623   memset (INFO->blocks, 0, sizeof (INFO->blocks));
624 
625   if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
626       || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
627       || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
628     return 0;
629 
630   /* Initialize journal code.  If something fails we end with zero
631    * journal_transactions, so we don't access the journal at all.
632    */
633   INFO->journal_transactions = 0;
634   if (super.s_journal_block != 0 && super.s_journal_dev == 0)
635     {
636       INFO->journal_block = super.s_journal_block;
637       INFO->journal_block_count = super.s_journal_size;
638       if (is_power_of_two (INFO->journal_block_count))
639 	journal_init ();
640 
641       /* Read in super block again, maybe it is in the journal */
642       block_read (superblock >> INFO->blocksize_shift,
643 		  0, sizeof (struct reiserfs_super_block), (char *) &super);
644     }
645 
646   if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
647     return 0;
648 
649   INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
650 
651 #ifdef REISERDEBUG
652   printf ("root read_in: block=%d, depth=%d\n",
653 	  super.s_root_block, INFO->tree_depth);
654 #endif /* REISERDEBUG */
655 
656   if (INFO->tree_depth >= MAX_HEIGHT)
657     return 0;
658   if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
659     {
660       /* There is only one node in the whole filesystem,
661        * which is simultanously leaf and root */
662       memcpy (LEAF, ROOT, INFO->blocksize);
663     }
664   return 1;
665 }
666 
667 /***************** TREE ACCESSING METHODS *****************************/
668 
669 /* I assume you are familiar with the ReiserFS tree, if not go to
670  * http://www.namesys.com/content_table.html
671  *
672  * My tree node cache is organized as following
673  *   0   ROOT node
674  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
675  *   2-n other nodes on current path from bottom to top.
676  *       if there is not enough space in the cache, the top most are
677  *       omitted.
678  *
679  * I have only two methods to find a key in the tree:
680  *   search_stat(dir_id, objectid) searches for the stat entry (always
681  *       the first entry) of an object.
682  *   next_key() gets the next key in tree order.
683  *
684  * This means, that I can only sequential reads of files are
685  * efficient, but this really doesn't hurt for grub.
686  */
687 
688 /* Read in the node at the current path and depth into the node cache.
689  * You must set INFO->blocks[depth] before.
690  */
691 static char *
read_tree_node(unsigned int blockNr,int depth)692 read_tree_node (unsigned int blockNr, int depth)
693 {
694   char* cache = CACHE(depth);
695   int num_cached = INFO->cached_slots;
696   if (depth < num_cached)
697     {
698       /* This is the cached part of the path.  Check if same block is
699        * needed.
700        */
701       if (blockNr == INFO->blocks[depth])
702 	return cache;
703     }
704   else
705     cache = CACHE(num_cached);
706 
707 #ifdef REISERDEBUG
708   printf ("  next read_in: block=%d (depth=%d)\n",
709 	  blockNr, depth);
710 #endif /* REISERDEBUG */
711   if (! block_read (blockNr, 0, INFO->blocksize, cache))
712     return 0;
713   /* Make sure it has the right node level */
714   if (BLOCKHEAD (cache)->blk_level != depth)
715     {
716       errnum = ERR_FSYS_CORRUPT;
717       return 0;
718     }
719 
720   INFO->blocks[depth] = blockNr;
721   return cache;
722 }
723 
724 /* Get the next key, i.e. the key following the last retrieved key in
725  * tree order.  INFO->current_ih and
726  * INFO->current_info are adapted accordingly.  */
727 static int
next_key(void)728 next_key (void)
729 {
730   int depth;
731   struct item_head *ih = INFO->current_ih + 1;
732   char *cache;
733 
734 #ifdef REISERDEBUG
735   printf ("next_key:\n  old ih: key %d:%d:%d:%d version:%d\n",
736 	  INFO->current_ih->ih_key.k_dir_id,
737 	  INFO->current_ih->ih_key.k_objectid,
738 	  INFO->current_ih->ih_key.u.v1.k_offset,
739 	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
740 	  INFO->current_ih->ih_version);
741 #endif /* REISERDEBUG */
742 
743   if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
744     {
745       depth = DISK_LEAF_NODE_LEVEL;
746       /* The last item, was the last in the leaf node.
747        * Read in the next block
748        */
749       do
750 	{
751 	  if (depth == INFO->tree_depth)
752 	    {
753 	      /* There are no more keys at all.
754 	       * Return a dummy item with MAX_KEY */
755 	      ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
756 	      goto found;
757 	    }
758 	  depth++;
759 #ifdef REISERDEBUG
760 	  printf ("  depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
761 #endif /* REISERDEBUG */
762 	}
763       while (INFO->next_key_nr[depth] == 0);
764 
765       if (depth == INFO->tree_depth)
766 	cache = ROOT;
767       else if (depth <= INFO->cached_slots)
768 	cache = CACHE (depth);
769       else
770 	{
771 	  cache = read_tree_node (INFO->blocks[depth], depth);
772 	  if (! cache)
773 	    return 0;
774 	}
775 
776       do
777 	{
778 	  int nr_item = BLOCKHEAD (cache)->blk_nr_item;
779 	  int key_nr = INFO->next_key_nr[depth]++;
780 #ifdef REISERDEBUG
781 	  printf ("  depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
782 #endif /* REISERDEBUG */
783 	  if (key_nr == nr_item)
784 	    /* This is the last item in this block, set the next_key_nr to 0 */
785 	    INFO->next_key_nr[depth] = 0;
786 
787 	  cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth);
788 	  if (! cache)
789 	    return 0;
790 	}
791       while (depth > DISK_LEAF_NODE_LEVEL);
792 
793       ih = ITEMHEAD;
794     }
795  found:
796   INFO->current_ih   = ih;
797   INFO->current_item = &LEAF[ih->ih_item_location];
798 #ifdef REISERDEBUG
799   printf ("  new ih: key %d:%d:%d:%d version:%d\n",
800 	  INFO->current_ih->ih_key.k_dir_id,
801 	  INFO->current_ih->ih_key.k_objectid,
802 	  INFO->current_ih->ih_key.u.v1.k_offset,
803 	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
804 	  INFO->current_ih->ih_version);
805 #endif /* REISERDEBUG */
806   return 1;
807 }
808 
809 /* preconditions: reiserfs_mount already executed, therefore
810  *   INFO block is valid
811  * returns: 0 if error (errnum is set),
812  *   nonzero iff we were able to find the key successfully.
813  * postconditions: on a nonzero return, the current_ih and
814  *   current_item fields describe the key that equals the
815  *   searched key.  INFO->next_key contains the next key after
816  *   the searched key.
817  * side effects: messes around with the cache.
818  */
819 static int
search_stat(__u32 dir_id,__u32 objectid)820 search_stat (__u32 dir_id, __u32 objectid)
821 {
822   char *cache;
823   int depth;
824   int nr_item;
825   int i;
826   struct item_head *ih;
827 #ifdef REISERDEBUG
828   printf ("search_stat:\n  key %d:%d:0:0\n", dir_id, objectid);
829 #endif /* REISERDEBUG */
830 
831   depth = INFO->tree_depth;
832   cache = ROOT;
833 
834   while (depth > DISK_LEAF_NODE_LEVEL)
835     {
836       struct key *key;
837       nr_item = BLOCKHEAD (cache)->blk_nr_item;
838 
839       key = KEY (cache);
840 
841       for (i = 0; i < nr_item; i++)
842 	{
843 	  if (key->k_dir_id > dir_id
844 	      || (key->k_dir_id == dir_id
845 		  && (key->k_objectid > objectid
846 		      || (key->k_objectid == objectid
847 			  && (key->u.v1.k_offset
848 			      | key->u.v1.k_uniqueness) > 0))))
849 	    break;
850 	  key++;
851 	}
852 
853 #ifdef REISERDEBUG
854       printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
855 #endif /* REISERDEBUG */
856       INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
857       cache = read_tree_node (DC (cache)[i].dc_block_number, --depth);
858       if (! cache)
859 	return 0;
860     }
861 
862   /* cache == LEAF */
863   nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
864   ih = ITEMHEAD;
865   for (i = 0; i < nr_item; i++)
866     {
867       if (ih->ih_key.k_dir_id == dir_id
868 	  && ih->ih_key.k_objectid == objectid
869 	  && ih->ih_key.u.v1.k_offset == 0
870 	  && ih->ih_key.u.v1.k_uniqueness == 0)
871 	{
872 #ifdef REISERDEBUG
873 	  printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
874 #endif /* REISERDEBUG */
875 	  INFO->current_ih   = ih;
876 	  INFO->current_item = &LEAF[ih->ih_item_location];
877 	  return 1;
878 	}
879       ih++;
880     }
881   errnum = ERR_FSYS_CORRUPT;
882   return 0;
883 }
884 
885 int
reiserfs_read(char * buf,int len)886 reiserfs_read (char *buf, int len)
887 {
888   unsigned int blocksize;
889   unsigned int offset;
890   unsigned int to_read;
891   char *prev_buf = buf;
892 
893 #ifdef REISERDEBUG
894   printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
895 	  filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
896 #endif /* REISERDEBUG */
897 
898   if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
899       || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
900     {
901       search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
902       goto get_next_key;
903     }
904 
905   while (! errnum)
906     {
907       if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
908 	break;
909 
910       offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
911       blocksize = INFO->current_ih->ih_item_len;
912 
913 #ifdef REISERDEBUG
914       printf ("  loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
915 	      filepos, len, offset, blocksize);
916 #endif /* REISERDEBUG */
917 
918       if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
919 	  && offset < blocksize)
920 	{
921 #ifdef REISERDEBUG
922 	  printf ("direct_read: offset=%d, blocksize=%d\n",
923 		  offset, blocksize);
924 #endif /* REISERDEBUG */
925 	  to_read = blocksize - offset;
926 	  if (to_read > len)
927 	    to_read = len;
928 
929 	  if (disk_read_hook != NULL)
930 	    {
931 	      disk_read_func = disk_read_hook;
932 
933 	      block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL],
934 			  (INFO->current_item - LEAF + offset), to_read, buf);
935 
936 	      disk_read_func = NULL;
937 	    }
938 	  else
939 	    memcpy (buf, INFO->current_item + offset, to_read);
940 	  goto update_buf_len;
941 	}
942       else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
943 	{
944 	  blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
945 #ifdef REISERDEBUG
946 	  printf ("indirect_read: offset=%d, blocksize=%d\n",
947 		  offset, blocksize);
948 #endif /* REISERDEBUG */
949 
950 	  while (offset < blocksize)
951 	    {
952 	      __u32 blocknr = ((__u32 *) INFO->current_item)
953 		[offset >> INFO->fullblocksize_shift];
954 	      int blk_offset = offset & (INFO->blocksize-1);
955 
956 	      to_read = INFO->blocksize - blk_offset;
957 	      if (to_read > len)
958 		to_read = len;
959 
960 	      disk_read_func = disk_read_hook;
961 
962 	      /* Journal is only for meta data.  Data blocks can be read
963 	       * directly without using block_read
964 	       */
965 	      devread (blocknr << INFO->blocksize_shift,
966 		       blk_offset, to_read, buf);
967 
968 	      disk_read_func = NULL;
969 	    update_buf_len:
970 	      len -= to_read;
971 	      buf += to_read;
972 	      offset += to_read;
973 	      filepos += to_read;
974 	      if (len == 0)
975 		goto done;
976 	    }
977 	}
978     get_next_key:
979       next_key ();
980     }
981  done:
982   return errnum ? 0 : buf - prev_buf;
983 }
984 
985 
986 /* preconditions: reiserfs_mount already executed, therefore
987  *   INFO block is valid
988  * returns: 0 if error, nonzero iff we were able to find the file successfully
989  * postconditions: on a nonzero return, INFO->fileinfo contains the info
990  *   of the file we were trying to look up, filepos is 0 and filemax is
991  *   the size of the file.
992  */
993 int
reiserfs_dir(char * dirname)994 reiserfs_dir (char *dirname)
995 {
996   struct reiserfs_de_head *de_head;
997   char *rest, ch;
998   __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
999 #ifndef STAGE1_5
1000   int do_possibilities = 0;
1001 #endif /* ! STAGE1_5 */
1002   char linkbuf[PATH_MAX];	/* buffer for following symbolic links */
1003   int link_count = 0;
1004   int mode;
1005 
1006   dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1007   objectid = REISERFS_ROOT_OBJECTID;
1008 
1009   while (1)
1010     {
1011 #ifdef REISERDEBUG
1012       printf ("dirname=%s\n", dirname);
1013 #endif /* REISERDEBUG */
1014 
1015       /* Search for the stat info first. */
1016       if (! search_stat (dir_id, objectid))
1017 	return 0;
1018 
1019 #ifdef REISERDEBUG
1020       printf ("sd_mode=%x sd_size=%d\n",
1021 	      ((struct stat_data *) INFO->current_item)->sd_mode,
1022 	      ((struct stat_data *) INFO->current_item)->sd_size);
1023 #endif /* REISERDEBUG */
1024 
1025       mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1026 
1027       /* If we've got a symbolic link, then chase it. */
1028       if (S_ISLNK (mode))
1029 	{
1030 	  int len;
1031 	  if (++link_count > MAX_LINK_COUNT)
1032 	    {
1033 	      errnum = ERR_SYMLINK_LOOP;
1034 	      return 0;
1035 	    }
1036 
1037 	  /* Get the symlink size. */
1038 	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1039 
1040 	  /* Find out how long our remaining name is. */
1041 	  len = 0;
1042 	  while (dirname[len] && !isspace (dirname[len]))
1043 	    len++;
1044 
1045 	  if (filemax + len > sizeof (linkbuf) - 1)
1046 	    {
1047 	      errnum = ERR_FILELENGTH;
1048 	      return 0;
1049 	    }
1050 
1051 	  /* Copy the remaining name to the end of the symlink data.
1052 	     Note that DIRNAME and LINKBUF may overlap! */
1053 	  grub_memmove (linkbuf + filemax, dirname, len+1);
1054 
1055 	  INFO->fileinfo.k_dir_id = dir_id;
1056 	  INFO->fileinfo.k_objectid = objectid;
1057   	  filepos = 0;
1058 	  if (! next_key ()
1059 	      || reiserfs_read (linkbuf, filemax) != filemax)
1060 	    {
1061 	      if (! errnum)
1062 		errnum = ERR_FSYS_CORRUPT;
1063 	      return 0;
1064 	    }
1065 
1066 #ifdef REISERDEBUG
1067 	  printf ("symlink=%s\n", linkbuf);
1068 #endif /* REISERDEBUG */
1069 
1070 	  dirname = linkbuf;
1071 	  if (*dirname == '/')
1072 	    {
1073 	      /* It's an absolute link, so look it up in root. */
1074 	      dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1075 	      objectid = REISERFS_ROOT_OBJECTID;
1076 	    }
1077 	  else
1078 	    {
1079 	      /* Relative, so look it up in our parent directory. */
1080 	      dir_id   = parent_dir_id;
1081 	      objectid = parent_objectid;
1082 	    }
1083 
1084 	  /* Now lookup the new name. */
1085 	  continue;
1086 	}
1087 
1088       /* if we have a real file (and we're not just printing possibilities),
1089 	 then this is where we want to exit */
1090 
1091       if (! *dirname || isspace (*dirname))
1092 	{
1093 	  if (! S_ISREG (mode))
1094 	    {
1095 	      errnum = ERR_BAD_FILETYPE;
1096 	      return 0;
1097 	    }
1098 
1099 	  filepos = 0;
1100 	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1101 
1102 	  /* If this is a new stat data and size is > 4GB set filemax to
1103 	   * maximum
1104 	   */
1105 	  if (INFO->current_ih->ih_version == ITEM_VERSION_2
1106 	      && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1107 	    filemax = 0xffffffff;
1108 
1109 	  INFO->fileinfo.k_dir_id = dir_id;
1110 	  INFO->fileinfo.k_objectid = objectid;
1111 	  return next_key ();
1112 	}
1113 
1114       /* continue with the file/directory name interpretation */
1115       while (*dirname == '/')
1116 	dirname++;
1117       if (! S_ISDIR (mode))
1118 	{
1119 	  errnum = ERR_BAD_FILETYPE;
1120 	  return 0;
1121 	}
1122       for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++);
1123       *rest = 0;
1124 
1125 # ifndef STAGE1_5
1126       if (print_possibilities && ch != '/')
1127 	do_possibilities = 1;
1128 # endif /* ! STAGE1_5 */
1129 
1130       while (1)
1131 	{
1132 	  char *name_end;
1133 	  int num_entries;
1134 
1135 	  if (! next_key ())
1136 	    return 0;
1137 #ifdef REISERDEBUG
1138 	  printf ("ih: key %d:%d:%d:%d version:%d\n",
1139 		  INFO->current_ih->ih_key.k_dir_id,
1140 		  INFO->current_ih->ih_key.k_objectid,
1141 		  INFO->current_ih->ih_key.u.v1.k_offset,
1142 		  INFO->current_ih->ih_key.u.v1.k_uniqueness,
1143 		  INFO->current_ih->ih_version);
1144 #endif /* REISERDEBUG */
1145 
1146 	  if (INFO->current_ih->ih_key.k_objectid != objectid)
1147 	    break;
1148 
1149 	  name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1150 	  de_head = (struct reiserfs_de_head *) INFO->current_item;
1151 	  num_entries = INFO->current_ih->u.ih_entry_count;
1152 	  while (num_entries > 0)
1153 	    {
1154 	      char *filename = INFO->current_item + de_head->deh_location;
1155 	      char  tmp = *name_end;
1156 	      if ((de_head->deh_state & DEH_Visible))
1157 		{
1158 		  int cmp;
1159 		  /* Directory names in ReiserFS are not null
1160 		   * terminated.  We write a temporary 0 behind it.
1161 		   * NOTE: that this may overwrite the first block in
1162 		   * the tree cache.  That doesn't hurt as long as we
1163 		   * don't call next_key () in between.
1164 		   */
1165 		  *name_end = 0;
1166 		  cmp = substring (dirname, filename);
1167 		  *name_end = tmp;
1168 # ifndef STAGE1_5
1169 		  if (do_possibilities)
1170 		    {
1171 		      if (cmp <= 0)
1172 			{
1173 			  if (print_possibilities > 0)
1174 			    print_possibilities = -print_possibilities;
1175 			  *name_end = 0;
1176 			  print_a_completion (filename);
1177 			  *name_end = tmp;
1178 			}
1179 		    }
1180 		  else
1181 # endif /* ! STAGE1_5 */
1182 		    if (cmp == 0)
1183 		      goto found;
1184 		}
1185 	      /* The beginning of this name marks the end of the next name.
1186 	       */
1187 	      name_end = filename;
1188 	      de_head++;
1189 	      num_entries--;
1190 	    }
1191 	}
1192 
1193 # ifndef STAGE1_5
1194       if (print_possibilities < 0)
1195 	return 1;
1196 # endif /* ! STAGE1_5 */
1197 
1198       errnum = ERR_FILE_NOT_FOUND;
1199       *rest = ch;
1200       return 0;
1201 
1202     found:
1203 
1204       *rest = ch;
1205       dirname = rest;
1206 
1207       parent_dir_id = dir_id;
1208       parent_objectid = objectid;
1209       dir_id = de_head->deh_dir_id;
1210       objectid = de_head->deh_objectid;
1211     }
1212 }
1213 
1214 int
reiserfs_embed(int * start_sector,int needed_sectors)1215 reiserfs_embed (int *start_sector, int needed_sectors)
1216 {
1217   struct reiserfs_super_block super;
1218   int num_sectors;
1219 
1220   if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1221 		 sizeof (struct reiserfs_super_block), (char *) &super))
1222     return 0;
1223 
1224   *start_sector = 1; /* reserve first sector for stage1 */
1225   if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1226        || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1227        || substring (REISER3FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1228       && (/* check that this is not a super block copy inside
1229 	   * the journal log */
1230 	  super.s_journal_block * super.s_blocksize
1231 	  > REISERFS_DISK_OFFSET_IN_BYTES))
1232     num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1233   else
1234     num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1235 
1236   return (needed_sectors <= num_sectors);
1237 }
1238 #endif /* FSYS_REISERFS */
1239