• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  // The cache is stored on disk as a collection of block-files, plus an index
6  // plus a collection of external files.
7  //
8  // Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in
9  // a separate file named f_xxx where x is a hexadecimal number. Shorter data
10  // will be stored as a series of blocks on a block-file. In any case, CacheAddr
11  // represents the address of the data inside the cache.
12  //
13  // The index is actually a collection of four files that store a hash table with
14  // allocation bitmaps and backup data. Hash collisions are handled directly by
15  // the table, which from some point of view behaves like a 4-way associative
16  // cache with overflow buckets (so not really open addressing).
17  //
18  // Basically the hash table is a collection of buckets. The first part of the
19  // table has a fixed number of buckets and it is directly addressed by the hash,
20  // while the second part of the table (stored on a second file) has a variable
21  // number of buckets. Each bucket stores up to four cells (each cell represents
22  // a possibl entry). The index bitmap tracks the state of individual cells.
23  //
24  // The last element of the cache is the block-file. A block file is a file
25  // designed to store blocks of data of a given size. For more details see
26  // disk_cache/disk_format_base.h
27  //
28  // A new cache is initialized with a set of block files (named data_0 through
29  // data_6), each one dedicated to store blocks of a given size or function. The
30  // number at the end of the file name is the block file number (in decimal).
31  //
32  // There are three "special" types of blocks: normal entries, evicted entries
33  // and control data for external files.
34  //
35  // The files that store internal information for the cache (blocks and index)
36  // are memory mapped. They have a location that is signaled every time the
37  // internal structures are modified, so it is possible to detect (most of the
38  // time) when the process dies in the middle of an update. There are dedicated
39  // backup files for cache bitmaps, used to detect entries out of date.
40  //
41  // Although cache files are to be consumed on the same machine that creates
42  // them, if files are to be moved accross machines, little endian storage is
43  // assumed.
44  
45  #ifndef NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
46  #define NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
47  
48  #include "base/basictypes.h"
49  #include "net/disk_cache/blockfile/disk_format_base.h"
50  
51  namespace disk_cache {
52  
53  const int kBaseTableLen = 0x400;
54  const uint32 kIndexMagicV3 = 0xC103CAC3;
55  const uint32 kVersion3 = 0x30000;  // Version 3.0.
56  
57  // Flags for a given cache.
58  enum CacheFlags {
59    SMALL_CACHE = 1 << 0,       // See IndexCell.
60    CACHE_EVICTION_2 = 1 << 1,  // Keep multiple lists for eviction.
61    CACHE_EVICTED = 1 << 2      // Already evicted at least one entry.
62  };
63  
64  // Header for the master index file.
65  struct IndexHeaderV3 {
66    uint32      magic;
67    uint32      version;
68    int32       num_entries;   // Number of entries currently stored.
69    int32       num_bytes;     // Total size of the stored data.
70    int32       last_file;     // Last external file created.
71    int32       reserved1;
72    CacheAddr   stats;         // Storage for usage data.
73    int32       table_len;     // Actual size of the table.
74    int32       crash;         // Signals a previous crash.
75    int32       experiment;    // Id of an ongoing test.
76    int32       max_bytes;     // Total maximum size of the stored data.
77    uint32      flags;
78    int32       used_cells;
79    int32       max_bucket;
80    uint64      create_time;   // Creation time for this set of files.
81    uint64      base_time;     // Current base for timestamps.
82    uint64      old_time;      // Previous time used for timestamps.
83    int32       max_block_file;
84    int32       num_no_use_entries;
85    int32       num_low_use_entries;
86    int32       num_high_use_entries;
87    int32       reserved;
88    int32       num_evicted_entries;
89    int32       pad[6];
90  };
91  
92  const int kBaseBitmapBytes = 3968;
93  // The IndexBitmap is directly saved to a file named index. The file grows in
94  // page increments (4096 bytes), but all bits don't have to be in use at any
95  // given time. The required file size can be computed from header.table_len.
96  struct IndexBitmap {
97    IndexHeaderV3   header;
98    uint32          bitmap[kBaseBitmapBytes / 4];  // First page of the bitmap.
99  };
100  COMPILE_ASSERT(sizeof(IndexBitmap) == 4096, bad_IndexHeader);
101  
102  // Possible states for a given entry.
103  enum EntryState {
104    ENTRY_FREE = 0,   // Available slot.
105    ENTRY_NEW,        // The entry is being created.
106    ENTRY_OPEN,       // The entry is being accessed.
107    ENTRY_MODIFIED,   // The entry is being modified.
108    ENTRY_DELETED,    // The entry is being deleted.
109    ENTRY_FIXING,     // Inconsistent state. The entry is being verified.
110    ENTRY_USED        // The slot is in use (entry is present).
111  };
112  COMPILE_ASSERT(ENTRY_USED <= 7, state_uses_3_bits);
113  
114  enum EntryGroup {
115    ENTRY_NO_USE = 0,   // The entry has not been reused.
116    ENTRY_LOW_USE,      // The entry has low reuse.
117    ENTRY_HIGH_USE,     // The entry has high reuse.
118    ENTRY_RESERVED,     // Reserved for future use.
119    ENTRY_EVICTED       // The entry was deleted.
120  };
121  COMPILE_ASSERT(ENTRY_USED <= 7, group_uses_3_bits);
122  
123  #pragma pack(push, 1)
124  struct IndexCell {
ClearIndexCell125    void Clear() { memset(this, 0, sizeof(*this)); }
126  
127    // A cell is a 9 byte bit-field that stores 7 values:
128    //   location : 22 bits
129    //   id : 18 bits
130    //   timestamp : 20 bits
131    //   reuse : 4 bits
132    //   state : 3 bits
133    //   group : 3 bits
134    //   sum : 2 bits
135    // The id is derived from the full hash of the entry.
136    //
137    // The actual layout is as follows:
138    //
139    // first_part (low order 32 bits):
140    //   0000 0000 0011 1111 1111 1111 1111 1111 : location
141    //   1111 1111 1100 0000 0000 0000 0000 0000 : id
142    //
143    // first_part (high order 32 bits):
144    //   0000 0000 0000 0000 0000 0000 1111 1111 : id
145    //   0000 1111 1111 1111 1111 1111 0000 0000 : timestamp
146    //   1111 0000 0000 0000 0000 0000 0000 0000 : reuse
147    //
148    // last_part:
149    //   0000 0111 : state
150    //   0011 1000 : group
151    //   1100 0000 : sum
152    //
153    // The small-cache version of the format moves some bits from the location to
154    // the id fileds, like so:
155    //   location : 16 bits
156    //   id : 24 bits
157    //
158    // first_part (low order 32 bits):
159    //   0000 0000 0000 0000 1111 1111 1111 1111 : location
160    //   1111 1111 1111 1111 0000 0000 0000 0000 : id
161    //
162    // The actual bit distribution between location and id is determined by the
163    // table size (IndexHeaderV3.table_len). Tables smaller than 65536 entries
164    // use the small-cache version; after that size, caches should have the
165    // SMALL_CACHE flag cleared.
166    //
167    // To locate a given entry after recovering the location from the cell, the
168    // file type and file number are appended (see disk_cache/addr.h). For a large
169    // table only the file type is implied; for a small table, the file number
170    // is also implied, and it should be the first file for that type of entry,
171    // as determined by the EntryGroup (two files in total, one for active entries
172    // and another one for evicted entries).
173    //
174    // For example, a small table may store something like 0x1234 as the location
175    // field. That means it stores the entry number 0x1234. If that record belongs
176    // to a deleted entry, the regular cache address may look something like
177    //     BLOCK_EVICTED + 1 block + file number 6 + entry number 0x1234
178    //     so Addr = 0xf0061234
179    //
180    // If that same Addr is stored on a large table, the location field would be
181    // 0x61234
182  
183    uint64      first_part;
184    uint8       last_part;
185  };
186  COMPILE_ASSERT(sizeof(IndexCell) == 9, bad_IndexCell);
187  
188  const int kCellsPerBucket = 4;
189  struct IndexBucket {
190    IndexCell   cells[kCellsPerBucket];
191    int32       next;
192    uint32      hash;  // The high order byte is reserved (should be zero).
193  };
194  COMPILE_ASSERT(sizeof(IndexBucket) == 44, bad_IndexBucket);
195  const int kBytesPerCell = 44 / kCellsPerBucket;
196  
197  // The main cache index. Backed by a file named index_tb1.
198  // The extra table (index_tb2) has a similar format, but different size.
199  struct Index {
200    // Default size. Actual size controlled by header.table_len.
201    IndexBucket table[kBaseTableLen / kCellsPerBucket];
202  };
203  #pragma pack(pop)
204  
205  // Flags that can be applied to an entry.
206  enum EntryFlags {
207    PARENT_ENTRY = 1,         // This entry has children (sparse) entries.
208    CHILD_ENTRY = 1 << 1      // Child entry that stores sparse data.
209  };
210  
211  struct EntryRecord {
212    uint32      hash;
213    uint32      pad1;
214    uint8       reuse_count;
215    uint8       refetch_count;
216    int8        state;              // Current EntryState.
217    uint8       flags;              // Any combination of EntryFlags.
218    int32       key_len;
219    int32       data_size[4];       // We can store up to 4 data streams for each
220    CacheAddr   data_addr[4];       // entry.
221    uint32      data_hash[4];
222    uint64      creation_time;
223    uint64      last_modified_time;
224    uint64      last_access_time;
225    int32       pad[3];
226    uint32      self_hash;
227  };
228  COMPILE_ASSERT(sizeof(EntryRecord) == 104, bad_EntryRecord);
229  
230  struct ShortEntryRecord {
231    uint32      hash;
232    uint32      pad1;
233    uint8       reuse_count;
234    uint8       refetch_count;
235    int8        state;              // Current EntryState.
236    uint8       flags;
237    int32       key_len;
238    uint64      last_access_time;
239    uint32      long_hash[5];
240    uint32      self_hash;
241  };
242  COMPILE_ASSERT(sizeof(ShortEntryRecord) == 48, bad_ShortEntryRecord);
243  
244  }  // namespace disk_cache
245  
246  #endif  // NET_DISK_CACHE_BLOCKFILE_DISK_FORMAT_V3_H_
247