• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <linux/spinlock.h>
2 #include <linux/slab.h>
3 #include <linux/list.h>
4 #include <linux/list_bl.h>
5 #include <linux/module.h>
6 #include <linux/sched.h>
7 #include <linux/mbcache2.h>
8 
9 /*
10  * Mbcache is a simple key-value store. Keys need not be unique, however
11  * key-value pairs are expected to be unique (we use this fact in
12  * mb2_cache_entry_delete_block()).
13  *
14  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
15  * They use hash of a block contents as a key and block number as a value.
16  * That's why keys need not be unique (different xattr blocks may end up having
17  * the same hash). However block number always uniquely identifies a cache
18  * entry.
19  *
20  * We provide functions for creation and removal of entries, search by key,
21  * and a special "delete entry with given key-value pair" operation. Fixed
22  * size hash table is used for fast key lookups.
23  */
24 
25 struct mb2_cache {
26 	/* Hash table of entries */
27 	struct hlist_bl_head	*c_hash;
28 	/* log2 of hash table size */
29 	int			c_bucket_bits;
30 	/* Protects c_lru_list, c_entry_count */
31 	spinlock_t		c_lru_list_lock;
32 	struct list_head	c_lru_list;
33 	/* Number of entries in cache */
34 	unsigned long		c_entry_count;
35 	struct shrinker		c_shrink;
36 };
37 
38 static struct kmem_cache *mb2_entry_cache;
39 
40 /*
41  * mb2_cache_entry_create - create entry in cache
42  * @cache - cache where the entry should be created
43  * @mask - gfp mask with which the entry should be allocated
44  * @key - key of the entry
45  * @block - block that contains data
46  *
47  * Creates entry in @cache with key @key and records that data is stored in
48  * block @block. The function returns -EBUSY if entry with the same key
49  * and for the same block already exists in cache. Otherwise 0 is returned.
50  */
mb2_cache_entry_create(struct mb2_cache * cache,gfp_t mask,u32 key,sector_t block)51 int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
52 			   sector_t block)
53 {
54 	struct mb2_cache_entry *entry, *dup;
55 	struct hlist_bl_node *dup_node;
56 	struct hlist_bl_head *head;
57 
58 	entry = kmem_cache_alloc(mb2_entry_cache, mask);
59 	if (!entry)
60 		return -ENOMEM;
61 
62 	INIT_LIST_HEAD(&entry->e_lru_list);
63 	/* One ref for hash, one ref returned */
64 	atomic_set(&entry->e_refcnt, 1);
65 	entry->e_key = key;
66 	entry->e_block = block;
67 	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
68 	entry->e_hash_list_head = head;
69 	hlist_bl_lock(head);
70 	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
71 		if (dup->e_key == key && dup->e_block == block) {
72 			hlist_bl_unlock(head);
73 			kmem_cache_free(mb2_entry_cache, entry);
74 			return -EBUSY;
75 		}
76 	}
77 	hlist_bl_add_head(&entry->e_hash_list, head);
78 	hlist_bl_unlock(head);
79 
80 	spin_lock(&cache->c_lru_list_lock);
81 	list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
82 	/* Grab ref for LRU list */
83 	atomic_inc(&entry->e_refcnt);
84 	cache->c_entry_count++;
85 	spin_unlock(&cache->c_lru_list_lock);
86 
87 	return 0;
88 }
89 EXPORT_SYMBOL(mb2_cache_entry_create);
90 
__mb2_cache_entry_free(struct mb2_cache_entry * entry)91 void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
92 {
93 	kmem_cache_free(mb2_entry_cache, entry);
94 }
95 EXPORT_SYMBOL(__mb2_cache_entry_free);
96 
__entry_find(struct mb2_cache * cache,struct mb2_cache_entry * entry,u32 key)97 static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
98 					    struct mb2_cache_entry *entry,
99 					    u32 key)
100 {
101 	struct mb2_cache_entry *old_entry = entry;
102 	struct hlist_bl_node *node;
103 	struct hlist_bl_head *head;
104 
105 	if (entry)
106 		head = entry->e_hash_list_head;
107 	else
108 		head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
109 	hlist_bl_lock(head);
110 	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
111 		node = entry->e_hash_list.next;
112 	else
113 		node = hlist_bl_first(head);
114 	while (node) {
115 		entry = hlist_bl_entry(node, struct mb2_cache_entry,
116 				       e_hash_list);
117 		if (entry->e_key == key) {
118 			atomic_inc(&entry->e_refcnt);
119 			goto out;
120 		}
121 		node = node->next;
122 	}
123 	entry = NULL;
124 out:
125 	hlist_bl_unlock(head);
126 	if (old_entry)
127 		mb2_cache_entry_put(cache, old_entry);
128 
129 	return entry;
130 }
131 
132 /*
133  * mb2_cache_entry_find_first - find the first entry in cache with given key
134  * @cache: cache where we should search
135  * @key: key to look for
136  *
137  * Search in @cache for entry with key @key. Grabs reference to the first
138  * entry found and returns the entry.
139  */
mb2_cache_entry_find_first(struct mb2_cache * cache,u32 key)140 struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
141 						   u32 key)
142 {
143 	return __entry_find(cache, NULL, key);
144 }
145 EXPORT_SYMBOL(mb2_cache_entry_find_first);
146 
147 /*
148  * mb2_cache_entry_find_next - find next entry in cache with the same
149  * @cache: cache where we should search
150  * @entry: entry to start search from
151  *
152  * Finds next entry in the hash chain which has the same key as @entry.
153  * If @entry is unhashed (which can happen when deletion of entry races
154  * with the search), finds the first entry in the hash chain. The function
155  * drops reference to @entry and returns with a reference to the found entry.
156  */
mb2_cache_entry_find_next(struct mb2_cache * cache,struct mb2_cache_entry * entry)157 struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
158 						  struct mb2_cache_entry *entry)
159 {
160 	return __entry_find(cache, entry, entry->e_key);
161 }
162 EXPORT_SYMBOL(mb2_cache_entry_find_next);
163 
164 /* mb2_cache_entry_delete_block - remove information about block from cache
165  * @cache - cache we work with
166  * @key - key of the entry to remove
167  * @block - block containing data for @key
168  *
169  * Remove entry from cache @cache with key @key with data stored in @block.
170  */
mb2_cache_entry_delete_block(struct mb2_cache * cache,u32 key,sector_t block)171 void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
172 				  sector_t block)
173 {
174 	struct hlist_bl_node *node;
175 	struct hlist_bl_head *head;
176 	struct mb2_cache_entry *entry;
177 
178 	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
179 	hlist_bl_lock(head);
180 	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
181 		if (entry->e_key == key && entry->e_block == block) {
182 			/* We keep hash list reference to keep entry alive */
183 			hlist_bl_del_init(&entry->e_hash_list);
184 			hlist_bl_unlock(head);
185 			spin_lock(&cache->c_lru_list_lock);
186 			if (!list_empty(&entry->e_lru_list)) {
187 				list_del_init(&entry->e_lru_list);
188 				cache->c_entry_count--;
189 				atomic_dec(&entry->e_refcnt);
190 			}
191 			spin_unlock(&cache->c_lru_list_lock);
192 			mb2_cache_entry_put(cache, entry);
193 			return;
194 		}
195 	}
196 	hlist_bl_unlock(head);
197 }
198 EXPORT_SYMBOL(mb2_cache_entry_delete_block);
199 
200 /* mb2_cache_entry_touch - cache entry got used
201  * @cache - cache the entry belongs to
202  * @entry - entry that got used
203  *
204  * Move entry in lru list to reflect the fact that it was used.
205  */
mb2_cache_entry_touch(struct mb2_cache * cache,struct mb2_cache_entry * entry)206 void mb2_cache_entry_touch(struct mb2_cache *cache,
207 			   struct mb2_cache_entry *entry)
208 {
209 	spin_lock(&cache->c_lru_list_lock);
210 	if (!list_empty(&entry->e_lru_list))
211 		list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
212 	spin_unlock(&cache->c_lru_list_lock);
213 }
214 EXPORT_SYMBOL(mb2_cache_entry_touch);
215 
mb2_cache_count(struct shrinker * shrink,struct shrink_control * sc)216 static unsigned long mb2_cache_count(struct shrinker *shrink,
217 				     struct shrink_control *sc)
218 {
219 	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
220 					       c_shrink);
221 
222 	return cache->c_entry_count;
223 }
224 
225 /* Shrink number of entries in cache */
mb2_cache_scan(struct shrinker * shrink,struct shrink_control * sc)226 static unsigned long mb2_cache_scan(struct shrinker *shrink,
227 				    struct shrink_control *sc)
228 {
229 	int nr_to_scan = sc->nr_to_scan;
230 	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
231 					      c_shrink);
232 	struct mb2_cache_entry *entry;
233 	struct hlist_bl_head *head;
234 	unsigned int shrunk = 0;
235 
236 	spin_lock(&cache->c_lru_list_lock);
237 	while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
238 		entry = list_first_entry(&cache->c_lru_list,
239 					 struct mb2_cache_entry, e_lru_list);
240 		list_del_init(&entry->e_lru_list);
241 		cache->c_entry_count--;
242 		/*
243 		 * We keep LRU list reference so that entry doesn't go away
244 		 * from under us.
245 		 */
246 		spin_unlock(&cache->c_lru_list_lock);
247 		head = entry->e_hash_list_head;
248 		hlist_bl_lock(head);
249 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
250 			hlist_bl_del_init(&entry->e_hash_list);
251 			atomic_dec(&entry->e_refcnt);
252 		}
253 		hlist_bl_unlock(head);
254 		if (mb2_cache_entry_put(cache, entry))
255 			shrunk++;
256 		cond_resched();
257 		spin_lock(&cache->c_lru_list_lock);
258 	}
259 	spin_unlock(&cache->c_lru_list_lock);
260 
261 	return shrunk;
262 }
263 
264 /*
265  * mb2_cache_create - create cache
266  * @bucket_bits: log2 of the hash table size
267  *
268  * Create cache for keys with 2^bucket_bits hash entries.
269  */
mb2_cache_create(int bucket_bits)270 struct mb2_cache *mb2_cache_create(int bucket_bits)
271 {
272 	struct mb2_cache *cache;
273 	int bucket_count = 1 << bucket_bits;
274 	int i;
275 
276 	if (!try_module_get(THIS_MODULE))
277 		return NULL;
278 
279 	cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
280 	if (!cache)
281 		goto err_out;
282 	cache->c_bucket_bits = bucket_bits;
283 	INIT_LIST_HEAD(&cache->c_lru_list);
284 	spin_lock_init(&cache->c_lru_list_lock);
285 	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
286 				GFP_KERNEL);
287 	if (!cache->c_hash) {
288 		kfree(cache);
289 		goto err_out;
290 	}
291 	for (i = 0; i < bucket_count; i++)
292 		INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
293 
294 	cache->c_shrink.count_objects = mb2_cache_count;
295 	cache->c_shrink.scan_objects = mb2_cache_scan;
296 	cache->c_shrink.seeks = DEFAULT_SEEKS;
297 	register_shrinker(&cache->c_shrink);
298 
299 	return cache;
300 
301 err_out:
302 	module_put(THIS_MODULE);
303 	return NULL;
304 }
305 EXPORT_SYMBOL(mb2_cache_create);
306 
307 /*
308  * mb2_cache_destroy - destroy cache
309  * @cache: the cache to destroy
310  *
311  * Free all entries in cache and cache itself. Caller must make sure nobody
312  * (except shrinker) can reach @cache when calling this.
313  */
mb2_cache_destroy(struct mb2_cache * cache)314 void mb2_cache_destroy(struct mb2_cache *cache)
315 {
316 	struct mb2_cache_entry *entry, *next;
317 
318 	unregister_shrinker(&cache->c_shrink);
319 
320 	/*
321 	 * We don't bother with any locking. Cache must not be used at this
322 	 * point.
323 	 */
324 	list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
325 		if (!hlist_bl_unhashed(&entry->e_hash_list)) {
326 			hlist_bl_del_init(&entry->e_hash_list);
327 			atomic_dec(&entry->e_refcnt);
328 		} else
329 			WARN_ON(1);
330 		list_del(&entry->e_lru_list);
331 		WARN_ON(atomic_read(&entry->e_refcnt) != 1);
332 		mb2_cache_entry_put(cache, entry);
333 	}
334 	kfree(cache->c_hash);
335 	kfree(cache);
336 	module_put(THIS_MODULE);
337 }
338 EXPORT_SYMBOL(mb2_cache_destroy);
339 
mb2cache_init(void)340 static int __init mb2cache_init(void)
341 {
342 	mb2_entry_cache = kmem_cache_create("mbcache",
343 				sizeof(struct mb2_cache_entry), 0,
344 				SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
345 	BUG_ON(!mb2_entry_cache);
346 	return 0;
347 }
348 
mb2cache_exit(void)349 static void __exit mb2cache_exit(void)
350 {
351 	kmem_cache_destroy(mb2_entry_cache);
352 }
353 
354 module_init(mb2cache_init)
355 module_exit(mb2cache_exit)
356 
357 MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
358 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
359 MODULE_LICENSE("GPL");
360