• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xvmalloc memory allocator
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *
6  * This code is released using a dual license strategy: BSD/GPL
7  * You can choose the licence that better fits your requirements.
8  *
9  * Released under the terms of 3-clause BSD License
10  * Released under the terms of GNU General Public License Version 2.0
11  */
12 
13 #ifdef CONFIG_ZRAM_DEBUG
14 #define DEBUG
15 #endif
16 
17 #include <linux/module.h>
18 #include <linux/kernel.h>
19 #include <linux/bitops.h>
20 #include <linux/errno.h>
21 #include <linux/highmem.h>
22 #include <linux/init.h>
23 #include <linux/string.h>
24 #include <linux/slab.h>
25 
26 #include "xvmalloc.h"
27 #include "xvmalloc_int.h"
28 
stat_inc(u64 * value)29 static void stat_inc(u64 *value)
30 {
31 	*value = *value + 1;
32 }
33 
stat_dec(u64 * value)34 static void stat_dec(u64 *value)
35 {
36 	*value = *value - 1;
37 }
38 
test_flag(struct block_header * block,enum blockflags flag)39 static int test_flag(struct block_header *block, enum blockflags flag)
40 {
41 	return block->prev & BIT(flag);
42 }
43 
set_flag(struct block_header * block,enum blockflags flag)44 static void set_flag(struct block_header *block, enum blockflags flag)
45 {
46 	block->prev |= BIT(flag);
47 }
48 
clear_flag(struct block_header * block,enum blockflags flag)49 static void clear_flag(struct block_header *block, enum blockflags flag)
50 {
51 	block->prev &= ~BIT(flag);
52 }
53 
54 /*
55  * Given <page, offset> pair, provide a dereferencable pointer.
56  * This is called from xv_malloc/xv_free path, so it
57  * needs to be fast.
58  */
get_ptr_atomic(struct page * page,u16 offset)59 static void *get_ptr_atomic(struct page *page, u16 offset)
60 {
61 	unsigned char *base;
62 
63 	base = kmap_atomic(page);
64 	return base + offset;
65 }
66 
put_ptr_atomic(void * ptr)67 static void put_ptr_atomic(void *ptr)
68 {
69 	kunmap_atomic(ptr);
70 }
71 
get_blockprev(struct block_header * block)72 static u32 get_blockprev(struct block_header *block)
73 {
74 	return block->prev & PREV_MASK;
75 }
76 
set_blockprev(struct block_header * block,u16 new_offset)77 static void set_blockprev(struct block_header *block, u16 new_offset)
78 {
79 	block->prev = new_offset | (block->prev & FLAGS_MASK);
80 }
81 
BLOCK_NEXT(struct block_header * block)82 static struct block_header *BLOCK_NEXT(struct block_header *block)
83 {
84 	return (struct block_header *)
85 		((char *)block + block->size + XV_ALIGN);
86 }
87 
88 /*
89  * Get index of free list containing blocks of maximum size
90  * which is less than or equal to given size.
91  */
get_index_for_insert(u32 size)92 static u32 get_index_for_insert(u32 size)
93 {
94 	if (unlikely(size > XV_MAX_ALLOC_SIZE))
95 		size = XV_MAX_ALLOC_SIZE;
96 	size &= ~FL_DELTA_MASK;
97 	return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
98 }
99 
100 /*
101  * Get index of free list having blocks of size greater than
102  * or equal to requested size.
103  */
get_index(u32 size)104 static u32 get_index(u32 size)
105 {
106 	if (unlikely(size < XV_MIN_ALLOC_SIZE))
107 		size = XV_MIN_ALLOC_SIZE;
108 	size = ALIGN(size, FL_DELTA);
109 	return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
110 }
111 
112 /**
113  * find_block - find block of at least given size
114  * @pool: memory pool to search from
115  * @size: size of block required
116  * @page: page containing required block
117  * @offset: offset within the page where block is located.
118  *
119  * Searches two level bitmap to locate block of at least
120  * the given size. If such a block is found, it provides
121  * <page, offset> to identify this block and returns index
122  * in freelist where we found this block.
123  * Otherwise, returns 0 and <page, offset> params are not touched.
124  */
find_block(struct xv_pool * pool,u32 size,struct page ** page,u32 * offset)125 static u32 find_block(struct xv_pool *pool, u32 size,
126 			struct page **page, u32 *offset)
127 {
128 	ulong flbitmap, slbitmap;
129 	u32 flindex, slindex, slbitstart;
130 
131 	/* There are no free blocks in this pool */
132 	if (!pool->flbitmap)
133 		return 0;
134 
135 	/* Get freelist index correspoding to this size */
136 	slindex = get_index(size);
137 	slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
138 	slbitstart = slindex % BITS_PER_LONG;
139 
140 	/*
141 	 * If freelist is not empty at this index, we found the
142 	 * block - head of this list. This is approximate best-fit match.
143 	 */
144 	if (test_bit(slbitstart, &slbitmap)) {
145 		*page = pool->freelist[slindex].page;
146 		*offset = pool->freelist[slindex].offset;
147 		return slindex;
148 	}
149 
150 	/*
151 	 * No best-fit found. Search a bit further in bitmap for a free block.
152 	 * Second level bitmap consists of series of 32-bit chunks. Search
153 	 * further in the chunk where we expected a best-fit, starting from
154 	 * index location found above.
155 	 */
156 	slbitstart++;
157 	slbitmap >>= slbitstart;
158 
159 	/* Skip this search if we were already at end of this bitmap chunk */
160 	if ((slbitstart != BITS_PER_LONG) && slbitmap) {
161 		slindex += __ffs(slbitmap) + 1;
162 		*page = pool->freelist[slindex].page;
163 		*offset = pool->freelist[slindex].offset;
164 		return slindex;
165 	}
166 
167 	/* Now do a full two-level bitmap search to find next nearest fit */
168 	flindex = slindex / BITS_PER_LONG;
169 
170 	flbitmap = (pool->flbitmap) >> (flindex + 1);
171 	if (!flbitmap)
172 		return 0;
173 
174 	flindex += __ffs(flbitmap) + 1;
175 	slbitmap = pool->slbitmap[flindex];
176 	slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
177 	*page = pool->freelist[slindex].page;
178 	*offset = pool->freelist[slindex].offset;
179 
180 	return slindex;
181 }
182 
183 /*
184  * Insert block at <page, offset> in freelist of given pool.
185  * freelist used depends on block size.
186  */
insert_block(struct xv_pool * pool,struct page * page,u32 offset,struct block_header * block)187 static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
188 			struct block_header *block)
189 {
190 	u32 flindex, slindex;
191 	struct block_header *nextblock;
192 
193 	slindex = get_index_for_insert(block->size);
194 	flindex = slindex / BITS_PER_LONG;
195 
196 	block->link.prev_page = NULL;
197 	block->link.prev_offset = 0;
198 	block->link.next_page = pool->freelist[slindex].page;
199 	block->link.next_offset = pool->freelist[slindex].offset;
200 	pool->freelist[slindex].page = page;
201 	pool->freelist[slindex].offset = offset;
202 
203 	if (block->link.next_page) {
204 		nextblock = get_ptr_atomic(block->link.next_page,
205 					block->link.next_offset);
206 		nextblock->link.prev_page = page;
207 		nextblock->link.prev_offset = offset;
208 		put_ptr_atomic(nextblock);
209 		/* If there was a next page then the free bits are set. */
210 		return;
211 	}
212 
213 	__set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
214 	__set_bit(flindex, &pool->flbitmap);
215 }
216 
217 /*
218  * Remove block from freelist. Index 'slindex' identifies the freelist.
219  */
remove_block(struct xv_pool * pool,struct page * page,u32 offset,struct block_header * block,u32 slindex)220 static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
221 			struct block_header *block, u32 slindex)
222 {
223 	u32 flindex = slindex / BITS_PER_LONG;
224 	struct block_header *tmpblock;
225 
226 	if (block->link.prev_page) {
227 		tmpblock = get_ptr_atomic(block->link.prev_page,
228 				block->link.prev_offset);
229 		tmpblock->link.next_page = block->link.next_page;
230 		tmpblock->link.next_offset = block->link.next_offset;
231 		put_ptr_atomic(tmpblock);
232 	}
233 
234 	if (block->link.next_page) {
235 		tmpblock = get_ptr_atomic(block->link.next_page,
236 				block->link.next_offset);
237 		tmpblock->link.prev_page = block->link.prev_page;
238 		tmpblock->link.prev_offset = block->link.prev_offset;
239 		put_ptr_atomic(tmpblock);
240 	}
241 
242 	/* Is this block is at the head of the freelist? */
243 	if (pool->freelist[slindex].page == page
244 	   && pool->freelist[slindex].offset == offset) {
245 
246 		pool->freelist[slindex].page = block->link.next_page;
247 		pool->freelist[slindex].offset = block->link.next_offset;
248 
249 		if (pool->freelist[slindex].page) {
250 			struct block_header *tmpblock;
251 			tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
252 					pool->freelist[slindex].offset);
253 			tmpblock->link.prev_page = NULL;
254 			tmpblock->link.prev_offset = 0;
255 			put_ptr_atomic(tmpblock);
256 		} else {
257 			/* This freelist bucket is empty */
258 			__clear_bit(slindex % BITS_PER_LONG,
259 				    &pool->slbitmap[flindex]);
260 			if (!pool->slbitmap[flindex])
261 				__clear_bit(flindex, &pool->flbitmap);
262 		}
263 	}
264 
265 	block->link.prev_page = NULL;
266 	block->link.prev_offset = 0;
267 	block->link.next_page = NULL;
268 	block->link.next_offset = 0;
269 }
270 
271 /*
272  * Allocate a page and add it to freelist of given pool.
273  */
grow_pool(struct xv_pool * pool,gfp_t flags)274 static int grow_pool(struct xv_pool *pool, gfp_t flags)
275 {
276 	struct page *page;
277 	struct block_header *block;
278 
279 	page = alloc_page(flags);
280 	if (unlikely(!page))
281 		return -ENOMEM;
282 
283 	stat_inc(&pool->total_pages);
284 
285 	spin_lock(&pool->lock);
286 	block = get_ptr_atomic(page, 0);
287 
288 	block->size = PAGE_SIZE - XV_ALIGN;
289 	set_flag(block, BLOCK_FREE);
290 	clear_flag(block, PREV_FREE);
291 	set_blockprev(block, 0);
292 
293 	insert_block(pool, page, 0, block);
294 
295 	put_ptr_atomic(block);
296 	spin_unlock(&pool->lock);
297 
298 	return 0;
299 }
300 
301 /*
302  * Create a memory pool. Allocates freelist, bitmaps and other
303  * per-pool metadata.
304  */
xv_create_pool(void)305 struct xv_pool *xv_create_pool(void)
306 {
307 	u32 ovhd_size;
308 	struct xv_pool *pool;
309 
310 	ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
311 	pool = kzalloc(ovhd_size, GFP_KERNEL);
312 	if (!pool)
313 		return NULL;
314 
315 	spin_lock_init(&pool->lock);
316 
317 	return pool;
318 }
319 EXPORT_SYMBOL_GPL(xv_create_pool);
320 
xv_destroy_pool(struct xv_pool * pool)321 void xv_destroy_pool(struct xv_pool *pool)
322 {
323 	kfree(pool);
324 }
325 EXPORT_SYMBOL_GPL(xv_destroy_pool);
326 
327 /**
328  * xv_malloc - Allocate block of given size from pool.
329  * @pool: pool to allocate from
330  * @size: size of block to allocate
331  * @page: page no. that holds the object
332  * @offset: location of object within page
333  *
334  * On success, <page, offset> identifies block allocated
335  * and 0 is returned. On failure, <page, offset> is set to
336  * 0 and -ENOMEM is returned.
337  *
338  * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
339  */
xv_malloc(struct xv_pool * pool,u32 size,struct page ** page,u32 * offset,gfp_t flags)340 int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
341 		u32 *offset, gfp_t flags)
342 {
343 	int error;
344 	u32 index, tmpsize, origsize, tmpoffset;
345 	struct block_header *block, *tmpblock;
346 
347 	*page = NULL;
348 	*offset = 0;
349 	origsize = size;
350 
351 	if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
352 		return -ENOMEM;
353 
354 	size = ALIGN(size, XV_ALIGN);
355 
356 	spin_lock(&pool->lock);
357 
358 	index = find_block(pool, size, page, offset);
359 
360 	if (!*page) {
361 		spin_unlock(&pool->lock);
362 		if (flags & GFP_NOWAIT)
363 			return -ENOMEM;
364 		error = grow_pool(pool, flags);
365 		if (unlikely(error))
366 			return error;
367 
368 		spin_lock(&pool->lock);
369 		index = find_block(pool, size, page, offset);
370 	}
371 
372 	if (!*page) {
373 		spin_unlock(&pool->lock);
374 		return -ENOMEM;
375 	}
376 
377 	block = get_ptr_atomic(*page, *offset);
378 
379 	remove_block(pool, *page, *offset, block, index);
380 
381 	/* Split the block if required */
382 	tmpoffset = *offset + size + XV_ALIGN;
383 	tmpsize = block->size - size;
384 	tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
385 	if (tmpsize) {
386 		tmpblock->size = tmpsize - XV_ALIGN;
387 		set_flag(tmpblock, BLOCK_FREE);
388 		clear_flag(tmpblock, PREV_FREE);
389 
390 		set_blockprev(tmpblock, *offset);
391 		if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
392 			insert_block(pool, *page, tmpoffset, tmpblock);
393 
394 		if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
395 			tmpblock = BLOCK_NEXT(tmpblock);
396 			set_blockprev(tmpblock, tmpoffset);
397 		}
398 	} else {
399 		/* This block is exact fit */
400 		if (tmpoffset != PAGE_SIZE)
401 			clear_flag(tmpblock, PREV_FREE);
402 	}
403 
404 	block->size = origsize;
405 	clear_flag(block, BLOCK_FREE);
406 
407 	put_ptr_atomic(block);
408 	spin_unlock(&pool->lock);
409 
410 	*offset += XV_ALIGN;
411 
412 	return 0;
413 }
414 EXPORT_SYMBOL_GPL(xv_malloc);
415 
416 /*
417  * Free block identified with <page, offset>
418  */
xv_free(struct xv_pool * pool,struct page * page,u32 offset)419 void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
420 {
421 	void *page_start;
422 	struct block_header *block, *tmpblock;
423 
424 	offset -= XV_ALIGN;
425 
426 	spin_lock(&pool->lock);
427 
428 	page_start = get_ptr_atomic(page, 0);
429 	block = (struct block_header *)((char *)page_start + offset);
430 
431 	/* Catch double free bugs */
432 	BUG_ON(test_flag(block, BLOCK_FREE));
433 
434 	block->size = ALIGN(block->size, XV_ALIGN);
435 
436 	tmpblock = BLOCK_NEXT(block);
437 	if (offset + block->size + XV_ALIGN == PAGE_SIZE)
438 		tmpblock = NULL;
439 
440 	/* Merge next block if its free */
441 	if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
442 		/*
443 		 * Blocks smaller than XV_MIN_ALLOC_SIZE
444 		 * are not inserted in any free list.
445 		 */
446 		if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
447 			remove_block(pool, page,
448 				    offset + block->size + XV_ALIGN, tmpblock,
449 				    get_index_for_insert(tmpblock->size));
450 		}
451 		block->size += tmpblock->size + XV_ALIGN;
452 	}
453 
454 	/* Merge previous block if its free */
455 	if (test_flag(block, PREV_FREE)) {
456 		tmpblock = (struct block_header *)((char *)(page_start) +
457 						get_blockprev(block));
458 		offset = offset - tmpblock->size - XV_ALIGN;
459 
460 		if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
461 			remove_block(pool, page, offset, tmpblock,
462 				    get_index_for_insert(tmpblock->size));
463 
464 		tmpblock->size += block->size + XV_ALIGN;
465 		block = tmpblock;
466 	}
467 
468 	/* No used objects in this page. Free it. */
469 	if (block->size == PAGE_SIZE - XV_ALIGN) {
470 		put_ptr_atomic(page_start);
471 		spin_unlock(&pool->lock);
472 
473 		__free_page(page);
474 		stat_dec(&pool->total_pages);
475 		return;
476 	}
477 
478 	set_flag(block, BLOCK_FREE);
479 	if (block->size >= XV_MIN_ALLOC_SIZE)
480 		insert_block(pool, page, offset, block);
481 
482 	if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
483 		tmpblock = BLOCK_NEXT(block);
484 		set_flag(tmpblock, PREV_FREE);
485 		set_blockprev(tmpblock, offset);
486 	}
487 
488 	put_ptr_atomic(page_start);
489 	spin_unlock(&pool->lock);
490 }
491 EXPORT_SYMBOL_GPL(xv_free);
492 
xv_get_object_size(void * obj)493 u32 xv_get_object_size(void *obj)
494 {
495 	struct block_header *blk;
496 
497 	blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498 	return blk->size;
499 }
500 EXPORT_SYMBOL_GPL(xv_get_object_size);
501 
502 /*
503  * Returns total memory used by allocator (userdata + metadata)
504  */
xv_get_total_size_bytes(struct xv_pool * pool)505 u64 xv_get_total_size_bytes(struct xv_pool *pool)
506 {
507 	return pool->total_pages << PAGE_SHIFT;
508 }
509 EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);
510