1 /*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
13 */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36 #include <linux/part_stat.h>
37
38 #ifdef CONFIG_ZRAM_GROUP
39 #include <linux/memcontrol.h>
40 #endif
41
42 #include "zram_drv.h"
43
44 static DEFINE_IDR(zram_index_idr);
45 /* idr index must be protected */
46 static DEFINE_MUTEX(zram_index_mutex);
47
48 static int zram_major;
49 static const char *default_compressor = "lzo-rle";
50
51 /* Module params (documentation at end) */
52 static unsigned int num_devices = 1;
53 /*
54 * Pages that compress to sizes equals or greater than this are stored
55 * uncompressed in memory.
56 */
57 static size_t huge_class_size;
58
59 static const struct block_device_operations zram_devops;
60 static const struct block_device_operations zram_wb_devops;
61
62 static void zram_free_page(struct zram *zram, size_t index);
63 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
64 u32 index, int offset, struct bio *bio);
65
init_done(struct zram * zram)66 static inline bool init_done(struct zram *zram)
67 {
68 return zram->disksize;
69 }
70
dev_to_zram(struct device * dev)71 static inline struct zram *dev_to_zram(struct device *dev)
72 {
73 return (struct zram *)dev_to_disk(dev)->private_data;
74 }
75
zram_set_element(struct zram * zram,u32 index,unsigned long element)76 static inline void zram_set_element(struct zram *zram, u32 index,
77 unsigned long element)
78 {
79 zram->table[index].element = element;
80 }
81
zram_get_element(struct zram * zram,u32 index)82 static unsigned long zram_get_element(struct zram *zram, u32 index)
83 {
84 return zram->table[index].element;
85 }
86
zram_allocated(struct zram * zram,u32 index)87 static inline bool zram_allocated(struct zram *zram, u32 index)
88 {
89 return zram_get_obj_size(zram, index) ||
90 zram_test_flag(zram, index, ZRAM_SAME) ||
91 zram_test_flag(zram, index, ZRAM_WB);
92 }
93
94 #if PAGE_SIZE != 4096
is_partial_io(struct bio_vec * bvec)95 static inline bool is_partial_io(struct bio_vec *bvec)
96 {
97 return bvec->bv_len != PAGE_SIZE;
98 }
99 #else
is_partial_io(struct bio_vec * bvec)100 static inline bool is_partial_io(struct bio_vec *bvec)
101 {
102 return false;
103 }
104 #endif
105
106 /*
107 * Check if request is within bounds and aligned on zram logical blocks.
108 */
valid_io_request(struct zram * zram,sector_t start,unsigned int size)109 static inline bool valid_io_request(struct zram *zram,
110 sector_t start, unsigned int size)
111 {
112 u64 end, bound;
113
114 /* unaligned request */
115 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
116 return false;
117 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
118 return false;
119
120 end = start + (size >> SECTOR_SHIFT);
121 bound = zram->disksize >> SECTOR_SHIFT;
122 /* out of range range */
123 if (unlikely(start >= bound || end > bound || start > end))
124 return false;
125
126 /* I/O request is valid */
127 return true;
128 }
129
update_position(u32 * index,int * offset,struct bio_vec * bvec)130 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
131 {
132 *index += (*offset + bvec->bv_len) / PAGE_SIZE;
133 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
134 }
135
update_used_max(struct zram * zram,const unsigned long pages)136 static inline void update_used_max(struct zram *zram,
137 const unsigned long pages)
138 {
139 unsigned long old_max, cur_max;
140
141 old_max = atomic_long_read(&zram->stats.max_used_pages);
142
143 do {
144 cur_max = old_max;
145 if (pages > cur_max)
146 old_max = atomic_long_cmpxchg(
147 &zram->stats.max_used_pages, cur_max, pages);
148 } while (old_max != cur_max);
149 }
150
zram_fill_page(void * ptr,unsigned long len,unsigned long value)151 static inline void zram_fill_page(void *ptr, unsigned long len,
152 unsigned long value)
153 {
154 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
155 memset_l(ptr, value, len / sizeof(unsigned long));
156 }
157
page_same_filled(void * ptr,unsigned long * element)158 static bool page_same_filled(void *ptr, unsigned long *element)
159 {
160 unsigned long *page;
161 unsigned long val;
162 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
163
164 page = (unsigned long *)ptr;
165 val = page[0];
166
167 if (val != page[last_pos])
168 return false;
169
170 for (pos = 1; pos < last_pos; pos++) {
171 if (val != page[pos])
172 return false;
173 }
174
175 *element = val;
176
177 return true;
178 }
179
initstate_show(struct device * dev,struct device_attribute * attr,char * buf)180 static ssize_t initstate_show(struct device *dev,
181 struct device_attribute *attr, char *buf)
182 {
183 u32 val;
184 struct zram *zram = dev_to_zram(dev);
185
186 down_read(&zram->init_lock);
187 val = init_done(zram);
188 up_read(&zram->init_lock);
189
190 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
191 }
192
disksize_show(struct device * dev,struct device_attribute * attr,char * buf)193 static ssize_t disksize_show(struct device *dev,
194 struct device_attribute *attr, char *buf)
195 {
196 struct zram *zram = dev_to_zram(dev);
197
198 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
199 }
200
mem_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)201 static ssize_t mem_limit_store(struct device *dev,
202 struct device_attribute *attr, const char *buf, size_t len)
203 {
204 u64 limit;
205 char *tmp;
206 struct zram *zram = dev_to_zram(dev);
207
208 limit = memparse(buf, &tmp);
209 if (buf == tmp) /* no chars parsed, invalid input */
210 return -EINVAL;
211
212 down_write(&zram->init_lock);
213 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
214 up_write(&zram->init_lock);
215
216 return len;
217 }
218
mem_used_max_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)219 static ssize_t mem_used_max_store(struct device *dev,
220 struct device_attribute *attr, const char *buf, size_t len)
221 {
222 int err;
223 unsigned long val;
224 struct zram *zram = dev_to_zram(dev);
225
226 err = kstrtoul(buf, 10, &val);
227 if (err || val != 0)
228 return -EINVAL;
229
230 down_read(&zram->init_lock);
231 if (init_done(zram)) {
232 atomic_long_set(&zram->stats.max_used_pages,
233 zs_get_total_pages(zram->mem_pool));
234 }
235 up_read(&zram->init_lock);
236
237 return len;
238 }
239
idle_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)240 static ssize_t idle_store(struct device *dev,
241 struct device_attribute *attr, const char *buf, size_t len)
242 {
243 struct zram *zram = dev_to_zram(dev);
244 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
245 int index;
246
247 if (!sysfs_streq(buf, "all"))
248 return -EINVAL;
249
250 down_read(&zram->init_lock);
251 if (!init_done(zram)) {
252 up_read(&zram->init_lock);
253 return -EINVAL;
254 }
255
256 for (index = 0; index < nr_pages; index++) {
257 /*
258 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
259 * See the comment in writeback_store.
260 */
261 zram_slot_lock(zram, index);
262 if (zram_allocated(zram, index) &&
263 !zram_test_flag(zram, index, ZRAM_UNDER_WB))
264 zram_set_flag(zram, index, ZRAM_IDLE);
265 zram_slot_unlock(zram, index);
266 }
267
268 up_read(&zram->init_lock);
269
270 return len;
271 }
272
273 #ifdef CONFIG_ZRAM_WRITEBACK
writeback_limit_enable_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)274 static ssize_t writeback_limit_enable_store(struct device *dev,
275 struct device_attribute *attr, const char *buf, size_t len)
276 {
277 struct zram *zram = dev_to_zram(dev);
278 u64 val;
279 ssize_t ret = -EINVAL;
280
281 if (kstrtoull(buf, 10, &val))
282 return ret;
283
284 down_read(&zram->init_lock);
285 spin_lock(&zram->wb_limit_lock);
286 zram->wb_limit_enable = val;
287 spin_unlock(&zram->wb_limit_lock);
288 up_read(&zram->init_lock);
289 ret = len;
290
291 return ret;
292 }
293
writeback_limit_enable_show(struct device * dev,struct device_attribute * attr,char * buf)294 static ssize_t writeback_limit_enable_show(struct device *dev,
295 struct device_attribute *attr, char *buf)
296 {
297 bool val;
298 struct zram *zram = dev_to_zram(dev);
299
300 down_read(&zram->init_lock);
301 spin_lock(&zram->wb_limit_lock);
302 val = zram->wb_limit_enable;
303 spin_unlock(&zram->wb_limit_lock);
304 up_read(&zram->init_lock);
305
306 return scnprintf(buf, PAGE_SIZE, "%d\n", val);
307 }
308
writeback_limit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)309 static ssize_t writeback_limit_store(struct device *dev,
310 struct device_attribute *attr, const char *buf, size_t len)
311 {
312 struct zram *zram = dev_to_zram(dev);
313 u64 val;
314 ssize_t ret = -EINVAL;
315
316 if (kstrtoull(buf, 10, &val))
317 return ret;
318
319 down_read(&zram->init_lock);
320 spin_lock(&zram->wb_limit_lock);
321 zram->bd_wb_limit = val;
322 spin_unlock(&zram->wb_limit_lock);
323 up_read(&zram->init_lock);
324 ret = len;
325
326 return ret;
327 }
328
writeback_limit_show(struct device * dev,struct device_attribute * attr,char * buf)329 static ssize_t writeback_limit_show(struct device *dev,
330 struct device_attribute *attr, char *buf)
331 {
332 u64 val;
333 struct zram *zram = dev_to_zram(dev);
334
335 down_read(&zram->init_lock);
336 spin_lock(&zram->wb_limit_lock);
337 val = zram->bd_wb_limit;
338 spin_unlock(&zram->wb_limit_lock);
339 up_read(&zram->init_lock);
340
341 return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
342 }
343
reset_bdev(struct zram * zram)344 static void reset_bdev(struct zram *zram)
345 {
346 struct block_device *bdev;
347
348 if (!zram->backing_dev)
349 return;
350
351 bdev = zram->bdev;
352 if (zram->old_block_size)
353 set_blocksize(bdev, zram->old_block_size);
354 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
355 /* hope filp_close flush all of IO */
356 filp_close(zram->backing_dev, NULL);
357 zram->backing_dev = NULL;
358 zram->old_block_size = 0;
359 zram->bdev = NULL;
360 zram->disk->fops = &zram_devops;
361 kvfree(zram->bitmap);
362 zram->bitmap = NULL;
363 }
364
backing_dev_show(struct device * dev,struct device_attribute * attr,char * buf)365 static ssize_t backing_dev_show(struct device *dev,
366 struct device_attribute *attr, char *buf)
367 {
368 struct file *file;
369 struct zram *zram = dev_to_zram(dev);
370 char *p;
371 ssize_t ret;
372
373 down_read(&zram->init_lock);
374 file = zram->backing_dev;
375 if (!file) {
376 memcpy(buf, "none\n", 5);
377 up_read(&zram->init_lock);
378 return 5;
379 }
380
381 p = file_path(file, buf, PAGE_SIZE - 1);
382 if (IS_ERR(p)) {
383 ret = PTR_ERR(p);
384 goto out;
385 }
386
387 ret = strlen(p);
388 memmove(buf, p, ret);
389 buf[ret++] = '\n';
390 out:
391 up_read(&zram->init_lock);
392 return ret;
393 }
394
backing_dev_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)395 static ssize_t backing_dev_store(struct device *dev,
396 struct device_attribute *attr, const char *buf, size_t len)
397 {
398 char *file_name;
399 size_t sz;
400 struct file *backing_dev = NULL;
401 struct inode *inode;
402 struct address_space *mapping;
403 unsigned int bitmap_sz, old_block_size = 0;
404 unsigned long nr_pages, *bitmap = NULL;
405 struct block_device *bdev = NULL;
406 int err;
407 struct zram *zram = dev_to_zram(dev);
408
409 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
410 if (!file_name)
411 return -ENOMEM;
412
413 down_write(&zram->init_lock);
414 if (init_done(zram)) {
415 pr_info("Can't setup backing device for initialized device\n");
416 err = -EBUSY;
417 goto out;
418 }
419
420 strlcpy(file_name, buf, PATH_MAX);
421 /* ignore trailing newline */
422 sz = strlen(file_name);
423 if (sz > 0 && file_name[sz - 1] == '\n')
424 file_name[sz - 1] = 0x00;
425
426 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
427 if (IS_ERR(backing_dev)) {
428 err = PTR_ERR(backing_dev);
429 backing_dev = NULL;
430 goto out;
431 }
432
433 mapping = backing_dev->f_mapping;
434 inode = mapping->host;
435
436 /* Support only block device in this moment */
437 if (!S_ISBLK(inode->i_mode)) {
438 err = -ENOTBLK;
439 goto out;
440 }
441
442 bdev = blkdev_get_by_dev(inode->i_rdev,
443 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
444 if (IS_ERR(bdev)) {
445 err = PTR_ERR(bdev);
446 bdev = NULL;
447 goto out;
448 }
449
450 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
451 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
452 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
453 if (!bitmap) {
454 err = -ENOMEM;
455 goto out;
456 }
457
458 old_block_size = block_size(bdev);
459 err = set_blocksize(bdev, PAGE_SIZE);
460 if (err)
461 goto out;
462
463 reset_bdev(zram);
464
465 zram->old_block_size = old_block_size;
466 zram->bdev = bdev;
467 zram->backing_dev = backing_dev;
468 zram->bitmap = bitmap;
469 zram->nr_pages = nr_pages;
470 /*
471 * With writeback feature, zram does asynchronous IO so it's no longer
472 * synchronous device so let's remove synchronous io flag. Othewise,
473 * upper layer(e.g., swap) could wait IO completion rather than
474 * (submit and return), which will cause system sluggish.
475 * Furthermore, when the IO function returns(e.g., swap_readpage),
476 * upper layer expects IO was done so it could deallocate the page
477 * freely but in fact, IO is going on so finally could cause
478 * use-after-free when the IO is really done.
479 */
480 zram->disk->fops = &zram_wb_devops;
481 up_write(&zram->init_lock);
482
483 pr_info("setup backing device %s\n", file_name);
484 kfree(file_name);
485
486 return len;
487 out:
488 if (bitmap)
489 kvfree(bitmap);
490
491 if (bdev)
492 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
493
494 if (backing_dev)
495 filp_close(backing_dev, NULL);
496
497 up_write(&zram->init_lock);
498
499 kfree(file_name);
500
501 return err;
502 }
503
alloc_block_bdev(struct zram * zram)504 static unsigned long alloc_block_bdev(struct zram *zram)
505 {
506 unsigned long blk_idx = 1;
507 retry:
508 /* skip 0 bit to confuse zram.handle = 0 */
509 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
510 if (blk_idx == zram->nr_pages)
511 return 0;
512
513 if (test_and_set_bit(blk_idx, zram->bitmap))
514 goto retry;
515
516 atomic64_inc(&zram->stats.bd_count);
517 return blk_idx;
518 }
519
free_block_bdev(struct zram * zram,unsigned long blk_idx)520 static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
521 {
522 int was_set;
523
524 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
525 WARN_ON_ONCE(!was_set);
526 atomic64_dec(&zram->stats.bd_count);
527 }
528
zram_page_end_io(struct bio * bio)529 static void zram_page_end_io(struct bio *bio)
530 {
531 struct page *page = bio_first_page_all(bio);
532
533 page_endio(page, op_is_write(bio_op(bio)),
534 blk_status_to_errno(bio->bi_status));
535 bio_put(bio);
536 }
537
538 /*
539 * Returns 1 if the submission is successful.
540 */
read_from_bdev_async(struct zram * zram,struct bio_vec * bvec,unsigned long entry,struct bio * parent)541 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
542 unsigned long entry, struct bio *parent)
543 {
544 struct bio *bio;
545
546 bio = bio_alloc(GFP_ATOMIC, 1);
547 if (!bio)
548 return -ENOMEM;
549
550 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
551 bio_set_dev(bio, zram->bdev);
552 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
553 bio_put(bio);
554 return -EIO;
555 }
556
557 if (!parent) {
558 bio->bi_opf = REQ_OP_READ;
559 bio->bi_end_io = zram_page_end_io;
560 } else {
561 bio->bi_opf = parent->bi_opf;
562 bio_chain(bio, parent);
563 }
564
565 submit_bio(bio);
566 return 1;
567 }
568
569 #define HUGE_WRITEBACK 1
570 #define IDLE_WRITEBACK 2
571
writeback_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)572 static ssize_t writeback_store(struct device *dev,
573 struct device_attribute *attr, const char *buf, size_t len)
574 {
575 struct zram *zram = dev_to_zram(dev);
576 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
577 unsigned long index;
578 struct bio bio;
579 struct bio_vec bio_vec;
580 struct page *page;
581 ssize_t ret = len;
582 int mode, err;
583 unsigned long blk_idx = 0;
584
585 if (sysfs_streq(buf, "idle"))
586 mode = IDLE_WRITEBACK;
587 else if (sysfs_streq(buf, "huge"))
588 mode = HUGE_WRITEBACK;
589 else
590 return -EINVAL;
591
592 down_read(&zram->init_lock);
593 if (!init_done(zram)) {
594 ret = -EINVAL;
595 goto release_init_lock;
596 }
597
598 if (!zram->backing_dev) {
599 ret = -ENODEV;
600 goto release_init_lock;
601 }
602
603 page = alloc_page(GFP_KERNEL);
604 if (!page) {
605 ret = -ENOMEM;
606 goto release_init_lock;
607 }
608
609 for (index = 0; index < nr_pages; index++) {
610 struct bio_vec bvec;
611
612 bvec.bv_page = page;
613 bvec.bv_len = PAGE_SIZE;
614 bvec.bv_offset = 0;
615
616 spin_lock(&zram->wb_limit_lock);
617 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
618 spin_unlock(&zram->wb_limit_lock);
619 ret = -EIO;
620 break;
621 }
622 spin_unlock(&zram->wb_limit_lock);
623
624 if (!blk_idx) {
625 blk_idx = alloc_block_bdev(zram);
626 if (!blk_idx) {
627 ret = -ENOSPC;
628 break;
629 }
630 }
631
632 zram_slot_lock(zram, index);
633 if (!zram_allocated(zram, index))
634 goto next;
635
636 if (zram_test_flag(zram, index, ZRAM_WB) ||
637 zram_test_flag(zram, index, ZRAM_SAME) ||
638 zram_test_flag(zram, index, ZRAM_UNDER_WB))
639 goto next;
640
641 if (mode == IDLE_WRITEBACK &&
642 !zram_test_flag(zram, index, ZRAM_IDLE))
643 goto next;
644 if (mode == HUGE_WRITEBACK &&
645 !zram_test_flag(zram, index, ZRAM_HUGE))
646 goto next;
647 /*
648 * Clearing ZRAM_UNDER_WB is duty of caller.
649 * IOW, zram_free_page never clear it.
650 */
651 zram_set_flag(zram, index, ZRAM_UNDER_WB);
652 /* Need for hugepage writeback racing */
653 zram_set_flag(zram, index, ZRAM_IDLE);
654 zram_slot_unlock(zram, index);
655 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
656 zram_slot_lock(zram, index);
657 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
658 zram_clear_flag(zram, index, ZRAM_IDLE);
659 zram_slot_unlock(zram, index);
660 continue;
661 }
662
663 bio_init(&bio, &bio_vec, 1);
664 bio_set_dev(&bio, zram->bdev);
665 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
666 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
667
668 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
669 bvec.bv_offset);
670 /*
671 * XXX: A single page IO would be inefficient for write
672 * but it would be not bad as starter.
673 */
674 err = submit_bio_wait(&bio);
675 if (err) {
676 zram_slot_lock(zram, index);
677 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
678 zram_clear_flag(zram, index, ZRAM_IDLE);
679 zram_slot_unlock(zram, index);
680 /*
681 * Return last IO error unless every IO were
682 * not suceeded.
683 */
684 ret = err;
685 continue;
686 }
687
688 atomic64_inc(&zram->stats.bd_writes);
689 /*
690 * We released zram_slot_lock so need to check if the slot was
691 * changed. If there is freeing for the slot, we can catch it
692 * easily by zram_allocated.
693 * A subtle case is the slot is freed/reallocated/marked as
694 * ZRAM_IDLE again. To close the race, idle_store doesn't
695 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
696 * Thus, we could close the race by checking ZRAM_IDLE bit.
697 */
698 zram_slot_lock(zram, index);
699 if (!zram_allocated(zram, index) ||
700 !zram_test_flag(zram, index, ZRAM_IDLE)) {
701 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
702 zram_clear_flag(zram, index, ZRAM_IDLE);
703 goto next;
704 }
705
706 zram_free_page(zram, index);
707 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
708 zram_set_flag(zram, index, ZRAM_WB);
709 zram_set_element(zram, index, blk_idx);
710 blk_idx = 0;
711 atomic64_inc(&zram->stats.pages_stored);
712 spin_lock(&zram->wb_limit_lock);
713 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
714 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
715 spin_unlock(&zram->wb_limit_lock);
716 next:
717 zram_slot_unlock(zram, index);
718 }
719
720 if (blk_idx)
721 free_block_bdev(zram, blk_idx);
722 __free_page(page);
723 release_init_lock:
724 up_read(&zram->init_lock);
725
726 return ret;
727 }
728
729 struct zram_work {
730 struct work_struct work;
731 struct zram *zram;
732 unsigned long entry;
733 struct bio *bio;
734 struct bio_vec bvec;
735 };
736
737 #if PAGE_SIZE != 4096
zram_sync_read(struct work_struct * work)738 static void zram_sync_read(struct work_struct *work)
739 {
740 struct zram_work *zw = container_of(work, struct zram_work, work);
741 struct zram *zram = zw->zram;
742 unsigned long entry = zw->entry;
743 struct bio *bio = zw->bio;
744
745 read_from_bdev_async(zram, &zw->bvec, entry, bio);
746 }
747
748 /*
749 * Block layer want one ->submit_bio to be active at a time, so if we use
750 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
751 * use a worker thread context.
752 */
read_from_bdev_sync(struct zram * zram,struct bio_vec * bvec,unsigned long entry,struct bio * bio)753 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
754 unsigned long entry, struct bio *bio)
755 {
756 struct zram_work work;
757
758 work.bvec = *bvec;
759 work.zram = zram;
760 work.entry = entry;
761 work.bio = bio;
762
763 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
764 queue_work(system_unbound_wq, &work.work);
765 flush_work(&work.work);
766 destroy_work_on_stack(&work.work);
767
768 return 1;
769 }
770 #else
read_from_bdev_sync(struct zram * zram,struct bio_vec * bvec,unsigned long entry,struct bio * bio)771 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
772 unsigned long entry, struct bio *bio)
773 {
774 WARN_ON(1);
775 return -EIO;
776 }
777 #endif
778
read_from_bdev(struct zram * zram,struct bio_vec * bvec,unsigned long entry,struct bio * parent,bool sync)779 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
780 unsigned long entry, struct bio *parent, bool sync)
781 {
782 atomic64_inc(&zram->stats.bd_reads);
783 if (sync)
784 return read_from_bdev_sync(zram, bvec, entry, parent);
785 else
786 return read_from_bdev_async(zram, bvec, entry, parent);
787 }
788 #else
reset_bdev(struct zram * zram)789 static inline void reset_bdev(struct zram *zram) {};
read_from_bdev(struct zram * zram,struct bio_vec * bvec,unsigned long entry,struct bio * parent,bool sync)790 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
791 unsigned long entry, struct bio *parent, bool sync)
792 {
793 return -EIO;
794 }
795
free_block_bdev(struct zram * zram,unsigned long blk_idx)796 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
797 #endif
798
799 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
800
801 static struct dentry *zram_debugfs_root;
802
zram_debugfs_create(void)803 static void zram_debugfs_create(void)
804 {
805 zram_debugfs_root = debugfs_create_dir("zram", NULL);
806 }
807
zram_debugfs_destroy(void)808 static void zram_debugfs_destroy(void)
809 {
810 debugfs_remove_recursive(zram_debugfs_root);
811 }
812
zram_accessed(struct zram * zram,u32 index)813 static void zram_accessed(struct zram *zram, u32 index)
814 {
815 zram_clear_flag(zram, index, ZRAM_IDLE);
816 zram->table[index].ac_time = ktime_get_boottime();
817 }
818
read_block_state(struct file * file,char __user * buf,size_t count,loff_t * ppos)819 static ssize_t read_block_state(struct file *file, char __user *buf,
820 size_t count, loff_t *ppos)
821 {
822 char *kbuf;
823 ssize_t index, written = 0;
824 struct zram *zram = file->private_data;
825 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
826 struct timespec64 ts;
827
828 kbuf = kvmalloc(count, GFP_KERNEL);
829 if (!kbuf)
830 return -ENOMEM;
831
832 down_read(&zram->init_lock);
833 if (!init_done(zram)) {
834 up_read(&zram->init_lock);
835 kvfree(kbuf);
836 return -EINVAL;
837 }
838
839 for (index = *ppos; index < nr_pages; index++) {
840 int copied;
841
842 zram_slot_lock(zram, index);
843 if (!zram_allocated(zram, index))
844 goto next;
845
846 ts = ktime_to_timespec64(zram->table[index].ac_time);
847 copied = snprintf(kbuf + written, count,
848 "%12zd %12lld.%06lu %c%c%c%c\n",
849 index, (s64)ts.tv_sec,
850 ts.tv_nsec / NSEC_PER_USEC,
851 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
852 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
853 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
854 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
855
856 if (count < copied) {
857 zram_slot_unlock(zram, index);
858 break;
859 }
860 written += copied;
861 count -= copied;
862 next:
863 zram_slot_unlock(zram, index);
864 *ppos += 1;
865 }
866
867 up_read(&zram->init_lock);
868 if (copy_to_user(buf, kbuf, written))
869 written = -EFAULT;
870 kvfree(kbuf);
871
872 return written;
873 }
874
875 static const struct file_operations proc_zram_block_state_op = {
876 .open = simple_open,
877 .read = read_block_state,
878 .llseek = default_llseek,
879 };
880
zram_debugfs_register(struct zram * zram)881 static void zram_debugfs_register(struct zram *zram)
882 {
883 if (!zram_debugfs_root)
884 return;
885
886 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
887 zram_debugfs_root);
888 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
889 zram, &proc_zram_block_state_op);
890 }
891
zram_debugfs_unregister(struct zram * zram)892 static void zram_debugfs_unregister(struct zram *zram)
893 {
894 debugfs_remove_recursive(zram->debugfs_dir);
895 }
896 #else
zram_debugfs_create(void)897 static void zram_debugfs_create(void) {};
zram_debugfs_destroy(void)898 static void zram_debugfs_destroy(void) {};
zram_accessed(struct zram * zram,u32 index)899 static void zram_accessed(struct zram *zram, u32 index)
900 {
901 zram_clear_flag(zram, index, ZRAM_IDLE);
902 };
zram_debugfs_register(struct zram * zram)903 static void zram_debugfs_register(struct zram *zram) {};
zram_debugfs_unregister(struct zram * zram)904 static void zram_debugfs_unregister(struct zram *zram) {};
905 #endif
906
907 /*
908 * We switched to per-cpu streams and this attr is not needed anymore.
909 * However, we will keep it around for some time, because:
910 * a) we may revert per-cpu streams in the future
911 * b) it's visible to user space and we need to follow our 2 years
912 * retirement rule; but we already have a number of 'soon to be
913 * altered' attrs, so max_comp_streams need to wait for the next
914 * layoff cycle.
915 */
max_comp_streams_show(struct device * dev,struct device_attribute * attr,char * buf)916 static ssize_t max_comp_streams_show(struct device *dev,
917 struct device_attribute *attr, char *buf)
918 {
919 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
920 }
921
max_comp_streams_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)922 static ssize_t max_comp_streams_store(struct device *dev,
923 struct device_attribute *attr, const char *buf, size_t len)
924 {
925 return len;
926 }
927
comp_algorithm_show(struct device * dev,struct device_attribute * attr,char * buf)928 static ssize_t comp_algorithm_show(struct device *dev,
929 struct device_attribute *attr, char *buf)
930 {
931 size_t sz;
932 struct zram *zram = dev_to_zram(dev);
933
934 down_read(&zram->init_lock);
935 sz = zcomp_available_show(zram->compressor, buf);
936 up_read(&zram->init_lock);
937
938 return sz;
939 }
940
comp_algorithm_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)941 static ssize_t comp_algorithm_store(struct device *dev,
942 struct device_attribute *attr, const char *buf, size_t len)
943 {
944 struct zram *zram = dev_to_zram(dev);
945 char compressor[ARRAY_SIZE(zram->compressor)];
946 size_t sz;
947
948 strlcpy(compressor, buf, sizeof(compressor));
949 /* ignore trailing newline */
950 sz = strlen(compressor);
951 if (sz > 0 && compressor[sz - 1] == '\n')
952 compressor[sz - 1] = 0x00;
953
954 if (!zcomp_available_algorithm(compressor))
955 return -EINVAL;
956
957 down_write(&zram->init_lock);
958 if (init_done(zram)) {
959 up_write(&zram->init_lock);
960 pr_info("Can't change algorithm for initialized device\n");
961 return -EBUSY;
962 }
963
964 strcpy(zram->compressor, compressor);
965 up_write(&zram->init_lock);
966 return len;
967 }
968
compact_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)969 static ssize_t compact_store(struct device *dev,
970 struct device_attribute *attr, const char *buf, size_t len)
971 {
972 struct zram *zram = dev_to_zram(dev);
973
974 down_read(&zram->init_lock);
975 if (!init_done(zram)) {
976 up_read(&zram->init_lock);
977 return -EINVAL;
978 }
979
980 zs_compact(zram->mem_pool);
981 up_read(&zram->init_lock);
982
983 return len;
984 }
985
io_stat_show(struct device * dev,struct device_attribute * attr,char * buf)986 static ssize_t io_stat_show(struct device *dev,
987 struct device_attribute *attr, char *buf)
988 {
989 struct zram *zram = dev_to_zram(dev);
990 ssize_t ret;
991
992 down_read(&zram->init_lock);
993 ret = scnprintf(buf, PAGE_SIZE,
994 "%8llu %8llu %8llu %8llu\n",
995 (u64)atomic64_read(&zram->stats.failed_reads),
996 (u64)atomic64_read(&zram->stats.failed_writes),
997 (u64)atomic64_read(&zram->stats.invalid_io),
998 (u64)atomic64_read(&zram->stats.notify_free));
999 up_read(&zram->init_lock);
1000
1001 return ret;
1002 }
1003
mm_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1004 static ssize_t mm_stat_show(struct device *dev,
1005 struct device_attribute *attr, char *buf)
1006 {
1007 struct zram *zram = dev_to_zram(dev);
1008 struct zs_pool_stats pool_stats;
1009 u64 orig_size, mem_used = 0;
1010 long max_used;
1011 ssize_t ret;
1012
1013 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1014
1015 down_read(&zram->init_lock);
1016 if (init_done(zram)) {
1017 mem_used = zs_get_total_pages(zram->mem_pool);
1018 zs_pool_stats(zram->mem_pool, &pool_stats);
1019 }
1020
1021 orig_size = atomic64_read(&zram->stats.pages_stored);
1022 max_used = atomic_long_read(&zram->stats.max_used_pages);
1023
1024 ret = scnprintf(buf, PAGE_SIZE,
1025 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1026 orig_size << PAGE_SHIFT,
1027 (u64)atomic64_read(&zram->stats.compr_data_size),
1028 mem_used << PAGE_SHIFT,
1029 zram->limit_pages << PAGE_SHIFT,
1030 max_used << PAGE_SHIFT,
1031 (u64)atomic64_read(&zram->stats.same_pages),
1032 atomic_long_read(&pool_stats.pages_compacted),
1033 (u64)atomic64_read(&zram->stats.huge_pages));
1034 up_read(&zram->init_lock);
1035
1036 return ret;
1037 }
1038
1039 #ifdef CONFIG_ZRAM_WRITEBACK
1040 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
bd_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1041 static ssize_t bd_stat_show(struct device *dev,
1042 struct device_attribute *attr, char *buf)
1043 {
1044 struct zram *zram = dev_to_zram(dev);
1045 ssize_t ret;
1046
1047 down_read(&zram->init_lock);
1048 ret = scnprintf(buf, PAGE_SIZE,
1049 "%8llu %8llu %8llu\n",
1050 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1051 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1052 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
1053 up_read(&zram->init_lock);
1054
1055 return ret;
1056 }
1057 #endif
1058
debug_stat_show(struct device * dev,struct device_attribute * attr,char * buf)1059 static ssize_t debug_stat_show(struct device *dev,
1060 struct device_attribute *attr, char *buf)
1061 {
1062 int version = 1;
1063 struct zram *zram = dev_to_zram(dev);
1064 ssize_t ret;
1065
1066 down_read(&zram->init_lock);
1067 ret = scnprintf(buf, PAGE_SIZE,
1068 "version: %d\n%8llu %8llu\n",
1069 version,
1070 (u64)atomic64_read(&zram->stats.writestall),
1071 (u64)atomic64_read(&zram->stats.miss_free));
1072 up_read(&zram->init_lock);
1073
1074 return ret;
1075 }
1076
1077 static DEVICE_ATTR_RO(io_stat);
1078 static DEVICE_ATTR_RO(mm_stat);
1079 #ifdef CONFIG_ZRAM_WRITEBACK
1080 static DEVICE_ATTR_RO(bd_stat);
1081 #endif
1082 static DEVICE_ATTR_RO(debug_stat);
1083
1084 #ifdef CONFIG_ZRAM_GROUP
group_show(struct device * dev,struct device_attribute * attr,char * buf)1085 static ssize_t group_show(struct device *dev, struct device_attribute *attr, char *buf)
1086 {
1087 struct zram *zram = dev_to_zram(dev);
1088
1089 down_read(&zram->init_lock);
1090 if (zram->zgrp_ctrl == ZGRP_NONE)
1091 strcpy(buf, "disable\n");
1092 else if (zram->zgrp_ctrl == ZGRP_TRACK)
1093 strcpy(buf, "readonly\n");
1094 #ifdef CONFIG_ZRAM_GROUP_WRITEBACK
1095 else if (zram->zgrp_ctrl == ZGRP_WRITE)
1096 strcpy(buf, "readwrite");
1097 #endif
1098 up_read(&zram->init_lock);
1099
1100 return strlen(buf);
1101 }
1102
group_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1103 static ssize_t group_store(struct device *dev, struct device_attribute *attr,
1104 const char *buf, size_t len)
1105 {
1106 struct zram *zram = dev_to_zram(dev);
1107 int ret;
1108 #ifdef CONFIG_ZRAM_GROUP_DEBUG
1109 u32 op, gid, index;
1110
1111 ret = sscanf(buf, "%u %u %u", &op, &index, &gid);
1112 if (ret == 3) {
1113 pr_info("op[%u] index[%u] gid[%u].\n", op, index, gid);
1114 group_debug(zram, op, index, gid);
1115 return len;
1116 }
1117 #endif
1118
1119 ret = len;
1120 down_write(&zram->init_lock);
1121 if (init_done(zram)) {
1122 pr_info("Can't setup group ctrl for initialized device!\n");
1123 ret = -EBUSY;
1124 goto out;
1125 }
1126 if (!strcmp(buf, "disable\n"))
1127 zram->zgrp_ctrl = ZGRP_NONE;
1128 else if (!strcmp(buf, "readonly\n"))
1129 zram->zgrp_ctrl = ZGRP_TRACK;
1130 #ifdef CONFIG_ZRAM_GROUP_WRITEBACK
1131 else if (!strcmp(buf, "readwrite\n"))
1132 zram->zgrp_ctrl = ZGRP_WRITE;
1133 #endif
1134 else
1135 ret = -EINVAL;
1136 out:
1137 up_write(&zram->init_lock);
1138
1139 return ret;
1140 }
1141 #endif
1142
zram_meta_free(struct zram * zram,u64 disksize)1143 static void zram_meta_free(struct zram *zram, u64 disksize)
1144 {
1145 size_t num_pages = disksize >> PAGE_SHIFT;
1146 size_t index;
1147
1148 /* Free all pages that are still in this zram device */
1149 for (index = 0; index < num_pages; index++)
1150 zram_free_page(zram, index);
1151
1152 zs_destroy_pool(zram->mem_pool);
1153 vfree(zram->table);
1154 #ifdef CONFIG_ZRAM_GROUP
1155 zram_group_deinit(zram);
1156 #endif
1157 }
1158
zram_meta_alloc(struct zram * zram,u64 disksize)1159 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
1160 {
1161 size_t num_pages;
1162
1163 num_pages = disksize >> PAGE_SHIFT;
1164 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
1165 if (!zram->table)
1166 return false;
1167
1168 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1169 if (!zram->mem_pool) {
1170 vfree(zram->table);
1171 return false;
1172 }
1173
1174 if (!huge_class_size)
1175 huge_class_size = zs_huge_class_size(zram->mem_pool);
1176 #ifdef CONFIG_ZRAM_GROUP
1177 zram_group_init(zram, num_pages);
1178 #endif
1179
1180 return true;
1181 }
1182
1183 /*
1184 * To protect concurrent access to the same index entry,
1185 * caller should hold this table index entry's bit_spinlock to
1186 * indicate this index entry is accessing.
1187 */
zram_free_page(struct zram * zram,size_t index)1188 static void zram_free_page(struct zram *zram, size_t index)
1189 {
1190 unsigned long handle;
1191
1192 #ifdef CONFIG_ZRAM_GROUP
1193 zram_group_untrack_obj(zram, index);
1194 #endif
1195
1196 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1197 zram->table[index].ac_time = 0;
1198 #endif
1199 if (zram_test_flag(zram, index, ZRAM_IDLE))
1200 zram_clear_flag(zram, index, ZRAM_IDLE);
1201
1202 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1203 zram_clear_flag(zram, index, ZRAM_HUGE);
1204 atomic64_dec(&zram->stats.huge_pages);
1205 }
1206
1207 if (zram_test_flag(zram, index, ZRAM_WB)) {
1208 zram_clear_flag(zram, index, ZRAM_WB);
1209 free_block_bdev(zram, zram_get_element(zram, index));
1210 goto out;
1211 }
1212
1213 /*
1214 * No memory is allocated for same element filled pages.
1215 * Simply clear same page flag.
1216 */
1217 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1218 zram_clear_flag(zram, index, ZRAM_SAME);
1219 atomic64_dec(&zram->stats.same_pages);
1220 goto out;
1221 }
1222
1223 handle = zram_get_handle(zram, index);
1224 if (!handle)
1225 return;
1226
1227 zs_free(zram->mem_pool, handle);
1228
1229 atomic64_sub(zram_get_obj_size(zram, index),
1230 &zram->stats.compr_data_size);
1231 out:
1232 atomic64_dec(&zram->stats.pages_stored);
1233 zram_set_handle(zram, index, 0);
1234 zram_set_obj_size(zram, index, 0);
1235 WARN_ON_ONCE(zram->table[index].flags &
1236 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
1237 }
1238
__zram_bvec_read(struct zram * zram,struct page * page,u32 index,struct bio * bio,bool partial_io)1239 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1240 struct bio *bio, bool partial_io)
1241 {
1242 struct zcomp_strm *zstrm;
1243 unsigned long handle;
1244 unsigned int size;
1245 void *src, *dst;
1246 int ret;
1247
1248 zram_slot_lock(zram, index);
1249 if (zram_test_flag(zram, index, ZRAM_WB)) {
1250 struct bio_vec bvec;
1251
1252 zram_slot_unlock(zram, index);
1253
1254 bvec.bv_page = page;
1255 bvec.bv_len = PAGE_SIZE;
1256 bvec.bv_offset = 0;
1257 return read_from_bdev(zram, &bvec,
1258 zram_get_element(zram, index),
1259 bio, partial_io);
1260 }
1261 #ifdef CONFIG_ZRAM_GROUP_WRITEBACK
1262 if (!bio) {
1263 ret = zram_group_fault_obj(zram, index);
1264 if (ret) {
1265 zram_slot_unlock(zram, index);
1266 return ret;
1267 }
1268 }
1269
1270 if (zram_test_flag(zram, index, ZRAM_GWB)) {
1271 zram_slot_unlock(zram, index);
1272 return -EIO;
1273 }
1274 #endif
1275 handle = zram_get_handle(zram, index);
1276 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1277 unsigned long value;
1278 void *mem;
1279
1280 value = handle ? zram_get_element(zram, index) : 0;
1281 mem = kmap_atomic(page);
1282 zram_fill_page(mem, PAGE_SIZE, value);
1283 kunmap_atomic(mem);
1284 zram_slot_unlock(zram, index);
1285 return 0;
1286 }
1287
1288 size = zram_get_obj_size(zram, index);
1289
1290 if (size != PAGE_SIZE)
1291 zstrm = zcomp_stream_get(zram->comp);
1292
1293 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1294 if (size == PAGE_SIZE) {
1295 dst = kmap_atomic(page);
1296 memcpy(dst, src, PAGE_SIZE);
1297 kunmap_atomic(dst);
1298 ret = 0;
1299 } else {
1300 dst = kmap_atomic(page);
1301 ret = zcomp_decompress(zstrm, src, size, dst);
1302 kunmap_atomic(dst);
1303 zcomp_stream_put(zram->comp);
1304 }
1305 zs_unmap_object(zram->mem_pool, handle);
1306 zram_slot_unlock(zram, index);
1307
1308 /* Should NEVER happen. Return bio error if it does. */
1309 if (WARN_ON(ret))
1310 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1311
1312 return ret;
1313 }
1314
zram_bvec_read(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1315 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1316 u32 index, int offset, struct bio *bio)
1317 {
1318 int ret;
1319 struct page *page;
1320
1321 page = bvec->bv_page;
1322 if (is_partial_io(bvec)) {
1323 /* Use a temporary buffer to decompress the page */
1324 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1325 if (!page)
1326 return -ENOMEM;
1327 }
1328
1329 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1330 if (unlikely(ret))
1331 goto out;
1332
1333 if (is_partial_io(bvec)) {
1334 void *dst = kmap_atomic(bvec->bv_page);
1335 void *src = kmap_atomic(page);
1336
1337 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1338 kunmap_atomic(src);
1339 kunmap_atomic(dst);
1340 }
1341 out:
1342 if (is_partial_io(bvec))
1343 __free_page(page);
1344
1345 return ret;
1346 }
1347
__zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,struct bio * bio)1348 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1349 u32 index, struct bio *bio)
1350 {
1351 int ret = 0;
1352 unsigned long alloced_pages;
1353 unsigned long handle = 0;
1354 unsigned int comp_len = 0;
1355 void *src, *dst, *mem;
1356 struct zcomp_strm *zstrm;
1357 struct page *page = bvec->bv_page;
1358 unsigned long element = 0;
1359 enum zram_pageflags flags = 0;
1360
1361 mem = kmap_atomic(page);
1362 if (page_same_filled(mem, &element)) {
1363 kunmap_atomic(mem);
1364 /* Free memory associated with this sector now. */
1365 flags = ZRAM_SAME;
1366 atomic64_inc(&zram->stats.same_pages);
1367 goto out;
1368 }
1369 kunmap_atomic(mem);
1370
1371 compress_again:
1372 zstrm = zcomp_stream_get(zram->comp);
1373 src = kmap_atomic(page);
1374 ret = zcomp_compress(zstrm, src, &comp_len);
1375 kunmap_atomic(src);
1376
1377 if (unlikely(ret)) {
1378 zcomp_stream_put(zram->comp);
1379 pr_err("Compression failed! err=%d\n", ret);
1380 zs_free(zram->mem_pool, handle);
1381 return ret;
1382 }
1383
1384 if (comp_len >= huge_class_size)
1385 comp_len = PAGE_SIZE;
1386 /*
1387 * handle allocation has 2 paths:
1388 * a) fast path is executed with preemption disabled (for
1389 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1390 * since we can't sleep;
1391 * b) slow path enables preemption and attempts to allocate
1392 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1393 * put per-cpu compression stream and, thus, to re-do
1394 * the compression once handle is allocated.
1395 *
1396 * if we have a 'non-null' handle here then we are coming
1397 * from the slow path and handle has already been allocated.
1398 */
1399 if (!handle)
1400 handle = zs_malloc(zram->mem_pool, comp_len,
1401 __GFP_KSWAPD_RECLAIM |
1402 __GFP_NOWARN |
1403 __GFP_HIGHMEM |
1404 __GFP_MOVABLE);
1405 if (!handle) {
1406 zcomp_stream_put(zram->comp);
1407 atomic64_inc(&zram->stats.writestall);
1408 handle = zs_malloc(zram->mem_pool, comp_len,
1409 GFP_NOIO | __GFP_HIGHMEM |
1410 __GFP_MOVABLE);
1411 if (handle)
1412 goto compress_again;
1413 return -ENOMEM;
1414 }
1415
1416 alloced_pages = zs_get_total_pages(zram->mem_pool);
1417 update_used_max(zram, alloced_pages);
1418
1419 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1420 zcomp_stream_put(zram->comp);
1421 zs_free(zram->mem_pool, handle);
1422 return -ENOMEM;
1423 }
1424
1425 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1426
1427 src = zstrm->buffer;
1428 if (comp_len == PAGE_SIZE)
1429 src = kmap_atomic(page);
1430 memcpy(dst, src, comp_len);
1431 if (comp_len == PAGE_SIZE)
1432 kunmap_atomic(src);
1433
1434 zcomp_stream_put(zram->comp);
1435 zs_unmap_object(zram->mem_pool, handle);
1436 atomic64_add(comp_len, &zram->stats.compr_data_size);
1437 out:
1438 /*
1439 * Free memory associated with this sector
1440 * before overwriting unused sectors.
1441 */
1442 zram_slot_lock(zram, index);
1443 zram_free_page(zram, index);
1444
1445 if (comp_len == PAGE_SIZE) {
1446 zram_set_flag(zram, index, ZRAM_HUGE);
1447 atomic64_inc(&zram->stats.huge_pages);
1448 }
1449
1450 if (flags) {
1451 zram_set_flag(zram, index, flags);
1452 zram_set_element(zram, index, element);
1453 } else {
1454 zram_set_handle(zram, index, handle);
1455 zram_set_obj_size(zram, index, comp_len);
1456 }
1457 #ifdef CONFIG_ZRAM_GROUP
1458 zram_group_track_obj(zram, index, page->mem_cgroup);
1459 #endif
1460 zram_slot_unlock(zram, index);
1461
1462 /* Update stats */
1463 atomic64_inc(&zram->stats.pages_stored);
1464 return ret;
1465 }
1466
zram_bvec_write(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,struct bio * bio)1467 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1468 u32 index, int offset, struct bio *bio)
1469 {
1470 int ret;
1471 struct page *page = NULL;
1472 void *src;
1473 struct bio_vec vec;
1474
1475 vec = *bvec;
1476 if (is_partial_io(bvec)) {
1477 void *dst;
1478 /*
1479 * This is a partial IO. We need to read the full page
1480 * before to write the changes.
1481 */
1482 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1483 if (!page)
1484 return -ENOMEM;
1485
1486 ret = __zram_bvec_read(zram, page, index, bio, true);
1487 if (ret)
1488 goto out;
1489
1490 src = kmap_atomic(bvec->bv_page);
1491 dst = kmap_atomic(page);
1492 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1493 kunmap_atomic(dst);
1494 kunmap_atomic(src);
1495
1496 vec.bv_page = page;
1497 vec.bv_len = PAGE_SIZE;
1498 vec.bv_offset = 0;
1499 }
1500
1501 ret = __zram_bvec_write(zram, &vec, index, bio);
1502 out:
1503 if (is_partial_io(bvec))
1504 __free_page(page);
1505 return ret;
1506 }
1507
1508 /*
1509 * zram_bio_discard - handler on discard request
1510 * @index: physical block index in PAGE_SIZE units
1511 * @offset: byte offset within physical block
1512 */
zram_bio_discard(struct zram * zram,u32 index,int offset,struct bio * bio)1513 static void zram_bio_discard(struct zram *zram, u32 index,
1514 int offset, struct bio *bio)
1515 {
1516 size_t n = bio->bi_iter.bi_size;
1517
1518 /*
1519 * zram manages data in physical block size units. Because logical block
1520 * size isn't identical with physical block size on some arch, we
1521 * could get a discard request pointing to a specific offset within a
1522 * certain physical block. Although we can handle this request by
1523 * reading that physiclal block and decompressing and partially zeroing
1524 * and re-compressing and then re-storing it, this isn't reasonable
1525 * because our intent with a discard request is to save memory. So
1526 * skipping this logical block is appropriate here.
1527 */
1528 if (offset) {
1529 if (n <= (PAGE_SIZE - offset))
1530 return;
1531
1532 n -= (PAGE_SIZE - offset);
1533 index++;
1534 }
1535
1536 while (n >= PAGE_SIZE) {
1537 zram_slot_lock(zram, index);
1538 zram_free_page(zram, index);
1539 zram_slot_unlock(zram, index);
1540 atomic64_inc(&zram->stats.notify_free);
1541 index++;
1542 n -= PAGE_SIZE;
1543 }
1544 }
1545
1546 /*
1547 * Returns errno if it has some problem. Otherwise return 0 or 1.
1548 * Returns 0 if IO request was done synchronously
1549 * Returns 1 if IO request was successfully submitted.
1550 */
zram_bvec_rw(struct zram * zram,struct bio_vec * bvec,u32 index,int offset,unsigned int op,struct bio * bio)1551 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1552 int offset, unsigned int op, struct bio *bio)
1553 {
1554 int ret;
1555
1556 if (!op_is_write(op)) {
1557 atomic64_inc(&zram->stats.num_reads);
1558 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1559 flush_dcache_page(bvec->bv_page);
1560 } else {
1561 atomic64_inc(&zram->stats.num_writes);
1562 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1563 }
1564
1565 zram_slot_lock(zram, index);
1566 zram_accessed(zram, index);
1567 zram_slot_unlock(zram, index);
1568
1569 if (unlikely(ret < 0)) {
1570 if (!op_is_write(op))
1571 atomic64_inc(&zram->stats.failed_reads);
1572 else
1573 atomic64_inc(&zram->stats.failed_writes);
1574 }
1575
1576 return ret;
1577 }
1578
__zram_make_request(struct zram * zram,struct bio * bio)1579 static void __zram_make_request(struct zram *zram, struct bio *bio)
1580 {
1581 int offset;
1582 u32 index;
1583 struct bio_vec bvec;
1584 struct bvec_iter iter;
1585 unsigned long start_time;
1586
1587 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1588 offset = (bio->bi_iter.bi_sector &
1589 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1590
1591 switch (bio_op(bio)) {
1592 case REQ_OP_DISCARD:
1593 case REQ_OP_WRITE_ZEROES:
1594 zram_bio_discard(zram, index, offset, bio);
1595 bio_endio(bio);
1596 return;
1597 default:
1598 break;
1599 }
1600
1601 start_time = bio_start_io_acct(bio);
1602 bio_for_each_segment(bvec, bio, iter) {
1603 struct bio_vec bv = bvec;
1604 unsigned int unwritten = bvec.bv_len;
1605
1606 do {
1607 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1608 unwritten);
1609 if (zram_bvec_rw(zram, &bv, index, offset,
1610 bio_op(bio), bio) < 0) {
1611 bio->bi_status = BLK_STS_IOERR;
1612 break;
1613 }
1614
1615 bv.bv_offset += bv.bv_len;
1616 unwritten -= bv.bv_len;
1617
1618 update_position(&index, &offset, &bv);
1619 } while (unwritten);
1620 }
1621 bio_end_io_acct(bio, start_time);
1622 bio_endio(bio);
1623 }
1624
1625 /*
1626 * Handler function for all zram I/O requests.
1627 */
zram_submit_bio(struct bio * bio)1628 static blk_qc_t zram_submit_bio(struct bio *bio)
1629 {
1630 struct zram *zram = bio->bi_disk->private_data;
1631
1632 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1633 bio->bi_iter.bi_size)) {
1634 atomic64_inc(&zram->stats.invalid_io);
1635 goto error;
1636 }
1637
1638 __zram_make_request(zram, bio);
1639 return BLK_QC_T_NONE;
1640
1641 error:
1642 bio_io_error(bio);
1643 return BLK_QC_T_NONE;
1644 }
1645
zram_slot_free_notify(struct block_device * bdev,unsigned long index)1646 static void zram_slot_free_notify(struct block_device *bdev,
1647 unsigned long index)
1648 {
1649 struct zram *zram;
1650
1651 zram = bdev->bd_disk->private_data;
1652
1653 atomic64_inc(&zram->stats.notify_free);
1654 if (!zram_slot_trylock(zram, index)) {
1655 atomic64_inc(&zram->stats.miss_free);
1656 return;
1657 }
1658
1659 zram_free_page(zram, index);
1660 zram_slot_unlock(zram, index);
1661 }
1662
zram_rw_page(struct block_device * bdev,sector_t sector,struct page * page,unsigned int op)1663 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1664 struct page *page, unsigned int op)
1665 {
1666 int offset, ret;
1667 u32 index;
1668 struct zram *zram;
1669 struct bio_vec bv;
1670 unsigned long start_time;
1671
1672 if (PageTransHuge(page))
1673 return -ENOTSUPP;
1674 zram = bdev->bd_disk->private_data;
1675
1676 if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1677 atomic64_inc(&zram->stats.invalid_io);
1678 ret = -EINVAL;
1679 goto out;
1680 }
1681
1682 index = sector >> SECTORS_PER_PAGE_SHIFT;
1683 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1684
1685 bv.bv_page = page;
1686 bv.bv_len = PAGE_SIZE;
1687 bv.bv_offset = 0;
1688
1689 start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op);
1690 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
1691 disk_end_io_acct(bdev->bd_disk, op, start_time);
1692 out:
1693 /*
1694 * If I/O fails, just return error(ie, non-zero) without
1695 * calling page_endio.
1696 * It causes resubmit the I/O with bio request by upper functions
1697 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1698 * bio->bi_end_io does things to handle the error
1699 * (e.g., SetPageError, set_page_dirty and extra works).
1700 */
1701 if (unlikely(ret < 0))
1702 return ret;
1703
1704 switch (ret) {
1705 case 0:
1706 page_endio(page, op_is_write(op), 0);
1707 break;
1708 case 1:
1709 ret = 0;
1710 break;
1711 default:
1712 WARN_ON(1);
1713 }
1714 return ret;
1715 }
1716
zram_reset_device(struct zram * zram)1717 static void zram_reset_device(struct zram *zram)
1718 {
1719 struct zcomp *comp;
1720 u64 disksize;
1721
1722 down_write(&zram->init_lock);
1723
1724 zram->limit_pages = 0;
1725
1726 if (!init_done(zram)) {
1727 up_write(&zram->init_lock);
1728 return;
1729 }
1730
1731 comp = zram->comp;
1732 disksize = zram->disksize;
1733 zram->disksize = 0;
1734
1735 set_capacity(zram->disk, 0);
1736 part_stat_set_all(&zram->disk->part0, 0);
1737
1738 up_write(&zram->init_lock);
1739 /* I/O operation under all of CPU are done so let's free */
1740 zram_meta_free(zram, disksize);
1741 memset(&zram->stats, 0, sizeof(zram->stats));
1742 zcomp_destroy(comp);
1743 reset_bdev(zram);
1744 }
1745
disksize_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1746 static ssize_t disksize_store(struct device *dev,
1747 struct device_attribute *attr, const char *buf, size_t len)
1748 {
1749 u64 disksize;
1750 struct zcomp *comp;
1751 struct zram *zram = dev_to_zram(dev);
1752 int err;
1753
1754 disksize = memparse(buf, NULL);
1755 if (!disksize)
1756 return -EINVAL;
1757
1758 down_write(&zram->init_lock);
1759 if (init_done(zram)) {
1760 pr_info("Cannot change disksize for initialized device\n");
1761 err = -EBUSY;
1762 goto out_unlock;
1763 }
1764
1765 disksize = PAGE_ALIGN(disksize);
1766 if (!zram_meta_alloc(zram, disksize)) {
1767 err = -ENOMEM;
1768 goto out_unlock;
1769 }
1770
1771 comp = zcomp_create(zram->compressor);
1772 if (IS_ERR(comp)) {
1773 pr_err("Cannot initialise %s compressing backend\n",
1774 zram->compressor);
1775 err = PTR_ERR(comp);
1776 goto out_free_meta;
1777 }
1778
1779 zram->comp = comp;
1780 zram->disksize = disksize;
1781 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1782
1783 revalidate_disk_size(zram->disk, true);
1784 up_write(&zram->init_lock);
1785
1786 return len;
1787
1788 out_free_meta:
1789 zram_meta_free(zram, disksize);
1790 out_unlock:
1791 up_write(&zram->init_lock);
1792 return err;
1793 }
1794
reset_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1795 static ssize_t reset_store(struct device *dev,
1796 struct device_attribute *attr, const char *buf, size_t len)
1797 {
1798 int ret;
1799 unsigned short do_reset;
1800 struct zram *zram;
1801 struct block_device *bdev;
1802
1803 ret = kstrtou16(buf, 10, &do_reset);
1804 if (ret)
1805 return ret;
1806
1807 if (!do_reset)
1808 return -EINVAL;
1809
1810 zram = dev_to_zram(dev);
1811 bdev = bdget_disk(zram->disk, 0);
1812 if (!bdev)
1813 return -ENOMEM;
1814
1815 mutex_lock(&bdev->bd_mutex);
1816 /* Do not reset an active device or claimed device */
1817 if (bdev->bd_openers || zram->claim) {
1818 mutex_unlock(&bdev->bd_mutex);
1819 bdput(bdev);
1820 return -EBUSY;
1821 }
1822
1823 /* From now on, anyone can't open /dev/zram[0-9] */
1824 zram->claim = true;
1825 mutex_unlock(&bdev->bd_mutex);
1826
1827 /* Make sure all the pending I/O are finished */
1828 fsync_bdev(bdev);
1829 zram_reset_device(zram);
1830 revalidate_disk_size(zram->disk, true);
1831 bdput(bdev);
1832
1833 mutex_lock(&bdev->bd_mutex);
1834 zram->claim = false;
1835 mutex_unlock(&bdev->bd_mutex);
1836
1837 return len;
1838 }
1839
zram_open(struct block_device * bdev,fmode_t mode)1840 static int zram_open(struct block_device *bdev, fmode_t mode)
1841 {
1842 int ret = 0;
1843 struct zram *zram;
1844
1845 WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1846
1847 zram = bdev->bd_disk->private_data;
1848 /* zram was claimed to reset so open request fails */
1849 if (zram->claim)
1850 ret = -EBUSY;
1851
1852 return ret;
1853 }
1854
1855 static const struct block_device_operations zram_devops = {
1856 .open = zram_open,
1857 .submit_bio = zram_submit_bio,
1858 .swap_slot_free_notify = zram_slot_free_notify,
1859 .rw_page = zram_rw_page,
1860 .owner = THIS_MODULE
1861 };
1862
1863 static const struct block_device_operations zram_wb_devops = {
1864 .open = zram_open,
1865 .submit_bio = zram_submit_bio,
1866 .swap_slot_free_notify = zram_slot_free_notify,
1867 .owner = THIS_MODULE
1868 };
1869
1870 static DEVICE_ATTR_WO(compact);
1871 static DEVICE_ATTR_RW(disksize);
1872 static DEVICE_ATTR_RO(initstate);
1873 static DEVICE_ATTR_WO(reset);
1874 static DEVICE_ATTR_WO(mem_limit);
1875 static DEVICE_ATTR_WO(mem_used_max);
1876 static DEVICE_ATTR_WO(idle);
1877 static DEVICE_ATTR_RW(max_comp_streams);
1878 static DEVICE_ATTR_RW(comp_algorithm);
1879 #ifdef CONFIG_ZRAM_WRITEBACK
1880 static DEVICE_ATTR_RW(backing_dev);
1881 static DEVICE_ATTR_WO(writeback);
1882 static DEVICE_ATTR_RW(writeback_limit);
1883 static DEVICE_ATTR_RW(writeback_limit_enable);
1884 #endif
1885 #ifdef CONFIG_ZRAM_GROUP
1886 static DEVICE_ATTR_RW(group);
1887 #endif
1888
1889 static struct attribute *zram_disk_attrs[] = {
1890 &dev_attr_disksize.attr,
1891 &dev_attr_initstate.attr,
1892 &dev_attr_reset.attr,
1893 &dev_attr_compact.attr,
1894 &dev_attr_mem_limit.attr,
1895 &dev_attr_mem_used_max.attr,
1896 &dev_attr_idle.attr,
1897 &dev_attr_max_comp_streams.attr,
1898 &dev_attr_comp_algorithm.attr,
1899 #ifdef CONFIG_ZRAM_WRITEBACK
1900 &dev_attr_backing_dev.attr,
1901 &dev_attr_writeback.attr,
1902 &dev_attr_writeback_limit.attr,
1903 &dev_attr_writeback_limit_enable.attr,
1904 #endif
1905 &dev_attr_io_stat.attr,
1906 &dev_attr_mm_stat.attr,
1907 #ifdef CONFIG_ZRAM_WRITEBACK
1908 &dev_attr_bd_stat.attr,
1909 #endif
1910 &dev_attr_debug_stat.attr,
1911 #ifdef CONFIG_ZRAM_GROUP
1912 &dev_attr_group.attr,
1913 #endif
1914 NULL,
1915 };
1916
1917 static const struct attribute_group zram_disk_attr_group = {
1918 .attrs = zram_disk_attrs,
1919 };
1920
1921 static const struct attribute_group *zram_disk_attr_groups[] = {
1922 &zram_disk_attr_group,
1923 NULL,
1924 };
1925
1926 /*
1927 * Allocate and initialize new zram device. the function returns
1928 * '>= 0' device_id upon success, and negative value otherwise.
1929 */
zram_add(void)1930 static int zram_add(void)
1931 {
1932 struct zram *zram;
1933 struct request_queue *queue;
1934 int ret, device_id;
1935
1936 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1937 if (!zram)
1938 return -ENOMEM;
1939
1940 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1941 if (ret < 0)
1942 goto out_free_dev;
1943 device_id = ret;
1944
1945 init_rwsem(&zram->init_lock);
1946 #ifdef CONFIG_ZRAM_WRITEBACK
1947 spin_lock_init(&zram->wb_limit_lock);
1948 #endif
1949 queue = blk_alloc_queue(NUMA_NO_NODE);
1950 if (!queue) {
1951 pr_err("Error allocating disk queue for device %d\n",
1952 device_id);
1953 ret = -ENOMEM;
1954 goto out_free_idr;
1955 }
1956
1957 /* gendisk structure */
1958 zram->disk = alloc_disk(1);
1959 if (!zram->disk) {
1960 pr_err("Error allocating disk structure for device %d\n",
1961 device_id);
1962 ret = -ENOMEM;
1963 goto out_free_queue;
1964 }
1965
1966 zram->disk->major = zram_major;
1967 zram->disk->first_minor = device_id;
1968 zram->disk->fops = &zram_devops;
1969 zram->disk->queue = queue;
1970 zram->disk->private_data = zram;
1971 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1972
1973 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1974 set_capacity(zram->disk, 0);
1975 /* zram devices sort of resembles non-rotational disks */
1976 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1977 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1978
1979 /*
1980 * To ensure that we always get PAGE_SIZE aligned
1981 * and n*PAGE_SIZED sized I/O requests.
1982 */
1983 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1984 blk_queue_logical_block_size(zram->disk->queue,
1985 ZRAM_LOGICAL_BLOCK_SIZE);
1986 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1987 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1988 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1989 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1990 blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1991
1992 /*
1993 * zram_bio_discard() will clear all logical blocks if logical block
1994 * size is identical with physical block size(PAGE_SIZE). But if it is
1995 * different, we will skip discarding some parts of logical blocks in
1996 * the part of the request range which isn't aligned to physical block
1997 * size. So we can't ensure that all discarded logical blocks are
1998 * zeroed.
1999 */
2000 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
2001 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
2002
2003 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
2004 device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
2005
2006 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
2007
2008 zram_debugfs_register(zram);
2009 pr_info("Added device: %s\n", zram->disk->disk_name);
2010 return device_id;
2011
2012 out_free_queue:
2013 blk_cleanup_queue(queue);
2014 out_free_idr:
2015 idr_remove(&zram_index_idr, device_id);
2016 out_free_dev:
2017 kfree(zram);
2018 return ret;
2019 }
2020
zram_remove(struct zram * zram)2021 static int zram_remove(struct zram *zram)
2022 {
2023 struct block_device *bdev;
2024
2025 bdev = bdget_disk(zram->disk, 0);
2026 if (!bdev)
2027 return -ENOMEM;
2028
2029 mutex_lock(&bdev->bd_mutex);
2030 if (bdev->bd_openers || zram->claim) {
2031 mutex_unlock(&bdev->bd_mutex);
2032 bdput(bdev);
2033 return -EBUSY;
2034 }
2035
2036 zram->claim = true;
2037 mutex_unlock(&bdev->bd_mutex);
2038
2039 zram_debugfs_unregister(zram);
2040
2041 /* Make sure all the pending I/O are finished */
2042 fsync_bdev(bdev);
2043 zram_reset_device(zram);
2044 bdput(bdev);
2045
2046 pr_info("Removed device: %s\n", zram->disk->disk_name);
2047
2048 del_gendisk(zram->disk);
2049 blk_cleanup_queue(zram->disk->queue);
2050 put_disk(zram->disk);
2051 kfree(zram);
2052 return 0;
2053 }
2054
2055 /* zram-control sysfs attributes */
2056
2057 /*
2058 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2059 * sense that reading from this file does alter the state of your system -- it
2060 * creates a new un-initialized zram device and returns back this device's
2061 * device_id (or an error code if it fails to create a new device).
2062 */
hot_add_show(struct class * class,struct class_attribute * attr,char * buf)2063 static ssize_t hot_add_show(struct class *class,
2064 struct class_attribute *attr,
2065 char *buf)
2066 {
2067 int ret;
2068
2069 mutex_lock(&zram_index_mutex);
2070 ret = zram_add();
2071 mutex_unlock(&zram_index_mutex);
2072
2073 if (ret < 0)
2074 return ret;
2075 return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2076 }
2077 static struct class_attribute class_attr_hot_add =
2078 __ATTR(hot_add, 0400, hot_add_show, NULL);
2079
hot_remove_store(struct class * class,struct class_attribute * attr,const char * buf,size_t count)2080 static ssize_t hot_remove_store(struct class *class,
2081 struct class_attribute *attr,
2082 const char *buf,
2083 size_t count)
2084 {
2085 struct zram *zram;
2086 int ret, dev_id;
2087
2088 /* dev_id is gendisk->first_minor, which is `int' */
2089 ret = kstrtoint(buf, 10, &dev_id);
2090 if (ret)
2091 return ret;
2092 if (dev_id < 0)
2093 return -EINVAL;
2094
2095 mutex_lock(&zram_index_mutex);
2096
2097 zram = idr_find(&zram_index_idr, dev_id);
2098 if (zram) {
2099 ret = zram_remove(zram);
2100 if (!ret)
2101 idr_remove(&zram_index_idr, dev_id);
2102 } else {
2103 ret = -ENODEV;
2104 }
2105
2106 mutex_unlock(&zram_index_mutex);
2107 return ret ? ret : count;
2108 }
2109 static CLASS_ATTR_WO(hot_remove);
2110
2111 static struct attribute *zram_control_class_attrs[] = {
2112 &class_attr_hot_add.attr,
2113 &class_attr_hot_remove.attr,
2114 NULL,
2115 };
2116 ATTRIBUTE_GROUPS(zram_control_class);
2117
2118 static struct class zram_control_class = {
2119 .name = "zram-control",
2120 .owner = THIS_MODULE,
2121 .class_groups = zram_control_class_groups,
2122 };
2123
zram_remove_cb(int id,void * ptr,void * data)2124 static int zram_remove_cb(int id, void *ptr, void *data)
2125 {
2126 zram_remove(ptr);
2127 return 0;
2128 }
2129
destroy_devices(void)2130 static void destroy_devices(void)
2131 {
2132 class_unregister(&zram_control_class);
2133 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
2134 zram_debugfs_destroy();
2135 idr_destroy(&zram_index_idr);
2136 unregister_blkdev(zram_major, "zram");
2137 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2138 }
2139
zram_init(void)2140 static int __init zram_init(void)
2141 {
2142 int ret;
2143
2144 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2145 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2146 if (ret < 0)
2147 return ret;
2148
2149 ret = class_register(&zram_control_class);
2150 if (ret) {
2151 pr_err("Unable to register zram-control class\n");
2152 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2153 return ret;
2154 }
2155
2156 zram_debugfs_create();
2157 zram_major = register_blkdev(0, "zram");
2158 if (zram_major <= 0) {
2159 pr_err("Unable to get major number\n");
2160 class_unregister(&zram_control_class);
2161 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
2162 return -EBUSY;
2163 }
2164
2165 while (num_devices != 0) {
2166 mutex_lock(&zram_index_mutex);
2167 ret = zram_add();
2168 mutex_unlock(&zram_index_mutex);
2169 if (ret < 0)
2170 goto out_error;
2171 num_devices--;
2172 }
2173
2174 return 0;
2175
2176 out_error:
2177 destroy_devices();
2178 return ret;
2179 }
2180
zram_exit(void)2181 static void __exit zram_exit(void)
2182 {
2183 destroy_devices();
2184 }
2185
2186 module_init(zram_init);
2187 module_exit(zram_exit);
2188
2189 module_param(num_devices, uint, 0);
2190 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
2191
2192 MODULE_LICENSE("Dual BSD/GPL");
2193 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2194 MODULE_DESCRIPTION("Compressed RAM Block Device");
2195