1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 * This code builds two trees of free clusters extents.
7 * Trees are sorted by start of extent and by length of extent.
8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
9 * In extreme case code reads on-disk bitmap to find free clusters.
10 *
11 */
12
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/kernel.h>
16
17 #include "ntfs.h"
18 #include "ntfs_fs.h"
19
20 /*
21 * Maximum number of extents in tree.
22 */
23 #define NTFS_MAX_WND_EXTENTS (32u * 1024u)
24
25 struct rb_node_key {
26 struct rb_node node;
27 size_t key;
28 };
29
30 struct e_node {
31 struct rb_node_key start; /* Tree sorted by start. */
32 struct rb_node_key count; /* Tree sorted by len. */
33 };
34
35 static int wnd_rescan(struct wnd_bitmap *wnd);
36 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
37 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
38
39 static struct kmem_cache *ntfs_enode_cachep;
40
ntfs3_init_bitmap(void)41 int __init ntfs3_init_bitmap(void)
42 {
43 ntfs_enode_cachep =
44 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
45 SLAB_RECLAIM_ACCOUNT, NULL);
46 return ntfs_enode_cachep ? 0 : -ENOMEM;
47 }
48
ntfs3_exit_bitmap(void)49 void ntfs3_exit_bitmap(void)
50 {
51 kmem_cache_destroy(ntfs_enode_cachep);
52 }
53
wnd_bits(const struct wnd_bitmap * wnd,size_t i)54 static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i)
55 {
56 return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8;
57 }
58
59 /*
60 * wnd_scan
61 *
62 * b_pos + b_len - biggest fragment.
63 * Scan range [wpos wbits) window @buf.
64 *
65 * Return: -1 if not found.
66 */
wnd_scan(const ulong * buf,size_t wbit,u32 wpos,u32 wend,size_t to_alloc,size_t * prev_tail,size_t * b_pos,size_t * b_len)67 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
68 size_t to_alloc, size_t *prev_tail, size_t *b_pos,
69 size_t *b_len)
70 {
71 while (wpos < wend) {
72 size_t free_len;
73 u32 free_bits, end;
74 u32 used = find_next_zero_bit(buf, wend, wpos);
75
76 if (used >= wend) {
77 if (*b_len < *prev_tail) {
78 *b_pos = wbit - *prev_tail;
79 *b_len = *prev_tail;
80 }
81
82 *prev_tail = 0;
83 return -1;
84 }
85
86 if (used > wpos) {
87 wpos = used;
88 if (*b_len < *prev_tail) {
89 *b_pos = wbit - *prev_tail;
90 *b_len = *prev_tail;
91 }
92
93 *prev_tail = 0;
94 }
95
96 /*
97 * Now we have a fragment [wpos, wend) staring with 0.
98 */
99 end = wpos + to_alloc - *prev_tail;
100 free_bits = find_next_bit(buf, min(end, wend), wpos);
101
102 free_len = *prev_tail + free_bits - wpos;
103
104 if (*b_len < free_len) {
105 *b_pos = wbit + wpos - *prev_tail;
106 *b_len = free_len;
107 }
108
109 if (free_len >= to_alloc)
110 return wbit + wpos - *prev_tail;
111
112 if (free_bits >= wend) {
113 *prev_tail += free_bits - wpos;
114 return -1;
115 }
116
117 wpos = free_bits + 1;
118
119 *prev_tail = 0;
120 }
121
122 return -1;
123 }
124
125 /*
126 * wnd_close - Frees all resources.
127 */
wnd_close(struct wnd_bitmap * wnd)128 void wnd_close(struct wnd_bitmap *wnd)
129 {
130 struct rb_node *node, *next;
131
132 kfree(wnd->free_bits);
133 run_close(&wnd->run);
134
135 node = rb_first(&wnd->start_tree);
136
137 while (node) {
138 next = rb_next(node);
139 rb_erase(node, &wnd->start_tree);
140 kmem_cache_free(ntfs_enode_cachep,
141 rb_entry(node, struct e_node, start.node));
142 node = next;
143 }
144 }
145
rb_lookup(struct rb_root * root,size_t v)146 static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
147 {
148 struct rb_node **p = &root->rb_node;
149 struct rb_node *r = NULL;
150
151 while (*p) {
152 struct rb_node_key *k;
153
154 k = rb_entry(*p, struct rb_node_key, node);
155 if (v < k->key) {
156 p = &(*p)->rb_left;
157 } else if (v > k->key) {
158 r = &k->node;
159 p = &(*p)->rb_right;
160 } else {
161 return &k->node;
162 }
163 }
164
165 return r;
166 }
167
168 /*
169 * rb_insert_count - Helper function to insert special kind of 'count' tree.
170 */
rb_insert_count(struct rb_root * root,struct e_node * e)171 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
172 {
173 struct rb_node **p = &root->rb_node;
174 struct rb_node *parent = NULL;
175 size_t e_ckey = e->count.key;
176 size_t e_skey = e->start.key;
177
178 while (*p) {
179 struct e_node *k =
180 rb_entry(parent = *p, struct e_node, count.node);
181
182 if (e_ckey > k->count.key) {
183 p = &(*p)->rb_left;
184 } else if (e_ckey < k->count.key) {
185 p = &(*p)->rb_right;
186 } else if (e_skey < k->start.key) {
187 p = &(*p)->rb_left;
188 } else if (e_skey > k->start.key) {
189 p = &(*p)->rb_right;
190 } else {
191 WARN_ON(1);
192 return false;
193 }
194 }
195
196 rb_link_node(&e->count.node, parent, p);
197 rb_insert_color(&e->count.node, root);
198 return true;
199 }
200
201 /*
202 * rb_insert_start - Helper function to insert special kind of 'count' tree.
203 */
rb_insert_start(struct rb_root * root,struct e_node * e)204 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
205 {
206 struct rb_node **p = &root->rb_node;
207 struct rb_node *parent = NULL;
208 size_t e_skey = e->start.key;
209
210 while (*p) {
211 struct e_node *k;
212
213 parent = *p;
214
215 k = rb_entry(parent, struct e_node, start.node);
216 if (e_skey < k->start.key) {
217 p = &(*p)->rb_left;
218 } else if (e_skey > k->start.key) {
219 p = &(*p)->rb_right;
220 } else {
221 WARN_ON(1);
222 return false;
223 }
224 }
225
226 rb_link_node(&e->start.node, parent, p);
227 rb_insert_color(&e->start.node, root);
228 return true;
229 }
230
231 /*
232 * wnd_add_free_ext - Adds a new extent of free space.
233 * @build: 1 when building tree.
234 */
wnd_add_free_ext(struct wnd_bitmap * wnd,size_t bit,size_t len,bool build)235 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
236 bool build)
237 {
238 struct e_node *e, *e0 = NULL;
239 size_t ib, end_in = bit + len;
240 struct rb_node *n;
241
242 if (build) {
243 /* Use extent_min to filter too short extents. */
244 if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
245 len <= wnd->extent_min) {
246 wnd->uptodated = -1;
247 return;
248 }
249 } else {
250 /* Try to find extent before 'bit'. */
251 n = rb_lookup(&wnd->start_tree, bit);
252
253 if (!n) {
254 n = rb_first(&wnd->start_tree);
255 } else {
256 e = rb_entry(n, struct e_node, start.node);
257 n = rb_next(n);
258 if (e->start.key + e->count.key == bit) {
259 /* Remove left. */
260 bit = e->start.key;
261 len += e->count.key;
262 rb_erase(&e->start.node, &wnd->start_tree);
263 rb_erase(&e->count.node, &wnd->count_tree);
264 wnd->count -= 1;
265 e0 = e;
266 }
267 }
268
269 while (n) {
270 size_t next_end;
271
272 e = rb_entry(n, struct e_node, start.node);
273 next_end = e->start.key + e->count.key;
274 if (e->start.key > end_in)
275 break;
276
277 /* Remove right. */
278 n = rb_next(n);
279 len += next_end - end_in;
280 end_in = next_end;
281 rb_erase(&e->start.node, &wnd->start_tree);
282 rb_erase(&e->count.node, &wnd->count_tree);
283 wnd->count -= 1;
284
285 if (!e0)
286 e0 = e;
287 else
288 kmem_cache_free(ntfs_enode_cachep, e);
289 }
290
291 if (wnd->uptodated != 1) {
292 /* Check bits before 'bit'. */
293 ib = wnd->zone_bit == wnd->zone_end ||
294 bit < wnd->zone_end
295 ? 0
296 : wnd->zone_end;
297
298 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
299 bit -= 1;
300 len += 1;
301 }
302
303 /* Check bits after 'end_in'. */
304 ib = wnd->zone_bit == wnd->zone_end ||
305 end_in > wnd->zone_bit
306 ? wnd->nbits
307 : wnd->zone_bit;
308
309 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
310 end_in += 1;
311 len += 1;
312 }
313 }
314 }
315 /* Insert new fragment. */
316 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
317 if (e0)
318 kmem_cache_free(ntfs_enode_cachep, e0);
319
320 wnd->uptodated = -1;
321
322 /* Compare with smallest fragment. */
323 n = rb_last(&wnd->count_tree);
324 e = rb_entry(n, struct e_node, count.node);
325 if (len <= e->count.key)
326 goto out; /* Do not insert small fragments. */
327
328 if (build) {
329 struct e_node *e2;
330
331 n = rb_prev(n);
332 e2 = rb_entry(n, struct e_node, count.node);
333 /* Smallest fragment will be 'e2->count.key'. */
334 wnd->extent_min = e2->count.key;
335 }
336
337 /* Replace smallest fragment by new one. */
338 rb_erase(&e->start.node, &wnd->start_tree);
339 rb_erase(&e->count.node, &wnd->count_tree);
340 wnd->count -= 1;
341 } else {
342 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
343 if (!e) {
344 wnd->uptodated = -1;
345 goto out;
346 }
347
348 if (build && len <= wnd->extent_min)
349 wnd->extent_min = len;
350 }
351 e->start.key = bit;
352 e->count.key = len;
353 if (len > wnd->extent_max)
354 wnd->extent_max = len;
355
356 rb_insert_start(&wnd->start_tree, e);
357 rb_insert_count(&wnd->count_tree, e);
358 wnd->count += 1;
359
360 out:;
361 }
362
363 /*
364 * wnd_remove_free_ext - Remove a run from the cached free space.
365 */
wnd_remove_free_ext(struct wnd_bitmap * wnd,size_t bit,size_t len)366 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
367 {
368 struct rb_node *n, *n3;
369 struct e_node *e, *e3;
370 size_t end_in = bit + len;
371 size_t end3, end, new_key, new_len, max_new_len;
372
373 /* Try to find extent before 'bit'. */
374 n = rb_lookup(&wnd->start_tree, bit);
375
376 if (!n)
377 return;
378
379 e = rb_entry(n, struct e_node, start.node);
380 end = e->start.key + e->count.key;
381
382 new_key = new_len = 0;
383 len = e->count.key;
384
385 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */
386 if (e->start.key > bit)
387 ;
388 else if (end_in <= end) {
389 /* Range [bit,end_in) inside 'e'. */
390 new_key = end_in;
391 new_len = end - end_in;
392 len = bit - e->start.key;
393 } else if (bit > end) {
394 bool bmax = false;
395
396 n3 = rb_next(n);
397
398 while (n3) {
399 e3 = rb_entry(n3, struct e_node, start.node);
400 if (e3->start.key >= end_in)
401 break;
402
403 if (e3->count.key == wnd->extent_max)
404 bmax = true;
405
406 end3 = e3->start.key + e3->count.key;
407 if (end3 > end_in) {
408 e3->start.key = end_in;
409 rb_erase(&e3->count.node, &wnd->count_tree);
410 e3->count.key = end3 - end_in;
411 rb_insert_count(&wnd->count_tree, e3);
412 break;
413 }
414
415 n3 = rb_next(n3);
416 rb_erase(&e3->start.node, &wnd->start_tree);
417 rb_erase(&e3->count.node, &wnd->count_tree);
418 wnd->count -= 1;
419 kmem_cache_free(ntfs_enode_cachep, e3);
420 }
421 if (!bmax)
422 return;
423 n3 = rb_first(&wnd->count_tree);
424 wnd->extent_max =
425 n3 ? rb_entry(n3, struct e_node, count.node)->count.key
426 : 0;
427 return;
428 }
429
430 if (e->count.key != wnd->extent_max) {
431 ;
432 } else if (rb_prev(&e->count.node)) {
433 ;
434 } else {
435 n3 = rb_next(&e->count.node);
436 max_new_len = max(len, new_len);
437 if (!n3) {
438 wnd->extent_max = max_new_len;
439 } else {
440 e3 = rb_entry(n3, struct e_node, count.node);
441 wnd->extent_max = max(e3->count.key, max_new_len);
442 }
443 }
444
445 if (!len) {
446 if (new_len) {
447 e->start.key = new_key;
448 rb_erase(&e->count.node, &wnd->count_tree);
449 e->count.key = new_len;
450 rb_insert_count(&wnd->count_tree, e);
451 } else {
452 rb_erase(&e->start.node, &wnd->start_tree);
453 rb_erase(&e->count.node, &wnd->count_tree);
454 wnd->count -= 1;
455 kmem_cache_free(ntfs_enode_cachep, e);
456 }
457 goto out;
458 }
459 rb_erase(&e->count.node, &wnd->count_tree);
460 e->count.key = len;
461 rb_insert_count(&wnd->count_tree, e);
462
463 if (!new_len)
464 goto out;
465
466 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
467 wnd->uptodated = -1;
468
469 /* Get minimal extent. */
470 e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
471 count.node);
472 if (e->count.key > new_len)
473 goto out;
474
475 /* Replace minimum. */
476 rb_erase(&e->start.node, &wnd->start_tree);
477 rb_erase(&e->count.node, &wnd->count_tree);
478 wnd->count -= 1;
479 } else {
480 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
481 if (!e)
482 wnd->uptodated = -1;
483 }
484
485 if (e) {
486 e->start.key = new_key;
487 e->count.key = new_len;
488 rb_insert_start(&wnd->start_tree, e);
489 rb_insert_count(&wnd->count_tree, e);
490 wnd->count += 1;
491 }
492
493 out:
494 if (!wnd->count && 1 != wnd->uptodated)
495 wnd_rescan(wnd);
496 }
497
498 /*
499 * wnd_rescan - Scan all bitmap. Used while initialization.
500 */
wnd_rescan(struct wnd_bitmap * wnd)501 static int wnd_rescan(struct wnd_bitmap *wnd)
502 {
503 int err = 0;
504 size_t prev_tail = 0;
505 struct super_block *sb = wnd->sb;
506 struct ntfs_sb_info *sbi = sb->s_fs_info;
507 u64 lbo, len = 0;
508 u32 blocksize = sb->s_blocksize;
509 u8 cluster_bits = sbi->cluster_bits;
510 u32 wbits = 8 * sb->s_blocksize;
511 u32 used, frb;
512 const ulong *buf;
513 size_t wpos, wbit, iw, vbo;
514 struct buffer_head *bh = NULL;
515 CLST lcn, clen;
516
517 wnd->uptodated = 0;
518 wnd->extent_max = 0;
519 wnd->extent_min = MINUS_ONE_T;
520 wnd->total_zeroes = 0;
521
522 vbo = 0;
523
524 for (iw = 0; iw < wnd->nwnd; iw++) {
525 if (iw + 1 == wnd->nwnd)
526 wbits = wnd->bits_last;
527
528 if (wnd->inited) {
529 if (!wnd->free_bits[iw]) {
530 /* All ones. */
531 if (prev_tail) {
532 wnd_add_free_ext(wnd,
533 vbo * 8 - prev_tail,
534 prev_tail, true);
535 prev_tail = 0;
536 }
537 goto next_wnd;
538 }
539 if (wbits == wnd->free_bits[iw]) {
540 /* All zeroes. */
541 prev_tail += wbits;
542 wnd->total_zeroes += wbits;
543 goto next_wnd;
544 }
545 }
546
547 if (!len) {
548 u32 off = vbo & sbi->cluster_mask;
549
550 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
551 &lcn, &clen, NULL)) {
552 err = -ENOENT;
553 goto out;
554 }
555
556 lbo = ((u64)lcn << cluster_bits) + off;
557 len = ((u64)clen << cluster_bits) - off;
558 }
559
560 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
561 if (!bh) {
562 err = -EIO;
563 goto out;
564 }
565
566 buf = (ulong *)bh->b_data;
567
568 used = __bitmap_weight(buf, wbits);
569 if (used < wbits) {
570 frb = wbits - used;
571 wnd->free_bits[iw] = frb;
572 wnd->total_zeroes += frb;
573 }
574
575 wpos = 0;
576 wbit = vbo * 8;
577
578 if (wbit + wbits > wnd->nbits)
579 wbits = wnd->nbits - wbit;
580
581 do {
582 used = find_next_zero_bit(buf, wbits, wpos);
583
584 if (used > wpos && prev_tail) {
585 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
586 prev_tail, true);
587 prev_tail = 0;
588 }
589
590 wpos = used;
591
592 if (wpos >= wbits) {
593 /* No free blocks. */
594 prev_tail = 0;
595 break;
596 }
597
598 frb = find_next_bit(buf, wbits, wpos);
599 if (frb >= wbits) {
600 /* Keep last free block. */
601 prev_tail += frb - wpos;
602 break;
603 }
604
605 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
606 frb + prev_tail - wpos, true);
607
608 /* Skip free block and first '1'. */
609 wpos = frb + 1;
610 /* Reset previous tail. */
611 prev_tail = 0;
612 } while (wpos < wbits);
613
614 next_wnd:
615
616 if (bh)
617 put_bh(bh);
618 bh = NULL;
619
620 vbo += blocksize;
621 if (len) {
622 len -= blocksize;
623 lbo += blocksize;
624 }
625 }
626
627 /* Add last block. */
628 if (prev_tail)
629 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
630
631 /*
632 * Before init cycle wnd->uptodated was 0.
633 * If any errors or limits occurs while initialization then
634 * wnd->uptodated will be -1.
635 * If 'uptodated' is still 0 then Tree is really updated.
636 */
637 if (!wnd->uptodated)
638 wnd->uptodated = 1;
639
640 if (wnd->zone_bit != wnd->zone_end) {
641 size_t zlen = wnd->zone_end - wnd->zone_bit;
642
643 wnd->zone_end = wnd->zone_bit;
644 wnd_zone_set(wnd, wnd->zone_bit, zlen);
645 }
646
647 out:
648 return err;
649 }
650
wnd_init(struct wnd_bitmap * wnd,struct super_block * sb,size_t nbits)651 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
652 {
653 int err;
654 u32 blocksize = sb->s_blocksize;
655 u32 wbits = blocksize * 8;
656
657 init_rwsem(&wnd->rw_lock);
658
659 wnd->sb = sb;
660 wnd->nbits = nbits;
661 wnd->total_zeroes = nbits;
662 wnd->extent_max = MINUS_ONE_T;
663 wnd->zone_bit = wnd->zone_end = 0;
664 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
665 wnd->bits_last = nbits & (wbits - 1);
666 if (!wnd->bits_last)
667 wnd->bits_last = wbits;
668
669 wnd->free_bits =
670 kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO);
671
672 if (!wnd->free_bits)
673 return -ENOMEM;
674
675 err = wnd_rescan(wnd);
676 if (err)
677 return err;
678
679 wnd->inited = true;
680
681 return 0;
682 }
683
684 /*
685 * wnd_map - Call sb_bread for requested window.
686 */
wnd_map(struct wnd_bitmap * wnd,size_t iw)687 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
688 {
689 size_t vbo;
690 CLST lcn, clen;
691 struct super_block *sb = wnd->sb;
692 struct ntfs_sb_info *sbi;
693 struct buffer_head *bh;
694 u64 lbo;
695
696 sbi = sb->s_fs_info;
697 vbo = (u64)iw << sb->s_blocksize_bits;
698
699 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
700 NULL)) {
701 return ERR_PTR(-ENOENT);
702 }
703
704 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
705
706 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
707 if (!bh)
708 return ERR_PTR(-EIO);
709
710 return bh;
711 }
712
713 /*
714 * wnd_set_free - Mark the bits range from bit to bit + bits as free.
715 */
wnd_set_free(struct wnd_bitmap * wnd,size_t bit,size_t bits)716 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
717 {
718 int err = 0;
719 struct super_block *sb = wnd->sb;
720 size_t bits0 = bits;
721 u32 wbits = 8 * sb->s_blocksize;
722 size_t iw = bit >> (sb->s_blocksize_bits + 3);
723 u32 wbit = bit & (wbits - 1);
724 struct buffer_head *bh;
725
726 while (iw < wnd->nwnd && bits) {
727 u32 tail, op;
728 ulong *buf;
729
730 if (iw + 1 == wnd->nwnd)
731 wbits = wnd->bits_last;
732
733 tail = wbits - wbit;
734 op = min_t(u32, tail, bits);
735
736 bh = wnd_map(wnd, iw);
737 if (IS_ERR(bh)) {
738 err = PTR_ERR(bh);
739 break;
740 }
741
742 buf = (ulong *)bh->b_data;
743
744 lock_buffer(bh);
745
746 __bitmap_clear(buf, wbit, op);
747
748 wnd->free_bits[iw] += op;
749
750 set_buffer_uptodate(bh);
751 mark_buffer_dirty(bh);
752 unlock_buffer(bh);
753 put_bh(bh);
754
755 wnd->total_zeroes += op;
756 bits -= op;
757 wbit = 0;
758 iw += 1;
759 }
760
761 wnd_add_free_ext(wnd, bit, bits0, false);
762
763 return err;
764 }
765
766 /*
767 * wnd_set_used - Mark the bits range from bit to bit + bits as used.
768 */
wnd_set_used(struct wnd_bitmap * wnd,size_t bit,size_t bits)769 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
770 {
771 int err = 0;
772 struct super_block *sb = wnd->sb;
773 size_t bits0 = bits;
774 size_t iw = bit >> (sb->s_blocksize_bits + 3);
775 u32 wbits = 8 * sb->s_blocksize;
776 u32 wbit = bit & (wbits - 1);
777 struct buffer_head *bh;
778
779 while (iw < wnd->nwnd && bits) {
780 u32 tail, op;
781 ulong *buf;
782
783 if (unlikely(iw + 1 == wnd->nwnd))
784 wbits = wnd->bits_last;
785
786 tail = wbits - wbit;
787 op = min_t(u32, tail, bits);
788
789 bh = wnd_map(wnd, iw);
790 if (IS_ERR(bh)) {
791 err = PTR_ERR(bh);
792 break;
793 }
794 buf = (ulong *)bh->b_data;
795
796 lock_buffer(bh);
797
798 __bitmap_set(buf, wbit, op);
799 wnd->free_bits[iw] -= op;
800
801 set_buffer_uptodate(bh);
802 mark_buffer_dirty(bh);
803 unlock_buffer(bh);
804 put_bh(bh);
805
806 wnd->total_zeroes -= op;
807 bits -= op;
808 wbit = 0;
809 iw += 1;
810 }
811
812 if (!RB_EMPTY_ROOT(&wnd->start_tree))
813 wnd_remove_free_ext(wnd, bit, bits0);
814
815 return err;
816 }
817
818 /*
819 * wnd_is_free_hlp
820 *
821 * Return: True if all clusters [bit, bit+bits) are free (bitmap only).
822 */
wnd_is_free_hlp(struct wnd_bitmap * wnd,size_t bit,size_t bits)823 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
824 {
825 struct super_block *sb = wnd->sb;
826 size_t iw = bit >> (sb->s_blocksize_bits + 3);
827 u32 wbits = 8 * sb->s_blocksize;
828 u32 wbit = bit & (wbits - 1);
829
830 while (iw < wnd->nwnd && bits) {
831 u32 tail, op;
832
833 if (unlikely(iw + 1 == wnd->nwnd))
834 wbits = wnd->bits_last;
835
836 tail = wbits - wbit;
837 op = min_t(u32, tail, bits);
838
839 if (wbits != wnd->free_bits[iw]) {
840 bool ret;
841 struct buffer_head *bh = wnd_map(wnd, iw);
842
843 if (IS_ERR(bh))
844 return false;
845
846 ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
847
848 put_bh(bh);
849 if (!ret)
850 return false;
851 }
852
853 bits -= op;
854 wbit = 0;
855 iw += 1;
856 }
857
858 return true;
859 }
860
861 /*
862 * wnd_is_free
863 *
864 * Return: True if all clusters [bit, bit+bits) are free.
865 */
wnd_is_free(struct wnd_bitmap * wnd,size_t bit,size_t bits)866 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
867 {
868 bool ret;
869 struct rb_node *n;
870 size_t end;
871 struct e_node *e;
872
873 if (RB_EMPTY_ROOT(&wnd->start_tree))
874 goto use_wnd;
875
876 n = rb_lookup(&wnd->start_tree, bit);
877 if (!n)
878 goto use_wnd;
879
880 e = rb_entry(n, struct e_node, start.node);
881
882 end = e->start.key + e->count.key;
883
884 if (bit < end && bit + bits <= end)
885 return true;
886
887 use_wnd:
888 ret = wnd_is_free_hlp(wnd, bit, bits);
889
890 return ret;
891 }
892
893 /*
894 * wnd_is_used
895 *
896 * Return: True if all clusters [bit, bit+bits) are used.
897 */
wnd_is_used(struct wnd_bitmap * wnd,size_t bit,size_t bits)898 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
899 {
900 bool ret = false;
901 struct super_block *sb = wnd->sb;
902 size_t iw = bit >> (sb->s_blocksize_bits + 3);
903 u32 wbits = 8 * sb->s_blocksize;
904 u32 wbit = bit & (wbits - 1);
905 size_t end;
906 struct rb_node *n;
907 struct e_node *e;
908
909 if (RB_EMPTY_ROOT(&wnd->start_tree))
910 goto use_wnd;
911
912 end = bit + bits;
913 n = rb_lookup(&wnd->start_tree, end - 1);
914 if (!n)
915 goto use_wnd;
916
917 e = rb_entry(n, struct e_node, start.node);
918 if (e->start.key + e->count.key > bit)
919 return false;
920
921 use_wnd:
922 while (iw < wnd->nwnd && bits) {
923 u32 tail, op;
924
925 if (unlikely(iw + 1 == wnd->nwnd))
926 wbits = wnd->bits_last;
927
928 tail = wbits - wbit;
929 op = min_t(u32, tail, bits);
930
931 if (wnd->free_bits[iw]) {
932 bool ret;
933 struct buffer_head *bh = wnd_map(wnd, iw);
934
935 if (IS_ERR(bh))
936 goto out;
937
938 ret = are_bits_set((ulong *)bh->b_data, wbit, op);
939 put_bh(bh);
940 if (!ret)
941 goto out;
942 }
943
944 bits -= op;
945 wbit = 0;
946 iw += 1;
947 }
948 ret = true;
949
950 out:
951 return ret;
952 }
953
954 /*
955 * wnd_find - Look for free space.
956 *
957 * - flags - BITMAP_FIND_XXX flags
958 *
959 * Return: 0 if not found.
960 */
wnd_find(struct wnd_bitmap * wnd,size_t to_alloc,size_t hint,size_t flags,size_t * allocated)961 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
962 size_t flags, size_t *allocated)
963 {
964 struct super_block *sb;
965 u32 wbits, wpos, wzbit, wzend;
966 size_t fnd, max_alloc, b_len, b_pos;
967 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
968 size_t to_alloc0 = to_alloc;
969 const ulong *buf;
970 const struct e_node *e;
971 const struct rb_node *pr, *cr;
972 u8 log2_bits;
973 bool fbits_valid;
974 struct buffer_head *bh;
975
976 /* Fast checking for available free space. */
977 if (flags & BITMAP_FIND_FULL) {
978 size_t zeroes = wnd_zeroes(wnd);
979
980 zeroes -= wnd->zone_end - wnd->zone_bit;
981 if (zeroes < to_alloc0)
982 goto no_space;
983
984 if (to_alloc0 > wnd->extent_max)
985 goto no_space;
986 } else {
987 if (to_alloc > wnd->extent_max)
988 to_alloc = wnd->extent_max;
989 }
990
991 if (wnd->zone_bit <= hint && hint < wnd->zone_end)
992 hint = wnd->zone_end;
993
994 max_alloc = wnd->nbits;
995 b_len = b_pos = 0;
996
997 if (hint >= max_alloc)
998 hint = 0;
999
1000 if (RB_EMPTY_ROOT(&wnd->start_tree)) {
1001 if (wnd->uptodated == 1) {
1002 /* Extents tree is updated -> No free space. */
1003 goto no_space;
1004 }
1005 goto scan_bitmap;
1006 }
1007
1008 e = NULL;
1009 if (!hint)
1010 goto allocate_biggest;
1011
1012 /* Use hint: Enumerate extents by start >= hint. */
1013 pr = NULL;
1014 cr = wnd->start_tree.rb_node;
1015
1016 for (;;) {
1017 e = rb_entry(cr, struct e_node, start.node);
1018
1019 if (e->start.key == hint)
1020 break;
1021
1022 if (e->start.key < hint) {
1023 pr = cr;
1024 cr = cr->rb_right;
1025 if (!cr)
1026 break;
1027 continue;
1028 }
1029
1030 cr = cr->rb_left;
1031 if (!cr) {
1032 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
1033 break;
1034 }
1035 }
1036
1037 if (!e)
1038 goto allocate_biggest;
1039
1040 if (e->start.key + e->count.key > hint) {
1041 /* We have found extension with 'hint' inside. */
1042 size_t len = e->start.key + e->count.key - hint;
1043
1044 if (len >= to_alloc && hint + to_alloc <= max_alloc) {
1045 fnd = hint;
1046 goto found;
1047 }
1048
1049 if (!(flags & BITMAP_FIND_FULL)) {
1050 if (len > to_alloc)
1051 len = to_alloc;
1052
1053 if (hint + len <= max_alloc) {
1054 fnd = hint;
1055 to_alloc = len;
1056 goto found;
1057 }
1058 }
1059 }
1060
1061 allocate_biggest:
1062 /* Allocate from biggest free extent. */
1063 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
1064 if (e->count.key != wnd->extent_max)
1065 wnd->extent_max = e->count.key;
1066
1067 if (e->count.key < max_alloc) {
1068 if (e->count.key >= to_alloc) {
1069 ;
1070 } else if (flags & BITMAP_FIND_FULL) {
1071 if (e->count.key < to_alloc0) {
1072 /* Biggest free block is less then requested. */
1073 goto no_space;
1074 }
1075 to_alloc = e->count.key;
1076 } else if (-1 != wnd->uptodated) {
1077 to_alloc = e->count.key;
1078 } else {
1079 /* Check if we can use more bits. */
1080 size_t op, max_check;
1081 struct rb_root start_tree;
1082
1083 memcpy(&start_tree, &wnd->start_tree,
1084 sizeof(struct rb_root));
1085 memset(&wnd->start_tree, 0, sizeof(struct rb_root));
1086
1087 max_check = e->start.key + to_alloc;
1088 if (max_check > max_alloc)
1089 max_check = max_alloc;
1090 for (op = e->start.key + e->count.key; op < max_check;
1091 op++) {
1092 if (!wnd_is_free(wnd, op, 1))
1093 break;
1094 }
1095 memcpy(&wnd->start_tree, &start_tree,
1096 sizeof(struct rb_root));
1097 to_alloc = op - e->start.key;
1098 }
1099
1100 /* Prepare to return. */
1101 fnd = e->start.key;
1102 if (e->start.key + to_alloc > max_alloc)
1103 to_alloc = max_alloc - e->start.key;
1104 goto found;
1105 }
1106
1107 if (wnd->uptodated == 1) {
1108 /* Extents tree is updated -> no free space. */
1109 goto no_space;
1110 }
1111
1112 b_len = e->count.key;
1113 b_pos = e->start.key;
1114
1115 scan_bitmap:
1116 sb = wnd->sb;
1117 log2_bits = sb->s_blocksize_bits + 3;
1118
1119 /* At most two ranges [hint, max_alloc) + [0, hint). */
1120 Again:
1121
1122 /* TODO: Optimize request for case nbits > wbits. */
1123 iw = hint >> log2_bits;
1124 wbits = sb->s_blocksize * 8;
1125 wpos = hint & (wbits - 1);
1126 prev_tail = 0;
1127 fbits_valid = true;
1128
1129 if (max_alloc == wnd->nbits) {
1130 nwnd = wnd->nwnd;
1131 } else {
1132 size_t t = max_alloc + wbits - 1;
1133
1134 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
1135 }
1136
1137 /* Enumerate all windows. */
1138 for (; iw < nwnd; iw++) {
1139 wbit = iw << log2_bits;
1140
1141 if (!wnd->free_bits[iw]) {
1142 if (prev_tail > b_len) {
1143 b_pos = wbit - prev_tail;
1144 b_len = prev_tail;
1145 }
1146
1147 /* Skip full used window. */
1148 prev_tail = 0;
1149 wpos = 0;
1150 continue;
1151 }
1152
1153 if (unlikely(iw + 1 == nwnd)) {
1154 if (max_alloc == wnd->nbits) {
1155 wbits = wnd->bits_last;
1156 } else {
1157 size_t t = max_alloc & (wbits - 1);
1158
1159 if (t) {
1160 wbits = t;
1161 fbits_valid = false;
1162 }
1163 }
1164 }
1165
1166 if (wnd->zone_end > wnd->zone_bit) {
1167 ebit = wbit + wbits;
1168 zbit = max(wnd->zone_bit, wbit);
1169 zend = min(wnd->zone_end, ebit);
1170
1171 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */
1172 if (zend <= zbit) {
1173 /* Zone does not overlap window. */
1174 } else {
1175 wzbit = zbit - wbit;
1176 wzend = zend - wbit;
1177
1178 /* Zone overlaps window. */
1179 if (wnd->free_bits[iw] == wzend - wzbit) {
1180 prev_tail = 0;
1181 wpos = 0;
1182 continue;
1183 }
1184
1185 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */
1186 bh = wnd_map(wnd, iw);
1187
1188 if (IS_ERR(bh)) {
1189 /* TODO: Error */
1190 prev_tail = 0;
1191 wpos = 0;
1192 continue;
1193 }
1194
1195 buf = (ulong *)bh->b_data;
1196
1197 /* Scan range [wbit, zbit). */
1198 if (wpos < wzbit) {
1199 /* Scan range [wpos, zbit). */
1200 fnd = wnd_scan(buf, wbit, wpos, wzbit,
1201 to_alloc, &prev_tail,
1202 &b_pos, &b_len);
1203 if (fnd != MINUS_ONE_T) {
1204 put_bh(bh);
1205 goto found;
1206 }
1207 }
1208
1209 prev_tail = 0;
1210
1211 /* Scan range [zend, ebit). */
1212 if (wzend < wbits) {
1213 fnd = wnd_scan(buf, wbit,
1214 max(wzend, wpos), wbits,
1215 to_alloc, &prev_tail,
1216 &b_pos, &b_len);
1217 if (fnd != MINUS_ONE_T) {
1218 put_bh(bh);
1219 goto found;
1220 }
1221 }
1222
1223 wpos = 0;
1224 put_bh(bh);
1225 continue;
1226 }
1227 }
1228
1229 /* Current window does not overlap zone. */
1230 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
1231 /* Window is empty. */
1232 if (prev_tail + wbits >= to_alloc) {
1233 fnd = wbit + wpos - prev_tail;
1234 goto found;
1235 }
1236
1237 /* Increase 'prev_tail' and process next window. */
1238 prev_tail += wbits;
1239 wpos = 0;
1240 continue;
1241 }
1242
1243 /* Read window. */
1244 bh = wnd_map(wnd, iw);
1245 if (IS_ERR(bh)) {
1246 // TODO: Error.
1247 prev_tail = 0;
1248 wpos = 0;
1249 continue;
1250 }
1251
1252 buf = (ulong *)bh->b_data;
1253
1254 /* Scan range [wpos, eBits). */
1255 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
1256 &b_pos, &b_len);
1257 put_bh(bh);
1258 if (fnd != MINUS_ONE_T)
1259 goto found;
1260 }
1261
1262 if (b_len < prev_tail) {
1263 /* The last fragment. */
1264 b_len = prev_tail;
1265 b_pos = max_alloc - prev_tail;
1266 }
1267
1268 if (hint) {
1269 /*
1270 * We have scanned range [hint max_alloc).
1271 * Prepare to scan range [0 hint + to_alloc).
1272 */
1273 size_t nextmax = hint + to_alloc;
1274
1275 if (likely(nextmax >= hint) && nextmax < max_alloc)
1276 max_alloc = nextmax;
1277 hint = 0;
1278 goto Again;
1279 }
1280
1281 if (!b_len)
1282 goto no_space;
1283
1284 wnd->extent_max = b_len;
1285
1286 if (flags & BITMAP_FIND_FULL)
1287 goto no_space;
1288
1289 fnd = b_pos;
1290 to_alloc = b_len;
1291
1292 found:
1293 if (flags & BITMAP_FIND_MARK_AS_USED) {
1294 /* TODO: Optimize remove extent (pass 'e'?). */
1295 if (wnd_set_used(wnd, fnd, to_alloc))
1296 goto no_space;
1297 } else if (wnd->extent_max != MINUS_ONE_T &&
1298 to_alloc > wnd->extent_max) {
1299 wnd->extent_max = to_alloc;
1300 }
1301
1302 *allocated = fnd;
1303 return to_alloc;
1304
1305 no_space:
1306 return 0;
1307 }
1308
1309 /*
1310 * wnd_extend - Extend bitmap ($MFT bitmap).
1311 */
wnd_extend(struct wnd_bitmap * wnd,size_t new_bits)1312 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
1313 {
1314 int err;
1315 struct super_block *sb = wnd->sb;
1316 struct ntfs_sb_info *sbi = sb->s_fs_info;
1317 u32 blocksize = sb->s_blocksize;
1318 u32 wbits = blocksize * 8;
1319 u32 b0, new_last;
1320 size_t bits, iw, new_wnd;
1321 size_t old_bits = wnd->nbits;
1322 u16 *new_free;
1323
1324 if (new_bits <= old_bits)
1325 return -EINVAL;
1326
1327 /* Align to 8 byte boundary. */
1328 new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
1329 new_last = new_bits & (wbits - 1);
1330 if (!new_last)
1331 new_last = wbits;
1332
1333 if (new_wnd != wnd->nwnd) {
1334 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
1335 if (!new_free)
1336 return -ENOMEM;
1337
1338 if (new_free != wnd->free_bits)
1339 memcpy(new_free, wnd->free_bits,
1340 wnd->nwnd * sizeof(short));
1341 memset(new_free + wnd->nwnd, 0,
1342 (new_wnd - wnd->nwnd) * sizeof(short));
1343 kfree(wnd->free_bits);
1344 wnd->free_bits = new_free;
1345 }
1346
1347 /* Zero bits [old_bits,new_bits). */
1348 bits = new_bits - old_bits;
1349 b0 = old_bits & (wbits - 1);
1350
1351 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
1352 u32 op;
1353 size_t frb;
1354 u64 vbo, lbo, bytes;
1355 struct buffer_head *bh;
1356 ulong *buf;
1357
1358 if (iw + 1 == new_wnd)
1359 wbits = new_last;
1360
1361 op = b0 + bits > wbits ? wbits - b0 : bits;
1362 vbo = (u64)iw * blocksize;
1363
1364 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
1365 if (err)
1366 break;
1367
1368 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1369 if (!bh)
1370 return -EIO;
1371
1372 lock_buffer(bh);
1373 buf = (ulong *)bh->b_data;
1374
1375 __bitmap_clear(buf, b0, blocksize * 8 - b0);
1376 frb = wbits - __bitmap_weight(buf, wbits);
1377 wnd->total_zeroes += frb - wnd->free_bits[iw];
1378 wnd->free_bits[iw] = frb;
1379
1380 set_buffer_uptodate(bh);
1381 mark_buffer_dirty(bh);
1382 unlock_buffer(bh);
1383 /* err = sync_dirty_buffer(bh); */
1384
1385 b0 = 0;
1386 bits -= op;
1387 }
1388
1389 wnd->nbits = new_bits;
1390 wnd->nwnd = new_wnd;
1391 wnd->bits_last = new_last;
1392
1393 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
1394
1395 return 0;
1396 }
1397
wnd_zone_set(struct wnd_bitmap * wnd,size_t lcn,size_t len)1398 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
1399 {
1400 size_t zlen;
1401
1402 zlen = wnd->zone_end - wnd->zone_bit;
1403 if (zlen)
1404 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
1405
1406 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
1407 wnd_remove_free_ext(wnd, lcn, len);
1408
1409 wnd->zone_bit = lcn;
1410 wnd->zone_end = lcn + len;
1411 }
1412
ntfs_trim_fs(struct ntfs_sb_info * sbi,struct fstrim_range * range)1413 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
1414 {
1415 int err = 0;
1416 struct super_block *sb = sbi->sb;
1417 struct wnd_bitmap *wnd = &sbi->used.bitmap;
1418 u32 wbits = 8 * sb->s_blocksize;
1419 CLST len = 0, lcn = 0, done = 0;
1420 CLST minlen = bytes_to_cluster(sbi, range->minlen);
1421 CLST lcn_from = bytes_to_cluster(sbi, range->start);
1422 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
1423 u32 wbit = lcn_from & (wbits - 1);
1424 const ulong *buf;
1425 CLST lcn_to;
1426
1427 if (!minlen)
1428 minlen = 1;
1429
1430 if (range->len == (u64)-1)
1431 lcn_to = wnd->nbits;
1432 else
1433 lcn_to = bytes_to_cluster(sbi, range->start + range->len);
1434
1435 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
1436
1437 for (; iw < wnd->nwnd; iw++, wbit = 0) {
1438 CLST lcn_wnd = iw * wbits;
1439 struct buffer_head *bh;
1440
1441 if (lcn_wnd > lcn_to)
1442 break;
1443
1444 if (!wnd->free_bits[iw])
1445 continue;
1446
1447 if (iw + 1 == wnd->nwnd)
1448 wbits = wnd->bits_last;
1449
1450 if (lcn_wnd + wbits > lcn_to)
1451 wbits = lcn_to - lcn_wnd;
1452
1453 bh = wnd_map(wnd, iw);
1454 if (IS_ERR(bh)) {
1455 err = PTR_ERR(bh);
1456 break;
1457 }
1458
1459 buf = (ulong *)bh->b_data;
1460
1461 for (; wbit < wbits; wbit++) {
1462 if (!test_bit(wbit, buf)) {
1463 if (!len)
1464 lcn = lcn_wnd + wbit;
1465 len += 1;
1466 continue;
1467 }
1468 if (len >= minlen) {
1469 err = ntfs_discard(sbi, lcn, len);
1470 if (err)
1471 goto out;
1472 done += len;
1473 }
1474 len = 0;
1475 }
1476 put_bh(bh);
1477 }
1478
1479 /* Process the last fragment. */
1480 if (len >= minlen) {
1481 err = ntfs_discard(sbi, lcn, len);
1482 if (err)
1483 goto out;
1484 done += len;
1485 }
1486
1487 out:
1488 range->len = (u64)done << sbi->cluster_bits;
1489
1490 up_read(&wnd->rw_lock);
1491
1492 return err;
1493 }
1494