• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * lcnalloc.c - Cluster (de)allocation code.  Part of the Linux-NTFS project.
4   *
5   * Copyright (c) 2004-2005 Anton Altaparmakov
6   */
7  
8  #ifdef NTFS_RW
9  
10  #include <linux/pagemap.h>
11  
12  #include "lcnalloc.h"
13  #include "debug.h"
14  #include "bitmap.h"
15  #include "inode.h"
16  #include "volume.h"
17  #include "attrib.h"
18  #include "malloc.h"
19  #include "aops.h"
20  #include "ntfs.h"
21  
22  /**
23   * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
24   * @vol:	mounted ntfs volume on which to free the clusters
25   * @rl:		runlist describing the clusters to free
26   *
27   * Free all the clusters described by the runlist @rl on the volume @vol.  In
28   * the case of an error being returned, at least some of the clusters were not
29   * freed.
30   *
31   * Return 0 on success and -errno on error.
32   *
33   * Locking: - The volume lcn bitmap must be locked for writing on entry and is
34   *	      left locked on return.
35   */
ntfs_cluster_free_from_rl_nolock(ntfs_volume * vol,const runlist_element * rl)36  int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
37  		const runlist_element *rl)
38  {
39  	struct inode *lcnbmp_vi = vol->lcnbmp_ino;
40  	int ret = 0;
41  
42  	ntfs_debug("Entering.");
43  	if (!rl)
44  		return 0;
45  	for (; rl->length; rl++) {
46  		int err;
47  
48  		if (rl->lcn < 0)
49  			continue;
50  		err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
51  		if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
52  			ret = err;
53  	}
54  	ntfs_debug("Done.");
55  	return ret;
56  }
57  
58  /**
59   * ntfs_cluster_alloc - allocate clusters on an ntfs volume
60   * @vol:	mounted ntfs volume on which to allocate the clusters
61   * @start_vcn:	vcn to use for the first allocated cluster
62   * @count:	number of clusters to allocate
63   * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
64   * @zone:	zone from which to allocate the clusters
65   * @is_extension:	if 'true', this is an attribute extension
66   *
67   * Allocate @count clusters preferably starting at cluster @start_lcn or at the
68   * current allocator position if @start_lcn is -1, on the mounted ntfs volume
69   * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
70   * MFT_ZONE for allocation of clusters for the master file table, i.e. the
71   * $MFT/$DATA attribute.
72   *
73   * @start_vcn specifies the vcn of the first allocated cluster.  This makes
74   * merging the resulting runlist with the old runlist easier.
75   *
76   * If @is_extension is 'true', the caller is allocating clusters to extend an
77   * attribute and if it is 'false', the caller is allocating clusters to fill a
78   * hole in an attribute.  Practically the difference is that if @is_extension
79   * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
80   * @is_extension is 'false' the runlist will be terminated with
81   * LCN_RL_NOT_MAPPED.
82   *
83   * You need to check the return value with IS_ERR().  If this is false, the
84   * function was successful and the return value is a runlist describing the
85   * allocated cluster(s).  If IS_ERR() is true, the function failed and
86   * PTR_ERR() gives you the error code.
87   *
88   * Notes on the allocation algorithm
89   * =================================
90   *
91   * There are two data zones.  First is the area between the end of the mft zone
92   * and the end of the volume, and second is the area between the start of the
93   * volume and the start of the mft zone.  On unmodified/standard NTFS 1.x
94   * volumes, the second data zone does not exist due to the mft zone being
95   * expanded to cover the start of the volume in order to reserve space for the
96   * mft bitmap attribute.
97   *
98   * This is not the prettiest function but the complexity stems from the need of
99   * implementing the mft vs data zoned approach and from the fact that we have
100   * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
101   * need to cope with crossing over boundaries of two buffers.  Further, the
102   * fact that the allocator allows for caller supplied hints as to the location
103   * of where allocation should begin and the fact that the allocator keeps track
104   * of where in the data zones the next natural allocation should occur,
105   * contribute to the complexity of the function.  But it should all be
106   * worthwhile, because this allocator should: 1) be a full implementation of
107   * the MFT zone approach used by Windows NT, 2) cause reduction in
108   * fragmentation, and 3) be speedy in allocations (the code is not optimized
109   * for speed, but the algorithm is, so further speed improvements are probably
110   * possible).
111   *
112   * FIXME: We should be monitoring cluster allocation and increment the MFT zone
113   * size dynamically but this is something for the future.  We will just cause
114   * heavier fragmentation by not doing it and I am not even sure Windows would
115   * grow the MFT zone dynamically, so it might even be correct not to do this.
116   * The overhead in doing dynamic MFT zone expansion would be very large and
117   * unlikely worth the effort. (AIA)
118   *
119   * TODO: I have added in double the required zone position pointer wrap around
120   * logic which can be optimized to having only one of the two logic sets.
121   * However, having the double logic will work fine, but if we have only one of
122   * the sets and we get it wrong somewhere, then we get into trouble, so
123   * removing the duplicate logic requires _very_ careful consideration of _all_
124   * possible code paths.  So at least for now, I am leaving the double logic -
125   * better safe than sorry... (AIA)
126   *
127   * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
128   *	      on return.
129   *	    - This function takes the volume lcn bitmap lock for writing and
130   *	      modifies the bitmap contents.
131   */
ntfs_cluster_alloc(ntfs_volume * vol,const VCN start_vcn,const s64 count,const LCN start_lcn,const NTFS_CLUSTER_ALLOCATION_ZONES zone,const bool is_extension)132  runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
133  		const s64 count, const LCN start_lcn,
134  		const NTFS_CLUSTER_ALLOCATION_ZONES zone,
135  		const bool is_extension)
136  {
137  	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
138  	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
139  	s64 clusters;
140  	loff_t i_size;
141  	struct inode *lcnbmp_vi;
142  	runlist_element *rl = NULL;
143  	struct address_space *mapping;
144  	struct page *page = NULL;
145  	u8 *buf, *byte;
146  	int err = 0, rlpos, rlsize, buf_size;
147  	u8 pass, done_zones, search_zone, need_writeback = 0, bit;
148  
149  	ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn "
150  			"0x%llx, zone %s_ZONE.", (unsigned long long)start_vcn,
151  			(unsigned long long)count,
152  			(unsigned long long)start_lcn,
153  			zone == MFT_ZONE ? "MFT" : "DATA");
154  	BUG_ON(!vol);
155  	lcnbmp_vi = vol->lcnbmp_ino;
156  	BUG_ON(!lcnbmp_vi);
157  	BUG_ON(start_vcn < 0);
158  	BUG_ON(count < 0);
159  	BUG_ON(start_lcn < -1);
160  	BUG_ON(zone < FIRST_ZONE);
161  	BUG_ON(zone > LAST_ZONE);
162  
163  	/* Return NULL if @count is zero. */
164  	if (!count)
165  		return NULL;
166  	/* Take the lcnbmp lock for writing. */
167  	down_write(&vol->lcnbmp_lock);
168  	/*
169  	 * If no specific @start_lcn was requested, use the current data zone
170  	 * position, otherwise use the requested @start_lcn but make sure it
171  	 * lies outside the mft zone.  Also set done_zones to 0 (no zones done)
172  	 * and pass depending on whether we are starting inside a zone (1) or
173  	 * at the beginning of a zone (2).  If requesting from the MFT_ZONE,
174  	 * we either start at the current position within the mft zone or at
175  	 * the specified position.  If the latter is out of bounds then we start
176  	 * at the beginning of the MFT_ZONE.
177  	 */
178  	done_zones = 0;
179  	pass = 1;
180  	/*
181  	 * zone_start and zone_end are the current search range.  search_zone
182  	 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
183  	 * volume) and 4 for data zone 2 (start of volume till start of mft
184  	 * zone).
185  	 */
186  	zone_start = start_lcn;
187  	if (zone_start < 0) {
188  		if (zone == DATA_ZONE)
189  			zone_start = vol->data1_zone_pos;
190  		else
191  			zone_start = vol->mft_zone_pos;
192  		if (!zone_start) {
193  			/*
194  			 * Zone starts at beginning of volume which means a
195  			 * single pass is sufficient.
196  			 */
197  			pass = 2;
198  		}
199  	} else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start &&
200  			zone_start < vol->mft_zone_end) {
201  		zone_start = vol->mft_zone_end;
202  		/*
203  		 * Starting at beginning of data1_zone which means a single
204  		 * pass in this zone is sufficient.
205  		 */
206  		pass = 2;
207  	} else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start ||
208  			zone_start >= vol->mft_zone_end)) {
209  		zone_start = vol->mft_lcn;
210  		if (!vol->mft_zone_end)
211  			zone_start = 0;
212  		/*
213  		 * Starting at beginning of volume which means a single pass
214  		 * is sufficient.
215  		 */
216  		pass = 2;
217  	}
218  	if (zone == MFT_ZONE) {
219  		zone_end = vol->mft_zone_end;
220  		search_zone = 1;
221  	} else /* if (zone == DATA_ZONE) */ {
222  		/* Skip searching the mft zone. */
223  		done_zones |= 1;
224  		if (zone_start >= vol->mft_zone_end) {
225  			zone_end = vol->nr_clusters;
226  			search_zone = 2;
227  		} else {
228  			zone_end = vol->mft_zone_start;
229  			search_zone = 4;
230  		}
231  	}
232  	/*
233  	 * bmp_pos is the current bit position inside the bitmap.  We use
234  	 * bmp_initial_pos to determine whether or not to do a zone switch.
235  	 */
236  	bmp_pos = bmp_initial_pos = zone_start;
237  
238  	/* Loop until all clusters are allocated, i.e. clusters == 0. */
239  	clusters = count;
240  	rlpos = rlsize = 0;
241  	mapping = lcnbmp_vi->i_mapping;
242  	i_size = i_size_read(lcnbmp_vi);
243  	while (1) {
244  		ntfs_debug("Start of outer while loop: done_zones 0x%x, "
245  				"search_zone %i, pass %i, zone_start 0x%llx, "
246  				"zone_end 0x%llx, bmp_initial_pos 0x%llx, "
247  				"bmp_pos 0x%llx, rlpos %i, rlsize %i.",
248  				done_zones, search_zone, pass,
249  				(unsigned long long)zone_start,
250  				(unsigned long long)zone_end,
251  				(unsigned long long)bmp_initial_pos,
252  				(unsigned long long)bmp_pos, rlpos, rlsize);
253  		/* Loop until we run out of free clusters. */
254  		last_read_pos = bmp_pos >> 3;
255  		ntfs_debug("last_read_pos 0x%llx.",
256  				(unsigned long long)last_read_pos);
257  		if (last_read_pos > i_size) {
258  			ntfs_debug("End of attribute reached.  "
259  					"Skipping to zone_pass_done.");
260  			goto zone_pass_done;
261  		}
262  		if (likely(page)) {
263  			if (need_writeback) {
264  				ntfs_debug("Marking page dirty.");
265  				flush_dcache_page(page);
266  				set_page_dirty(page);
267  				need_writeback = 0;
268  			}
269  			ntfs_unmap_page(page);
270  		}
271  		page = ntfs_map_page(mapping, last_read_pos >>
272  				PAGE_SHIFT);
273  		if (IS_ERR(page)) {
274  			err = PTR_ERR(page);
275  			ntfs_error(vol->sb, "Failed to map page.");
276  			goto out;
277  		}
278  		buf_size = last_read_pos & ~PAGE_MASK;
279  		buf = page_address(page) + buf_size;
280  		buf_size = PAGE_SIZE - buf_size;
281  		if (unlikely(last_read_pos + buf_size > i_size))
282  			buf_size = i_size - last_read_pos;
283  		buf_size <<= 3;
284  		lcn = bmp_pos & 7;
285  		bmp_pos &= ~(LCN)7;
286  		ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
287  				"bmp_pos 0x%llx, need_writeback %i.", buf_size,
288  				(unsigned long long)lcn,
289  				(unsigned long long)bmp_pos, need_writeback);
290  		while (lcn < buf_size && lcn + bmp_pos < zone_end) {
291  			byte = buf + (lcn >> 3);
292  			ntfs_debug("In inner while loop: buf_size %i, "
293  					"lcn 0x%llx, bmp_pos 0x%llx, "
294  					"need_writeback %i, byte ofs 0x%x, "
295  					"*byte 0x%x.", buf_size,
296  					(unsigned long long)lcn,
297  					(unsigned long long)bmp_pos,
298  					need_writeback,
299  					(unsigned int)(lcn >> 3),
300  					(unsigned int)*byte);
301  			/* Skip full bytes. */
302  			if (*byte == 0xff) {
303  				lcn = (lcn + 8) & ~(LCN)7;
304  				ntfs_debug("Continuing while loop 1.");
305  				continue;
306  			}
307  			bit = 1 << (lcn & 7);
308  			ntfs_debug("bit 0x%x.", bit);
309  			/* If the bit is already set, go onto the next one. */
310  			if (*byte & bit) {
311  				lcn++;
312  				ntfs_debug("Continuing while loop 2.");
313  				continue;
314  			}
315  			/*
316  			 * Allocate more memory if needed, including space for
317  			 * the terminator element.
318  			 * ntfs_malloc_nofs() operates on whole pages only.
319  			 */
320  			if ((rlpos + 2) * sizeof(*rl) > rlsize) {
321  				runlist_element *rl2;
322  
323  				ntfs_debug("Reallocating memory.");
324  				if (!rl)
325  					ntfs_debug("First free bit is at LCN "
326  							"0x%llx.",
327  							(unsigned long long)
328  							(lcn + bmp_pos));
329  				rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
330  				if (unlikely(!rl2)) {
331  					err = -ENOMEM;
332  					ntfs_error(vol->sb, "Failed to "
333  							"allocate memory.");
334  					goto out;
335  				}
336  				memcpy(rl2, rl, rlsize);
337  				ntfs_free(rl);
338  				rl = rl2;
339  				rlsize += PAGE_SIZE;
340  				ntfs_debug("Reallocated memory, rlsize 0x%x.",
341  						rlsize);
342  			}
343  			/* Allocate the bitmap bit. */
344  			*byte |= bit;
345  			/* We need to write this bitmap page to disk. */
346  			need_writeback = 1;
347  			ntfs_debug("*byte 0x%x, need_writeback is set.",
348  					(unsigned int)*byte);
349  			/*
350  			 * Coalesce with previous run if adjacent LCNs.
351  			 * Otherwise, append a new run.
352  			 */
353  			ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), "
354  					"prev_lcn 0x%llx, lcn 0x%llx, "
355  					"bmp_pos 0x%llx, prev_run_len 0x%llx, "
356  					"rlpos %i.",
357  					(unsigned long long)(lcn + bmp_pos),
358  					1ULL, (unsigned long long)prev_lcn,
359  					(unsigned long long)lcn,
360  					(unsigned long long)bmp_pos,
361  					(unsigned long long)prev_run_len,
362  					rlpos);
363  			if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
364  				ntfs_debug("Coalescing to run (lcn 0x%llx, "
365  						"len 0x%llx).",
366  						(unsigned long long)
367  						rl[rlpos - 1].lcn,
368  						(unsigned long long)
369  						rl[rlpos - 1].length);
370  				rl[rlpos - 1].length = ++prev_run_len;
371  				ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), "
372  						"prev_run_len 0x%llx.",
373  						(unsigned long long)
374  						rl[rlpos - 1].lcn,
375  						(unsigned long long)
376  						rl[rlpos - 1].length,
377  						(unsigned long long)
378  						prev_run_len);
379  			} else {
380  				if (likely(rlpos)) {
381  					ntfs_debug("Adding new run, (previous "
382  							"run lcn 0x%llx, "
383  							"len 0x%llx).",
384  							(unsigned long long)
385  							rl[rlpos - 1].lcn,
386  							(unsigned long long)
387  							rl[rlpos - 1].length);
388  					rl[rlpos].vcn = rl[rlpos - 1].vcn +
389  							prev_run_len;
390  				} else {
391  					ntfs_debug("Adding new run, is first "
392  							"run.");
393  					rl[rlpos].vcn = start_vcn;
394  				}
395  				rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
396  				rl[rlpos].length = prev_run_len = 1;
397  				rlpos++;
398  			}
399  			/* Done? */
400  			if (!--clusters) {
401  				LCN tc;
402  				/*
403  				 * Update the current zone position.  Positions
404  				 * of already scanned zones have been updated
405  				 * during the respective zone switches.
406  				 */
407  				tc = lcn + bmp_pos + 1;
408  				ntfs_debug("Done. Updating current zone "
409  						"position, tc 0x%llx, "
410  						"search_zone %i.",
411  						(unsigned long long)tc,
412  						search_zone);
413  				switch (search_zone) {
414  				case 1:
415  					ntfs_debug("Before checks, "
416  							"vol->mft_zone_pos "
417  							"0x%llx.",
418  							(unsigned long long)
419  							vol->mft_zone_pos);
420  					if (tc >= vol->mft_zone_end) {
421  						vol->mft_zone_pos =
422  								vol->mft_lcn;
423  						if (!vol->mft_zone_end)
424  							vol->mft_zone_pos = 0;
425  					} else if ((bmp_initial_pos >=
426  							vol->mft_zone_pos ||
427  							tc > vol->mft_zone_pos)
428  							&& tc >= vol->mft_lcn)
429  						vol->mft_zone_pos = tc;
430  					ntfs_debug("After checks, "
431  							"vol->mft_zone_pos "
432  							"0x%llx.",
433  							(unsigned long long)
434  							vol->mft_zone_pos);
435  					break;
436  				case 2:
437  					ntfs_debug("Before checks, "
438  							"vol->data1_zone_pos "
439  							"0x%llx.",
440  							(unsigned long long)
441  							vol->data1_zone_pos);
442  					if (tc >= vol->nr_clusters)
443  						vol->data1_zone_pos =
444  							     vol->mft_zone_end;
445  					else if ((bmp_initial_pos >=
446  						    vol->data1_zone_pos ||
447  						    tc > vol->data1_zone_pos)
448  						    && tc >= vol->mft_zone_end)
449  						vol->data1_zone_pos = tc;
450  					ntfs_debug("After checks, "
451  							"vol->data1_zone_pos "
452  							"0x%llx.",
453  							(unsigned long long)
454  							vol->data1_zone_pos);
455  					break;
456  				case 4:
457  					ntfs_debug("Before checks, "
458  							"vol->data2_zone_pos "
459  							"0x%llx.",
460  							(unsigned long long)
461  							vol->data2_zone_pos);
462  					if (tc >= vol->mft_zone_start)
463  						vol->data2_zone_pos = 0;
464  					else if (bmp_initial_pos >=
465  						      vol->data2_zone_pos ||
466  						      tc > vol->data2_zone_pos)
467  						vol->data2_zone_pos = tc;
468  					ntfs_debug("After checks, "
469  							"vol->data2_zone_pos "
470  							"0x%llx.",
471  							(unsigned long long)
472  							vol->data2_zone_pos);
473  					break;
474  				default:
475  					BUG();
476  				}
477  				ntfs_debug("Finished.  Going to out.");
478  				goto out;
479  			}
480  			lcn++;
481  		}
482  		bmp_pos += buf_size;
483  		ntfs_debug("After inner while loop: buf_size 0x%x, lcn "
484  				"0x%llx, bmp_pos 0x%llx, need_writeback %i.",
485  				buf_size, (unsigned long long)lcn,
486  				(unsigned long long)bmp_pos, need_writeback);
487  		if (bmp_pos < zone_end) {
488  			ntfs_debug("Continuing outer while loop, "
489  					"bmp_pos 0x%llx, zone_end 0x%llx.",
490  					(unsigned long long)bmp_pos,
491  					(unsigned long long)zone_end);
492  			continue;
493  		}
494  zone_pass_done:	/* Finished with the current zone pass. */
495  		ntfs_debug("At zone_pass_done, pass %i.", pass);
496  		if (pass == 1) {
497  			/*
498  			 * Now do pass 2, scanning the first part of the zone
499  			 * we omitted in pass 1.
500  			 */
501  			pass = 2;
502  			zone_end = zone_start;
503  			switch (search_zone) {
504  			case 1: /* mft_zone */
505  				zone_start = vol->mft_zone_start;
506  				break;
507  			case 2: /* data1_zone */
508  				zone_start = vol->mft_zone_end;
509  				break;
510  			case 4: /* data2_zone */
511  				zone_start = 0;
512  				break;
513  			default:
514  				BUG();
515  			}
516  			/* Sanity check. */
517  			if (zone_end < zone_start)
518  				zone_end = zone_start;
519  			bmp_pos = zone_start;
520  			ntfs_debug("Continuing outer while loop, pass 2, "
521  					"zone_start 0x%llx, zone_end 0x%llx, "
522  					"bmp_pos 0x%llx.",
523  					(unsigned long long)zone_start,
524  					(unsigned long long)zone_end,
525  					(unsigned long long)bmp_pos);
526  			continue;
527  		} /* pass == 2 */
528  done_zones_check:
529  		ntfs_debug("At done_zones_check, search_zone %i, done_zones "
530  				"before 0x%x, done_zones after 0x%x.",
531  				search_zone, done_zones,
532  				done_zones | search_zone);
533  		done_zones |= search_zone;
534  		if (done_zones < 7) {
535  			ntfs_debug("Switching zone.");
536  			/* Now switch to the next zone we haven't done yet. */
537  			pass = 1;
538  			switch (search_zone) {
539  			case 1:
540  				ntfs_debug("Switching from mft zone to data1 "
541  						"zone.");
542  				/* Update mft zone position. */
543  				if (rlpos) {
544  					LCN tc;
545  
546  					ntfs_debug("Before checks, "
547  							"vol->mft_zone_pos "
548  							"0x%llx.",
549  							(unsigned long long)
550  							vol->mft_zone_pos);
551  					tc = rl[rlpos - 1].lcn +
552  							rl[rlpos - 1].length;
553  					if (tc >= vol->mft_zone_end) {
554  						vol->mft_zone_pos =
555  								vol->mft_lcn;
556  						if (!vol->mft_zone_end)
557  							vol->mft_zone_pos = 0;
558  					} else if ((bmp_initial_pos >=
559  							vol->mft_zone_pos ||
560  							tc > vol->mft_zone_pos)
561  							&& tc >= vol->mft_lcn)
562  						vol->mft_zone_pos = tc;
563  					ntfs_debug("After checks, "
564  							"vol->mft_zone_pos "
565  							"0x%llx.",
566  							(unsigned long long)
567  							vol->mft_zone_pos);
568  				}
569  				/* Switch from mft zone to data1 zone. */
570  switch_to_data1_zone:		search_zone = 2;
571  				zone_start = bmp_initial_pos =
572  						vol->data1_zone_pos;
573  				zone_end = vol->nr_clusters;
574  				if (zone_start == vol->mft_zone_end)
575  					pass = 2;
576  				if (zone_start >= zone_end) {
577  					vol->data1_zone_pos = zone_start =
578  							vol->mft_zone_end;
579  					pass = 2;
580  				}
581  				break;
582  			case 2:
583  				ntfs_debug("Switching from data1 zone to "
584  						"data2 zone.");
585  				/* Update data1 zone position. */
586  				if (rlpos) {
587  					LCN tc;
588  
589  					ntfs_debug("Before checks, "
590  							"vol->data1_zone_pos "
591  							"0x%llx.",
592  							(unsigned long long)
593  							vol->data1_zone_pos);
594  					tc = rl[rlpos - 1].lcn +
595  							rl[rlpos - 1].length;
596  					if (tc >= vol->nr_clusters)
597  						vol->data1_zone_pos =
598  							     vol->mft_zone_end;
599  					else if ((bmp_initial_pos >=
600  						    vol->data1_zone_pos ||
601  						    tc > vol->data1_zone_pos)
602  						    && tc >= vol->mft_zone_end)
603  						vol->data1_zone_pos = tc;
604  					ntfs_debug("After checks, "
605  							"vol->data1_zone_pos "
606  							"0x%llx.",
607  							(unsigned long long)
608  							vol->data1_zone_pos);
609  				}
610  				/* Switch from data1 zone to data2 zone. */
611  				search_zone = 4;
612  				zone_start = bmp_initial_pos =
613  						vol->data2_zone_pos;
614  				zone_end = vol->mft_zone_start;
615  				if (!zone_start)
616  					pass = 2;
617  				if (zone_start >= zone_end) {
618  					vol->data2_zone_pos = zone_start =
619  							bmp_initial_pos = 0;
620  					pass = 2;
621  				}
622  				break;
623  			case 4:
624  				ntfs_debug("Switching from data2 zone to "
625  						"data1 zone.");
626  				/* Update data2 zone position. */
627  				if (rlpos) {
628  					LCN tc;
629  
630  					ntfs_debug("Before checks, "
631  							"vol->data2_zone_pos "
632  							"0x%llx.",
633  							(unsigned long long)
634  							vol->data2_zone_pos);
635  					tc = rl[rlpos - 1].lcn +
636  							rl[rlpos - 1].length;
637  					if (tc >= vol->mft_zone_start)
638  						vol->data2_zone_pos = 0;
639  					else if (bmp_initial_pos >=
640  						      vol->data2_zone_pos ||
641  						      tc > vol->data2_zone_pos)
642  						vol->data2_zone_pos = tc;
643  					ntfs_debug("After checks, "
644  							"vol->data2_zone_pos "
645  							"0x%llx.",
646  							(unsigned long long)
647  							vol->data2_zone_pos);
648  				}
649  				/* Switch from data2 zone to data1 zone. */
650  				goto switch_to_data1_zone;
651  			default:
652  				BUG();
653  			}
654  			ntfs_debug("After zone switch, search_zone %i, "
655  					"pass %i, bmp_initial_pos 0x%llx, "
656  					"zone_start 0x%llx, zone_end 0x%llx.",
657  					search_zone, pass,
658  					(unsigned long long)bmp_initial_pos,
659  					(unsigned long long)zone_start,
660  					(unsigned long long)zone_end);
661  			bmp_pos = zone_start;
662  			if (zone_start == zone_end) {
663  				ntfs_debug("Empty zone, going to "
664  						"done_zones_check.");
665  				/* Empty zone. Don't bother searching it. */
666  				goto done_zones_check;
667  			}
668  			ntfs_debug("Continuing outer while loop.");
669  			continue;
670  		} /* done_zones == 7 */
671  		ntfs_debug("All zones are finished.");
672  		/*
673  		 * All zones are finished!  If DATA_ZONE, shrink mft zone.  If
674  		 * MFT_ZONE, we have really run out of space.
675  		 */
676  		mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
677  		ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end "
678  				"0x%llx, mft_zone_size 0x%llx.",
679  				(unsigned long long)vol->mft_zone_start,
680  				(unsigned long long)vol->mft_zone_end,
681  				(unsigned long long)mft_zone_size);
682  		if (zone == MFT_ZONE || mft_zone_size <= 0) {
683  			ntfs_debug("No free clusters left, going to out.");
684  			/* Really no more space left on device. */
685  			err = -ENOSPC;
686  			goto out;
687  		} /* zone == DATA_ZONE && mft_zone_size > 0 */
688  		ntfs_debug("Shrinking mft zone.");
689  		zone_end = vol->mft_zone_end;
690  		mft_zone_size >>= 1;
691  		if (mft_zone_size > 0)
692  			vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
693  		else /* mft zone and data2 zone no longer exist. */
694  			vol->data2_zone_pos = vol->mft_zone_start =
695  					vol->mft_zone_end = 0;
696  		if (vol->mft_zone_pos >= vol->mft_zone_end) {
697  			vol->mft_zone_pos = vol->mft_lcn;
698  			if (!vol->mft_zone_end)
699  				vol->mft_zone_pos = 0;
700  		}
701  		bmp_pos = zone_start = bmp_initial_pos =
702  				vol->data1_zone_pos = vol->mft_zone_end;
703  		search_zone = 2;
704  		pass = 2;
705  		done_zones &= ~2;
706  		ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, "
707  				"vol->mft_zone_start 0x%llx, "
708  				"vol->mft_zone_end 0x%llx, "
709  				"vol->mft_zone_pos 0x%llx, search_zone 2, "
710  				"pass 2, dones_zones 0x%x, zone_start 0x%llx, "
711  				"zone_end 0x%llx, vol->data1_zone_pos 0x%llx, "
712  				"continuing outer while loop.",
713  				(unsigned long long)mft_zone_size,
714  				(unsigned long long)vol->mft_zone_start,
715  				(unsigned long long)vol->mft_zone_end,
716  				(unsigned long long)vol->mft_zone_pos,
717  				done_zones, (unsigned long long)zone_start,
718  				(unsigned long long)zone_end,
719  				(unsigned long long)vol->data1_zone_pos);
720  	}
721  	ntfs_debug("After outer while loop.");
722  out:
723  	ntfs_debug("At out.");
724  	/* Add runlist terminator element. */
725  	if (likely(rl)) {
726  		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
727  		rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
728  		rl[rlpos].length = 0;
729  	}
730  	if (likely(page && !IS_ERR(page))) {
731  		if (need_writeback) {
732  			ntfs_debug("Marking page dirty.");
733  			flush_dcache_page(page);
734  			set_page_dirty(page);
735  			need_writeback = 0;
736  		}
737  		ntfs_unmap_page(page);
738  	}
739  	if (likely(!err)) {
740  		up_write(&vol->lcnbmp_lock);
741  		ntfs_debug("Done.");
742  		return rl;
743  	}
744  	ntfs_error(vol->sb, "Failed to allocate clusters, aborting "
745  			"(error %i).", err);
746  	if (rl) {
747  		int err2;
748  
749  		if (err == -ENOSPC)
750  			ntfs_debug("Not enough space to complete allocation, "
751  					"err -ENOSPC, first free lcn 0x%llx, "
752  					"could allocate up to 0x%llx "
753  					"clusters.",
754  					(unsigned long long)rl[0].lcn,
755  					(unsigned long long)(count - clusters));
756  		/* Deallocate all allocated clusters. */
757  		ntfs_debug("Attempting rollback...");
758  		err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
759  		if (err2) {
760  			ntfs_error(vol->sb, "Failed to rollback (error %i).  "
761  					"Leaving inconsistent metadata!  "
762  					"Unmount and run chkdsk.", err2);
763  			NVolSetErrors(vol);
764  		}
765  		/* Free the runlist. */
766  		ntfs_free(rl);
767  	} else if (err == -ENOSPC)
768  		ntfs_debug("No space left at all, err = -ENOSPC, first free "
769  				"lcn = 0x%llx.",
770  				(long long)vol->data1_zone_pos);
771  	up_write(&vol->lcnbmp_lock);
772  	return ERR_PTR(err);
773  }
774  
775  /**
776   * __ntfs_cluster_free - free clusters on an ntfs volume
777   * @ni:		ntfs inode whose runlist describes the clusters to free
778   * @start_vcn:	vcn in the runlist of @ni at which to start freeing clusters
779   * @count:	number of clusters to free or -1 for all clusters
780   * @ctx:	active attribute search context if present or NULL if not
781   * @is_rollback:	true if this is a rollback operation
782   *
783   * Free @count clusters starting at the cluster @start_vcn in the runlist
784   * described by the vfs inode @ni.
785   *
786   * If @count is -1, all clusters from @start_vcn to the end of the runlist are
787   * deallocated.  Thus, to completely free all clusters in a runlist, use
788   * @start_vcn = 0 and @count = -1.
789   *
790   * If @ctx is specified, it is an active search context of @ni and its base mft
791   * record.  This is needed when __ntfs_cluster_free() encounters unmapped
792   * runlist fragments and allows their mapping.  If you do not have the mft
793   * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
794   * perform the necessary mapping and unmapping.
795   *
796   * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
797   * before returning.  Thus, @ctx will be left pointing to the same attribute on
798   * return as on entry.  However, the actual pointers in @ctx may point to
799   * different memory locations on return, so you must remember to reset any
800   * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
801   * you will probably want to do:
802   *	m = ctx->mrec;
803   *	a = ctx->attr;
804   * Assuming you cache ctx->attr in a variable @a of type ATTR_RECORD * and that
805   * you cache ctx->mrec in a variable @m of type MFT_RECORD *.
806   *
807   * @is_rollback should always be 'false', it is for internal use to rollback
808   * errors.  You probably want to use ntfs_cluster_free() instead.
809   *
810   * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
811   * remove from the runlist or mark sparse the freed runs later.
812   *
813   * Return the number of deallocated clusters (not counting sparse ones) on
814   * success and -errno on error.
815   *
816   * WARNING: If @ctx is supplied, regardless of whether success or failure is
817   *	    returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
818   *	    is no longer valid, i.e. you need to either call
819   *	    ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
820   *	    In that case PTR_ERR(@ctx->mrec) will give you the error code for
821   *	    why the mapping of the old inode failed.
822   *
823   * Locking: - The runlist described by @ni must be locked for writing on entry
824   *	      and is locked on return.  Note the runlist may be modified when
825   *	      needed runlist fragments need to be mapped.
826   *	    - The volume lcn bitmap must be unlocked on entry and is unlocked
827   *	      on return.
828   *	    - This function takes the volume lcn bitmap lock for writing and
829   *	      modifies the bitmap contents.
830   *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
831   *	      entry and it will be left unmapped on return.
832   *	    - If @ctx is not NULL, the base mft record must be mapped on entry
833   *	      and it will be left mapped on return.
834   */
__ntfs_cluster_free(ntfs_inode * ni,const VCN start_vcn,s64 count,ntfs_attr_search_ctx * ctx,const bool is_rollback)835  s64 __ntfs_cluster_free(ntfs_inode *ni, const VCN start_vcn, s64 count,
836  		ntfs_attr_search_ctx *ctx, const bool is_rollback)
837  {
838  	s64 delta, to_free, total_freed, real_freed;
839  	ntfs_volume *vol;
840  	struct inode *lcnbmp_vi;
841  	runlist_element *rl;
842  	int err;
843  
844  	BUG_ON(!ni);
845  	ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count "
846  			"0x%llx.%s", ni->mft_no, (unsigned long long)start_vcn,
847  			(unsigned long long)count,
848  			is_rollback ? " (rollback)" : "");
849  	vol = ni->vol;
850  	lcnbmp_vi = vol->lcnbmp_ino;
851  	BUG_ON(!lcnbmp_vi);
852  	BUG_ON(start_vcn < 0);
853  	BUG_ON(count < -1);
854  	/*
855  	 * Lock the lcn bitmap for writing but only if not rolling back.  We
856  	 * must hold the lock all the way including through rollback otherwise
857  	 * rollback is not possible because once we have cleared a bit and
858  	 * dropped the lock, anyone could have set the bit again, thus
859  	 * allocating the cluster for another use.
860  	 */
861  	if (likely(!is_rollback))
862  		down_write(&vol->lcnbmp_lock);
863  
864  	total_freed = real_freed = 0;
865  
866  	rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
867  	if (IS_ERR(rl)) {
868  		if (!is_rollback)
869  			ntfs_error(vol->sb, "Failed to find first runlist "
870  					"element (error %li), aborting.",
871  					PTR_ERR(rl));
872  		err = PTR_ERR(rl);
873  		goto err_out;
874  	}
875  	if (unlikely(rl->lcn < LCN_HOLE)) {
876  		if (!is_rollback)
877  			ntfs_error(vol->sb, "First runlist element has "
878  					"invalid lcn, aborting.");
879  		err = -EIO;
880  		goto err_out;
881  	}
882  	/* Find the starting cluster inside the run that needs freeing. */
883  	delta = start_vcn - rl->vcn;
884  
885  	/* The number of clusters in this run that need freeing. */
886  	to_free = rl->length - delta;
887  	if (count >= 0 && to_free > count)
888  		to_free = count;
889  
890  	if (likely(rl->lcn >= 0)) {
891  		/* Do the actual freeing of the clusters in this run. */
892  		err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
893  				to_free, likely(!is_rollback) ? 0 : 1);
894  		if (unlikely(err)) {
895  			if (!is_rollback)
896  				ntfs_error(vol->sb, "Failed to clear first run "
897  						"(error %i), aborting.", err);
898  			goto err_out;
899  		}
900  		/* We have freed @to_free real clusters. */
901  		real_freed = to_free;
902  	};
903  	/* Go to the next run and adjust the number of clusters left to free. */
904  	++rl;
905  	if (count >= 0)
906  		count -= to_free;
907  
908  	/* Keep track of the total "freed" clusters, including sparse ones. */
909  	total_freed = to_free;
910  	/*
911  	 * Loop over the remaining runs, using @count as a capping value, and
912  	 * free them.
913  	 */
914  	for (; rl->length && count != 0; ++rl) {
915  		if (unlikely(rl->lcn < LCN_HOLE)) {
916  			VCN vcn;
917  
918  			/* Attempt to map runlist. */
919  			vcn = rl->vcn;
920  			rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
921  			if (IS_ERR(rl)) {
922  				err = PTR_ERR(rl);
923  				if (!is_rollback)
924  					ntfs_error(vol->sb, "Failed to map "
925  							"runlist fragment or "
926  							"failed to find "
927  							"subsequent runlist "
928  							"element.");
929  				goto err_out;
930  			}
931  			if (unlikely(rl->lcn < LCN_HOLE)) {
932  				if (!is_rollback)
933  					ntfs_error(vol->sb, "Runlist element "
934  							"has invalid lcn "
935  							"(0x%llx).",
936  							(unsigned long long)
937  							rl->lcn);
938  				err = -EIO;
939  				goto err_out;
940  			}
941  		}
942  		/* The number of clusters in this run that need freeing. */
943  		to_free = rl->length;
944  		if (count >= 0 && to_free > count)
945  			to_free = count;
946  
947  		if (likely(rl->lcn >= 0)) {
948  			/* Do the actual freeing of the clusters in the run. */
949  			err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
950  					to_free, likely(!is_rollback) ? 0 : 1);
951  			if (unlikely(err)) {
952  				if (!is_rollback)
953  					ntfs_error(vol->sb, "Failed to clear "
954  							"subsequent run.");
955  				goto err_out;
956  			}
957  			/* We have freed @to_free real clusters. */
958  			real_freed += to_free;
959  		}
960  		/* Adjust the number of clusters left to free. */
961  		if (count >= 0)
962  			count -= to_free;
963  
964  		/* Update the total done clusters. */
965  		total_freed += to_free;
966  	}
967  	if (likely(!is_rollback))
968  		up_write(&vol->lcnbmp_lock);
969  
970  	BUG_ON(count > 0);
971  
972  	/* We are done.  Return the number of actually freed clusters. */
973  	ntfs_debug("Done.");
974  	return real_freed;
975  err_out:
976  	if (is_rollback)
977  		return err;
978  	/* If no real clusters were freed, no need to rollback. */
979  	if (!real_freed) {
980  		up_write(&vol->lcnbmp_lock);
981  		return err;
982  	}
983  	/*
984  	 * Attempt to rollback and if that succeeds just return the error code.
985  	 * If rollback fails, set the volume errors flag, emit an error
986  	 * message, and return the error code.
987  	 */
988  	delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
989  	if (delta < 0) {
990  		ntfs_error(vol->sb, "Failed to rollback (error %i).  Leaving "
991  				"inconsistent metadata!  Unmount and run "
992  				"chkdsk.", (int)delta);
993  		NVolSetErrors(vol);
994  	}
995  	up_write(&vol->lcnbmp_lock);
996  	ntfs_error(vol->sb, "Aborting (error %i).", err);
997  	return err;
998  }
999  
1000  #endif /* NTFS_RW */
1001